--- /dev/null
+From stable+bounces-188302-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:58 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:42 +0200
+Subject: block: fix race between set_blocksize and read paths
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Luis Chamberlain <mcgrof@kernel.org>, Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>, "Jens Axboe" <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Alexander Viro <viro@zeniv.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Hannes Reinecke" <hare@suse.de>, Damien Le Moal <dlemoal@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-9-mngyadam@amazon.de>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit c0e473a0d226479e8e925d5ba93f751d8df628e9 upstream.
+
+With the new large sector size support, it's now the case that
+set_blocksize can change i_blksize and the folio order in a manner that
+conflicts with a concurrent reader and causes a kernel crash.
+
+Specifically, let's say that udev-worker calls libblkid to detect the
+labels on a block device. The read call can create an order-0 folio to
+read the first 4096 bytes from the disk. But then udev is preempted.
+
+Next, someone tries to mount an 8k-sectorsize filesystem from the same
+block device. The filesystem calls set_blksize, which sets i_blksize to
+8192 and the minimum folio order to 1.
+
+Now udev resumes, still holding the order-0 folio it allocated. It then
+tries to schedule a read bio and do_mpage_readahead tries to create
+bufferheads for the folio. Unfortunately, blocks_per_folio == 0 because
+the page size is 4096 but the blocksize is 8192 so no bufferheads are
+attached and the bh walk never sets bdev. We then submit the bio with a
+NULL block device and crash.
+
+Therefore, truncate the page cache after flushing but before updating
+i_blksize. However, that's not enough -- we also need to lock out file
+IO and page faults during the update. Take both the i_rwsem and the
+invalidate_lock in exclusive mode for invalidations, and in shared mode
+for read/write operations.
+
+I don't know if this is the correct fix, but xfs/259 found it.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Link: https://lore.kernel.org/r/174543795699.4139148.2086129139322431423.stgit@frogsfrogsfrogs
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[use bdev->bd_inode instead & fix small contextual changes]
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bdev.c | 17 +++++++++++++++++
+ block/blk-zoned.c | 5 ++++-
+ block/fops.c | 16 ++++++++++++++++
+ block/ioctl.c | 6 ++++++
+ 4 files changed, 43 insertions(+), 1 deletion(-)
+
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -147,9 +147,26 @@ int set_blocksize(struct block_device *b
+
+ /* Don't change the size if it is same as current */
+ if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
++ /*
++ * Flush and truncate the pagecache before we reconfigure the
++ * mapping geometry because folio sizes are variable now. If a
++ * reader has already allocated a folio whose size is smaller
++ * than the new min_order but invokes readahead after the new
++ * min_order becomes visible, readahead will think there are
++ * "zero" blocks per folio and crash. Take the inode and
++ * invalidation locks to avoid racing with
++ * read/write/fallocate.
++ */
++ inode_lock(bdev->bd_inode);
++ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
++
+ sync_blockdev(bdev);
++ kill_bdev(bdev);
++
+ bdev->bd_inode->i_blkbits = blksize_bits(size);
+ kill_bdev(bdev);
++ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
++ inode_unlock(bdev->bd_inode);
+ }
+ return 0;
+ }
+--- a/block/blk-zoned.c
++++ b/block/blk-zoned.c
+@@ -417,6 +417,7 @@ int blkdev_zone_mgmt_ioctl(struct block_
+ op = REQ_OP_ZONE_RESET;
+
+ /* Invalidate the page cache, including dirty pages. */
++ inode_lock(bdev->bd_inode);
+ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+ ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+ if (ret)
+@@ -439,8 +440,10 @@ int blkdev_zone_mgmt_ioctl(struct block_
+ GFP_KERNEL);
+
+ fail:
+- if (cmd == BLKRESETZONE)
++ if (cmd == BLKRESETZONE) {
+ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
++ inode_unlock(bdev->bd_inode);
++ }
+
+ return ret;
+ }
+--- a/block/fops.c
++++ b/block/fops.c
+@@ -592,7 +592,14 @@ static ssize_t blkdev_write_iter(struct
+ ret = direct_write_fallback(iocb, from, ret,
+ generic_perform_write(iocb, from));
+ } else {
++ /*
++ * Take i_rwsem and invalidate_lock to avoid racing with
++ * set_blocksize changing i_blkbits/folio order and punching
++ * out the pagecache.
++ */
++ inode_lock_shared(bd_inode);
+ ret = generic_perform_write(iocb, from);
++ inode_unlock_shared(bd_inode);
+ }
+
+ if (ret > 0)
+@@ -605,6 +612,7 @@ static ssize_t blkdev_write_iter(struct
+ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ {
+ struct block_device *bdev = iocb->ki_filp->private_data;
++ struct inode *bd_inode = bdev->bd_inode;
+ loff_t size = bdev_nr_bytes(bdev);
+ loff_t pos = iocb->ki_pos;
+ size_t shorted = 0;
+@@ -652,7 +660,13 @@ static ssize_t blkdev_read_iter(struct k
+ goto reexpand;
+ }
+
++ /*
++ * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
++ * changing i_blkbits/folio order and punching out the pagecache.
++ */
++ inode_lock_shared(bd_inode);
+ ret = filemap_read(iocb, to, ret);
++ inode_unlock_shared(bd_inode);
+
+ reexpand:
+ if (unlikely(shorted))
+@@ -695,6 +709,7 @@ static long blkdev_fallocate(struct file
+ if ((start | len) & (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
++ inode_lock(inode);
+ filemap_invalidate_lock(inode->i_mapping);
+
+ /*
+@@ -735,6 +750,7 @@ static long blkdev_fallocate(struct file
+
+ fail:
+ filemap_invalidate_unlock(inode->i_mapping);
++ inode_unlock(inode);
+ return error;
+ }
+
+--- a/block/ioctl.c
++++ b/block/ioctl.c
+@@ -114,6 +114,7 @@ static int blk_ioctl_discard(struct bloc
+ end > bdev_nr_bytes(bdev))
+ return -EINVAL;
+
++ inode_lock(inode);
+ filemap_invalidate_lock(inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, end - 1);
+ if (err)
+@@ -121,6 +122,7 @@ static int blk_ioctl_discard(struct bloc
+ err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+ fail:
+ filemap_invalidate_unlock(inode->i_mapping);
++ inode_unlock(inode);
+ return err;
+ }
+
+@@ -146,12 +148,14 @@ static int blk_ioctl_secure_erase(struct
+ end > bdev_nr_bytes(bdev))
+ return -EINVAL;
+
++ inode_lock(bdev->bd_inode);
+ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, end - 1);
+ if (!err)
+ err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
+ GFP_KERNEL);
+ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
++ inode_unlock(bdev->bd_inode);
+ return err;
+ }
+
+@@ -184,6 +188,7 @@ static int blk_ioctl_zeroout(struct bloc
+ return -EINVAL;
+
+ /* Invalidate the page cache, including dirty pages */
++ inode_lock(inode);
+ filemap_invalidate_lock(inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, end);
+ if (err)
+@@ -194,6 +199,7 @@ static int blk_ioctl_zeroout(struct bloc
+
+ fail:
+ filemap_invalidate_unlock(inode->i_mapping);
++ inode_unlock(inode);
+ return err;
+ }
+
--- /dev/null
+From stable+bounces-188301-greg=kroah.com@vger.kernel.org Tue Oct 21 16:18:34 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:41 +0200
+Subject: block: open code __generic_file_write_iter for blkdev writes
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, "Christian Brauner" <brauner@kernel.org>, Hannes Reinecke <hare@suse.de>, "Luis Chamberlain" <mcgrof@kernel.org>, Jens Axboe <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Alexander Viro <viro@zeniv.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, "Darrick J. Wong" <djwong@kernel.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Damien Le Moal" <dlemoal@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-8-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 727cfe976758b79f8d2f8051c75a5ccb14539a56 upstream.
+
+Open code __generic_file_write_iter to remove the indirect call into
+->direct_IO and to prepare using the iomap based write code.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Link: https://lore.kernel.org/r/20230801172201.1923299-4-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[fix contextual changes]
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/fops.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+--- a/block/fops.c
++++ b/block/fops.c
+@@ -515,6 +515,30 @@ static int blkdev_close(struct inode *in
+ return 0;
+ }
+
++static ssize_t
++blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
++{
++ size_t count = iov_iter_count(from);
++ ssize_t written;
++
++ written = kiocb_invalidate_pages(iocb, count);
++ if (written) {
++ if (written == -EBUSY)
++ return 0;
++ return written;
++ }
++
++ written = blkdev_direct_IO(iocb, from);
++ if (written > 0) {
++ kiocb_invalidate_post_direct_write(iocb, count);
++ iocb->ki_pos += written;
++ count -= written;
++ }
++ if (written != -EIOCBQUEUED)
++ iov_iter_revert(from, count - iov_iter_count(from));
++ return written;
++}
++
+ /*
+ * Write data to the block device. Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+@@ -524,7 +548,8 @@ static int blkdev_close(struct inode *in
+ */
+ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ {
+- struct block_device *bdev = iocb->ki_filp->private_data;
++ struct file *file = iocb->ki_filp;
++ struct block_device *bdev = file->private_data;
+ struct inode *bd_inode = bdev->bd_inode;
+ loff_t size = bdev_nr_bytes(bdev);
+ struct blk_plug plug;
+@@ -553,7 +578,23 @@ static ssize_t blkdev_write_iter(struct
+ }
+
+ blk_start_plug(&plug);
+- ret = __generic_file_write_iter(iocb, from);
++ ret = file_remove_privs(file);
++ if (ret)
++ return ret;
++
++ ret = file_update_time(file);
++ if (ret)
++ return ret;
++
++ if (iocb->ki_flags & IOCB_DIRECT) {
++ ret = blkdev_direct_write(iocb, from);
++ if (ret >= 0 && iov_iter_count(from))
++ ret = direct_write_fallback(iocb, from, ret,
++ generic_perform_write(iocb, from));
++ } else {
++ ret = generic_perform_write(iocb, from);
++ }
++
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ iov_iter_reexpand(from, iov_iter_count(from) + shorted);
--- /dev/null
+From stable+bounces-188300-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:26 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:40 +0200
+Subject: direct_write_fallback(): on error revert the ->ki_pos update from buffered write
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>, Jens Axboe <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, "Darrick J. Wong" <djwong@kernel.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Hannes Reinecke" <hare@suse.de>, Damien Le Moal <dlemoal@kernel.org>, "Luis Chamberlain" <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-7-mngyadam@amazon.de>
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 8287474aa5ffb41df52552c4ae4748e791d2faf2 upstream.
+
+If we fail filemap_write_and_wait_range() on the range the buffered write went
+into, we only report the "number of bytes which we direct-written", to quote
+the comment in there. Which is fine, but buffered write has already advanced
+iocb->ki_pos, so we need to roll that back. Otherwise we end up with e.g.
+write(2) advancing position by more than the amount it reports having written.
+
+Fixes: 182c25e9c157 "filemap: update ki_pos in generic_perform_write"
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Message-Id: <20230827214518.GU3390869@ZenIV>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/libfs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1615,6 +1615,7 @@ ssize_t direct_write_fallback(struct kio
+ * We don't know how much we wrote, so just return the number of
+ * bytes which were direct-written
+ */
++ iocb->ki_pos -= buffered_written;
+ if (direct_written)
+ return direct_written;
+ return err;
--- /dev/null
+From stable+bounces-188296-greg=kroah.com@vger.kernel.org Tue Oct 21 16:11:59 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:36 +0200
+Subject: filemap: add a kiocb_invalidate_pages helper
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Damien Le Moal <dlemoal@kernel.org>, Hannes Reinecke <hare@suse.de>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Matthew Wilcox <willy@infradead.org>, "Miklos Szeredi" <miklos@szeredi.hu>, Miklos Szeredi <mszeredi@redhat.com>, "Theodore Ts'o" <tytso@mit.edu>, Trond Myklebust <trond.myklebust@hammerspace.com>, Xiubo Li <xiubli@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, "Jeff Layton" <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-3-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit e003f74afbd2feadbb9ffbf9135e2d2fb5d320a5 upstream.
+
+Factor out a helper that calls filemap_write_and_wait_range and
+invalidate_inode_pages2_range for the range covered by a write kiocb or
+returns -EAGAIN if the kiocb is marked as nowait and there would be pages
+to write or invalidate.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-6-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pagemap.h | 1 +
+ mm/filemap.c | 48 ++++++++++++++++++++++++++++--------------------
+ 2 files changed, 29 insertions(+), 20 deletions(-)
+
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -30,6 +30,7 @@ static inline void invalidate_remote_ino
+ int invalidate_inode_pages2(struct address_space *mapping);
+ int invalidate_inode_pages2_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
++int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
+ int write_inode_now(struct inode *, int sync);
+ int filemap_fdatawrite(struct address_space *);
+ int filemap_flush(struct address_space *);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2839,6 +2839,33 @@ put_folios:
+ }
+ EXPORT_SYMBOL_GPL(filemap_read);
+
++int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
++{
++ struct address_space *mapping = iocb->ki_filp->f_mapping;
++ loff_t pos = iocb->ki_pos;
++ loff_t end = pos + count - 1;
++ int ret;
++
++ if (iocb->ki_flags & IOCB_NOWAIT) {
++ /* we could block if there are any pages in the range */
++ if (filemap_range_has_page(mapping, pos, end))
++ return -EAGAIN;
++ } else {
++ ret = filemap_write_and_wait_range(mapping, pos, end);
++ if (ret)
++ return ret;
++ }
++
++ /*
++ * After a write we want buffered reads to be sure to go to disk to get
++ * the new data. We invalidate clean cached page from the region we're
++ * about to write. We do this *before* the write so that we can return
++ * without clobbering -EIOCBQUEUED from ->direct_IO().
++ */
++ return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
++ end >> PAGE_SHIFT);
++}
++
+ /**
+ * generic_file_read_iter - generic filesystem read routine
+ * @iocb: kernel I/O control block
+@@ -3737,30 +3764,11 @@ generic_file_direct_write(struct kiocb *
+ write_len = iov_iter_count(from);
+ end = (pos + write_len - 1) >> PAGE_SHIFT;
+
+- if (iocb->ki_flags & IOCB_NOWAIT) {
+- /* If there are pages to writeback, return */
+- if (filemap_range_has_page(file->f_mapping, pos,
+- pos + write_len - 1))
+- return -EAGAIN;
+- } else {
+- written = filemap_write_and_wait_range(mapping, pos,
+- pos + write_len - 1);
+- if (written)
+- goto out;
+- }
+-
+- /*
+- * After a write we want buffered reads to be sure to go to disk to get
+- * the new data. We invalidate clean cached page from the region we're
+- * about to write. We do this *before* the write so that we can return
+- * without clobbering -EIOCBQUEUED from ->direct_IO().
+- */
+- written = invalidate_inode_pages2_range(mapping,
+- pos >> PAGE_SHIFT, end);
+ /*
+ * If a page can not be invalidated, return 0 to fall back
+ * to buffered write.
+ */
++ written = kiocb_invalidate_pages(iocb, write_len);
+ if (written) {
+ if (written == -EBUSY)
+ return 0;
--- /dev/null
+From stable+bounces-188297-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:23 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:37 +0200
+Subject: filemap: add a kiocb_invalidate_post_direct_write helper
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Damien Le Moal <dlemoal@kernel.org>, Hannes Reinecke <hare@suse.de>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Matthew Wilcox <willy@infradead.org>, "Miklos Szeredi" <miklos@szeredi.hu>, Miklos Szeredi <mszeredi@redhat.com>, "Theodore Ts'o" <tytso@mit.edu>, Trond Myklebust <trond.myklebust@hammerspace.com>, Xiubo Li <xiubli@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, "Jeff Layton" <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-4-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit c402a9a9430b670926decbb284b756ee6f47c1ec upstream.
+
+Add a helper to invalidate page cache after a dio write.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-7-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/direct-io.c | 10 ++--------
+ fs/iomap/direct-io.c | 12 ++----------
+ include/linux/fs.h | 5 -----
+ include/linux/pagemap.h | 1 +
+ mm/filemap.c | 37 ++++++++++++++++++++-----------------
+ 5 files changed, 25 insertions(+), 40 deletions(-)
+
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -286,14 +286,8 @@ static ssize_t dio_complete(struct dio *
+ * zeros from unwritten extents.
+ */
+ if (flags & DIO_COMPLETE_INVALIDATE &&
+- ret > 0 && dio_op == REQ_OP_WRITE &&
+- dio->inode->i_mapping->nrpages) {
+- err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+- offset >> PAGE_SHIFT,
+- (offset + ret - 1) >> PAGE_SHIFT);
+- if (err)
+- dio_warn_stale_pagecache(dio->iocb->ki_filp);
+- }
++ ret > 0 && dio_op == REQ_OP_WRITE)
++ kiocb_invalidate_post_direct_write(dio->iocb, ret);
+
+ inode_dio_end(dio->inode);
+
+--- a/fs/iomap/direct-io.c
++++ b/fs/iomap/direct-io.c
+@@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_
+ {
+ const struct iomap_dio_ops *dops = dio->dops;
+ struct kiocb *iocb = dio->iocb;
+- struct inode *inode = file_inode(iocb->ki_filp);
+ loff_t offset = iocb->ki_pos;
+ ssize_t ret = dio->error;
+
+@@ -108,15 +107,8 @@ ssize_t iomap_dio_complete(struct iomap_
+ * ->end_io() when necessary, otherwise a racing buffer read would cache
+ * zeros from unwritten extents.
+ */
+- if (!dio->error && dio->size &&
+- (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+- int err;
+- err = invalidate_inode_pages2_range(inode->i_mapping,
+- offset >> PAGE_SHIFT,
+- (offset + dio->size - 1) >> PAGE_SHIFT);
+- if (err)
+- dio_warn_stale_pagecache(iocb->ki_filp);
+- }
++ if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
++ kiocb_invalidate_post_direct_write(iocb, dio->size);
+
+ inode_dio_end(file_inode(iocb->ki_filp));
+ if (ret > 0) {
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3371,11 +3371,6 @@ static inline void inode_dio_end(struct
+ wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+ }
+
+-/*
+- * Warn about a page cache invalidation failure diring a direct I/O write.
+- */
+-void dio_warn_stale_pagecache(struct file *filp);
+-
+ extern void inode_set_flags(struct inode *inode, unsigned int flags,
+ unsigned int mask);
+
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -31,6 +31,7 @@ int invalidate_inode_pages2(struct addre
+ int invalidate_inode_pages2_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
++void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
+ int write_inode_now(struct inode *, int sync);
+ int filemap_fdatawrite(struct address_space *);
+ int filemap_flush(struct address_space *);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3733,7 +3733,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
+ /*
+ * Warn about a page cache invalidation failure during a direct I/O write.
+ */
+-void dio_warn_stale_pagecache(struct file *filp)
++static void dio_warn_stale_pagecache(struct file *filp)
+ {
+ static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
+ char pathname[128];
+@@ -3750,19 +3750,23 @@ void dio_warn_stale_pagecache(struct fil
+ }
+ }
+
++void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count)
++{
++ struct address_space *mapping = iocb->ki_filp->f_mapping;
++
++ if (mapping->nrpages &&
++ invalidate_inode_pages2_range(mapping,
++ iocb->ki_pos >> PAGE_SHIFT,
++ (iocb->ki_pos + count - 1) >> PAGE_SHIFT))
++ dio_warn_stale_pagecache(iocb->ki_filp);
++}
++
+ ssize_t
+ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
+ {
+- struct file *file = iocb->ki_filp;
+- struct address_space *mapping = file->f_mapping;
+- struct inode *inode = mapping->host;
+- loff_t pos = iocb->ki_pos;
+- ssize_t written;
+- size_t write_len;
+- pgoff_t end;
+-
+- write_len = iov_iter_count(from);
+- end = (pos + write_len - 1) >> PAGE_SHIFT;
++ struct address_space *mapping = iocb->ki_filp->f_mapping;
++ size_t write_len = iov_iter_count(from);
++ ssize_t written;
+
+ /*
+ * If a page can not be invalidated, return 0 to fall back
+@@ -3772,7 +3776,7 @@ generic_file_direct_write(struct kiocb *
+ if (written) {
+ if (written == -EBUSY)
+ return 0;
+- goto out;
++ return written;
+ }
+
+ written = mapping->a_ops->direct_IO(iocb, from);
+@@ -3794,11 +3798,11 @@ generic_file_direct_write(struct kiocb *
+ *
+ * Skip invalidation for async writes or if mapping has no pages.
+ */
+- if (written > 0 && mapping->nrpages &&
+- invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
+- dio_warn_stale_pagecache(file);
+-
+ if (written > 0) {
++ struct inode *inode = mapping->host;
++ loff_t pos = iocb->ki_pos;
++
++ kiocb_invalidate_post_direct_write(iocb, written);
+ pos += written;
+ write_len -= written;
+ if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
+@@ -3809,7 +3813,6 @@ generic_file_direct_write(struct kiocb *
+ }
+ if (written != -EIOCBQUEUED)
+ iov_iter_revert(from, write_len - iov_iter_count(from));
+-out:
+ return written;
+ }
+ EXPORT_SYMBOL(generic_file_direct_write);
--- /dev/null
+From stable+bounces-188298-greg=kroah.com@vger.kernel.org Tue Oct 21 16:16:51 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:38 +0200
+Subject: filemap: update ki_pos in generic_perform_write
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Xiubo Li <xiubli@redhat.com>, Damien Le Moal <dlemoal@kernel.org>, Hannes Reinecke <hare@suse.de>, Theodore Ts'o <tytso@mit.edu>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Matthew Wilcox <willy@infradead.org>, "Miklos Szeredi" <miklos@szeredi.hu>, Miklos Szeredi <mszeredi@redhat.com>, "Trond Myklebust" <trond.myklebust@hammerspace.com>, Andrew Morton <akpm@linux-foundation.org>, Jeff Layton <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-5-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 182c25e9c157f37bd0ab5a82fe2417e2223df459 upstream.
+
+All callers of generic_perform_write need to updated ki_pos, move it into
+common code.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-4-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Theodore Ts'o <tytso@mit.edu>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c | 2 --
+ fs/ext4/file.c | 9 +++------
+ fs/f2fs/file.c | 1 -
+ fs/nfs/file.c | 1 -
+ mm/filemap.c | 8 ++++----
+ 5 files changed, 7 insertions(+), 14 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1891,8 +1891,6 @@ retry_snap:
+ * can not run at the same time
+ */
+ written = generic_perform_write(iocb, from);
+- if (likely(written >= 0))
+- iocb->ki_pos = pos + written;
+ ceph_end_io_write(inode);
+ }
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -287,12 +287,9 @@ static ssize_t ext4_buffered_write_iter(
+
+ out:
+ inode_unlock(inode);
+- if (likely(ret > 0)) {
+- iocb->ki_pos += ret;
+- ret = generic_write_sync(iocb, ret);
+- }
+-
+- return ret;
++ if (unlikely(ret <= 0))
++ return ret;
++ return generic_write_sync(iocb, ret);
+ }
+
+ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -4659,7 +4659,6 @@ static ssize_t f2fs_buffered_write_iter(
+ current->backing_dev_info = NULL;
+
+ if (ret > 0) {
+- iocb->ki_pos += ret;
+ f2fs_update_iostat(F2FS_I_SB(inode), inode,
+ APP_BUFFERED_IO, ret);
+ }
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -645,7 +645,6 @@ ssize_t nfs_file_write(struct kiocb *ioc
+ goto out;
+
+ written = result;
+- iocb->ki_pos += written;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+
+ if (mntflags & NFS_MOUNT_WRITE_EAGER) {
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3891,7 +3891,10 @@ again:
+ balance_dirty_pages_ratelimited(mapping);
+ } while (iov_iter_count(i));
+
+- return written ? written : status;
++ if (!written)
++ return status;
++ iocb->ki_pos += written;
++ return written;
+ }
+ EXPORT_SYMBOL(generic_perform_write);
+
+@@ -3970,7 +3973,6 @@ ssize_t __generic_file_write_iter(struct
+ endbyte = pos + status - 1;
+ err = filemap_write_and_wait_range(mapping, pos, endbyte);
+ if (err == 0) {
+- iocb->ki_pos = endbyte + 1;
+ written += status;
+ invalidate_mapping_pages(mapping,
+ pos >> PAGE_SHIFT,
+@@ -3983,8 +3985,6 @@ ssize_t __generic_file_write_iter(struct
+ }
+ } else {
+ written = generic_perform_write(iocb, from);
+- if (likely(written > 0))
+- iocb->ki_pos += written;
+ }
+ out:
+ current->backing_dev_info = NULL;
--- /dev/null
+From stable+bounces-188299-greg=kroah.com@vger.kernel.org Tue Oct 21 16:17:20 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:39 +0200
+Subject: fs: factor out a direct_write_fallback helper
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Damien Le Moal <dlemoal@kernel.org>, Miklos Szeredi <mszeredi@redhat.com>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Hannes Reinecke <hare@suse.de>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, "Matthew Wilcox" <willy@infradead.org>, Miklos Szeredi <miklos@szeredi.hu>, "Theodore Ts'o" <tytso@mit.edu>, Trond Myklebust <trond.myklebust@hammerspace.com>, Xiubo Li <xiubli@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, "Jeff Layton" <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-6-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 44fff0fa08ec5a6d9d5fb05443a36d854d0ece4d upstream.
+
+Add a helper dealing with handling the syncing of a buffered write
+fallback for direct I/O.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-10-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[backing_dev_info still being used here. do small changes to the patch
+to keep the out label. Which means replacing all returns to goto out.]
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/libfs.c | 41 +++++++++++++++++++++++++++++++++++
+ include/linux/fs.h | 2 +
+ mm/filemap.c | 61 ++++++++++++-----------------------------------------
+ 3 files changed, 57 insertions(+), 47 deletions(-)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1582,3 +1582,44 @@ bool inode_maybe_inc_iversion(struct ino
+ return true;
+ }
+ EXPORT_SYMBOL(inode_maybe_inc_iversion);
++
++ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
++ ssize_t direct_written, ssize_t buffered_written)
++{
++ struct address_space *mapping = iocb->ki_filp->f_mapping;
++ loff_t pos = iocb->ki_pos - buffered_written;
++ loff_t end = iocb->ki_pos - 1;
++ int err;
++
++ /*
++ * If the buffered write fallback returned an error, we want to return
++ * the number of bytes which were written by direct I/O, or the error
++ * code if that was zero.
++ *
++ * Note that this differs from normal direct-io semantics, which will
++ * return -EFOO even if some bytes were written.
++ */
++ if (unlikely(buffered_written < 0)) {
++ if (direct_written)
++ return direct_written;
++ return buffered_written;
++ }
++
++ /*
++ * We need to ensure that the page cache pages are written to disk and
++ * invalidated to preserve the expected O_DIRECT semantics.
++ */
++ err = filemap_write_and_wait_range(mapping, pos, end);
++ if (err < 0) {
++ /*
++ * We don't know how much we wrote, so just return the number of
++ * bytes which were direct-written
++ */
++ if (direct_written)
++ return direct_written;
++ return err;
++ }
++ invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
++ return direct_written + buffered_written;
++}
++EXPORT_SYMBOL_GPL(direct_write_fallback);
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3278,6 +3278,8 @@ extern ssize_t __generic_file_write_iter
+ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
+ ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
++ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
++ ssize_t direct_written, ssize_t buffered_written);
+
+ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ rwf_t flags);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3923,25 +3923,21 @@ ssize_t __generic_file_write_iter(struct
+ {
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+- struct inode *inode = mapping->host;
+- ssize_t written = 0;
+- ssize_t err;
+- ssize_t status;
++ struct inode *inode = mapping->host;
++ ssize_t ret;
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = inode_to_bdi(inode);
+- err = file_remove_privs(file);
+- if (err)
++ ret = file_remove_privs(file);
++ if (ret)
+ goto out;
+
+- err = file_update_time(file);
+- if (err)
++ ret = file_update_time(file);
++ if (ret)
+ goto out;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+- loff_t pos, endbyte;
+-
+- written = generic_file_direct_write(iocb, from);
++ ret = generic_file_direct_write(iocb, from);
+ /*
+ * If the write stopped short of completing, fall back to
+ * buffered writes. Some filesystems do this for writes to
+@@ -3949,46 +3945,17 @@ ssize_t __generic_file_write_iter(struct
+ * not succeed (even if it did, DAX does not handle dirty
+ * page-cache pages correctly).
+ */
+- if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
+- goto out;
+-
+- pos = iocb->ki_pos;
+- status = generic_perform_write(iocb, from);
+- /*
+- * If generic_perform_write() returned a synchronous error
+- * then we want to return the number of bytes which were
+- * direct-written, or the error code if that was zero. Note
+- * that this differs from normal direct-io semantics, which
+- * will return -EFOO even if some bytes were written.
+- */
+- if (unlikely(status < 0)) {
+- err = status;
++ if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
+ goto out;
+- }
+- /*
+- * We need to ensure that the page cache pages are written to
+- * disk and invalidated to preserve the expected O_DIRECT
+- * semantics.
+- */
+- endbyte = pos + status - 1;
+- err = filemap_write_and_wait_range(mapping, pos, endbyte);
+- if (err == 0) {
+- written += status;
+- invalidate_mapping_pages(mapping,
+- pos >> PAGE_SHIFT,
+- endbyte >> PAGE_SHIFT);
+- } else {
+- /*
+- * We don't know how much we wrote, so just return
+- * the number of bytes which were direct-written
+- */
+- }
+- } else {
+- written = generic_perform_write(iocb, from);
++ ret = direct_write_fallback(iocb, from, ret,
++ generic_perform_write(iocb, from));
++ goto out;
+ }
++
++ ret = generic_perform_write(iocb, from);
+ out:
+ current->backing_dev_info = NULL;
+- return written ? written : err;
++ return ret;
+ }
+ EXPORT_SYMBOL(__generic_file_write_iter);
+
--- /dev/null
+From stable+bounces-188303-greg=kroah.com@vger.kernel.org Tue Oct 21 16:19:13 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:43 +0200
+Subject: nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs()
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, <syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com>, <syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com>, Andrew Morton <akpm@linux-foundation.org>, Jens Axboe <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Alexander Viro <viro@zeniv.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, "Darrick J. Wong" <djwong@kernel.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Hannes Reinecke <hare@suse.de>, Damien Le Moal <dlemoal@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-10-mngyadam@amazon.de>
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream.
+
+After commit c0e473a0d226 ("block: fix race between set_blocksize and read
+paths") was merged, set_blocksize() called by sb_set_blocksize() now locks
+the inode of the backing device file. As a result of this change, syzbot
+started reporting deadlock warnings due to a circular dependency involving
+the semaphore "ns_sem" of the nilfs object, the inode lock of the backing
+device file, and the locks that this inode lock is transitively dependent
+on.
+
+This is caused by a new lock dependency added by the above change, since
+init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem".
+However, these warnings are false positives because init_nilfs() is called
+in the early stage of the mount operation and the filesystem has not yet
+started.
+
+The reason why "ns_sem" is locked in init_nilfs() was to avoid a race
+condition in nilfs_fill_super() caused by sharing a nilfs object among
+multiple filesystem instances (super block structures) in the early
+implementation. However, nilfs objects and super block structures have
+long ago become one-to-one, and there is no longer any need to use the
+semaphore there.
+
+So, fix this issue by removing the use of the semaphore "ns_sem" in
+init_nilfs().
+
+Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com
+Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773
+Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com
+Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b
+Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/the_nilfs.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -680,8 +680,6 @@ int init_nilfs(struct the_nilfs *nilfs,
+ int blocksize;
+ int err;
+
+- down_write(&nilfs->ns_sem);
+-
+ blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
+ if (!blocksize) {
+ nilfs_err(sb, "unable to set blocksize");
+@@ -757,7 +755,6 @@ int init_nilfs(struct the_nilfs *nilfs,
+ set_nilfs_init(nilfs);
+ err = 0;
+ out:
+- up_write(&nilfs->ns_sem);
+ return err;
+
+ failed_sbh:
cacheinfo-fix-llc-is-not-exported-through-sysfs.patch
drivers-base-cacheinfo-update-cpu_map_populated-during-cpu-hotplug.patch
arm64-tegra-update-cache-properties.patch
+filemap-add-a-kiocb_invalidate_pages-helper.patch
+filemap-add-a-kiocb_invalidate_post_direct_write-helper.patch
+filemap-update-ki_pos-in-generic_perform_write.patch
+fs-factor-out-a-direct_write_fallback-helper.patch
+direct_write_fallback-on-error-revert-the-ki_pos-update-from-buffered-write.patch
+block-open-code-__generic_file_write_iter-for-blkdev-writes.patch
+block-fix-race-between-set_blocksize-and-read-paths.patch
+nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch