]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 3 Nov 2025 01:46:56 +0000 (10:46 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 3 Nov 2025 01:46:56 +0000 (10:46 +0900)
added patches:
block-fix-race-between-set_blocksize-and-read-paths.patch
block-open-code-__generic_file_write_iter-for-blkdev-writes.patch
direct_write_fallback-on-error-revert-the-ki_pos-update-from-buffered-write.patch
filemap-add-a-kiocb_invalidate_pages-helper.patch
filemap-add-a-kiocb_invalidate_post_direct_write-helper.patch
filemap-update-ki_pos-in-generic_perform_write.patch
fs-factor-out-a-direct_write_fallback-helper.patch
nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch

queue-6.1/block-fix-race-between-set_blocksize-and-read-paths.patch [new file with mode: 0644]
queue-6.1/block-open-code-__generic_file_write_iter-for-blkdev-writes.patch [new file with mode: 0644]
queue-6.1/direct_write_fallback-on-error-revert-the-ki_pos-update-from-buffered-write.patch [new file with mode: 0644]
queue-6.1/filemap-add-a-kiocb_invalidate_pages-helper.patch [new file with mode: 0644]
queue-6.1/filemap-add-a-kiocb_invalidate_post_direct_write-helper.patch [new file with mode: 0644]
queue-6.1/filemap-update-ki_pos-in-generic_perform_write.patch [new file with mode: 0644]
queue-6.1/fs-factor-out-a-direct_write_fallback-helper.patch [new file with mode: 0644]
queue-6.1/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/block-fix-race-between-set_blocksize-and-read-paths.patch b/queue-6.1/block-fix-race-between-set_blocksize-and-read-paths.patch
new file mode 100644 (file)
index 0000000..70e63b1
--- /dev/null
@@ -0,0 +1,210 @@
+From stable+bounces-188302-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:58 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:42 +0200
+Subject: block: fix race between set_blocksize and read paths
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Luis Chamberlain <mcgrof@kernel.org>, Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>, "Jens Axboe" <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Alexander Viro <viro@zeniv.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Hannes Reinecke" <hare@suse.de>, Damien Le Moal <dlemoal@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-9-mngyadam@amazon.de>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit c0e473a0d226479e8e925d5ba93f751d8df628e9 upstream.
+
+With the new large sector size support, it's now the case that
+set_blocksize can change i_blksize and the folio order in a manner that
+conflicts with a concurrent reader and causes a kernel crash.
+
+Specifically, let's say that udev-worker calls libblkid to detect the
+labels on a block device.  The read call can create an order-0 folio to
+read the first 4096 bytes from the disk.  But then udev is preempted.
+
+Next, someone tries to mount an 8k-sectorsize filesystem from the same
+block device.  The filesystem calls set_blksize, which sets i_blksize to
+8192 and the minimum folio order to 1.
+
+Now udev resumes, still holding the order-0 folio it allocated.  It then
+tries to schedule a read bio and do_mpage_readahead tries to create
+bufferheads for the folio.  Unfortunately, blocks_per_folio == 0 because
+the page size is 4096 but the blocksize is 8192 so no bufferheads are
+attached and the bh walk never sets bdev.  We then submit the bio with a
+NULL block device and crash.
+
+Therefore, truncate the page cache after flushing but before updating
+i_blksize.  However, that's not enough -- we also need to lock out file
+IO and page faults during the update.  Take both the i_rwsem and the
+invalidate_lock in exclusive mode for invalidations, and in shared mode
+for read/write operations.
+
+I don't know if this is the correct fix, but xfs/259 found it.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Link: https://lore.kernel.org/r/174543795699.4139148.2086129139322431423.stgit@frogsfrogsfrogs
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[use bdev->bd_inode instead & fix small contextual changes]
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bdev.c      |   17 +++++++++++++++++
+ block/blk-zoned.c |    5 ++++-
+ block/fops.c      |   16 ++++++++++++++++
+ block/ioctl.c     |    6 ++++++
+ 4 files changed, 43 insertions(+), 1 deletion(-)
+
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -147,9 +147,26 @@ int set_blocksize(struct block_device *b
+       /* Don't change the size if it is same as current */
+       if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
++              /*
++               * Flush and truncate the pagecache before we reconfigure the
++               * mapping geometry because folio sizes are variable now.  If a
++               * reader has already allocated a folio whose size is smaller
++               * than the new min_order but invokes readahead after the new
++               * min_order becomes visible, readahead will think there are
++               * "zero" blocks per folio and crash.  Take the inode and
++               * invalidation locks to avoid racing with
++               * read/write/fallocate.
++               */
++              inode_lock(bdev->bd_inode);
++              filemap_invalidate_lock(bdev->bd_inode->i_mapping);
++
+               sync_blockdev(bdev);
++              kill_bdev(bdev);
++
+               bdev->bd_inode->i_blkbits = blksize_bits(size);
+               kill_bdev(bdev);
++              filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
++              inode_unlock(bdev->bd_inode);
+       }
+       return 0;
+ }
+--- a/block/blk-zoned.c
++++ b/block/blk-zoned.c
+@@ -417,6 +417,7 @@ int blkdev_zone_mgmt_ioctl(struct block_
+               op = REQ_OP_ZONE_RESET;
+               /* Invalidate the page cache, including dirty pages. */
++              inode_lock(bdev->bd_inode);
+               filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+               ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+               if (ret)
+@@ -439,8 +440,10 @@ int blkdev_zone_mgmt_ioctl(struct block_
+                              GFP_KERNEL);
+ fail:
+-      if (cmd == BLKRESETZONE)
++      if (cmd == BLKRESETZONE) {
+               filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
++              inode_unlock(bdev->bd_inode);
++      }
+       return ret;
+ }
+--- a/block/fops.c
++++ b/block/fops.c
+@@ -592,7 +592,14 @@ static ssize_t blkdev_write_iter(struct
+                       ret = direct_write_fallback(iocb, from, ret,
+                                       generic_perform_write(iocb, from));
+       } else {
++              /*
++               * Take i_rwsem and invalidate_lock to avoid racing with
++               * set_blocksize changing i_blkbits/folio order and punching
++               * out the pagecache.
++               */
++              inode_lock_shared(bd_inode);
+               ret = generic_perform_write(iocb, from);
++              inode_unlock_shared(bd_inode);
+       }
+       if (ret > 0)
+@@ -605,6 +612,7 @@ static ssize_t blkdev_write_iter(struct
+ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ {
+       struct block_device *bdev = iocb->ki_filp->private_data;
++      struct inode *bd_inode = bdev->bd_inode;
+       loff_t size = bdev_nr_bytes(bdev);
+       loff_t pos = iocb->ki_pos;
+       size_t shorted = 0;
+@@ -652,7 +660,13 @@ static ssize_t blkdev_read_iter(struct k
+                       goto reexpand;
+       }
++      /*
++       * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
++       * changing i_blkbits/folio order and punching out the pagecache.
++       */
++      inode_lock_shared(bd_inode);
+       ret = filemap_read(iocb, to, ret);
++      inode_unlock_shared(bd_inode);
+ reexpand:
+       if (unlikely(shorted))
+@@ -695,6 +709,7 @@ static long blkdev_fallocate(struct file
+       if ((start | len) & (bdev_logical_block_size(bdev) - 1))
+               return -EINVAL;
++      inode_lock(inode);
+       filemap_invalidate_lock(inode->i_mapping);
+       /*
+@@ -735,6 +750,7 @@ static long blkdev_fallocate(struct file
+  fail:
+       filemap_invalidate_unlock(inode->i_mapping);
++      inode_unlock(inode);
+       return error;
+ }
+--- a/block/ioctl.c
++++ b/block/ioctl.c
+@@ -114,6 +114,7 @@ static int blk_ioctl_discard(struct bloc
+           end > bdev_nr_bytes(bdev))
+               return -EINVAL;
++      inode_lock(inode);
+       filemap_invalidate_lock(inode->i_mapping);
+       err = truncate_bdev_range(bdev, mode, start, end - 1);
+       if (err)
+@@ -121,6 +122,7 @@ static int blk_ioctl_discard(struct bloc
+       err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+ fail:
+       filemap_invalidate_unlock(inode->i_mapping);
++      inode_unlock(inode);
+       return err;
+ }
+@@ -146,12 +148,14 @@ static int blk_ioctl_secure_erase(struct
+           end > bdev_nr_bytes(bdev))
+               return -EINVAL;
++      inode_lock(bdev->bd_inode);
+       filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+       err = truncate_bdev_range(bdev, mode, start, end - 1);
+       if (!err)
+               err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
+                                               GFP_KERNEL);
+       filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
++      inode_unlock(bdev->bd_inode);
+       return err;
+ }
+@@ -184,6 +188,7 @@ static int blk_ioctl_zeroout(struct bloc
+               return -EINVAL;
+       /* Invalidate the page cache, including dirty pages */
++      inode_lock(inode);
+       filemap_invalidate_lock(inode->i_mapping);
+       err = truncate_bdev_range(bdev, mode, start, end);
+       if (err)
+@@ -194,6 +199,7 @@ static int blk_ioctl_zeroout(struct bloc
+ fail:
+       filemap_invalidate_unlock(inode->i_mapping);
++      inode_unlock(inode);
+       return err;
+ }
diff --git a/queue-6.1/block-open-code-__generic_file_write_iter-for-blkdev-writes.patch b/queue-6.1/block-open-code-__generic_file_write_iter-for-blkdev-writes.patch
new file mode 100644 (file)
index 0000000..e76aeac
--- /dev/null
@@ -0,0 +1,97 @@
+From stable+bounces-188301-greg=kroah.com@vger.kernel.org Tue Oct 21 16:18:34 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:41 +0200
+Subject: block: open code __generic_file_write_iter for blkdev writes
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, "Christian Brauner" <brauner@kernel.org>, Hannes Reinecke <hare@suse.de>, "Luis Chamberlain" <mcgrof@kernel.org>, Jens Axboe <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Alexander Viro <viro@zeniv.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, "Darrick J. Wong" <djwong@kernel.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Damien Le Moal" <dlemoal@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-8-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 727cfe976758b79f8d2f8051c75a5ccb14539a56 upstream.
+
+Open code __generic_file_write_iter to remove the indirect call into
+->direct_IO and to prepare using the iomap based write code.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Link: https://lore.kernel.org/r/20230801172201.1923299-4-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[fix contextual changes]
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/fops.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+--- a/block/fops.c
++++ b/block/fops.c
+@@ -515,6 +515,30 @@ static int blkdev_close(struct inode *in
+       return 0;
+ }
++static ssize_t
++blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
++{
++      size_t count = iov_iter_count(from);
++      ssize_t written;
++
++      written = kiocb_invalidate_pages(iocb, count);
++      if (written) {
++              if (written == -EBUSY)
++                      return 0;
++              return written;
++      }
++
++      written = blkdev_direct_IO(iocb, from);
++      if (written > 0) {
++              kiocb_invalidate_post_direct_write(iocb, count);
++              iocb->ki_pos += written;
++              count -= written;
++      }
++      if (written != -EIOCBQUEUED)
++              iov_iter_revert(from, count - iov_iter_count(from));
++      return written;
++}
++
+ /*
+  * Write data to the block device.  Only intended for the block device itself
+  * and the raw driver which basically is a fake block device.
+@@ -524,7 +548,8 @@ static int blkdev_close(struct inode *in
+  */
+ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ {
+-      struct block_device *bdev = iocb->ki_filp->private_data;
++      struct file *file = iocb->ki_filp;
++      struct block_device *bdev = file->private_data;
+       struct inode *bd_inode = bdev->bd_inode;
+       loff_t size = bdev_nr_bytes(bdev);
+       struct blk_plug plug;
+@@ -553,7 +578,23 @@ static ssize_t blkdev_write_iter(struct
+       }
+       blk_start_plug(&plug);
+-      ret = __generic_file_write_iter(iocb, from);
++      ret = file_remove_privs(file);
++      if (ret)
++              return ret;
++
++      ret = file_update_time(file);
++      if (ret)
++              return ret;
++
++      if (iocb->ki_flags & IOCB_DIRECT) {
++              ret = blkdev_direct_write(iocb, from);
++              if (ret >= 0 && iov_iter_count(from))
++                      ret = direct_write_fallback(iocb, from, ret,
++                                      generic_perform_write(iocb, from));
++      } else {
++              ret = generic_perform_write(iocb, from);
++      }
++
+       if (ret > 0)
+               ret = generic_write_sync(iocb, ret);
+       iov_iter_reexpand(from, iov_iter_count(from) + shorted);
diff --git a/queue-6.1/direct_write_fallback-on-error-revert-the-ki_pos-update-from-buffered-write.patch b/queue-6.1/direct_write_fallback-on-error-revert-the-ki_pos-update-from-buffered-write.patch
new file mode 100644 (file)
index 0000000..f31d37d
--- /dev/null
@@ -0,0 +1,38 @@
+From stable+bounces-188300-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:26 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:40 +0200
+Subject: direct_write_fallback(): on error revert the ->ki_pos update from buffered write
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>, Jens Axboe <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, "Darrick J. Wong" <djwong@kernel.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Hannes Reinecke" <hare@suse.de>, Damien Le Moal <dlemoal@kernel.org>, "Luis Chamberlain" <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-7-mngyadam@amazon.de>
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 8287474aa5ffb41df52552c4ae4748e791d2faf2 upstream.
+
+If we fail filemap_write_and_wait_range() on the range the buffered write went
+into, we only report the "number of bytes which we direct-written", to quote
+the comment in there.  Which is fine, but buffered write has already advanced
+iocb->ki_pos, so we need to roll that back.  Otherwise we end up with e.g.
+write(2) advancing position by more than the amount it reports having written.
+
+Fixes: 182c25e9c157 "filemap: update ki_pos in generic_perform_write"
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Message-Id: <20230827214518.GU3390869@ZenIV>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/libfs.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1615,6 +1615,7 @@ ssize_t direct_write_fallback(struct kio
+                * We don't know how much we wrote, so just return the number of
+                * bytes which were direct-written
+                */
++              iocb->ki_pos -= buffered_written;
+               if (direct_written)
+                       return direct_written;
+               return err;
diff --git a/queue-6.1/filemap-add-a-kiocb_invalidate_pages-helper.patch b/queue-6.1/filemap-add-a-kiocb_invalidate_pages-helper.patch
new file mode 100644 (file)
index 0000000..8f00e37
--- /dev/null
@@ -0,0 +1,123 @@
+From stable+bounces-188296-greg=kroah.com@vger.kernel.org Tue Oct 21 16:11:59 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:36 +0200
+Subject: filemap: add a kiocb_invalidate_pages helper
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Damien Le Moal <dlemoal@kernel.org>, Hannes Reinecke <hare@suse.de>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Matthew Wilcox <willy@infradead.org>, "Miklos Szeredi" <miklos@szeredi.hu>, Miklos Szeredi <mszeredi@redhat.com>, "Theodore Ts'o" <tytso@mit.edu>, Trond Myklebust <trond.myklebust@hammerspace.com>, Xiubo Li <xiubli@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, "Jeff Layton" <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-3-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit e003f74afbd2feadbb9ffbf9135e2d2fb5d320a5 upstream.
+
+Factor out a helper that calls filemap_write_and_wait_range and
+invalidate_inode_pages2_range for the range covered by a write kiocb or
+returns -EAGAIN if the kiocb is marked as nowait and there would be pages
+to write or invalidate.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-6-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pagemap.h |    1 +
+ mm/filemap.c            |   48 ++++++++++++++++++++++++++++--------------------
+ 2 files changed, 29 insertions(+), 20 deletions(-)
+
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -30,6 +30,7 @@ static inline void invalidate_remote_ino
+ int invalidate_inode_pages2(struct address_space *mapping);
+ int invalidate_inode_pages2_range(struct address_space *mapping,
+               pgoff_t start, pgoff_t end);
++int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
+ int write_inode_now(struct inode *, int sync);
+ int filemap_fdatawrite(struct address_space *);
+ int filemap_flush(struct address_space *);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2839,6 +2839,33 @@ put_folios:
+ }
+ EXPORT_SYMBOL_GPL(filemap_read);
++int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
++{
++      struct address_space *mapping = iocb->ki_filp->f_mapping;
++      loff_t pos = iocb->ki_pos;
++      loff_t end = pos + count - 1;
++      int ret;
++
++      if (iocb->ki_flags & IOCB_NOWAIT) {
++              /* we could block if there are any pages in the range */
++              if (filemap_range_has_page(mapping, pos, end))
++                      return -EAGAIN;
++      } else {
++              ret = filemap_write_and_wait_range(mapping, pos, end);
++              if (ret)
++                      return ret;
++      }
++
++      /*
++       * After a write we want buffered reads to be sure to go to disk to get
++       * the new data.  We invalidate clean cached page from the region we're
++       * about to write.  We do this *before* the write so that we can return
++       * without clobbering -EIOCBQUEUED from ->direct_IO().
++       */
++      return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
++                                           end >> PAGE_SHIFT);
++}
++
+ /**
+  * generic_file_read_iter - generic filesystem read routine
+  * @iocb:     kernel I/O control block
+@@ -3737,30 +3764,11 @@ generic_file_direct_write(struct kiocb *
+       write_len = iov_iter_count(from);
+       end = (pos + write_len - 1) >> PAGE_SHIFT;
+-      if (iocb->ki_flags & IOCB_NOWAIT) {
+-              /* If there are pages to writeback, return */
+-              if (filemap_range_has_page(file->f_mapping, pos,
+-                                         pos + write_len - 1))
+-                      return -EAGAIN;
+-      } else {
+-              written = filemap_write_and_wait_range(mapping, pos,
+-                                                      pos + write_len - 1);
+-              if (written)
+-                      goto out;
+-      }
+-
+-      /*
+-       * After a write we want buffered reads to be sure to go to disk to get
+-       * the new data.  We invalidate clean cached page from the region we're
+-       * about to write.  We do this *before* the write so that we can return
+-       * without clobbering -EIOCBQUEUED from ->direct_IO().
+-       */
+-      written = invalidate_inode_pages2_range(mapping,
+-                                      pos >> PAGE_SHIFT, end);
+       /*
+        * If a page can not be invalidated, return 0 to fall back
+        * to buffered write.
+        */
++      written = kiocb_invalidate_pages(iocb, write_len);
+       if (written) {
+               if (written == -EBUSY)
+                       return 0;
diff --git a/queue-6.1/filemap-add-a-kiocb_invalidate_post_direct_write-helper.patch b/queue-6.1/filemap-add-a-kiocb_invalidate_post_direct_write-helper.patch
new file mode 100644 (file)
index 0000000..e3cc7f4
--- /dev/null
@@ -0,0 +1,194 @@
+From stable+bounces-188297-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:23 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:37 +0200
+Subject: filemap: add a kiocb_invalidate_post_direct_write helper
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Damien Le Moal <dlemoal@kernel.org>, Hannes Reinecke <hare@suse.de>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Matthew Wilcox <willy@infradead.org>, "Miklos Szeredi" <miklos@szeredi.hu>, Miklos Szeredi <mszeredi@redhat.com>, "Theodore Ts'o" <tytso@mit.edu>, Trond Myklebust <trond.myklebust@hammerspace.com>, Xiubo Li <xiubli@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, "Jeff Layton" <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-4-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit c402a9a9430b670926decbb284b756ee6f47c1ec upstream.
+
+Add a helper to invalidate page cache after a dio write.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-7-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/direct-io.c          |   10 ++--------
+ fs/iomap/direct-io.c    |   12 ++----------
+ include/linux/fs.h      |    5 -----
+ include/linux/pagemap.h |    1 +
+ mm/filemap.c            |   37 ++++++++++++++++++++-----------------
+ 5 files changed, 25 insertions(+), 40 deletions(-)
+
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -286,14 +286,8 @@ static ssize_t dio_complete(struct dio *
+        * zeros from unwritten extents.
+        */
+       if (flags & DIO_COMPLETE_INVALIDATE &&
+-          ret > 0 && dio_op == REQ_OP_WRITE &&
+-          dio->inode->i_mapping->nrpages) {
+-              err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+-                                      offset >> PAGE_SHIFT,
+-                                      (offset + ret - 1) >> PAGE_SHIFT);
+-              if (err)
+-                      dio_warn_stale_pagecache(dio->iocb->ki_filp);
+-      }
++          ret > 0 && dio_op == REQ_OP_WRITE)
++              kiocb_invalidate_post_direct_write(dio->iocb, ret);
+       inode_dio_end(dio->inode);
+--- a/fs/iomap/direct-io.c
++++ b/fs/iomap/direct-io.c
+@@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_
+ {
+       const struct iomap_dio_ops *dops = dio->dops;
+       struct kiocb *iocb = dio->iocb;
+-      struct inode *inode = file_inode(iocb->ki_filp);
+       loff_t offset = iocb->ki_pos;
+       ssize_t ret = dio->error;
+@@ -108,15 +107,8 @@ ssize_t iomap_dio_complete(struct iomap_
+        * ->end_io() when necessary, otherwise a racing buffer read would cache
+        * zeros from unwritten extents.
+        */
+-      if (!dio->error && dio->size &&
+-          (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+-              int err;
+-              err = invalidate_inode_pages2_range(inode->i_mapping,
+-                              offset >> PAGE_SHIFT,
+-                              (offset + dio->size - 1) >> PAGE_SHIFT);
+-              if (err)
+-                      dio_warn_stale_pagecache(iocb->ki_filp);
+-      }
++      if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
++              kiocb_invalidate_post_direct_write(iocb, dio->size);
+       inode_dio_end(file_inode(iocb->ki_filp));
+       if (ret > 0) {
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3371,11 +3371,6 @@ static inline void inode_dio_end(struct
+               wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+ }
+-/*
+- * Warn about a page cache invalidation failure diring a direct I/O write.
+- */
+-void dio_warn_stale_pagecache(struct file *filp);
+-
+ extern void inode_set_flags(struct inode *inode, unsigned int flags,
+                           unsigned int mask);
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -31,6 +31,7 @@ int invalidate_inode_pages2(struct addre
+ int invalidate_inode_pages2_range(struct address_space *mapping,
+               pgoff_t start, pgoff_t end);
+ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
++void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
+ int write_inode_now(struct inode *, int sync);
+ int filemap_fdatawrite(struct address_space *);
+ int filemap_flush(struct address_space *);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3733,7 +3733,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
+ /*
+  * Warn about a page cache invalidation failure during a direct I/O write.
+  */
+-void dio_warn_stale_pagecache(struct file *filp)
++static void dio_warn_stale_pagecache(struct file *filp)
+ {
+       static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
+       char pathname[128];
+@@ -3750,19 +3750,23 @@ void dio_warn_stale_pagecache(struct fil
+       }
+ }
++void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count)
++{
++      struct address_space *mapping = iocb->ki_filp->f_mapping;
++
++      if (mapping->nrpages &&
++          invalidate_inode_pages2_range(mapping,
++                      iocb->ki_pos >> PAGE_SHIFT,
++                      (iocb->ki_pos + count - 1) >> PAGE_SHIFT))
++              dio_warn_stale_pagecache(iocb->ki_filp);
++}
++
+ ssize_t
+ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
+ {
+-      struct file     *file = iocb->ki_filp;
+-      struct address_space *mapping = file->f_mapping;
+-      struct inode    *inode = mapping->host;
+-      loff_t          pos = iocb->ki_pos;
+-      ssize_t         written;
+-      size_t          write_len;
+-      pgoff_t         end;
+-
+-      write_len = iov_iter_count(from);
+-      end = (pos + write_len - 1) >> PAGE_SHIFT;
++      struct address_space *mapping = iocb->ki_filp->f_mapping;
++      size_t write_len = iov_iter_count(from);
++      ssize_t written;
+       /*
+        * If a page can not be invalidated, return 0 to fall back
+@@ -3772,7 +3776,7 @@ generic_file_direct_write(struct kiocb *
+       if (written) {
+               if (written == -EBUSY)
+                       return 0;
+-              goto out;
++              return written;
+       }
+       written = mapping->a_ops->direct_IO(iocb, from);
+@@ -3794,11 +3798,11 @@ generic_file_direct_write(struct kiocb *
+        *
+        * Skip invalidation for async writes or if mapping has no pages.
+        */
+-      if (written > 0 && mapping->nrpages &&
+-          invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
+-              dio_warn_stale_pagecache(file);
+-
+       if (written > 0) {
++              struct inode *inode = mapping->host;
++              loff_t pos = iocb->ki_pos;
++
++              kiocb_invalidate_post_direct_write(iocb, written);
+               pos += written;
+               write_len -= written;
+               if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
+@@ -3809,7 +3813,6 @@ generic_file_direct_write(struct kiocb *
+       }
+       if (written != -EIOCBQUEUED)
+               iov_iter_revert(from, write_len - iov_iter_count(from));
+-out:
+       return written;
+ }
+ EXPORT_SYMBOL(generic_file_direct_write);
diff --git a/queue-6.1/filemap-update-ki_pos-in-generic_perform_write.patch b/queue-6.1/filemap-update-ki_pos-in-generic_perform_write.patch
new file mode 100644 (file)
index 0000000..222b4a1
--- /dev/null
@@ -0,0 +1,126 @@
+From stable+bounces-188298-greg=kroah.com@vger.kernel.org Tue Oct 21 16:16:51 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:38 +0200
+Subject: filemap: update ki_pos in generic_perform_write
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Xiubo Li <xiubli@redhat.com>, Damien Le Moal <dlemoal@kernel.org>, Hannes Reinecke <hare@suse.de>, Theodore Ts'o <tytso@mit.edu>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Matthew Wilcox <willy@infradead.org>, "Miklos Szeredi" <miklos@szeredi.hu>, Miklos Szeredi <mszeredi@redhat.com>, "Trond Myklebust" <trond.myklebust@hammerspace.com>, Andrew Morton <akpm@linux-foundation.org>, Jeff Layton <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, "Ryusuke Konishi" <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-5-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 182c25e9c157f37bd0ab5a82fe2417e2223df459 upstream.
+
+All callers of generic_perform_write need to updated ki_pos, move it into
+common code.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-4-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Theodore Ts'o <tytso@mit.edu>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c |    2 --
+ fs/ext4/file.c |    9 +++------
+ fs/f2fs/file.c |    1 -
+ fs/nfs/file.c  |    1 -
+ mm/filemap.c   |    8 ++++----
+ 5 files changed, 7 insertions(+), 14 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1891,8 +1891,6 @@ retry_snap:
+                * can not run at the same time
+                */
+               written = generic_perform_write(iocb, from);
+-              if (likely(written >= 0))
+-                      iocb->ki_pos = pos + written;
+               ceph_end_io_write(inode);
+       }
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -287,12 +287,9 @@ static ssize_t ext4_buffered_write_iter(
+ out:
+       inode_unlock(inode);
+-      if (likely(ret > 0)) {
+-              iocb->ki_pos += ret;
+-              ret = generic_write_sync(iocb, ret);
+-      }
+-
+-      return ret;
++      if (unlikely(ret <= 0))
++              return ret;
++      return generic_write_sync(iocb, ret);
+ }
+ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -4659,7 +4659,6 @@ static ssize_t f2fs_buffered_write_iter(
+       current->backing_dev_info = NULL;
+       if (ret > 0) {
+-              iocb->ki_pos += ret;
+               f2fs_update_iostat(F2FS_I_SB(inode), inode,
+                                               APP_BUFFERED_IO, ret);
+       }
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -645,7 +645,6 @@ ssize_t nfs_file_write(struct kiocb *ioc
+               goto out;
+       written = result;
+-      iocb->ki_pos += written;
+       nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+       if (mntflags & NFS_MOUNT_WRITE_EAGER) {
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3891,7 +3891,10 @@ again:
+               balance_dirty_pages_ratelimited(mapping);
+       } while (iov_iter_count(i));
+-      return written ? written : status;
++      if (!written)
++              return status;
++      iocb->ki_pos += written;
++      return written;
+ }
+ EXPORT_SYMBOL(generic_perform_write);
+@@ -3970,7 +3973,6 @@ ssize_t __generic_file_write_iter(struct
+               endbyte = pos + status - 1;
+               err = filemap_write_and_wait_range(mapping, pos, endbyte);
+               if (err == 0) {
+-                      iocb->ki_pos = endbyte + 1;
+                       written += status;
+                       invalidate_mapping_pages(mapping,
+                                                pos >> PAGE_SHIFT,
+@@ -3983,8 +3985,6 @@ ssize_t __generic_file_write_iter(struct
+               }
+       } else {
+               written = generic_perform_write(iocb, from);
+-              if (likely(written > 0))
+-                      iocb->ki_pos += written;
+       }
+ out:
+       current->backing_dev_info = NULL;
diff --git a/queue-6.1/fs-factor-out-a-direct_write_fallback-helper.patch b/queue-6.1/fs-factor-out-a-direct_write_fallback-helper.patch
new file mode 100644 (file)
index 0000000..7a7c231
--- /dev/null
@@ -0,0 +1,193 @@
+From stable+bounces-188299-greg=kroah.com@vger.kernel.org Tue Oct 21 16:17:20 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:39 +0200
+Subject: fs: factor out a direct_write_fallback helper
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Christoph Hellwig <hch@lst.de>, Damien Le Moal <dlemoal@kernel.org>, Miklos Szeredi <mszeredi@redhat.com>, "Darrick J. Wong" <djwong@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Andreas Gruenbacher <agruenba@redhat.com>, "Anna Schumaker" <anna@kernel.org>, Chao Yu <chao@kernel.org>, Christian Brauner <brauner@kernel.org>, Hannes Reinecke <hare@suse.de>, Ilya Dryomov <idryomov@gmail.com>, Jaegeuk Kim <jaegeuk@kernel.org>, Jens Axboe <axboe@kernel.dk>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, "Matthew Wilcox" <willy@infradead.org>, Miklos Szeredi <miklos@szeredi.hu>, "Theodore Ts'o" <tytso@mit.edu>, Trond Myklebust <trond.myklebust@hammerspace.com>, Xiubo Li <xiubli@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, "Jeff Layton" <jlayton@kernel.org>, Andreas Dilger <adilger.kernel@dilger.ca>, Christoph Hellwig <hch@infradead.org>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-6-mngyadam@amazon.de>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 44fff0fa08ec5a6d9d5fb05443a36d854d0ece4d upstream.
+
+Add a helper dealing with handling the syncing of a buffered write
+fallback for direct I/O.
+
+Link: https://lkml.kernel.org/r/20230601145904.1385409-10-hch@lst.de
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chao Yu <chao@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[backing_dev_info still being used here. do small changes to the patch
+to keep the out label. Which means replacing all returns to goto out.]
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/libfs.c         |   41 +++++++++++++++++++++++++++++++++++
+ include/linux/fs.h |    2 +
+ mm/filemap.c       |   61 ++++++++++++-----------------------------------------
+ 3 files changed, 57 insertions(+), 47 deletions(-)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1582,3 +1582,44 @@ bool inode_maybe_inc_iversion(struct ino
+       return true;
+ }
+ EXPORT_SYMBOL(inode_maybe_inc_iversion);
++
++ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
++              ssize_t direct_written, ssize_t buffered_written)
++{
++      struct address_space *mapping = iocb->ki_filp->f_mapping;
++      loff_t pos = iocb->ki_pos - buffered_written;
++      loff_t end = iocb->ki_pos - 1;
++      int err;
++
++      /*
++       * If the buffered write fallback returned an error, we want to return
++       * the number of bytes which were written by direct I/O, or the error
++       * code if that was zero.
++       *
++       * Note that this differs from normal direct-io semantics, which will
++       * return -EFOO even if some bytes were written.
++       */
++      if (unlikely(buffered_written < 0)) {
++              if (direct_written)
++                      return direct_written;
++              return buffered_written;
++      }
++
++      /*
++       * We need to ensure that the page cache pages are written to disk and
++       * invalidated to preserve the expected O_DIRECT semantics.
++       */
++      err = filemap_write_and_wait_range(mapping, pos, end);
++      if (err < 0) {
++              /*
++               * We don't know how much we wrote, so just return the number of
++               * bytes which were direct-written
++               */
++              if (direct_written)
++                      return direct_written;
++              return err;
++      }
++      invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
++      return direct_written + buffered_written;
++}
++EXPORT_SYMBOL_GPL(direct_write_fallback);
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3278,6 +3278,8 @@ extern ssize_t __generic_file_write_iter
+ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
+ ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
++ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
++              ssize_t direct_written, ssize_t buffered_written);
+ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+               rwf_t flags);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3923,25 +3923,21 @@ ssize_t __generic_file_write_iter(struct
+ {
+       struct file *file = iocb->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+-      struct inode    *inode = mapping->host;
+-      ssize_t         written = 0;
+-      ssize_t         err;
+-      ssize_t         status;
++      struct inode *inode = mapping->host;
++      ssize_t ret;
+       /* We can write back this queue in page reclaim */
+       current->backing_dev_info = inode_to_bdi(inode);
+-      err = file_remove_privs(file);
+-      if (err)
++      ret = file_remove_privs(file);
++      if (ret)
+               goto out;
+-      err = file_update_time(file);
+-      if (err)
++      ret = file_update_time(file);
++      if (ret)
+               goto out;
+       if (iocb->ki_flags & IOCB_DIRECT) {
+-              loff_t pos, endbyte;
+-
+-              written = generic_file_direct_write(iocb, from);
++              ret = generic_file_direct_write(iocb, from);
+               /*
+                * If the write stopped short of completing, fall back to
+                * buffered writes.  Some filesystems do this for writes to
+@@ -3949,46 +3945,17 @@ ssize_t __generic_file_write_iter(struct
+                * not succeed (even if it did, DAX does not handle dirty
+                * page-cache pages correctly).
+                */
+-              if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
+-                      goto out;
+-
+-              pos = iocb->ki_pos;
+-              status = generic_perform_write(iocb, from);
+-              /*
+-               * If generic_perform_write() returned a synchronous error
+-               * then we want to return the number of bytes which were
+-               * direct-written, or the error code if that was zero.  Note
+-               * that this differs from normal direct-io semantics, which
+-               * will return -EFOO even if some bytes were written.
+-               */
+-              if (unlikely(status < 0)) {
+-                      err = status;
++              if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
+                       goto out;
+-              }
+-              /*
+-               * We need to ensure that the page cache pages are written to
+-               * disk and invalidated to preserve the expected O_DIRECT
+-               * semantics.
+-               */
+-              endbyte = pos + status - 1;
+-              err = filemap_write_and_wait_range(mapping, pos, endbyte);
+-              if (err == 0) {
+-                      written += status;
+-                      invalidate_mapping_pages(mapping,
+-                                               pos >> PAGE_SHIFT,
+-                                               endbyte >> PAGE_SHIFT);
+-              } else {
+-                      /*
+-                       * We don't know how much we wrote, so just return
+-                       * the number of bytes which were direct-written
+-                       */
+-              }
+-      } else {
+-              written = generic_perform_write(iocb, from);
++              ret = direct_write_fallback(iocb, from, ret,
++                              generic_perform_write(iocb, from));
++              goto out;
+       }
++
++      ret = generic_perform_write(iocb, from);
+ out:
+       current->backing_dev_info = NULL;
+-      return written ? written : err;
++      return ret;
+ }
+ EXPORT_SYMBOL(__generic_file_write_iter);
diff --git a/queue-6.1/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch b/queue-6.1/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch
new file mode 100644 (file)
index 0000000..bfb741a
--- /dev/null
@@ -0,0 +1,71 @@
+From stable+bounces-188303-greg=kroah.com@vger.kernel.org Tue Oct 21 16:19:13 2025
+From: Mahmoud Adam <mngyadam@amazon.de>
+Date: Tue, 21 Oct 2025 09:03:43 +0200
+Subject: nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs()
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <nagy@khwaternagy.com>, Ryusuke Konishi <konishi.ryusuke@gmail.com>, <syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com>, <syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com>, Andrew Morton <akpm@linux-foundation.org>, Jens Axboe <axboe@kernel.dk>, Xiubo Li <xiubli@redhat.com>, Ilya Dryomov <idryomov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Alexander Viro <viro@zeniv.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Andreas Dilger <adilger.kernel@dilger.ca>, Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>, Christoph Hellwig <hch@infradead.org>, "Darrick J. Wong" <djwong@kernel.org>, Trond Myklebust <trond.myklebust@hammerspace.com>, Anna Schumaker <anna@kernel.org>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Hannes Reinecke <hare@suse.de>, Damien Le Moal <dlemoal@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <ceph-devel@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>, <linux-f2fs-devel@lists.sourceforge.net>, <linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>, <linux-nilfs@vger.kernel.org>, <linux-mm@kvack.org>
+Message-ID: <20251021070353.96705-10-mngyadam@amazon.de>
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream.
+
+After commit c0e473a0d226 ("block: fix race between set_blocksize and read
+paths") was merged, set_blocksize() called by sb_set_blocksize() now locks
+the inode of the backing device file.  As a result of this change, syzbot
+started reporting deadlock warnings due to a circular dependency involving
+the semaphore "ns_sem" of the nilfs object, the inode lock of the backing
+device file, and the locks that this inode lock is transitively dependent
+on.
+
+This is caused by a new lock dependency added by the above change, since
+init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem".
+However, these warnings are false positives because init_nilfs() is called
+in the early stage of the mount operation and the filesystem has not yet
+started.
+
+The reason why "ns_sem" is locked in init_nilfs() was to avoid a race
+condition in nilfs_fill_super() caused by sharing a nilfs object among
+multiple filesystem instances (super block structures) in the early
+implementation.  However, nilfs objects and super block structures have
+long ago become one-to-one, and there is no longer any need to use the
+semaphore there.
+
+So, fix this issue by removing the use of the semaphore "ns_sem" in
+init_nilfs().
+
+Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com
+Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773
+Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com
+Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b
+Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/the_nilfs.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -680,8 +680,6 @@ int init_nilfs(struct the_nilfs *nilfs,
+       int blocksize;
+       int err;
+-      down_write(&nilfs->ns_sem);
+-
+       blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
+       if (!blocksize) {
+               nilfs_err(sb, "unable to set blocksize");
+@@ -757,7 +755,6 @@ int init_nilfs(struct the_nilfs *nilfs,
+       set_nilfs_init(nilfs);
+       err = 0;
+  out:
+-      up_write(&nilfs->ns_sem);
+       return err;
+  failed_sbh:
index 675c0d094b176a788af188ee7863ce7b552cb3ab..3d0c4de8050e11b850e1aa6b46a962777d32fc47 100644 (file)
@@ -72,3 +72,11 @@ cacheinfo-initialize-variables-in-fetch_cache_info.patch
 cacheinfo-fix-llc-is-not-exported-through-sysfs.patch
 drivers-base-cacheinfo-update-cpu_map_populated-during-cpu-hotplug.patch
 arm64-tegra-update-cache-properties.patch
+filemap-add-a-kiocb_invalidate_pages-helper.patch
+filemap-add-a-kiocb_invalidate_post_direct_write-helper.patch
+filemap-update-ki_pos-in-generic_perform_write.patch
+fs-factor-out-a-direct_write_fallback-helper.patch
+direct_write_fallback-on-error-revert-the-ki_pos-update-from-buffered-write.patch
+block-open-code-__generic_file_write_iter-for-blkdev-writes.patch
+block-fix-race-between-set_blocksize-and-read-paths.patch
+nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch