From: Greg Kroah-Hartman Date: Fri, 23 Feb 2024 16:00:55 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.19.308~99 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2c9a6f5b0cde1a87715c80918c48383566e4d53d;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch zonefs-improve-error-handling.patch --- diff --git a/queue-5.10/series b/queue-5.10/series index 39d0d6259cc..285c8a1eeb0 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -5,3 +5,5 @@ smb-client-fix-oob-in-receive_encrypted_standard.patch smb-client-fix-potential-oobs-in-smb2_parse_contexts.patch smb-client-fix-parsing-of-smb3.1.1-posix-create-context.patch sched-rt-sysctl_sched_rr_timeslice-show-default-timeslice-after-reset.patch +userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch +zonefs-improve-error-handling.patch diff --git a/queue-5.10/userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch b/queue-5.10/userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch new file mode 100644 index 00000000000..0242def129c --- /dev/null +++ b/queue-5.10/userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch @@ -0,0 +1,80 @@ +From 67695f18d55924b2013534ef3bdc363bc9e14605 Mon Sep 17 00:00:00 2001 +From: Lokesh Gidra +Date: Wed, 17 Jan 2024 14:37:29 -0800 +Subject: userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb + +From: Lokesh Gidra + +commit 67695f18d55924b2013534ef3bdc363bc9e14605 upstream. + +In mfill_atomic_hugetlb(), mmap_changing isn't being checked +again if we drop mmap_lock and reacquire it. When the lock is not held, +mmap_changing could have been incremented. This is also inconsistent +with the behavior in mfill_atomic(). + +Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com +Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") +Signed-off-by: Lokesh Gidra +Cc: Andrea Arcangeli +Cc: Mike Rapoport +Cc: Axel Rasmussen +Cc: Brian Geffon +Cc: David Hildenbrand +Cc: Jann Horn +Cc: Kalesh Singh +Cc: Matthew Wilcox (Oracle) +Cc: Nicolas Geoffray +Cc: Peter Xu +Cc: Suren Baghdasaryan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Mike Rapoport (IBM) +Signed-off-by: Greg Kroah-Hartman +--- + mm/userfaultfd.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -209,6 +209,7 @@ static __always_inline ssize_t __mcopy_a + unsigned long dst_start, + unsigned long src_start, + unsigned long len, ++ bool *mmap_changing, + bool zeropage) + { + int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED; +@@ -329,6 +330,15 @@ retry: + goto out; + } + mmap_read_lock(dst_mm); ++ /* ++ * If memory mappings are changing because of non-cooperative ++ * operation (e.g. mremap) running in parallel, bail out and ++ * request the user to retry later ++ */ ++ if (mmap_changing && READ_ONCE(*mmap_changing)) { ++ err = -EAGAIN; ++ break; ++ } + + dst_vma = NULL; + goto retry; +@@ -410,6 +420,7 @@ extern ssize_t __mcopy_atomic_hugetlb(st + unsigned long dst_start, + unsigned long src_start, + unsigned long len, ++ bool *mmap_changing, + bool zeropage); + #endif /* CONFIG_HUGETLB_PAGE */ + +@@ -529,7 +540,8 @@ retry: + */ + if (is_vm_hugetlb_page(dst_vma)) + return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, +- src_start, len, zeropage); ++ src_start, len, mmap_changing, ++ zeropage); + + if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) + goto out_unlock; diff --git a/queue-5.10/zonefs-improve-error-handling.patch b/queue-5.10/zonefs-improve-error-handling.patch new file mode 100644 index 00000000000..b72b6a909cf --- /dev/null +++ b/queue-5.10/zonefs-improve-error-handling.patch @@ -0,0 +1,188 @@ +From 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 8 Feb 2024 17:26:59 +0900 +Subject: zonefs: Improve error handling + +From: Damien Le Moal + +commit 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 upstream. + +Write error handling is racy and can sometime lead to the error recovery +path wrongly changing the inode size of a sequential zone file to an +incorrect value which results in garbage data being readable at the end +of a file. There are 2 problems: + +1) zonefs_file_dio_write() updates a zone file write pointer offset + after issuing a direct IO with iomap_dio_rw(). This update is done + only if the IO succeed for synchronous direct writes. However, for + asynchronous direct writes, the update is done without waiting for + the IO completion so that the next asynchronous IO can be + immediately issued. However, if an asynchronous IO completes with a + failure right before the i_truncate_mutex lock protecting the update, + the update may change the value of the inode write pointer offset + that was corrected by the error path (zonefs_io_error() function). + +2) zonefs_io_error() is called when a read or write error occurs. This + function executes a report zone operation using the callback function + zonefs_io_error_cb(), which does all the error recovery handling + based on the current zone condition, write pointer position and + according to the mount options being used. However, depending on the + zoned device being used, a report zone callback may be executed in a + context that is different from the context of __zonefs_io_error(). As + a result, zonefs_io_error_cb() may be executed without the inode + truncate mutex lock held, which can lead to invalid error processing. + +Fix both problems as follows: +- Problem 1: Perform the inode write pointer offset update before a + direct write is issued with iomap_dio_rw(). This is safe to do as + partial direct writes are not supported (IOMAP_DIO_PARTIAL is not + set) and any failed IO will trigger the execution of zonefs_io_error() + which will correct the inode write pointer offset to reflect the + current state of the one on the device. +- Problem 2: Change zonefs_io_error_cb() into zonefs_handle_io_error() + and call this function directly from __zonefs_io_error() after + obtaining the zone information using blkdev_report_zones() with a + simple callback function that copies to a local stack variable the + struct blk_zone obtained from the device. This ensures that error + handling is performed holding the inode truncate mutex. + This change also simplifies error handling for conventional zone files + by bypassing the execution of report zones entirely. This is safe to + do because the condition of conventional zones cannot be read-only or + offline and conventional zone files are always fully mapped with a + constant file size. + +Reported-by: Shin'ichiro Kawasaki +Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Tested-by: Shin'ichiro Kawasaki +Reviewed-by: Johannes Thumshirn +Reviewed-by: Himanshu Madhani +Signed-off-by: Greg Kroah-Hartman +--- + fs/zonefs/super.c | 70 ++++++++++++++++++++++++++++++------------------------ + 1 file changed, 40 insertions(+), 30 deletions(-) + +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -319,16 +319,18 @@ static loff_t zonefs_check_zone_conditio + } + } + +-struct zonefs_ioerr_data { +- struct inode *inode; +- bool write; +-}; +- + static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + void *data) + { +- struct zonefs_ioerr_data *err = data; +- struct inode *inode = err->inode; ++ struct blk_zone *z = data; ++ ++ *z = *zone; ++ return 0; ++} ++ ++static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, ++ bool write) ++{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); +@@ -344,8 +346,8 @@ static int zonefs_io_error_cb(struct blk + isize = i_size_read(inode); + if (zone->cond != BLK_ZONE_COND_OFFLINE && + zone->cond != BLK_ZONE_COND_READONLY && +- !err->write && isize == data_size) +- return 0; ++ !write && isize == data_size) ++ return; + + /* + * At this point, we detected either a bad zone or an inconsistency +@@ -366,8 +368,9 @@ static int zonefs_io_error_cb(struct blk + * In all cases, warn about inode size inconsistency and handle the + * IO error according to the zone condition and to the mount options. + */ +- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size) +- zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", ++ if (isize != data_size) ++ zonefs_warn(sb, ++ "inode %lu: invalid size %lld (should be %lld)\n", + inode->i_ino, isize, data_size); + + /* +@@ -427,8 +430,6 @@ static int zonefs_io_error_cb(struct blk + zonefs_update_stats(inode, data_size); + zonefs_i_size_write(inode, data_size); + zi->i_wpoffset = data_size; +- +- return 0; + } + + /* +@@ -442,23 +443,25 @@ static void __zonefs_io_error(struct ino + { + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; +- struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; +- unsigned int nr_zones = 1; +- struct zonefs_ioerr_data err = { +- .inode = inode, +- .write = write, +- }; ++ struct blk_zone zone; + int ret; + + /* +- * The only files that have more than one zone are conventional zone +- * files with aggregated conventional zones, for which the inode zone +- * size is always larger than the device zone size. +- */ +- if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) +- nr_zones = zi->i_zone_size >> +- (sbi->s_zone_sectors_shift + SECTOR_SHIFT); ++ * Conventional zone have no write pointer and cannot become read-only ++ * or offline. So simply fake a report for a single or aggregated zone ++ * and let zonefs_handle_io_error() correct the zone inode information ++ * according to the mount options. ++ */ ++ if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) { ++ zone.start = zi->i_zsector; ++ zone.len = zi->i_max_size >> SECTOR_SHIFT; ++ zone.wp = zone.start + zone.len; ++ zone.type = BLK_ZONE_TYPE_CONVENTIONAL; ++ zone.cond = BLK_ZONE_COND_NOT_WP; ++ zone.capacity = zone.len; ++ goto handle_io_error; ++ } + + /* + * Memory allocations in blkdev_report_zones() can trigger a memory +@@ -469,12 +472,19 @@ static void __zonefs_io_error(struct ino + * the GFP_NOIO context avoids both problems. + */ + noio_flag = memalloc_noio_save(); +- ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones, +- zonefs_io_error_cb, &err); +- if (ret != nr_zones) ++ ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, 1, ++ zonefs_io_error_cb, &zone); ++ memalloc_noio_restore(noio_flag); ++ if (ret != 1) { + zonefs_err(sb, "Get inode %lu zone information failed %d\n", + inode->i_ino, ret); +- memalloc_noio_restore(noio_flag); ++ zonefs_warn(sb, "remounting filesystem read-only\n"); ++ sb->s_flags |= SB_RDONLY; ++ return; ++ } ++ ++handle_io_error: ++ zonefs_handle_io_error(inode, &zone, write); + } + + static void zonefs_io_error(struct inode *inode, bool write)