From: Greg Kroah-Hartman Date: Mon, 8 Jul 2024 11:55:45 +0000 (+0200) Subject: 6.9-stable patches X-Git-Tag: v6.6.38~33 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3b73ad8f2832449e681f1575bbba02d9e6118bef;p=thirdparty%2Fkernel%2Fstable-queue.git 6.9-stable patches added patches: mm-avoid-overflows-in-dirty-throttling-logic.patch mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch nilfs2-fix-inode-number-range-checks.patch --- diff --git a/queue-6.9/mm-avoid-overflows-in-dirty-throttling-logic.patch b/queue-6.9/mm-avoid-overflows-in-dirty-throttling-logic.patch new file mode 100644 index 00000000000..e2f21fdfadd --- /dev/null +++ b/queue-6.9/mm-avoid-overflows-in-dirty-throttling-logic.patch @@ -0,0 +1,103 @@ +From 385d838df280eba6c8680f9777bfa0d0bfe7e8b2 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 21 Jun 2024 16:42:38 +0200 +Subject: mm: avoid overflows in dirty throttling logic + +From: Jan Kara + +commit 385d838df280eba6c8680f9777bfa0d0bfe7e8b2 upstream. + +The dirty throttling logic is interspersed with assumptions that dirty +limits in PAGE_SIZE units fit into 32-bit (so that various multiplications +fit into 64-bits). If limits end up being larger, we will hit overflows, +possible divisions by 0 etc. Fix these problems by never allowing so +large dirty limits as they have dubious practical value anyway. For +dirty_bytes / dirty_background_bytes interfaces we can just refuse to set +so large limits. For dirty_ratio / dirty_background_ratio it isn't so +simple as the dirty limit is computed from the amount of available memory +which can change due to memory hotplug etc. So when converting dirty +limits from ratios to numbers of pages, we just don't allow the result to +exceed UINT_MAX. + +This is root-only triggerable problem which occurs when the operator +sets dirty limits to >16 TB. + +Link: https://lkml.kernel.org/r/20240621144246.11148-2-jack@suse.cz +Signed-off-by: Jan Kara +Reported-by: Zach O'Keefe +Reviewed-By: Zach O'Keefe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page-writeback.c | 30 ++++++++++++++++++++++++++---- + 1 file changed, 26 insertions(+), 4 deletions(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -415,13 +415,20 @@ static void domain_dirty_limits(struct d + else + bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; + +- if (bg_thresh >= thresh) +- bg_thresh = thresh / 2; + tsk = current; + if (rt_task(tsk)) { + bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; + thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; + } ++ /* ++ * Dirty throttling logic assumes the limits in page units fit into ++ * 32-bits. This gives 16TB dirty limits max which is hopefully enough. ++ */ ++ if (thresh > UINT_MAX) ++ thresh = UINT_MAX; ++ /* This makes sure bg_thresh is within 32-bits as well */ ++ if (bg_thresh >= thresh) ++ bg_thresh = thresh / 2; + dtc->thresh = thresh; + dtc->bg_thresh = bg_thresh; + +@@ -471,7 +478,11 @@ static unsigned long node_dirty_limit(st + if (rt_task(tsk)) + dirty += dirty / 4; + +- return dirty; ++ /* ++ * Dirty throttling logic assumes the limits in page units fit into ++ * 32-bits. This gives 16TB dirty limits max which is hopefully enough. ++ */ ++ return min_t(unsigned long, dirty, UINT_MAX); + } + + /** +@@ -508,10 +519,17 @@ static int dirty_background_bytes_handle + void *buffer, size_t *lenp, loff_t *ppos) + { + int ret; ++ unsigned long old_bytes = dirty_background_bytes; + + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +- if (ret == 0 && write) ++ if (ret == 0 && write) { ++ if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) > ++ UINT_MAX) { ++ dirty_background_bytes = old_bytes; ++ return -ERANGE; ++ } + dirty_background_ratio = 0; ++ } + return ret; + } + +@@ -537,6 +555,10 @@ static int dirty_bytes_handler(struct ct + + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_bytes != old_bytes) { ++ if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) { ++ vm_dirty_bytes = old_bytes; ++ return -ERANGE; ++ } + writeback_set_ratelimit(); + vm_dirty_ratio = 0; + } diff --git a/queue-6.9/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch b/queue-6.9/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch new file mode 100644 index 00000000000..a1c38a00b01 --- /dev/null +++ b/queue-6.9/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch @@ -0,0 +1,47 @@ +From cf3f9a593dab87a032d2b6a6fb205e7f3de4f0a1 Mon Sep 17 00:00:00 2001 +From: Jinliang Zheng +Date: Thu, 20 Jun 2024 20:21:24 +0800 +Subject: mm: optimize the redundant loop of mm_update_owner_next() + +From: Jinliang Zheng + +commit cf3f9a593dab87a032d2b6a6fb205e7f3de4f0a1 upstream. + +When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or +/proc or ptrace or page migration (get_task_mm()), it is impossible to +find an appropriate task_struct in the loop whose mm_struct is the same as +the target mm_struct. + +If the above race condition is combined with the stress-ng-zombie and +stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in +write_lock_irq() for tasklist_lock. + +Recognize this situation in advance and exit early. + +Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com +Signed-off-by: Jinliang Zheng +Acked-by: Michal Hocko +Cc: Christian Brauner +Cc: Jens Axboe +Cc: Mateusz Guzik +Cc: Matthew Wilcox (Oracle) +Cc: Oleg Nesterov +Cc: Tycho Andersen +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/exit.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -488,6 +488,8 @@ retry: + * Search through everything else, we should not get here often. + */ + for_each_process(g) { ++ if (atomic_read(&mm->mm_users) <= 1) ++ break; + if (g->flags & PF_KTHREAD) + continue; + for_each_thread(g, c) { diff --git a/queue-6.9/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch b/queue-6.9/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch new file mode 100644 index 00000000000..ba2047bb067 --- /dev/null +++ b/queue-6.9/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch @@ -0,0 +1,80 @@ +From bb76c6c274683c8570ad788f79d4b875bde0e458 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sun, 23 Jun 2024 14:11:34 +0900 +Subject: nilfs2: add missing check for inode numbers on directory entries + +From: Ryusuke Konishi + +commit bb76c6c274683c8570ad788f79d4b875bde0e458 upstream. + +Syzbot reported that mounting and unmounting a specific pattern of +corrupted nilfs2 filesystem images causes a use-after-free of metadata +file inodes, which triggers a kernel bug in lru_add_fn(). + +As Jan Kara pointed out, this is because the link count of a metadata file +gets corrupted to 0, and nilfs_evict_inode(), which is called from iput(), +tries to delete that inode (ifile inode in this case). + +The inconsistency occurs because directories containing the inode numbers +of these metadata files that should not be visible in the namespace are +read without checking. + +Fix this issue by treating the inode numbers of these internal files as +errors in the sanity check helper when reading directory folios/pages. + +Also thanks to Hillf Danton and Matthew Wilcox for their initial mm-layer +analysis. + +Link: https://lkml.kernel.org/r/20240623051135.4180-3-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+d79afb004be235636ee8@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=d79afb004be235636ee8 +Reported-by: Jan Kara +Closes: https://lkml.kernel.org/r/20240617075758.wewhukbrjod5fp5o@quack3 +Tested-by: Ryusuke Konishi +Cc: Hillf Danton +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/dir.c | 6 ++++++ + fs/nilfs2/nilfs.h | 5 +++++ + 2 files changed, 11 insertions(+) + +--- a/fs/nilfs2/dir.c ++++ b/fs/nilfs2/dir.c +@@ -135,6 +135,9 @@ static bool nilfs_check_folio(struct fol + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + goto Espan; ++ if (unlikely(p->inode && ++ NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) ++ goto Einumber; + } + if (offs != limit) + goto Eend; +@@ -160,6 +163,9 @@ Enamelen: + goto bad_entry; + Espan: + error = "directory entry across blocks"; ++ goto bad_entry; ++Einumber: ++ error = "disallowed inode number"; + bad_entry: + nilfs_error(sb, + "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -121,6 +121,11 @@ enum { + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + ++#define NILFS_PRIVATE_INODE(ino) ({ \ ++ ino_t __ino = (ino); \ ++ ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ ++ (__ino) != NILFS_SKETCH_INO); }) ++ + /** + * struct nilfs_transaction_info: context information for synchronization + * @ti_magic: Magic number diff --git a/queue-6.9/nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch b/queue-6.9/nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch new file mode 100644 index 00000000000..cba9c57077a --- /dev/null +++ b/queue-6.9/nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch @@ -0,0 +1,149 @@ +From 93aef9eda1cea9e84ab2453fcceb8addad0e46f1 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sun, 23 Jun 2024 14:11:35 +0900 +Subject: nilfs2: fix incorrect inode allocation from reserved inodes + +From: Ryusuke Konishi + +commit 93aef9eda1cea9e84ab2453fcceb8addad0e46f1 upstream. + +If the bitmap block that manages the inode allocation status is corrupted, +nilfs_ifile_create_inode() may allocate a new inode from the reserved +inode area where it should not be allocated. + +Previous fix commit d325dc6eb763 ("nilfs2: fix use-after-free bug of +struct nilfs_root"), fixed the problem that reserved inodes with inode +numbers less than NILFS_USER_INO (=11) were incorrectly reallocated due to +bitmap corruption, but since the start number of non-reserved inodes is +read from the super block and may change, in which case inode allocation +may occur from the extended reserved inode area. + +If that happens, access to that inode will cause an IO error, causing the +file system to degrade to an error state. + +Fix this potential issue by adding a wraparound option to the common +metadata object allocation routine and by modifying +nilfs_ifile_create_inode() to disable the option so that it only allocates +inodes with inode numbers greater than or equal to the inode number read +in "nilfs->ns_first_ino", regardless of the bitmap status of reserved +inodes. + +Link: https://lkml.kernel.org/r/20240623051135.4180-4-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Cc: Hillf Danton +Cc: Jan Kara +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/alloc.c | 19 +++++++++++++++---- + fs/nilfs2/alloc.h | 4 ++-- + fs/nilfs2/dat.c | 2 +- + fs/nilfs2/ifile.c | 7 ++----- + 4 files changed, 20 insertions(+), 12 deletions(-) + +--- a/fs/nilfs2/alloc.c ++++ b/fs/nilfs2/alloc.c +@@ -377,11 +377,12 @@ void *nilfs_palloc_block_get_entry(const + * @target: offset number of an entry in the group (start point) + * @bsize: size in bits + * @lock: spin lock protecting @bitmap ++ * @wrap: whether to wrap around + */ + static int nilfs_palloc_find_available_slot(unsigned char *bitmap, + unsigned long target, + unsigned int bsize, +- spinlock_t *lock) ++ spinlock_t *lock, bool wrap) + { + int pos, end = bsize; + +@@ -397,6 +398,8 @@ static int nilfs_palloc_find_available_s + + end = target; + } ++ if (!wrap) ++ return -ENOSPC; + + /* wrap around */ + for (pos = 0; pos < end; pos++) { +@@ -495,9 +498,10 @@ int nilfs_palloc_count_max_entries(struc + * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the allocation ++ * @wrap: whether to wrap around + */ + int nilfs_palloc_prepare_alloc_entry(struct inode *inode, +- struct nilfs_palloc_req *req) ++ struct nilfs_palloc_req *req, bool wrap) + { + struct buffer_head *desc_bh, *bitmap_bh; + struct nilfs_palloc_group_desc *desc; +@@ -516,7 +520,7 @@ int nilfs_palloc_prepare_alloc_entry(str + entries_per_group = nilfs_palloc_entries_per_group(inode); + + for (i = 0; i < ngroups; i += n) { +- if (group >= ngroups) { ++ if (group >= ngroups && wrap) { + /* wrap around */ + group = 0; + maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, +@@ -550,7 +554,14 @@ int nilfs_palloc_prepare_alloc_entry(str + bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); + pos = nilfs_palloc_find_available_slot( +- bitmap, group_offset, entries_per_group, lock); ++ bitmap, group_offset, entries_per_group, lock, ++ wrap); ++ /* ++ * Since the search for a free slot in the second and ++ * subsequent bitmap blocks always starts from the ++ * beginning, the wrap flag only has an effect on the ++ * first search. ++ */ + kunmap_local(bitmap_kaddr); + if (pos >= 0) + goto found; +--- a/fs/nilfs2/alloc.h ++++ b/fs/nilfs2/alloc.h +@@ -50,8 +50,8 @@ struct nilfs_palloc_req { + struct buffer_head *pr_entry_bh; + }; + +-int nilfs_palloc_prepare_alloc_entry(struct inode *, +- struct nilfs_palloc_req *); ++int nilfs_palloc_prepare_alloc_entry(struct inode *inode, ++ struct nilfs_palloc_req *req, bool wrap); + void nilfs_palloc_commit_alloc_entry(struct inode *, + struct nilfs_palloc_req *); + void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); +--- a/fs/nilfs2/dat.c ++++ b/fs/nilfs2/dat.c +@@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode + { + int ret; + +- ret = nilfs_palloc_prepare_alloc_entry(dat, req); ++ ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); + if (ret < 0) + return ret; + +--- a/fs/nilfs2/ifile.c ++++ b/fs/nilfs2/ifile.c +@@ -56,13 +56,10 @@ int nilfs_ifile_create_inode(struct inod + struct nilfs_palloc_req req; + int ret; + +- req.pr_entry_nr = 0; /* +- * 0 says find free inode from beginning +- * of a group. dull code!! +- */ ++ req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); + req.pr_entry_bh = NULL; + +- ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); ++ ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); + if (!ret) { + ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, + &req.pr_entry_bh); diff --git a/queue-6.9/nilfs2-fix-inode-number-range-checks.patch b/queue-6.9/nilfs2-fix-inode-number-range-checks.patch new file mode 100644 index 00000000000..30a9d2431c3 --- /dev/null +++ b/queue-6.9/nilfs2-fix-inode-number-range-checks.patch @@ -0,0 +1,100 @@ +From e2fec219a36e0993642844be0f345513507031f4 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sun, 23 Jun 2024 14:11:33 +0900 +Subject: nilfs2: fix inode number range checks + +From: Ryusuke Konishi + +commit e2fec219a36e0993642844be0f345513507031f4 upstream. + +Patch series "nilfs2: fix potential issues related to reserved inodes". + +This series fixes one use-after-free issue reported by syzbot, caused by +nilfs2's internal inode being exposed in the namespace on a corrupted +filesystem, and a couple of flaws that cause problems if the starting +number of non-reserved inodes written in the on-disk super block is +intentionally (or corruptly) changed from its default value. + + +This patch (of 3): + +In the current implementation of nilfs2, "nilfs->ns_first_ino", which +gives the first non-reserved inode number, is read from the superblock, +but its lower limit is not checked. + +As a result, if a number that overlaps with the inode number range of +reserved inodes such as the root directory or metadata files is set in the +super block parameter, the inode number test macros (NILFS_MDT_INODE and +NILFS_VALID_INODE) will not function properly. + +In addition, these test macros use left bit-shift calculations using with +the inode number as the shift count via the BIT macro, but the result of a +shift calculation that exceeds the bit width of an integer is undefined in +the C specification, so if "ns_first_ino" is set to a large value other +than the default value NILFS_USER_INO (=11), the macros may potentially +malfunction depending on the environment. + +Fix these issues by checking the lower bound of "nilfs->ns_first_ino" and +by preventing bit shifts equal to or greater than the NILFS_USER_INO +constant in the inode number test macros. + +Also, change the type of "ns_first_ino" from signed integer to unsigned +integer to avoid the need for type casting in comparisons such as the +lower bound check introduced this time. + +Link: https://lkml.kernel.org/r/20240623051135.4180-1-konishi.ryusuke@gmail.com +Link: https://lkml.kernel.org/r/20240623051135.4180-2-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Cc: Hillf Danton +Cc: Jan Kara +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/nilfs.h | 5 +++-- + fs/nilfs2/the_nilfs.c | 6 ++++++ + fs/nilfs2/the_nilfs.h | 2 +- + 3 files changed, 10 insertions(+), 3 deletions(-) + +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -116,9 +116,10 @@ enum { + #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) + + #define NILFS_MDT_INODE(sb, ino) \ +- ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino))) ++ ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) + #define NILFS_VALID_INODE(sb, ino) \ +- ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino))) ++ ((ino) >= NILFS_FIRST_INO(sb) || \ ++ ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + + /** + * struct nilfs_transaction_info: context information for synchronization +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struc + } + + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); ++ if (nilfs->ns_first_ino < NILFS_USER_INO) { ++ nilfs_err(nilfs->ns_sb, ++ "too small lower limit for non-reserved inode numbers: %u", ++ nilfs->ns_first_ino); ++ return -EINVAL; ++ } + + nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { +--- a/fs/nilfs2/the_nilfs.h ++++ b/fs/nilfs2/the_nilfs.h +@@ -182,7 +182,7 @@ struct the_nilfs { + unsigned long ns_nrsvsegs; + unsigned long ns_first_data_block; + int ns_inode_size; +- int ns_first_ino; ++ unsigned int ns_first_ino; + u32 ns_crc_seed; + + /* /sys/fs// */ diff --git a/queue-6.9/series b/queue-6.9/series index c9bdd9d47ea..b4f56fc622f 100644 --- a/queue-6.9/series +++ b/queue-6.9/series @@ -130,3 +130,8 @@ bnxt_en-fix-the-resource-check-condition-for-rss-con.patch gpiolib-of-add-polarity-quirk-for-tsc2005.patch platform-x86-toshiba_acpi-fix-quickstart-quirk-handling.patch revert-igc-fix-a-log-entry-using-uninitialized-netdev.patch +nilfs2-fix-inode-number-range-checks.patch +nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch +nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch +mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch +mm-avoid-overflows-in-dirty-throttling-logic.patch