From: Greg Kroah-Hartman Date: Mon, 8 Jul 2024 11:55:23 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.6.38~35 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1c97820651bc36d557d3cecb828b444a0094f6f9;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: mm-avoid-overflows-in-dirty-throttling-logic.patch mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch nilfs2-fix-inode-number-range-checks.patch --- diff --git a/queue-6.1/mm-avoid-overflows-in-dirty-throttling-logic.patch b/queue-6.1/mm-avoid-overflows-in-dirty-throttling-logic.patch new file mode 100644 index 00000000000..c5d2fb2d169 --- /dev/null +++ b/queue-6.1/mm-avoid-overflows-in-dirty-throttling-logic.patch @@ -0,0 +1,103 @@ +From 385d838df280eba6c8680f9777bfa0d0bfe7e8b2 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 21 Jun 2024 16:42:38 +0200 +Subject: mm: avoid overflows in dirty throttling logic + +From: Jan Kara + +commit 385d838df280eba6c8680f9777bfa0d0bfe7e8b2 upstream. + +The dirty throttling logic is interspersed with assumptions that dirty +limits in PAGE_SIZE units fit into 32-bit (so that various multiplications +fit into 64-bits). If limits end up being larger, we will hit overflows, +possible divisions by 0 etc. Fix these problems by never allowing so +large dirty limits as they have dubious practical value anyway. For +dirty_bytes / dirty_background_bytes interfaces we can just refuse to set +so large limits. For dirty_ratio / dirty_background_ratio it isn't so +simple as the dirty limit is computed from the amount of available memory +which can change due to memory hotplug etc. So when converting dirty +limits from ratios to numbers of pages, we just don't allow the result to +exceed UINT_MAX. + +This is root-only triggerable problem which occurs when the operator +sets dirty limits to >16 TB. + +Link: https://lkml.kernel.org/r/20240621144246.11148-2-jack@suse.cz +Signed-off-by: Jan Kara +Reported-by: Zach O'Keefe +Reviewed-By: Zach O'Keefe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page-writeback.c | 30 ++++++++++++++++++++++++++---- + 1 file changed, 26 insertions(+), 4 deletions(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -414,13 +414,20 @@ static void domain_dirty_limits(struct d + else + bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; + +- if (bg_thresh >= thresh) +- bg_thresh = thresh / 2; + tsk = current; + if (rt_task(tsk)) { + bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; + thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; + } ++ /* ++ * Dirty throttling logic assumes the limits in page units fit into ++ * 32-bits. This gives 16TB dirty limits max which is hopefully enough. ++ */ ++ if (thresh > UINT_MAX) ++ thresh = UINT_MAX; ++ /* This makes sure bg_thresh is within 32-bits as well */ ++ if (bg_thresh >= thresh) ++ bg_thresh = thresh / 2; + dtc->thresh = thresh; + dtc->bg_thresh = bg_thresh; + +@@ -470,7 +477,11 @@ static unsigned long node_dirty_limit(st + if (rt_task(tsk)) + dirty += dirty / 4; + +- return dirty; ++ /* ++ * Dirty throttling logic assumes the limits in page units fit into ++ * 32-bits. This gives 16TB dirty limits max which is hopefully enough. ++ */ ++ return min_t(unsigned long, dirty, UINT_MAX); + } + + /** +@@ -507,10 +518,17 @@ static int dirty_background_bytes_handle + void *buffer, size_t *lenp, loff_t *ppos) + { + int ret; ++ unsigned long old_bytes = dirty_background_bytes; + + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +- if (ret == 0 && write) ++ if (ret == 0 && write) { ++ if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) > ++ UINT_MAX) { ++ dirty_background_bytes = old_bytes; ++ return -ERANGE; ++ } + dirty_background_ratio = 0; ++ } + return ret; + } + +@@ -536,6 +554,10 @@ static int dirty_bytes_handler(struct ct + + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_bytes != old_bytes) { ++ if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) { ++ vm_dirty_bytes = old_bytes; ++ return -ERANGE; ++ } + writeback_set_ratelimit(); + vm_dirty_ratio = 0; + } diff --git a/queue-6.1/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch b/queue-6.1/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch new file mode 100644 index 00000000000..8a298e4decb --- /dev/null +++ b/queue-6.1/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch @@ -0,0 +1,47 @@ +From cf3f9a593dab87a032d2b6a6fb205e7f3de4f0a1 Mon Sep 17 00:00:00 2001 +From: Jinliang Zheng +Date: Thu, 20 Jun 2024 20:21:24 +0800 +Subject: mm: optimize the redundant loop of mm_update_owner_next() + +From: Jinliang Zheng + +commit cf3f9a593dab87a032d2b6a6fb205e7f3de4f0a1 upstream. + +When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or +/proc or ptrace or page migration (get_task_mm()), it is impossible to +find an appropriate task_struct in the loop whose mm_struct is the same as +the target mm_struct. + +If the above race condition is combined with the stress-ng-zombie and +stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in +write_lock_irq() for tasklist_lock. + +Recognize this situation in advance and exit early. + +Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com +Signed-off-by: Jinliang Zheng +Acked-by: Michal Hocko +Cc: Christian Brauner +Cc: Jens Axboe +Cc: Mateusz Guzik +Cc: Matthew Wilcox (Oracle) +Cc: Oleg Nesterov +Cc: Tycho Andersen +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/exit.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -481,6 +481,8 @@ retry: + * Search through everything else, we should not get here often. + */ + for_each_process(g) { ++ if (atomic_read(&mm->mm_users) <= 1) ++ break; + if (g->flags & PF_KTHREAD) + continue; + for_each_thread(g, c) { diff --git a/queue-6.1/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch b/queue-6.1/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch new file mode 100644 index 00000000000..f00dd31f84f --- /dev/null +++ b/queue-6.1/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch @@ -0,0 +1,80 @@ +From bb76c6c274683c8570ad788f79d4b875bde0e458 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sun, 23 Jun 2024 14:11:34 +0900 +Subject: nilfs2: add missing check for inode numbers on directory entries + +From: Ryusuke Konishi + +commit bb76c6c274683c8570ad788f79d4b875bde0e458 upstream. + +Syzbot reported that mounting and unmounting a specific pattern of +corrupted nilfs2 filesystem images causes a use-after-free of metadata +file inodes, which triggers a kernel bug in lru_add_fn(). + +As Jan Kara pointed out, this is because the link count of a metadata file +gets corrupted to 0, and nilfs_evict_inode(), which is called from iput(), +tries to delete that inode (ifile inode in this case). + +The inconsistency occurs because directories containing the inode numbers +of these metadata files that should not be visible in the namespace are +read without checking. + +Fix this issue by treating the inode numbers of these internal files as +errors in the sanity check helper when reading directory folios/pages. + +Also thanks to Hillf Danton and Matthew Wilcox for their initial mm-layer +analysis. + +Link: https://lkml.kernel.org/r/20240623051135.4180-3-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+d79afb004be235636ee8@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=d79afb004be235636ee8 +Reported-by: Jan Kara +Closes: https://lkml.kernel.org/r/20240617075758.wewhukbrjod5fp5o@quack3 +Tested-by: Ryusuke Konishi +Cc: Hillf Danton +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/dir.c | 6 ++++++ + fs/nilfs2/nilfs.h | 5 +++++ + 2 files changed, 11 insertions(+) + +--- a/fs/nilfs2/dir.c ++++ b/fs/nilfs2/dir.c +@@ -143,6 +143,9 @@ static bool nilfs_check_page(struct page + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + goto Espan; ++ if (unlikely(p->inode && ++ NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) ++ goto Einumber; + } + if (offs != limit) + goto Eend; +@@ -168,6 +171,9 @@ Enamelen: + goto bad_entry; + Espan: + error = "directory entry across blocks"; ++ goto bad_entry; ++Einumber: ++ error = "disallowed inode number"; + bad_entry: + nilfs_error(sb, + "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%d, name_len=%d", +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -121,6 +121,11 @@ enum { + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + ++#define NILFS_PRIVATE_INODE(ino) ({ \ ++ ino_t __ino = (ino); \ ++ ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ ++ (__ino) != NILFS_SKETCH_INO); }) ++ + /** + * struct nilfs_transaction_info: context information for synchronization + * @ti_magic: Magic number diff --git a/queue-6.1/nilfs2-fix-inode-number-range-checks.patch b/queue-6.1/nilfs2-fix-inode-number-range-checks.patch new file mode 100644 index 00000000000..30a9d2431c3 --- /dev/null +++ b/queue-6.1/nilfs2-fix-inode-number-range-checks.patch @@ -0,0 +1,100 @@ +From e2fec219a36e0993642844be0f345513507031f4 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sun, 23 Jun 2024 14:11:33 +0900 +Subject: nilfs2: fix inode number range checks + +From: Ryusuke Konishi + +commit e2fec219a36e0993642844be0f345513507031f4 upstream. + +Patch series "nilfs2: fix potential issues related to reserved inodes". + +This series fixes one use-after-free issue reported by syzbot, caused by +nilfs2's internal inode being exposed in the namespace on a corrupted +filesystem, and a couple of flaws that cause problems if the starting +number of non-reserved inodes written in the on-disk super block is +intentionally (or corruptly) changed from its default value. + + +This patch (of 3): + +In the current implementation of nilfs2, "nilfs->ns_first_ino", which +gives the first non-reserved inode number, is read from the superblock, +but its lower limit is not checked. + +As a result, if a number that overlaps with the inode number range of +reserved inodes such as the root directory or metadata files is set in the +super block parameter, the inode number test macros (NILFS_MDT_INODE and +NILFS_VALID_INODE) will not function properly. + +In addition, these test macros use left bit-shift calculations using with +the inode number as the shift count via the BIT macro, but the result of a +shift calculation that exceeds the bit width of an integer is undefined in +the C specification, so if "ns_first_ino" is set to a large value other +than the default value NILFS_USER_INO (=11), the macros may potentially +malfunction depending on the environment. + +Fix these issues by checking the lower bound of "nilfs->ns_first_ino" and +by preventing bit shifts equal to or greater than the NILFS_USER_INO +constant in the inode number test macros. + +Also, change the type of "ns_first_ino" from signed integer to unsigned +integer to avoid the need for type casting in comparisons such as the +lower bound check introduced this time. + +Link: https://lkml.kernel.org/r/20240623051135.4180-1-konishi.ryusuke@gmail.com +Link: https://lkml.kernel.org/r/20240623051135.4180-2-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Cc: Hillf Danton +Cc: Jan Kara +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/nilfs.h | 5 +++-- + fs/nilfs2/the_nilfs.c | 6 ++++++ + fs/nilfs2/the_nilfs.h | 2 +- + 3 files changed, 10 insertions(+), 3 deletions(-) + +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -116,9 +116,10 @@ enum { + #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) + + #define NILFS_MDT_INODE(sb, ino) \ +- ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino))) ++ ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) + #define NILFS_VALID_INODE(sb, ino) \ +- ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino))) ++ ((ino) >= NILFS_FIRST_INO(sb) || \ ++ ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + + /** + * struct nilfs_transaction_info: context information for synchronization +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struc + } + + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); ++ if (nilfs->ns_first_ino < NILFS_USER_INO) { ++ nilfs_err(nilfs->ns_sb, ++ "too small lower limit for non-reserved inode numbers: %u", ++ nilfs->ns_first_ino); ++ return -EINVAL; ++ } + + nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { +--- a/fs/nilfs2/the_nilfs.h ++++ b/fs/nilfs2/the_nilfs.h +@@ -182,7 +182,7 @@ struct the_nilfs { + unsigned long ns_nrsvsegs; + unsigned long ns_first_data_block; + int ns_inode_size; +- int ns_first_ino; ++ unsigned int ns_first_ino; + u32 ns_crc_seed; + + /* /sys/fs// */ diff --git a/queue-6.1/series b/queue-6.1/series index e544f9c3eb8..0a2ba7db460 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -69,3 +69,7 @@ gpiolib-of-fix-lookup-quirk-for-mips-lantiq.patch gpiolib-of-add-polarity-quirk-for-tsc2005.patch platform-x86-toshiba_acpi-fix-quickstart-quirk-handling.patch revert-igc-fix-a-log-entry-using-uninitialized-netdev.patch +nilfs2-fix-inode-number-range-checks.patch +nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch +mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch +mm-avoid-overflows-in-dirty-throttling-logic.patch