]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Jul 2024 11:55:23 +0000 (13:55 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Jul 2024 11:55:23 +0000 (13:55 +0200)
added patches:
mm-avoid-overflows-in-dirty-throttling-logic.patch
mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch
nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch
nilfs2-fix-inode-number-range-checks.patch

queue-6.1/mm-avoid-overflows-in-dirty-throttling-logic.patch [new file with mode: 0644]
queue-6.1/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch [new file with mode: 0644]
queue-6.1/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch [new file with mode: 0644]
queue-6.1/nilfs2-fix-inode-number-range-checks.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/mm-avoid-overflows-in-dirty-throttling-logic.patch b/queue-6.1/mm-avoid-overflows-in-dirty-throttling-logic.patch
new file mode 100644 (file)
index 0000000..c5d2fb2
--- /dev/null
@@ -0,0 +1,103 @@
+From 385d838df280eba6c8680f9777bfa0d0bfe7e8b2 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 21 Jun 2024 16:42:38 +0200
+Subject: mm: avoid overflows in dirty throttling logic
+
+From: Jan Kara <jack@suse.cz>
+
+commit 385d838df280eba6c8680f9777bfa0d0bfe7e8b2 upstream.
+
+The dirty throttling logic is interspersed with assumptions that dirty
+limits in PAGE_SIZE units fit into 32-bit (so that various multiplications
+fit into 64-bits).  If limits end up being larger, we will hit overflows,
+possible divisions by 0 etc.  Fix these problems by never allowing so
+large dirty limits as they have dubious practical value anyway.  For
+dirty_bytes / dirty_background_bytes interfaces we can just refuse to set
+so large limits.  For dirty_ratio / dirty_background_ratio it isn't so
+simple as the dirty limit is computed from the amount of available memory
+which can change due to memory hotplug etc.  So when converting dirty
+limits from ratios to numbers of pages, we just don't allow the result to
+exceed UINT_MAX.
+
+This is root-only triggerable problem which occurs when the operator
+sets dirty limits to >16 TB.
+
+Link: https://lkml.kernel.org/r/20240621144246.11148-2-jack@suse.cz
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reported-by: Zach O'Keefe <zokeefe@google.com>
+Reviewed-By: Zach O'Keefe <zokeefe@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page-writeback.c |   30 ++++++++++++++++++++++++++----
+ 1 file changed, 26 insertions(+), 4 deletions(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -414,13 +414,20 @@ static void domain_dirty_limits(struct d
+       else
+               bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
+-      if (bg_thresh >= thresh)
+-              bg_thresh = thresh / 2;
+       tsk = current;
+       if (rt_task(tsk)) {
+               bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
+               thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
+       }
++      /*
++       * Dirty throttling logic assumes the limits in page units fit into
++       * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
++       */
++      if (thresh > UINT_MAX)
++              thresh = UINT_MAX;
++      /* This makes sure bg_thresh is within 32-bits as well */
++      if (bg_thresh >= thresh)
++              bg_thresh = thresh / 2;
+       dtc->thresh = thresh;
+       dtc->bg_thresh = bg_thresh;
+@@ -470,7 +477,11 @@ static unsigned long node_dirty_limit(st
+       if (rt_task(tsk))
+               dirty += dirty / 4;
+-      return dirty;
++      /*
++       * Dirty throttling logic assumes the limits in page units fit into
++       * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
++       */
++      return min_t(unsigned long, dirty, UINT_MAX);
+ }
+ /**
+@@ -507,10 +518,17 @@ static int dirty_background_bytes_handle
+               void *buffer, size_t *lenp, loff_t *ppos)
+ {
+       int ret;
++      unsigned long old_bytes = dirty_background_bytes;
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+-      if (ret == 0 && write)
++      if (ret == 0 && write) {
++              if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) >
++                                                              UINT_MAX) {
++                      dirty_background_bytes = old_bytes;
++                      return -ERANGE;
++              }
+               dirty_background_ratio = 0;
++      }
+       return ret;
+ }
+@@ -536,6 +554,10 @@ static int dirty_bytes_handler(struct ct
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
++              if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) {
++                      vm_dirty_bytes = old_bytes;
++                      return -ERANGE;
++              }
+               writeback_set_ratelimit();
+               vm_dirty_ratio = 0;
+       }
diff --git a/queue-6.1/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch b/queue-6.1/mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch
new file mode 100644 (file)
index 0000000..8a298e4
--- /dev/null
@@ -0,0 +1,47 @@
+From cf3f9a593dab87a032d2b6a6fb205e7f3de4f0a1 Mon Sep 17 00:00:00 2001
+From: Jinliang Zheng <alexjlzheng@tencent.com>
+Date: Thu, 20 Jun 2024 20:21:24 +0800
+Subject: mm: optimize the redundant loop of mm_update_owner_next()
+
+From: Jinliang Zheng <alexjlzheng@tencent.com>
+
+commit cf3f9a593dab87a032d2b6a6fb205e7f3de4f0a1 upstream.
+
+When mm_update_owner_next() is racing with swapoff (try_to_unuse()) or
+/proc or ptrace or page migration (get_task_mm()), it is impossible to
+find an appropriate task_struct in the loop whose mm_struct is the same as
+the target mm_struct.
+
+If the above race condition is combined with the stress-ng-zombie and
+stress-ng-dup tests, such a long loop can easily cause a Hard Lockup in
+write_lock_irq() for tasklist_lock.
+
+Recognize this situation in advance and exit early.
+
+Link: https://lkml.kernel.org/r/20240620122123.3877432-1-alexjlzheng@tencent.com
+Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Mateusz Guzik <mjguzik@gmail.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tycho Andersen <tandersen@netflix.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/exit.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -481,6 +481,8 @@ retry:
+        * Search through everything else, we should not get here often.
+        */
+       for_each_process(g) {
++              if (atomic_read(&mm->mm_users) <= 1)
++                      break;
+               if (g->flags & PF_KTHREAD)
+                       continue;
+               for_each_thread(g, c) {
diff --git a/queue-6.1/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch b/queue-6.1/nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch
new file mode 100644 (file)
index 0000000..f00dd31
--- /dev/null
@@ -0,0 +1,80 @@
+From bb76c6c274683c8570ad788f79d4b875bde0e458 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sun, 23 Jun 2024 14:11:34 +0900
+Subject: nilfs2: add missing check for inode numbers on directory entries
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit bb76c6c274683c8570ad788f79d4b875bde0e458 upstream.
+
+Syzbot reported that mounting and unmounting a specific pattern of
+corrupted nilfs2 filesystem images causes a use-after-free of metadata
+file inodes, which triggers a kernel bug in lru_add_fn().
+
+As Jan Kara pointed out, this is because the link count of a metadata file
+gets corrupted to 0, and nilfs_evict_inode(), which is called from iput(),
+tries to delete that inode (ifile inode in this case).
+
+The inconsistency occurs because directories containing the inode numbers
+of these metadata files that should not be visible in the namespace are
+read without checking.
+
+Fix this issue by treating the inode numbers of these internal files as
+errors in the sanity check helper when reading directory folios/pages.
+
+Also thanks to Hillf Danton and Matthew Wilcox for their initial mm-layer
+analysis.
+
+Link: https://lkml.kernel.org/r/20240623051135.4180-3-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+d79afb004be235636ee8@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=d79afb004be235636ee8
+Reported-by: Jan Kara <jack@suse.cz>
+Closes: https://lkml.kernel.org/r/20240617075758.wewhukbrjod5fp5o@quack3
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/dir.c   |    6 ++++++
+ fs/nilfs2/nilfs.h |    5 +++++
+ 2 files changed, 11 insertions(+)
+
+--- a/fs/nilfs2/dir.c
++++ b/fs/nilfs2/dir.c
+@@ -143,6 +143,9 @@ static bool nilfs_check_page(struct page
+                       goto Enamelen;
+               if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
+                       goto Espan;
++              if (unlikely(p->inode &&
++                           NILFS_PRIVATE_INODE(le64_to_cpu(p->inode))))
++                      goto Einumber;
+       }
+       if (offs != limit)
+               goto Eend;
+@@ -168,6 +171,9 @@ Enamelen:
+       goto bad_entry;
+ Espan:
+       error = "directory entry across blocks";
++      goto bad_entry;
++Einumber:
++      error = "disallowed inode number";
+ bad_entry:
+       nilfs_error(sb,
+                   "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
+--- a/fs/nilfs2/nilfs.h
++++ b/fs/nilfs2/nilfs.h
+@@ -121,6 +121,11 @@ enum {
+       ((ino) >= NILFS_FIRST_INO(sb) ||                                \
+        ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino))))
++#define NILFS_PRIVATE_INODE(ino) ({                                   \
++      ino_t __ino = (ino);                                            \
++      ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO &&       \
++       (__ino) != NILFS_SKETCH_INO); })
++
+ /**
+  * struct nilfs_transaction_info: context information for synchronization
+  * @ti_magic: Magic number
diff --git a/queue-6.1/nilfs2-fix-inode-number-range-checks.patch b/queue-6.1/nilfs2-fix-inode-number-range-checks.patch
new file mode 100644 (file)
index 0000000..30a9d24
--- /dev/null
@@ -0,0 +1,100 @@
+From e2fec219a36e0993642844be0f345513507031f4 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sun, 23 Jun 2024 14:11:33 +0900
+Subject: nilfs2: fix inode number range checks
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit e2fec219a36e0993642844be0f345513507031f4 upstream.
+
+Patch series "nilfs2: fix potential issues related to reserved inodes".
+
+This series fixes one use-after-free issue reported by syzbot, caused by
+nilfs2's internal inode being exposed in the namespace on a corrupted
+filesystem, and a couple of flaws that cause problems if the starting
+number of non-reserved inodes written in the on-disk super block is
+intentionally (or corruptly) changed from its default value.
+
+
+This patch (of 3):
+
+In the current implementation of nilfs2, "nilfs->ns_first_ino", which
+gives the first non-reserved inode number, is read from the superblock,
+but its lower limit is not checked.
+
+As a result, if a number that overlaps with the inode number range of
+reserved inodes such as the root directory or metadata files is set in the
+super block parameter, the inode number test macros (NILFS_MDT_INODE and
+NILFS_VALID_INODE) will not function properly.
+
+In addition, these test macros use left bit-shift calculations using with
+the inode number as the shift count via the BIT macro, but the result of a
+shift calculation that exceeds the bit width of an integer is undefined in
+the C specification, so if "ns_first_ino" is set to a large value other
+than the default value NILFS_USER_INO (=11), the macros may potentially
+malfunction depending on the environment.
+
+Fix these issues by checking the lower bound of "nilfs->ns_first_ino" and
+by preventing bit shifts equal to or greater than the NILFS_USER_INO
+constant in the inode number test macros.
+
+Also, change the type of "ns_first_ino" from signed integer to unsigned
+integer to avoid the need for type casting in comparisons such as the
+lower bound check introduced this time.
+
+Link: https://lkml.kernel.org/r/20240623051135.4180-1-konishi.ryusuke@gmail.com
+Link: https://lkml.kernel.org/r/20240623051135.4180-2-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/nilfs.h     |    5 +++--
+ fs/nilfs2/the_nilfs.c |    6 ++++++
+ fs/nilfs2/the_nilfs.h |    2 +-
+ 3 files changed, 10 insertions(+), 3 deletions(-)
+
+--- a/fs/nilfs2/nilfs.h
++++ b/fs/nilfs2/nilfs.h
+@@ -116,9 +116,10 @@ enum {
+ #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
+ #define NILFS_MDT_INODE(sb, ino) \
+-      ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino)))
++      ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino)))
+ #define NILFS_VALID_INODE(sb, ino) \
+-      ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino)))
++      ((ino) >= NILFS_FIRST_INO(sb) ||                                \
++       ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino))))
+ /**
+  * struct nilfs_transaction_info: context information for synchronization
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struc
+       }
+       nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
++      if (nilfs->ns_first_ino < NILFS_USER_INO) {
++              nilfs_err(nilfs->ns_sb,
++                        "too small lower limit for non-reserved inode numbers: %u",
++                        nilfs->ns_first_ino);
++              return -EINVAL;
++      }
+       nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
+       if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
+--- a/fs/nilfs2/the_nilfs.h
++++ b/fs/nilfs2/the_nilfs.h
+@@ -182,7 +182,7 @@ struct the_nilfs {
+       unsigned long           ns_nrsvsegs;
+       unsigned long           ns_first_data_block;
+       int                     ns_inode_size;
+-      int                     ns_first_ino;
++      unsigned int            ns_first_ino;
+       u32                     ns_crc_seed;
+       /* /sys/fs/<nilfs>/<device> */
index e544f9c3eb8dd4a04c06774fc6ff29fd5eadc49d..0a2ba7db460f5e21336017bb3b77bf0c9d4d378c 100644 (file)
@@ -69,3 +69,7 @@ gpiolib-of-fix-lookup-quirk-for-mips-lantiq.patch
 gpiolib-of-add-polarity-quirk-for-tsc2005.patch
 platform-x86-toshiba_acpi-fix-quickstart-quirk-handling.patch
 revert-igc-fix-a-log-entry-using-uninitialized-netdev.patch
+nilfs2-fix-inode-number-range-checks.patch
+nilfs2-add-missing-check-for-inode-numbers-on-directory-entries.patch
+mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch
+mm-avoid-overflows-in-dirty-throttling-logic.patch