]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Jun 2024 07:33:22 +0000 (09:33 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Jun 2024 07:33:22 +0000 (09:33 +0200)
added patches:
acpi-apei-einj-fix-einj_dev-release-leak.patch
filemap-add-helper-mapping_max_folio_size.patch
io_uring-check-for-non-null-file-pointer-in-io_file_can_poll.patch
io_uring-napi-fix-timeout-calculation.patch
iomap-fault-in-smaller-chunks-for-non-large-folio-mappings.patch

queue-6.9/acpi-apei-einj-fix-einj_dev-release-leak.patch [new file with mode: 0644]
queue-6.9/filemap-add-helper-mapping_max_folio_size.patch [new file with mode: 0644]
queue-6.9/io_uring-check-for-non-null-file-pointer-in-io_file_can_poll.patch [new file with mode: 0644]
queue-6.9/io_uring-napi-fix-timeout-calculation.patch [new file with mode: 0644]
queue-6.9/iomap-fault-in-smaller-chunks-for-non-large-folio-mappings.patch [new file with mode: 0644]
queue-6.9/series

diff --git a/queue-6.9/acpi-apei-einj-fix-einj_dev-release-leak.patch b/queue-6.9/acpi-apei-einj-fix-einj_dev-release-leak.patch
new file mode 100644 (file)
index 0000000..b49bbd3
--- /dev/null
@@ -0,0 +1,42 @@
+From 7ff6c798eca05e4a9dcb80163cb454d7787a4bc3 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 21 May 2024 15:46:32 -0700
+Subject: ACPI: APEI: EINJ: Fix einj_dev release leak
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 7ff6c798eca05e4a9dcb80163cb454d7787a4bc3 upstream.
+
+The platform driver conversion of EINJ mistakenly used
+platform_device_del() to unwind platform_device_register_full() at
+module exit. This leads to a small leak of one 'struct platform_device'
+instance per module load/unload cycle. Switch to
+platform_device_unregister() which performs both device_del() and final
+put_device().
+
+Fixes: 5621fafaac00 ("EINJ: Migrate to a platform driver")
+Cc: 6.9+ <stable@vger.kernel.org> # 6.9+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/apei/einj-core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
+index 9515bcfe5e97..73903a497d73 100644
+--- a/drivers/acpi/apei/einj-core.c
++++ b/drivers/acpi/apei/einj-core.c
+@@ -909,7 +909,7 @@ static void __exit einj_exit(void)
+       if (einj_initialized)
+               platform_driver_unregister(&einj_driver);
+-      platform_device_del(einj_dev);
++      platform_device_unregister(einj_dev);
+ }
+ module_init(einj_init);
+-- 
+2.45.2
+
diff --git a/queue-6.9/filemap-add-helper-mapping_max_folio_size.patch b/queue-6.9/filemap-add-helper-mapping_max_folio_size.patch
new file mode 100644 (file)
index 0000000..87c1d56
--- /dev/null
@@ -0,0 +1,83 @@
+From 79c137454815ba5554caa8eeb4ad5c94e96e45ce Mon Sep 17 00:00:00 2001
+From: Xu Yang <xu.yang_2@nxp.com>
+Date: Tue, 21 May 2024 19:49:38 +0800
+Subject: filemap: add helper mapping_max_folio_size()
+
+From: Xu Yang <xu.yang_2@nxp.com>
+
+commit 79c137454815ba5554caa8eeb4ad5c94e96e45ce upstream.
+
+Add mapping_max_folio_size() to get the maximum folio size for this
+pagecache mapping.
+
+Fixes: 5d8edfb900d5 ("iomap: Copy larger chunks from userspace")
+Cc: stable@vger.kernel.org
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
+Link: https://lore.kernel.org/r/20240521114939.2541461-1-xu.yang_2@nxp.com
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pagemap.h |   34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -344,6 +344,19 @@ static inline void mapping_set_gfp_mask(
+       m->gfp_mask = mask;
+ }
++/*
++ * There are some parts of the kernel which assume that PMD entries
++ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
++ * limit the maximum allocation order to PMD size.  I'm not aware of any
++ * assumptions about maximum order if THP are disabled, but 8 seems like
++ * a good order (that's 1MB if you're using 4kB pages)
++ */
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++#define MAX_PAGECACHE_ORDER   HPAGE_PMD_ORDER
++#else
++#define MAX_PAGECACHE_ORDER   8
++#endif
++
+ /**
+  * mapping_set_large_folios() - Indicate the file supports large folios.
+  * @mapping: The file.
+@@ -370,6 +383,14 @@ static inline bool mapping_large_folio_s
+               test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+ }
++/* Return the maximum folio size for this pagecache mapping, in bytes. */
++static inline size_t mapping_max_folio_size(struct address_space *mapping)
++{
++      if (mapping_large_folio_support(mapping))
++              return PAGE_SIZE << MAX_PAGECACHE_ORDER;
++      return PAGE_SIZE;
++}
++
+ static inline int filemap_nr_thps(struct address_space *mapping)
+ {
+ #ifdef CONFIG_READ_ONLY_THP_FOR_FS
+@@ -528,19 +549,6 @@ static inline void *detach_page_private(
+       return folio_detach_private(page_folio(page));
+ }
+-/*
+- * There are some parts of the kernel which assume that PMD entries
+- * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+- * limit the maximum allocation order to PMD size.  I'm not aware of any
+- * assumptions about maximum order if THP are disabled, but 8 seems like
+- * a good order (that's 1MB if you're using 4kB pages)
+- */
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-#define MAX_PAGECACHE_ORDER   HPAGE_PMD_ORDER
+-#else
+-#define MAX_PAGECACHE_ORDER   8
+-#endif
+-
+ #ifdef CONFIG_NUMA
+ struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
+ #else
diff --git a/queue-6.9/io_uring-check-for-non-null-file-pointer-in-io_file_can_poll.patch b/queue-6.9/io_uring-check-for-non-null-file-pointer-in-io_file_can_poll.patch
new file mode 100644 (file)
index 0000000..c404275
--- /dev/null
@@ -0,0 +1,76 @@
+From 5fc16fa5f13b3c06fdb959ef262050bd810416a2 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sat, 1 Jun 2024 12:25:35 -0600
+Subject: io_uring: check for non-NULL file pointer in io_file_can_poll()
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 5fc16fa5f13b3c06fdb959ef262050bd810416a2 upstream.
+
+In earlier kernels, it was possible to trigger a NULL pointer
+dereference off the forced async preparation path, if no file had
+been assigned. The trace leading to that looks as follows:
+
+BUG: kernel NULL pointer dereference, address: 00000000000000b0
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP
+CPU: 67 PID: 1633 Comm: buf-ring-invali Not tainted 6.8.0-rc3+ #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 2/2/2022
+RIP: 0010:io_buffer_select+0xc3/0x210
+Code: 00 00 48 39 d1 0f 82 ae 00 00 00 48 81 4b 48 00 00 01 00 48 89 73 70 0f b7 50 0c 66 89 53 42 85 ed 0f 85 d2 00 00 00 48 8b 13 <48> 8b 92 b0 00 00 00 48 83 7a 40 00 0f 84 21 01 00 00 4c 8b 20 5b
+RSP: 0018:ffffb7bec38c7d88 EFLAGS: 00010246
+RAX: ffff97af2be61000 RBX: ffff97af234f1700 RCX: 0000000000000040
+RDX: 0000000000000000 RSI: ffff97aecfb04820 RDI: ffff97af234f1700
+RBP: 0000000000000000 R08: 0000000000200030 R09: 0000000000000020
+R10: ffffb7bec38c7dc8 R11: 000000000000c000 R12: ffffb7bec38c7db8
+R13: ffff97aecfb05800 R14: ffff97aecfb05800 R15: ffff97af2be5e000
+FS:  00007f852f74b740(0000) GS:ffff97b1eeec0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000000000b0 CR3: 000000016deab005 CR4: 0000000000370ef0
+Call Trace:
+ <TASK>
+ ? __die+0x1f/0x60
+ ? page_fault_oops+0x14d/0x420
+ ? do_user_addr_fault+0x61/0x6a0
+ ? exc_page_fault+0x6c/0x150
+ ? asm_exc_page_fault+0x22/0x30
+ ? io_buffer_select+0xc3/0x210
+ __io_import_iovec+0xb5/0x120
+ io_readv_prep_async+0x36/0x70
+ io_queue_sqe_fallback+0x20/0x260
+ io_submit_sqes+0x314/0x630
+ __do_sys_io_uring_enter+0x339/0xbc0
+ ? __do_sys_io_uring_register+0x11b/0xc50
+ ? vm_mmap_pgoff+0xce/0x160
+ do_syscall_64+0x5f/0x180
+ entry_SYSCALL_64_after_hwframe+0x46/0x4e
+RIP: 0033:0x55e0a110a67e
+Code: ba cc 00 00 00 45 31 c0 44 0f b6 92 d0 00 00 00 31 d2 41 b9 08 00 00 00 41 83 e2 01 41 c1 e2 04 41 09 c2 b8 aa 01 00 00 0f 05 <c3> 90 89 30 eb a9 0f 1f 40 00 48 8b 42 20 8b 00 a8 06 75 af 85 f6
+
+because the request is marked forced ASYNC and has a bad file fd, and
+hence takes the forced async prep path.
+
+Current kernels with the request async prep cleaned up can no longer hit
+this issue, but for ease of backporting, let's add this safety check in
+here too as it really doesn't hurt. For both cases, this will inevitably
+end with a CQE posted with -EBADF.
+
+Cc: stable@vger.kernel.org
+Fixes: a76c0b31eef5 ("io_uring: commit non-pollable provided mapped buffers upfront")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -442,7 +442,7 @@ static inline bool io_file_can_poll(stru
+ {
+       if (req->flags & REQ_F_CAN_POLL)
+               return true;
+-      if (file_can_poll(req->file)) {
++      if (req->file && file_can_poll(req->file)) {
+               req->flags |= REQ_F_CAN_POLL;
+               return true;
+       }
diff --git a/queue-6.9/io_uring-napi-fix-timeout-calculation.patch b/queue-6.9/io_uring-napi-fix-timeout-calculation.patch
new file mode 100644 (file)
index 0000000..36e06d1
--- /dev/null
@@ -0,0 +1,84 @@
+From 415ce0ea55c5a3afea501a773e002be9ed7149f5 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 3 Jun 2024 13:56:53 -0600
+Subject: io_uring/napi: fix timeout calculation
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 415ce0ea55c5a3afea501a773e002be9ed7149f5 upstream.
+
+Not quite sure what __io_napi_adjust_timeout() was attemping to do, it's
+adjusting both the NAPI timeout and the general overall timeout, and
+calculating a value that is never used. The overall timeout is a super
+set of the NAPI timeout, and doesn't need adjusting. The only thing we
+really need to care about is that the NAPI timeout doesn't exceed the
+overall timeout. If a user asked for a timeout of eg 5 usec and NAPI
+timeout is 10 usec, then we should not spin for 10 usec.
+
+While in there, sanitize the time checking a bit. If we have a negative
+value in the passed in timeout, discard it. Round up the value as well,
+so we don't end up with a NAPI timeout for the majority of the wait,
+with only a tiny sleep value at the end.
+
+Hence the only case we need to care about is if the NAPI timeout is
+larger than the overall timeout. If it is, cap the NAPI timeout at what
+the overall timeout is.
+
+Cc: stable@vger.kernel.org
+Fixes: 8d0c12a80cde ("io-uring: add napi busy poll support")
+Reported-by: Lewis Baker <lewissbaker@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/napi.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/io_uring/napi.c b/io_uring/napi.c
+index 883a1a665907..8c18ede595c4 100644
+--- a/io_uring/napi.c
++++ b/io_uring/napi.c
+@@ -261,12 +261,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
+ }
+ /*
+- * __io_napi_adjust_timeout() - Add napi id to the busy poll list
++ * __io_napi_adjust_timeout() - adjust busy loop timeout
+  * @ctx: pointer to io-uring context structure
+  * @iowq: pointer to io wait queue
+  * @ts: pointer to timespec or NULL
+  *
+  * Adjust the busy loop timeout according to timespec and busy poll timeout.
++ * If the specified NAPI timeout is bigger than the wait timeout, then adjust
++ * the NAPI timeout accordingly.
+  */
+ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
+                             struct timespec64 *ts)
+@@ -274,16 +276,16 @@ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iow
+       unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
+       if (ts) {
+-              struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
++              struct timespec64 poll_to_ts;
+-              if (timespec64_compare(ts, &poll_to_ts) > 0) {
+-                      *ts = timespec64_sub(*ts, poll_to_ts);
+-              } else {
+-                      u64 to = timespec64_to_ns(ts);
+-
+-                      do_div(to, 1000);
+-                      ts->tv_sec = 0;
+-                      ts->tv_nsec = 0;
++              poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
++              if (timespec64_compare(ts, &poll_to_ts) < 0) {
++                      s64 poll_to_ns = timespec64_to_ns(ts);
++                      if (poll_to_ns > 0) {
++                              u64 val = poll_to_ns + 999;
++                              do_div(val, (s64) 1000);
++                              poll_to = val;
++                      }
+               }
+       }
+-- 
+2.45.2
+
diff --git a/queue-6.9/iomap-fault-in-smaller-chunks-for-non-large-folio-mappings.patch b/queue-6.9/iomap-fault-in-smaller-chunks-for-non-large-folio-mappings.patch
new file mode 100644 (file)
index 0000000..a776a99
--- /dev/null
@@ -0,0 +1,81 @@
+From 4e527d5841e24623181edc7fd6f6598ffa810e10 Mon Sep 17 00:00:00 2001
+From: Xu Yang <xu.yang_2@nxp.com>
+Date: Tue, 21 May 2024 19:49:39 +0800
+Subject: iomap: fault in smaller chunks for non-large folio mappings
+
+From: Xu Yang <xu.yang_2@nxp.com>
+
+commit 4e527d5841e24623181edc7fd6f6598ffa810e10 upstream.
+
+Since commit (5d8edfb900d5 "iomap: Copy larger chunks from userspace"),
+iomap will try to copy in larger chunks than PAGE_SIZE. However, if the
+mapping doesn't support large folio, only one page of maximum 4KB will
+be created and 4KB data will be writen to pagecache each time. Then,
+next 4KB will be handled in next iteration. This will cause potential
+write performance problem.
+
+If chunk is 2MB, total 512 pages need to be handled finally. During this
+period, fault_in_iov_iter_readable() is called to check iov_iter readable
+validity. Since only 4KB will be handled each time, below address space
+will be checked over and over again:
+
+start          end
+-
+buf,           buf+2MB
+buf+4KB,       buf+2MB
+buf+8KB,       buf+2MB
+...
+buf+2044KB     buf+2MB
+
+Obviously the checking size is wrong since only 4KB will be handled each
+time. So this will get a correct chunk to let iomap work well in non-large
+folio case.
+
+With this change, the write speed will be stable. Tested on ARM64 device.
+
+Before:
+
+ - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (334 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (278 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (204 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (170 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (150 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (139 MB/s)
+
+After:
+
+ - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (339 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (330 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (332 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (333 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (333 MB/s)
+ - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (333 MB/s)
+
+Fixes: 5d8edfb900d5 ("iomap: Copy larger chunks from userspace")
+Cc: stable@vger.kernel.org
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
+Link: https://lore.kernel.org/r/20240521114939.2541461-2-xu.yang_2@nxp.com
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/iomap/buffered-io.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/iomap/buffered-io.c
++++ b/fs/iomap/buffered-io.c
+@@ -909,11 +909,11 @@ static size_t iomap_write_end(struct iom
+ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
+ {
+       loff_t length = iomap_length(iter);
+-      size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
+       loff_t pos = iter->pos;
+       ssize_t written = 0;
+       long status = 0;
+       struct address_space *mapping = iter->inode->i_mapping;
++      size_t chunk = mapping_max_folio_size(mapping);
+       unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
+       do {
index 86905c307968e8f950bea5c908b6c3ece6c46bad..f038bce8c9a82cf85eb2efc737748d391bae4948 100644 (file)
@@ -71,3 +71,8 @@ crypto-ecrdsa-fix-module-auto-load-on-add_key.patch
 crypto-qat-fix-adf_dev_reset_sync-memory-leak.patch
 kbuild-remove-support-for-clang-s-thinlto-caching.patch
 mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch
+io_uring-napi-fix-timeout-calculation.patch
+io_uring-check-for-non-null-file-pointer-in-io_file_can_poll.patch
+filemap-add-helper-mapping_max_folio_size.patch
+iomap-fault-in-smaller-chunks-for-non-large-folio-mappings.patch
+acpi-apei-einj-fix-einj_dev-release-leak.patch