From: Greg Kroah-Hartman Date: Fri, 1 May 2026 11:41:41 +0000 (+0200) Subject: 7.0-stable patches X-Git-Tag: v6.12.86~59 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=91623d692dcd632e17304e6c978e2e5a9f348b98;p=thirdparty%2Fkernel%2Fstable-queue.git 7.0-stable patches added patches: block-fix-zone-write-plugs-refcount-handling-in-disk_zone_wplug_schedule_bio_work.patch block-relax-pgmap-check-in-bio_add_page-for-compatible-zone-device-pages.patch erofs-fix-the-out-of-bounds-nameoff-handling-for-trailing-dirents.patch iio-adc-ti-ads7950-use-iio_push_to_buffers_with_ts_unaligned.patch io_uring-poll-ensure-epoll_oneshot-is-propagated-for-epoll_uring_wake.patch io_uring-poll-fix-signed-comparison-in-io_poll_get_ownership.patch io_uring-register-fix-ring-resizing-with-mixed-large-sqes-cqes.patch io_uring-timeout-check-unused-sqe-fields.patch io_uring-zcrx-fix-user_struct-uaf.patch io_uring-zcrx-return-back-two-step-unregistration.patch ipmi-ssif-clean-up-kthread-on-errors.patch jbd2-fix-deadlock-in-jbd2_journal_cancel_revoke.patch kvm-selftests-fix-reserved-value-wrmsr-testcase-for-multi-feature-msrs.patch landlock-allow-tsync-with-log_subdomains_off-and-fd-1.patch landlock-fix-log_subdomains_off-inheritance-across-fork.patch md-raid10-fix-deadlock-with-check-operation-and-nowait-requests.patch media-rc-igorplugusb-heed-coherency-rules.patch media-rockchip-rkcif-comply-with-minimum-number-of-buffers-requirement.patch media-rockchip-rkcif-fix-off-by-one-bugs.patch mfd-stpmic1-attempt-system-shutdown-twice-in-case-pmic-is-confused.patch mm-alloc_tag-clear-codetag-for-pages-allocated-before-page_ext-initialization.patch mm-damon-core-fix-damon_call-vs-kdamond_fn-exit-race.patch mm-damon-core-fix-damos_walk-vs-kdamond_fn-exit-race.patch mm-hugetlb-fix-early-boot-crash-on-parameters-without-separator.patch mm-zone_device-do-not-touch-device-folio-after-calling-folio_free.patch mtd-docg3-fix-use-after-free-in-docg3_release.patch nvme-pci-add-nvme_quirk_disable_write_zeroes-for-kingston-om3sgp4.patch nvme-respect-nvme_quirk_disable_write_zeroes-when-wzsl-is-set.patch parisc-_llseek-syscall-is-only-available-for-32-bit-userspace.patch parisc-drop-ip_fast_csum-inline-assembly-implementation.patch parisc-led-fix-reference-leak-on-failed-device-registration.patch pci-cadence-use-cdns_pcie_read_sz-for-byte-or-word-read-access.patch pci-imx6-fix-reference-clock-source-selection-for-i.mx95.patch perf-annotate-use-jump__delete-when-freeing-loongarch-jumps.patch rbd-fix-null-ptr-deref-when-device_add_disk-fails.patch rdma-mana_ib-disable-rx-steering-on-rss-qp-destroy.patch remoteproc-xlnx-only-access-buffer-information-if-ipi-is-buffered.patch reset-rzv2h-usb2phy-keep-phy-clock-enabled-for-entire-device-lifetime.patch sched-use-u64-for-bandwidth-ratio-calculations.patch selftests-landlock-drain-stale-audit-records-on-init.patch selftests-landlock-fix-format-warning-for-__u64-in-net_test.patch selftests-landlock-fix-snprintf-truncation-checks-in-audit-helpers.patch selftests-landlock-skip-stale-records-in-audit_match_record.patch selftests-mqueue-fix-incorrectly-named-file.patch --- diff --git a/queue-7.0/block-fix-zone-write-plugs-refcount-handling-in-disk_zone_wplug_schedule_bio_work.patch b/queue-7.0/block-fix-zone-write-plugs-refcount-handling-in-disk_zone_wplug_schedule_bio_work.patch new file mode 100644 index 0000000000..d7e73257ca --- /dev/null +++ b/queue-7.0/block-fix-zone-write-plugs-refcount-handling-in-disk_zone_wplug_schedule_bio_work.patch @@ -0,0 +1,57 @@ +From 0a8b8af896e0ef83e188e1fe20f98f2bbb1c2459 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Fri, 27 Feb 2026 22:19:45 +0900 +Subject: block: fix zone write plugs refcount handling in disk_zone_wplug_schedule_bio_work() + +From: Damien Le Moal + +commit 0a8b8af896e0ef83e188e1fe20f98f2bbb1c2459 upstream. + +The function disk_zone_wplug_schedule_bio_work() always takes a +reference on the zone write plug of the BIO work being scheduled. This +ensures that the zone write plug cannot be freed while the BIO work is +being scheduled but has not run yet. However, this unconditional +reference taking is fragile since the reference taken is released by the +BIO work blk_zone_wplug_bio_work() function, which implies that there +always must be a 1:1 relation between the work being scheduled and the +work running. + +Make sure to drop the reference taken when scheduling the BIO work if +the work is already scheduled, that is, when queue_work() returns false. + +Fixes: 9e78c38ab30b ("block: Hold a reference on zone write plugs to schedule submission") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Christoph Hellwig +Reviewed-by: Bart Van Assche +Reviewed-by: Johannes Thumshirn +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-zoned.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/block/blk-zoned.c ++++ b/block/blk-zoned.c +@@ -1198,13 +1198,17 @@ static void disk_zone_wplug_schedule_bio + lockdep_assert_held(&zwplug->lock); + + /* +- * Take a reference on the zone write plug and schedule the submission +- * of the next plugged BIO. blk_zone_wplug_bio_work() will release the +- * reference we take here. ++ * Schedule the submission of the next plugged BIO. Taking a reference ++ * to the zone write plug is required as the bio_work belongs to the ++ * plug, and thus we must ensure that the write plug does not go away ++ * while the work is being scheduled but has not run yet. ++ * blk_zone_wplug_bio_work() will release the reference we take here, ++ * and we also drop this reference if the work is already scheduled. + */ + WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); + refcount_inc(&zwplug->ref); +- queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); ++ if (!queue_work(disk->zone_wplugs_wq, &zwplug->bio_work)) ++ disk_put_zone_wplug(zwplug); + } + + static inline void disk_zone_wplug_add_bio(struct gendisk *disk, diff --git a/queue-7.0/block-relax-pgmap-check-in-bio_add_page-for-compatible-zone-device-pages.patch b/queue-7.0/block-relax-pgmap-check-in-bio_add_page-for-compatible-zone-device-pages.patch new file mode 100644 index 0000000000..00a568970e --- /dev/null +++ b/queue-7.0/block-relax-pgmap-check-in-bio_add_page-for-compatible-zone-device-pages.patch @@ -0,0 +1,110 @@ +From 41c665aae2b5dbecddddcc8ace344caf630cc7a4 Mon Sep 17 00:00:00 2001 +From: Naman Jain +Date: Fri, 10 Apr 2026 15:34:14 +0000 +Subject: block: relax pgmap check in bio_add_page for compatible zone device pages + +From: Naman Jain + +commit 41c665aae2b5dbecddddcc8ace344caf630cc7a4 upstream. + +bio_add_page() and bio_integrity_add_page() reject pages from different +dev_pagemaps entirely, returning 0 even when those pages have compatible +DMA mapping requirements. This forces callers to start a new bio when +buffers span pgmap boundaries, even though the pages could safely coexist +as separate bvec entries. + +This matters for guests where memory is registered through +devm_memremap_pages() with MEMORY_DEVICE_GENERIC in multiple calls, +creating separate dev_pagemaps for each chunk. When a direct I/O buffer +spans two such chunks, bio_add_page() rejects the second page, forcing an +unnecessary bio split or I/O failure. + +Introduce zone_device_pages_compatible() in blk.h to check whether two +pages can coexist in the same bio as separate bvec entries. The block DMA +iterator (blk_dma_map_iter_start) caches the P2PDMA mapping state from the +first segment and applies it to all others, so P2PDMA pages from different +pgmaps must not be mixed, and neither must P2PDMA and non-P2PDMA pages. +All other combinations (MEMORY_DEVICE_GENERIC pages from different pgmaps, +or MEMORY_DEVICE_GENERIC with normal RAM) use the same dma_map_phys path +and are safe. + +Replace the blanket zone_device_pages_have_same_pgmap() rejection with +zone_device_pages_compatible(), while keeping +zone_device_pages_have_same_pgmap() as a merge guard. +Pages from different pgmaps can be added as separate bvec entries but +must not be coalesced into the same segment, as that would make +it impossible to recover the correct pgmap via page_pgmap(). + +Fixes: 49580e690755 ("block: add check when merging zone device pages") +Cc: stable@vger.kernel.org +Signed-off-by: Naman Jain +Reviewed-by: Christoph Hellwig +Link: https://patch.msgid.link/20260410153414.4159050-3-namjain@linux.microsoft.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/bio-integrity.c | 6 +++--- + block/bio.c | 6 +++--- + block/blk.h | 19 +++++++++++++++++++ + 3 files changed, 25 insertions(+), 6 deletions(-) + +--- a/block/bio-integrity.c ++++ b/block/bio-integrity.c +@@ -167,10 +167,10 @@ int bio_integrity_add_page(struct bio *b + if (bip->bip_vcnt > 0) { + struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; + +- if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) ++ if (!zone_device_pages_compatible(bv->bv_page, page)) + return 0; +- +- if (bvec_try_merge_hw_page(q, bv, page, len, offset)) { ++ if (zone_device_pages_have_same_pgmap(bv->bv_page, page) && ++ bvec_try_merge_hw_page(q, bv, page, len, offset)) { + bip->bip_iter.bi_size += len; + return len; + } +--- a/block/bio.c ++++ b/block/bio.c +@@ -1070,10 +1070,10 @@ int bio_add_page(struct bio *bio, struct + if (bio->bi_vcnt > 0) { + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; + +- if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) ++ if (!zone_device_pages_compatible(bv->bv_page, page)) + return 0; +- +- if (bvec_try_merge_page(bv, page, len, offset)) { ++ if (zone_device_pages_have_same_pgmap(bv->bv_page, page) && ++ bvec_try_merge_page(bv, page, len, offset)) { + bio->bi_iter.bi_size += len; + return len; + } +--- a/block/blk.h ++++ b/block/blk.h +@@ -139,6 +139,25 @@ static inline bool biovec_phys_mergeable + return true; + } + ++/* ++ * Check if two pages from potentially different zone device pgmaps can ++ * coexist as separate bvec entries in the same bio. ++ * ++ * The block DMA iterator (blk_dma_map_iter_start) caches the P2PDMA mapping ++ * state from the first segment and applies it to all subsequent segments, so ++ * P2PDMA pages from different pgmaps must not be mixed in the same bio. ++ * ++ * Other zone device types (FS_DAX, GENERIC) use the same dma_map_phys() path ++ * as normal RAM. PRIVATE and COHERENT pages never appear in bios. ++ */ ++static inline bool zone_device_pages_compatible(const struct page *a, ++ const struct page *b) ++{ ++ if (is_pci_p2pdma_page(a) || is_pci_p2pdma_page(b)) ++ return zone_device_pages_have_same_pgmap(a, b); ++ return true; ++} ++ + static inline bool __bvec_gap_to_prev(const struct queue_limits *lim, + struct bio_vec *bprv, unsigned int offset) + { diff --git a/queue-7.0/erofs-fix-the-out-of-bounds-nameoff-handling-for-trailing-dirents.patch b/queue-7.0/erofs-fix-the-out-of-bounds-nameoff-handling-for-trailing-dirents.patch new file mode 100644 index 0000000000..125fc9d93f --- /dev/null +++ b/queue-7.0/erofs-fix-the-out-of-bounds-nameoff-handling-for-trailing-dirents.patch @@ -0,0 +1,88 @@ +From d18a3b5d337fa412a38e776e6b4b857a58836575 Mon Sep 17 00:00:00 2001 +From: Gao Xiang +Date: Tue, 21 Apr 2026 15:59:52 +0800 +Subject: erofs: fix the out-of-bounds nameoff handling for trailing dirents + +From: Gao Xiang + +commit d18a3b5d337fa412a38e776e6b4b857a58836575 upstream. + +Currently we already have boundary-checks for nameoffs, but the trailing +dirents are special since the namelens are calculated with strnlen() +with unchecked nameoffs. + +If a crafted EROFS has a trailing dirent with nameoff >= maxsize, +maxsize - nameoff can underflow, causing strnlen() to read past the +directory block. + +nameoff0 should also be verified to be a multiple of +`sizeof(struct erofs_dirent)` as well [1]. + +[1] https://sashiko.dev/#/patchset/20260416063511.3173774-1-hsiangkao%40linux.alibaba.com + +Fixes: 3aa8ec716e52 ("staging: erofs: add directory operations") +Fixes: 33bac912840f ("staging: erofs: keep corrupted fs from crashing kernel in erofs_readdir()") +Reported-by: Yuhao Jiang +Reported-by: Junrui Luo +Closes: https://lore.kernel.org/r/A0FD7E0F-7558-49B0-8BC8-EB1ECDB2479A@outlook.com +Cc: stable@vger.kernel.org +Signed-off-by: Gao Xiang +Reviewed-by: Chao Yu +Signed-off-by: Greg Kroah-Hartman +--- + fs/erofs/dir.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +--- a/fs/erofs/dir.c ++++ b/fs/erofs/dir.c +@@ -19,20 +19,18 @@ static int erofs_fill_dentries(struct in + const char *de_name = (char *)dentry_blk + nameoff; + unsigned int de_namelen; + +- /* the last dirent in the block? */ +- if (de + 1 >= end) +- de_namelen = strnlen(de_name, maxsize - nameoff); +- else ++ /* non-trailing dirent in the directory block? */ ++ if (de + 1 < end) + de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; ++ else if (maxsize <= nameoff) ++ goto err_bogus; ++ else ++ de_namelen = strnlen(de_name, maxsize - nameoff); + +- /* a corrupted entry is found */ +- if (nameoff + de_namelen > maxsize || +- de_namelen > EROFS_NAME_LEN) { +- erofs_err(dir->i_sb, "bogus dirent @ nid %llu", +- EROFS_I(dir)->nid); +- DBG_BUGON(1); +- return -EFSCORRUPTED; +- } ++ /* a corrupted entry is found (including negative namelen) */ ++ if (!in_range32(de_namelen, 1, EROFS_NAME_LEN) || ++ nameoff + de_namelen > maxsize) ++ goto err_bogus; + + if (!dir_emit(ctx, de_name, de_namelen, + erofs_nid_to_ino64(EROFS_SB(dir->i_sb), +@@ -42,6 +40,10 @@ static int erofs_fill_dentries(struct in + ctx->pos += sizeof(struct erofs_dirent); + } + return 0; ++err_bogus: ++ erofs_err(dir->i_sb, "bogus dirent @ nid %llu", EROFS_I(dir)->nid); ++ DBG_BUGON(1); ++ return -EFSCORRUPTED; + } + + static int erofs_readdir(struct file *f, struct dir_context *ctx) +@@ -88,7 +90,7 @@ static int erofs_readdir(struct file *f, + } + + nameoff = le16_to_cpu(de->nameoff); +- if (nameoff < sizeof(struct erofs_dirent) || nameoff >= bsz) { ++ if (!nameoff || nameoff >= bsz || (nameoff % sizeof(*de))) { + erofs_err(sb, "invalid de[0].nameoff %u @ nid %llu", + nameoff, EROFS_I(dir)->nid); + err = -EFSCORRUPTED; diff --git a/queue-7.0/iio-adc-ti-ads7950-use-iio_push_to_buffers_with_ts_unaligned.patch b/queue-7.0/iio-adc-ti-ads7950-use-iio_push_to_buffers_with_ts_unaligned.patch new file mode 100644 index 0000000000..3fb8760471 --- /dev/null +++ b/queue-7.0/iio-adc-ti-ads7950-use-iio_push_to_buffers_with_ts_unaligned.patch @@ -0,0 +1,59 @@ +From 7806c060cceb2d6895efbb6cff2f2f17cf1ec5de Mon Sep 17 00:00:00 2001 +From: David Lechner +Date: Sat, 14 Mar 2026 16:12:24 -0500 +Subject: iio: adc: ti-ads7950: use iio_push_to_buffers_with_ts_unaligned() + +From: David Lechner + +commit 7806c060cceb2d6895efbb6cff2f2f17cf1ec5de upstream. + +Use iio_push_to_buffers_with_ts_unaligned() to avoid unaligned access +when writing the timestamp in the rx_buf. + +The previous implementation would have been fine on architectures that +support 4-byte alignment of 64-bit integers but could cause issues on +architectures that require 8-byte alignment. + +Fixes: 902c4b2446d4 ("iio: adc: New driver for TI ADS7950 chips") +Signed-off-by: David Lechner +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/adc/ti-ads7950.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/drivers/iio/adc/ti-ads7950.c ++++ b/drivers/iio/adc/ti-ads7950.c +@@ -47,8 +47,6 @@ + #define TI_ADS7950_MAX_CHAN 16 + #define TI_ADS7950_NUM_GPIOS 4 + +-#define TI_ADS7950_TIMESTAMP_SIZE (sizeof(int64_t) / sizeof(__be16)) +- + /* val = value, dec = left shift, bits = number of bits of the mask */ + #define TI_ADS7950_EXTRACT(val, dec, bits) \ + (((val) >> (dec)) & ((1 << (bits)) - 1)) +@@ -105,8 +103,7 @@ struct ti_ads7950_state { + * DMA (thus cache coherency maintenance) may require the + * transfer buffers to live in their own cache lines. + */ +- u16 rx_buf[TI_ADS7950_MAX_CHAN + 2 + TI_ADS7950_TIMESTAMP_SIZE] +- __aligned(IIO_DMA_MINALIGN); ++ u16 rx_buf[TI_ADS7950_MAX_CHAN + 2] __aligned(IIO_DMA_MINALIGN); + u16 tx_buf[TI_ADS7950_MAX_CHAN + 2]; + u16 single_tx; + u16 single_rx; +@@ -313,8 +310,10 @@ static irqreturn_t ti_ads7950_trigger_ha + if (ret < 0) + goto out; + +- iio_push_to_buffers_with_timestamp(indio_dev, &st->rx_buf[2], +- iio_get_time_ns(indio_dev)); ++ iio_push_to_buffers_with_ts_unaligned(indio_dev, &st->rx_buf[2], ++ sizeof(*st->rx_buf) * ++ TI_ADS7950_MAX_CHAN, ++ iio_get_time_ns(indio_dev)); + + out: + mutex_unlock(&st->slock); diff --git a/queue-7.0/io_uring-poll-ensure-epoll_oneshot-is-propagated-for-epoll_uring_wake.patch b/queue-7.0/io_uring-poll-ensure-epoll_oneshot-is-propagated-for-epoll_uring_wake.patch new file mode 100644 index 0000000000..725c536a71 --- /dev/null +++ b/queue-7.0/io_uring-poll-ensure-epoll_oneshot-is-propagated-for-epoll_uring_wake.patch @@ -0,0 +1,52 @@ +From 1967f0b1cafdde37aa9e08e6021c14bcc484b7a5 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 21 Apr 2026 13:24:33 -0600 +Subject: io_uring/poll: ensure EPOLL_ONESHOT is propagated for EPOLL_URING_WAKE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jens Axboe + +commit 1967f0b1cafdde37aa9e08e6021c14bcc484b7a5 upstream. + +Commit: + +aacf2f9f382c ("io_uring: fix req->apoll_events") + +fixed an issue where poll->events and req->apoll_events weren't +synchronized, but then when the commit referenced in Fixes got added, +it didn't ensure the same thing. + +If we mask in EPOLLONESHOT in the regular EPOLL_URING_WAKE path, then +ensure it's done for both. Including a link to the original report +below, even though it's mostly nonsense. But it includes a reproducer +that does show that IORING_CQE_F_MORE is set in the previous CQE, +while no more CQEs will be generated for this request. Just ignore +anything that pretends this is security related in any way, it's just +the typical AI nonsense. + +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/io-uring/CAM0zi7yQzF3eKncgHo4iVM5yFLAjsiob_ucqyWKs=hyd_GqiMg@mail.gmail.com/ +Reported-by: Azizcan Daştan +Fixes: 4464853277d0 ("io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/poll.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -415,8 +415,10 @@ static int io_poll_wake(struct wait_queu + * disable multishot as there is a circular dependency between + * CQ posting and triggering the event. + */ +- if (mask & EPOLL_URING_WAKE) ++ if (mask & EPOLL_URING_WAKE) { + poll->events |= EPOLLONESHOT; ++ req->apoll_events |= EPOLLONESHOT; ++ } + + /* optional, saves extra locking for removal in tw handler */ + if (mask && poll->events & EPOLLONESHOT) { diff --git a/queue-7.0/io_uring-poll-fix-signed-comparison-in-io_poll_get_ownership.patch b/queue-7.0/io_uring-poll-fix-signed-comparison-in-io_poll_get_ownership.patch new file mode 100644 index 0000000000..ec5806a27d --- /dev/null +++ b/queue-7.0/io_uring-poll-fix-signed-comparison-in-io_poll_get_ownership.patch @@ -0,0 +1,52 @@ +From 326941b22806cbf2df1fbfe902b7908b368cce42 Mon Sep 17 00:00:00 2001 +From: Longxuan Yu +Date: Sun, 12 Apr 2026 16:38:20 +0800 +Subject: io_uring/poll: fix signed comparison in io_poll_get_ownership() + +From: Longxuan Yu + +commit 326941b22806cbf2df1fbfe902b7908b368cce42 upstream. + +io_poll_get_ownership() uses a signed comparison to check whether +poll_refs has reached the threshold for the slowpath: + + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + +atomic_read() returns int (signed). When IO_POLL_CANCEL_FLAG +(BIT(31)) is set in poll_refs, the value becomes negative in +signed arithmetic, so the >= 128 comparison always evaluates to +false and the slowpath is never taken. + +Fix this by casting the atomic_read() result to unsigned int +before the comparison, so that the cancel flag is treated as a +large positive value and correctly triggers the slowpath. + +Fixes: a26a35e9019f ("io_uring: make poll refs more robust") +Cc: stable@vger.kernel.org +Reported-by: Yifan Wu +Reported-by: Juefei Pu +Co-developed-by: Yuan Tan +Signed-off-by: Yuan Tan +Suggested-by: Xin Liu +Tested-by: Zhengchuan Liang +Signed-off-by: Longxuan Yu +Signed-off-by: Ren Wei +Reviewed-by: Pavel Begunkov +Link: https://patch.msgid.link/3a3508b08bcd7f1bc3beff848ae6e1d73d355043.1775965597.git.ylong030@ucr.edu +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/poll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -93,7 +93,7 @@ static bool io_poll_get_ownership_slowpa + */ + static inline bool io_poll_get_ownership(struct io_kiocb *req) + { +- if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) ++ if (unlikely((unsigned int)atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); + } diff --git a/queue-7.0/io_uring-register-fix-ring-resizing-with-mixed-large-sqes-cqes.patch b/queue-7.0/io_uring-register-fix-ring-resizing-with-mixed-large-sqes-cqes.patch new file mode 100644 index 0000000000..21624fa235 --- /dev/null +++ b/queue-7.0/io_uring-register-fix-ring-resizing-with-mixed-large-sqes-cqes.patch @@ -0,0 +1,77 @@ +From 45cd95763e198d74d369ede43aef0b1955b8dea4 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 20 Apr 2026 13:41:38 -0600 +Subject: io_uring/register: fix ring resizing with mixed/large SQEs/CQEs + +From: Jens Axboe + +commit 45cd95763e198d74d369ede43aef0b1955b8dea4 upstream. + +The ring resizing only properly handles "normal" sized SQEs or CQEs, if +there are pending entries around a resize. This normally should not be +the case, but the code is supposed to handle this regardless. + +For the mixed SQE/CQE cases, the current copying works fine as they +are indexed in the same way. Each half is just copied separately. But +for fixed large SQEs and CQEs, the iteration and copy need to take that +into account. + +Cc: stable@kernel.org +Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS") +Reviewed-by: Gabriel Krisman Bertazi +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/register.c | 32 ++++++++++++++++++++++++++------ + 1 file changed, 26 insertions(+), 6 deletions(-) + +--- a/io_uring/register.c ++++ b/io_uring/register.c +@@ -599,10 +599,20 @@ static int io_register_resize_rings(stru + if (tail - old_head > p->sq_entries) + goto overflow; + for (i = old_head; i < tail; i++) { +- unsigned src_head = i & (ctx->sq_entries - 1); +- unsigned dst_head = i & (p->sq_entries - 1); ++ unsigned index, dst_mask, src_mask; ++ size_t sq_size; + +- n.sq_sqes[dst_head] = o.sq_sqes[src_head]; ++ index = i; ++ sq_size = sizeof(struct io_uring_sqe); ++ src_mask = ctx->sq_entries - 1; ++ dst_mask = p->sq_entries - 1; ++ if (ctx->flags & IORING_SETUP_SQE128) { ++ index <<= 1; ++ sq_size <<= 1; ++ src_mask = (ctx->sq_entries << 1) - 1; ++ dst_mask = (p->sq_entries << 1) - 1; ++ } ++ memcpy(&n.sq_sqes[index & dst_mask], &o.sq_sqes[index & src_mask], sq_size); + } + WRITE_ONCE(n.rings->sq.head, old_head); + WRITE_ONCE(n.rings->sq.tail, tail); +@@ -619,10 +629,20 @@ overflow: + goto out; + } + for (i = old_head; i < tail; i++) { +- unsigned src_head = i & (ctx->cq_entries - 1); +- unsigned dst_head = i & (p->cq_entries - 1); ++ unsigned index, dst_mask, src_mask; ++ size_t cq_size; + +- n.rings->cqes[dst_head] = o.rings->cqes[src_head]; ++ index = i; ++ cq_size = sizeof(struct io_uring_cqe); ++ src_mask = ctx->cq_entries - 1; ++ dst_mask = p->cq_entries - 1; ++ if (ctx->flags & IORING_SETUP_CQE32) { ++ index <<= 1; ++ cq_size <<= 1; ++ src_mask = (ctx->cq_entries << 1) - 1; ++ dst_mask = (p->cq_entries << 1) - 1; ++ } ++ memcpy(&n.rings->cqes[index & dst_mask], &o.rings->cqes[index & src_mask], cq_size); + } + WRITE_ONCE(n.rings->cq.head, old_head); + WRITE_ONCE(n.rings->cq.tail, tail); diff --git a/queue-7.0/io_uring-timeout-check-unused-sqe-fields.patch b/queue-7.0/io_uring-timeout-check-unused-sqe-fields.patch new file mode 100644 index 0000000000..117609985c --- /dev/null +++ b/queue-7.0/io_uring-timeout-check-unused-sqe-fields.patch @@ -0,0 +1,41 @@ +From 484ae637a3e3d909718de7c07afd3bb34b6b8504 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 2 Mar 2026 13:10:34 +0000 +Subject: io_uring/timeout: check unused sqe fields + +From: Pavel Begunkov + +commit 484ae637a3e3d909718de7c07afd3bb34b6b8504 upstream. + +Zero check unused SQE fields addr3 and pad2 for timeout and timeout +update requests. They're not needed now, but could be used sometime +in the future. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/timeout.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/io_uring/timeout.c ++++ b/io_uring/timeout.c +@@ -449,6 +449,8 @@ int io_timeout_remove_prep(struct io_kio + + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; ++ if (sqe->addr3 || sqe->__pad2[0]) ++ return -EINVAL; + if (sqe->buf_index || sqe->len || sqe->splice_fd_in) + return -EINVAL; + +@@ -521,6 +523,8 @@ static int __io_timeout_prep(struct io_k + unsigned flags; + u32 off = READ_ONCE(sqe->off); + ++ if (sqe->addr3 || sqe->__pad2[0]) ++ return -EINVAL; + if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) + return -EINVAL; + if (off && is_timeout_link) diff --git a/queue-7.0/io_uring-zcrx-fix-user_struct-uaf.patch b/queue-7.0/io_uring-zcrx-fix-user_struct-uaf.patch new file mode 100644 index 0000000000..5e8af7687e --- /dev/null +++ b/queue-7.0/io_uring-zcrx-fix-user_struct-uaf.patch @@ -0,0 +1,39 @@ +From 0fcccfd87152f957fa8312b841f6efef42a05a20 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 21 Apr 2026 09:47:04 +0100 +Subject: io_uring/zcrx: fix user_struct uaf + +From: Pavel Begunkov + +commit 0fcccfd87152f957fa8312b841f6efef42a05a20 upstream. + +io_free_rbuf_ring() usees a struct user_struct, which +io_zcrx_ifq_free() puts it down before destroying the ring. + +Cc: stable@vger.kernel.org +Fixes: 5c686456a4e83 ("io_uring/zcrx: add user_struct and mm_struct to io_zcrx_ifq") +Signed-off-by: Pavel Begunkov +Link: https://patch.msgid.link/e560ae00960d27a810522a7efc0e201c82dff351.1776760917.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/zcrx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/zcrx.c ++++ b/io_uring/zcrx.c +@@ -565,13 +565,13 @@ static void io_zcrx_ifq_free(struct io_z + + if (ifq->area) + io_zcrx_free_area(ifq, ifq->area); +- free_uid(ifq->user); + if (ifq->mm_account) + mmdrop(ifq->mm_account); + if (ifq->dev) + put_device(ifq->dev); + + io_free_rbuf_ring(ifq); ++ free_uid(ifq->user); + mutex_destroy(&ifq->pp_lock); + kfree(ifq); + } diff --git a/queue-7.0/io_uring-zcrx-return-back-two-step-unregistration.patch b/queue-7.0/io_uring-zcrx-return-back-two-step-unregistration.patch new file mode 100644 index 0000000000..125f7e771f --- /dev/null +++ b/queue-7.0/io_uring-zcrx-return-back-two-step-unregistration.patch @@ -0,0 +1,141 @@ +From e5361d25e241ac3a23177fa74ae91d049bad00d3 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 23 Mar 2026 12:43:50 +0000 +Subject: io_uring/zcrx: return back two step unregistration + +From: Pavel Begunkov + +commit e5361d25e241ac3a23177fa74ae91d049bad00d3 upstream. + +There are reports where io_uring instance removal takes too long and an +ifq reallocation by another zcrx instance fails. Split zcrx destruction +into two steps similarly how it was before, first close the queue early +but maintain zcrx alive, and then when all inflight requests are +completed, drop the main zcrx reference. For extra protection, mark +terminated zcrx instances in xarray and warn if we double put them. + +Cc: stable@vger.kernel.org # 6.19+ +Link: https://github.com/axboe/liburing/issues/1550 +Reported-by: Youngmin Choi +Signed-off-by: Pavel Begunkov +Link: https://patch.msgid.link/0ce21f0565ab4358668922a28a8a36922dfebf76.1774261953.git.asml.silence@gmail.com +[axboe: NULL ifq before break inside scoped guard] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 4 ++++ + io_uring/zcrx.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- + io_uring/zcrx.h | 4 ++++ + 3 files changed, 51 insertions(+), 3 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2308,6 +2308,10 @@ static __cold void io_ring_exit_work(str + struct io_tctx_node *node; + int ret; + ++ mutex_lock(&ctx->uring_lock); ++ io_terminate_zcrx(ctx); ++ mutex_unlock(&ctx->uring_lock); ++ + /* + * If we're doing polled IO and end up having requests being + * submitted async (out-of-line), then completions can come in while +--- a/io_uring/zcrx.c ++++ b/io_uring/zcrx.c +@@ -624,12 +624,17 @@ static void io_zcrx_scrub(struct io_zcrx + } + } + +-static void zcrx_unregister(struct io_zcrx_ifq *ifq) ++static void zcrx_unregister_user(struct io_zcrx_ifq *ifq) + { + if (refcount_dec_and_test(&ifq->user_refs)) { + io_close_queue(ifq); + io_zcrx_scrub(ifq); + } ++} ++ ++static void zcrx_unregister(struct io_zcrx_ifq *ifq) ++{ ++ zcrx_unregister_user(ifq); + io_put_zcrx_ifq(ifq); + } + +@@ -885,6 +890,36 @@ static struct net_iov *__io_zcrx_get_fre + return &area->nia.niovs[niov_idx]; + } + ++static inline bool is_zcrx_entry_marked(struct io_ring_ctx *ctx, unsigned long id) ++{ ++ return xa_get_mark(&ctx->zcrx_ctxs, id, XA_MARK_0); ++} ++ ++static inline void set_zcrx_entry_mark(struct io_ring_ctx *ctx, unsigned long id) ++{ ++ xa_set_mark(&ctx->zcrx_ctxs, id, XA_MARK_0); ++} ++ ++void io_terminate_zcrx(struct io_ring_ctx *ctx) ++{ ++ struct io_zcrx_ifq *ifq; ++ unsigned long id = 0; ++ ++ lockdep_assert_held(&ctx->uring_lock); ++ ++ while (1) { ++ scoped_guard(mutex, &ctx->mmap_lock) ++ ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT); ++ if (!ifq) ++ break; ++ if (WARN_ON_ONCE(is_zcrx_entry_marked(ctx, id))) ++ break; ++ set_zcrx_entry_mark(ctx, id); ++ id++; ++ zcrx_unregister_user(ifq); ++ } ++} ++ + void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) + { + struct io_zcrx_ifq *ifq; +@@ -896,12 +931,17 @@ void io_unregister_zcrx_ifqs(struct io_r + unsigned long id = 0; + + ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT); +- if (ifq) ++ if (ifq) { ++ if (WARN_ON_ONCE(!is_zcrx_entry_marked(ctx, id))) { ++ ifq = NULL; ++ break; ++ } + xa_erase(&ctx->zcrx_ctxs, id); ++ } + } + if (!ifq) + break; +- zcrx_unregister(ifq); ++ io_put_zcrx_ifq(ifq); + } + + xa_destroy(&ctx->zcrx_ctxs); +--- a/io_uring/zcrx.h ++++ b/io_uring/zcrx.h +@@ -71,6 +71,7 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx + int io_register_zcrx_ifq(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg); + void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); ++void io_terminate_zcrx(struct io_ring_ctx *ctx); + int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + struct socket *sock, unsigned int flags, + unsigned issue_flags, unsigned int *len); +@@ -85,6 +86,9 @@ static inline int io_register_zcrx_ifq(s + static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) + { + } ++static inline void io_terminate_zcrx(struct io_ring_ctx *ctx) ++{ ++} + static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + struct socket *sock, unsigned int flags, + unsigned issue_flags, unsigned int *len) diff --git a/queue-7.0/ipmi-ssif-clean-up-kthread-on-errors.patch b/queue-7.0/ipmi-ssif-clean-up-kthread-on-errors.patch new file mode 100644 index 0000000000..f0b680407a --- /dev/null +++ b/queue-7.0/ipmi-ssif-clean-up-kthread-on-errors.patch @@ -0,0 +1,56 @@ +From 75c486cb1bcaa1a3ec3a6438498176a3a4998ae4 Mon Sep 17 00:00:00 2001 +From: Corey Minyard +Date: Mon, 13 Apr 2026 08:00:23 -0500 +Subject: ipmi:ssif: Clean up kthread on errors + +From: Corey Minyard + +commit 75c486cb1bcaa1a3ec3a6438498176a3a4998ae4 upstream. + +If an error occurs after the ssif kthread is created, but before the +main IPMI code starts the ssif interface, the ssif kthread will not +be stopped. + +So make sure the kthread is stopped on an error condition if it is +running. + +Fixes: 259307074bfc ("ipmi: Add SMBus interface driver (SSIF)") +Reported-by: Li Xiao <<252270051@hdu.edu.cn> +Cc: stable@vger.kernel.org +Reviewed-by: Li Xiao <252270051@hdu.edu.cn> +Signed-off-by: Corey Minyard +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/ipmi/ipmi_ssif.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/char/ipmi/ipmi_ssif.c ++++ b/drivers/char/ipmi/ipmi_ssif.c +@@ -1268,8 +1268,10 @@ static void shutdown_ssif(void *send_inf + ssif_info->stopping = true; + timer_delete_sync(&ssif_info->watch_timer); + timer_delete_sync(&ssif_info->retry_timer); +- if (ssif_info->thread) ++ if (ssif_info->thread) { + kthread_stop(ssif_info->thread); ++ ssif_info->thread = NULL; ++ } + } + + static void ssif_remove(struct i2c_client *client) +@@ -1916,6 +1918,15 @@ static int ssif_probe(struct i2c_client + + out: + if (rv) { ++ /* ++ * If ipmi_register_smi() starts the interface, it will ++ * call shutdown and that will free the thread and set ++ * it to NULL. Otherwise it must be freed here. ++ */ ++ if (ssif_info->thread) { ++ kthread_stop(ssif_info->thread); ++ ssif_info->thread = NULL; ++ } + if (addr_info) + addr_info->client = NULL; + diff --git a/queue-7.0/jbd2-fix-deadlock-in-jbd2_journal_cancel_revoke.patch b/queue-7.0/jbd2-fix-deadlock-in-jbd2_journal_cancel_revoke.patch new file mode 100644 index 0000000000..df54310226 --- /dev/null +++ b/queue-7.0/jbd2-fix-deadlock-in-jbd2_journal_cancel_revoke.patch @@ -0,0 +1,78 @@ +From 981fcc5674e67158d24d23e841523eccba19d0e7 Mon Sep 17 00:00:00 2001 +From: Zhang Yi +Date: Thu, 9 Apr 2026 19:42:03 +0800 +Subject: jbd2: fix deadlock in jbd2_journal_cancel_revoke() + +From: Zhang Yi + +commit 981fcc5674e67158d24d23e841523eccba19d0e7 upstream. + +Commit f76d4c28a46a ("fs/jbd2: use sleeping version of +__find_get_block()") changed jbd2_journal_cancel_revoke() to use +__find_get_block_nonatomic() which holds the folio lock instead of +i_private_lock. This breaks the lock ordering (folio -> buffer) and +causes an ABBA deadlock when the filesystem blocksize < pagesize: + + T1 T2 +ext4_mkdir() + ext4_init_new_dir() + ext4_append() + ext4_getblk() + lock_buffer() <- A + sync_blockdev() + blkdev_writepages() + writeback_iter() + writeback_get_folio() + folio_lock() <- B + ext4_journal_get_create_access() + jbd2_journal_cancel_revoke() + __find_get_block_nonatomic() + folio_lock() <- B + block_write_full_folio() + lock_buffer() <- A + +This can occasionally cause generic/013 to hang. + +Fix by only calling __find_get_block_nonatomic() when the passed +buffer_head doesn't belong to the bdev, which is the only case that we +need to look up its bdev alias. Otherwise, the lookup is redundant since +the found buffer_head is equal to the one we passed in. + +Fixes: f76d4c28a46a ("fs/jbd2: use sleeping version of __find_get_block()") +Signed-off-by: Zhang Yi +Link: https://patch.msgid.link/20260409114204.917154-1-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/revoke.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/jbd2/revoke.c ++++ b/fs/jbd2/revoke.c +@@ -428,6 +428,7 @@ void jbd2_journal_cancel_revoke(handle_t + journal_t *journal = handle->h_transaction->t_journal; + int need_cancel; + struct buffer_head *bh = jh2bh(jh); ++ struct address_space *bh_mapping = bh->b_folio->mapping; + + jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh); + +@@ -464,13 +465,14 @@ void jbd2_journal_cancel_revoke(handle_t + * buffer_head? If so, we'd better make sure we clear the + * revoked status on any hashed alias too, otherwise the revoke + * state machine will get very upset later on. */ +- if (need_cancel) { ++ if (need_cancel && !sb_is_blkdev_sb(bh_mapping->host->i_sb)) { + struct buffer_head *bh2; ++ + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, + bh->b_size); + if (bh2) { +- if (bh2 != bh) +- clear_buffer_revoked(bh2); ++ WARN_ON_ONCE(bh2 == bh); ++ clear_buffer_revoked(bh2); + __brelse(bh2); + } + } diff --git a/queue-7.0/kvm-selftests-fix-reserved-value-wrmsr-testcase-for-multi-feature-msrs.patch b/queue-7.0/kvm-selftests-fix-reserved-value-wrmsr-testcase-for-multi-feature-msrs.patch new file mode 100644 index 0000000000..b17b69cdd3 --- /dev/null +++ b/queue-7.0/kvm-selftests-fix-reserved-value-wrmsr-testcase-for-multi-feature-msrs.patch @@ -0,0 +1,39 @@ +From 9396cc1e282a280bcba2e932e03994e0aada4cd8 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 12 Feb 2026 18:38:41 +0800 +Subject: KVM: selftests: Fix reserved value WRMSR testcase for multi-feature MSRs + +From: Sean Christopherson + +commit 9396cc1e282a280bcba2e932e03994e0aada4cd8 upstream. + +When determining whether or not a WRMSR with reserved bits will #GP or +succeed due to the WRMSR not existing per the guest virtual CPU model, +expect failure if and only if _all_ features associated with the MSR are +unsupported. Checking only the primary feature results in false failures +when running on AMD and Hygon CPUs with only one of RDPID or RDTSCP, as +AMD/Hygon CPUs ignore MSR_TSC_AUX[63:32], i.e. don't treat the bits as +reserved, and so #GP only if the MSR is unsupported. + +Fixes: 9c38ddb3df94 ("KVM: selftests: Add an MSR test to exercise guest/host and read/write") +Reported-by: Zhiquan Li +Closes: https://lore.kernel.org/all/20260209041305.64906-6-zhiquan_li@163.com +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260212103841.171459-5-zhiquan_li@163.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/kvm/x86/msrs_test.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/kvm/x86/msrs_test.c ++++ b/tools/testing/selftests/kvm/x86/msrs_test.c +@@ -175,7 +175,7 @@ void guest_test_reserved_val(const struc + * If the CPU will truncate the written value (e.g. SYSENTER on AMD), + * expect success and a truncated value, not #GP. + */ +- if (!this_cpu_has(msr->feature) || ++ if ((!this_cpu_has(msr->feature) && !this_cpu_has(msr->feature2)) || + msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) { + u8 vec = wrmsr_safe(msr->index, msr->rsvd_val); + diff --git a/queue-7.0/landlock-allow-tsync-with-log_subdomains_off-and-fd-1.patch b/queue-7.0/landlock-allow-tsync-with-log_subdomains_off-and-fd-1.patch new file mode 100644 index 0000000000..5f08e91d84 --- /dev/null +++ b/queue-7.0/landlock-allow-tsync-with-log_subdomains_off-and-fd-1.patch @@ -0,0 +1,426 @@ +From e75e38055b9df5eafd663c6db00e634f534dc426 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Tue, 7 Apr 2026 18:41:05 +0200 +Subject: landlock: Allow TSYNC with LOG_SUBDOMAINS_OFF and fd=-1 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit e75e38055b9df5eafd663c6db00e634f534dc426 upstream. + +LANDLOCK_RESTRICT_SELF_TSYNC does not allow +LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with ruleset_fd=-1, preventing +a multithreaded process from atomically propagating subdomain log muting +to all threads without creating a domain layer. Relax the fd=-1 +condition to accept TSYNC alongside LOG_SUBDOMAINS_OFF, and update the +documentation accordingly. + +Add flag validation tests for all TSYNC combinations with ruleset_fd=-1, +and audit tests verifying both transition directions: muting via TSYNC +(logged to not logged) and override via TSYNC (not logged to logged). + +Cc: Günther Noack +Cc: stable@vger.kernel.org +Fixes: 42fc7e6543f6 ("landlock: Multithreading support for landlock_restrict_self()") +Reviewed-by: Günther Noack +Link: https://lore.kernel.org/r/20260407164107.2012589-2-mic@digikod.net +Signed-off-by: Mickaël Salaün +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/landlock.h | 4 +- + security/landlock/syscalls.c | 14 +- + tools/testing/selftests/landlock/audit_test.c | 233 ++++++++++++++++++ + tools/testing/selftests/landlock/tsync_test.c | 77 ++++++ + 4 files changed, 322 insertions(+), 6 deletions(-) + +diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h +index f88fa1f68b77..d37603efc273 100644 +--- a/include/uapi/linux/landlock.h ++++ b/include/uapi/linux/landlock.h +@@ -116,7 +116,9 @@ struct landlock_ruleset_attr { + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects + * future nested domains, not the one being created. It can also be used + * with a @ruleset_fd value of -1 to mute subdomain logs without creating a +- * domain. ++ * domain. When combined with %LANDLOCK_RESTRICT_SELF_TSYNC and a ++ * @ruleset_fd value of -1, this configuration is propagated to all threads ++ * of the current process. + * + * The following flag supports policy enforcement in multithreaded processes: + * +diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c +index 0d66a68677b7..a0bb664e0d31 100644 +--- a/security/landlock/syscalls.c ++++ b/security/landlock/syscalls.c +@@ -512,10 +512,13 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, + + /* + * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with +- * -1 as ruleset_fd, but no other flag must be set. ++ * -1 as ruleset_fd, optionally combined with ++ * LANDLOCK_RESTRICT_SELF_TSYNC to propagate this configuration to all ++ * threads. No other flag must be set. + */ + if (!(ruleset_fd == -1 && +- flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) { ++ (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) == ++ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) { + /* Gets and checks the ruleset. */ + ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ); + if (IS_ERR(ruleset)) +@@ -537,9 +540,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, + + /* + * The only case when a ruleset may not be set is if +- * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1. +- * We could optimize this case by not calling commit_creds() if this flag +- * was already set, but it is not worth the complexity. ++ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with ++ * LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1. We could ++ * optimize this case by not calling commit_creds() if this flag was ++ * already set, but it is not worth the complexity. + */ + if (ruleset) { + /* +diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c +index 20099b8667e7..897596cd7c80 100644 +--- a/tools/testing/selftests/landlock/audit_test.c ++++ b/tools/testing/selftests/landlock/audit_test.c +@@ -162,6 +162,7 @@ TEST_F(audit, layers) + struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; ++ bool mute_subdomains; + }; + + static void *thread_audit_test(void *arg) +@@ -367,6 +368,238 @@ TEST_F(audit, log_subdomains_off_fork) + EXPECT_EQ(0, close(ruleset_fd)); + } + ++/* ++ * Thread function: runs two rounds of (create domain, trigger denial, signal ++ * back), waiting for the main thread before each round. When mute_subdomains ++ * is set, phase 1 also mutes subdomain logs via the fd=-1 path before creating ++ * the domain. The ruleset_fd is kept open across both rounds so each ++ * restrict_self call stacks a new domain layer. ++ */ ++static void *thread_sandbox_deny_twice(void *arg) ++{ ++ const struct thread_data *data = (struct thread_data *)arg; ++ uintptr_t err = 0; ++ char buffer; ++ ++ /* Phase 1: optionally mutes, creates a domain, and triggers a denial. */ ++ if (read(data->pipe_parent, &buffer, 1) != 1) { ++ err = 1; ++ goto out; ++ } ++ ++ if (data->mute_subdomains && ++ landlock_restrict_self(-1, ++ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) { ++ err = 2; ++ goto out; ++ } ++ ++ if (landlock_restrict_self(data->ruleset_fd, 0)) { ++ err = 3; ++ goto out; ++ } ++ ++ if (kill(data->parent_pid, 0) != -1 || errno != EPERM) { ++ err = 4; ++ goto out; ++ } ++ ++ if (write(data->pipe_child, ".", 1) != 1) { ++ err = 5; ++ goto out; ++ } ++ ++ /* Phase 2: stacks another domain and triggers a denial. */ ++ if (read(data->pipe_parent, &buffer, 1) != 1) { ++ err = 6; ++ goto out; ++ } ++ ++ if (landlock_restrict_self(data->ruleset_fd, 0)) { ++ err = 7; ++ goto out; ++ } ++ ++ if (kill(data->parent_pid, 0) != -1 || errno != EPERM) { ++ err = 8; ++ goto out; ++ } ++ ++ if (write(data->pipe_child, ".", 1) != 1) { ++ err = 9; ++ goto out; ++ } ++ ++out: ++ close(data->ruleset_fd); ++ close(data->pipe_child); ++ close(data->pipe_parent); ++ return (void *)err; ++} ++ ++/* ++ * Verifies that LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with ++ * LANDLOCK_RESTRICT_SELF_TSYNC and ruleset_fd=-1 propagates log_subdomains_off ++ * to a sibling thread, suppressing audit logging on domains it subsequently ++ * creates. ++ * ++ * Phase 1 (before TSYNC) acts as an inline baseline: the sibling creates a ++ * domain and triggers a denial that IS logged. ++ * ++ * Phase 2 (after TSYNC) verifies suppression: the sibling stacks another domain ++ * and triggers a denial that is NOT logged. ++ */ ++TEST_F(audit, log_subdomains_off_tsync) ++{ ++ const struct landlock_ruleset_attr ruleset_attr = { ++ .scoped = LANDLOCK_SCOPE_SIGNAL, ++ }; ++ struct audit_records records; ++ struct thread_data child_data = {}; ++ int pipe_child[2], pipe_parent[2]; ++ char buffer; ++ pthread_t thread; ++ void *thread_ret; ++ ++ child_data.parent_pid = getppid(); ++ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); ++ child_data.pipe_child = pipe_child[1]; ++ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); ++ child_data.pipe_parent = pipe_parent[0]; ++ child_data.ruleset_fd = ++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, child_data.ruleset_fd); ++ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ++ /* Creates the sibling thread. */ ++ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice, ++ &child_data)); ++ ++ /* ++ * Phase 1: the sibling creates a domain and triggers a denial before ++ * any log muting. This proves the audit path works. ++ */ ++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ++ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); ++ ++ /* The denial must be logged. */ ++ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, ++ child_data.parent_pid, NULL)); ++ ++ /* Drains any remaining records (e.g. domain allocation). */ ++ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); ++ ++ /* ++ * Mutes subdomain logs and propagates to the sibling thread via TSYNC, ++ * without creating a domain. ++ */ ++ ASSERT_EQ(0, landlock_restrict_self( ++ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF | ++ LANDLOCK_RESTRICT_SELF_TSYNC)); ++ ++ /* ++ * Phase 2: the sibling stacks another domain and triggers a denial. ++ * Because log_subdomains_off was propagated via TSYNC, the new domain ++ * has log_status=LANDLOCK_LOG_DISABLED. ++ */ ++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ++ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); ++ ++ /* No denial record should appear. */ ++ EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd, ++ child_data.parent_pid, NULL)); ++ ++ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); ++ EXPECT_EQ(0, records.access); ++ ++ EXPECT_EQ(0, close(pipe_child[0])); ++ EXPECT_EQ(0, close(pipe_parent[1])); ++ ASSERT_EQ(0, pthread_join(thread, &thread_ret)); ++ EXPECT_EQ(NULL, thread_ret); ++} ++ ++/* ++ * Verifies that LANDLOCK_RESTRICT_SELF_TSYNC without ++ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF overrides a sibling thread's ++ * log_subdomains_off, re-enabling audit logging on domains the sibling ++ * subsequently creates. ++ * ++ * Phase 1: the sibling sets log_subdomains_off, creates a muted domain, and ++ * triggers a denial that is NOT logged. ++ * ++ * Phase 2 (after TSYNC without LOG_SUBDOMAINS_OFF): the sibling stacks another ++ * domain and triggers a denial that IS logged, proving the muting was ++ * overridden. ++ */ ++TEST_F(audit, tsync_override_log_subdomains_off) ++{ ++ const struct landlock_ruleset_attr ruleset_attr = { ++ .scoped = LANDLOCK_SCOPE_SIGNAL, ++ }; ++ struct audit_records records; ++ struct thread_data child_data = {}; ++ int pipe_child[2], pipe_parent[2]; ++ char buffer; ++ pthread_t thread; ++ void *thread_ret; ++ ++ child_data.parent_pid = getppid(); ++ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); ++ child_data.pipe_child = pipe_child[1]; ++ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); ++ child_data.pipe_parent = pipe_parent[0]; ++ child_data.ruleset_fd = ++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, child_data.ruleset_fd); ++ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ++ child_data.mute_subdomains = true; ++ ++ /* Creates the sibling thread. */ ++ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice, ++ &child_data)); ++ ++ /* ++ * Phase 1: the sibling mutes subdomain logs, creates a domain, and ++ * triggers a denial. The denial must not be logged. ++ */ ++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ++ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); ++ ++ EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd, ++ child_data.parent_pid, NULL)); ++ ++ /* Drains any remaining records. */ ++ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); ++ EXPECT_EQ(0, records.access); ++ ++ /* ++ * Overrides the sibling's log_subdomains_off by calling TSYNC without ++ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF. ++ */ ++ ASSERT_EQ(0, landlock_restrict_self(child_data.ruleset_fd, ++ LANDLOCK_RESTRICT_SELF_TSYNC)); ++ ++ /* ++ * Phase 2: the sibling stacks another domain and triggers a denial. ++ * Because TSYNC replaced its log_subdomains_off with 0, the new domain ++ * has log_status=LANDLOCK_LOG_PENDING. ++ */ ++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ++ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); ++ ++ /* The denial must be logged. */ ++ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, ++ child_data.parent_pid, NULL)); ++ ++ EXPECT_EQ(0, close(pipe_child[0])); ++ EXPECT_EQ(0, close(pipe_parent[1])); ++ ASSERT_EQ(0, pthread_join(thread, &thread_ret)); ++ EXPECT_EQ(NULL, thread_ret); ++} ++ + FIXTURE(audit_flags) + { + struct audit_filter audit_filter; +diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c +index 2b9ad4f154f4..9cf1491bbaaf 100644 +--- a/tools/testing/selftests/landlock/tsync_test.c ++++ b/tools/testing/selftests/landlock/tsync_test.c +@@ -247,4 +247,81 @@ TEST(tsync_interrupt) + EXPECT_EQ(0, close(ruleset_fd)); + } + ++/* clang-format off */ ++FIXTURE(tsync_without_ruleset) {}; ++/* clang-format on */ ++ ++FIXTURE_VARIANT(tsync_without_ruleset) ++{ ++ const __u32 flags; ++ const int expected_errno; ++}; ++ ++/* clang-format off */ ++FIXTURE_VARIANT_ADD(tsync_without_ruleset, tsync_only) { ++ /* clang-format on */ ++ .flags = LANDLOCK_RESTRICT_SELF_TSYNC, ++ .expected_errno = EBADF, ++}; ++ ++/* clang-format off */ ++FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_same_exec_off) { ++ /* clang-format on */ ++ .flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF | ++ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | ++ LANDLOCK_RESTRICT_SELF_TSYNC, ++ .expected_errno = EBADF, ++}; ++ ++/* clang-format off */ ++FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_new_exec_on) { ++ /* clang-format on */ ++ .flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF | ++ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | ++ LANDLOCK_RESTRICT_SELF_TSYNC, ++ .expected_errno = EBADF, ++}; ++ ++/* clang-format off */ ++FIXTURE_VARIANT_ADD(tsync_without_ruleset, all_flags) { ++ /* clang-format on */ ++ .flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | ++ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | ++ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF | ++ LANDLOCK_RESTRICT_SELF_TSYNC, ++ .expected_errno = EBADF, ++}; ++ ++/* clang-format off */ ++FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off) { ++ /* clang-format on */ ++ .flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF | ++ LANDLOCK_RESTRICT_SELF_TSYNC, ++ .expected_errno = 0, ++}; ++ ++FIXTURE_SETUP(tsync_without_ruleset) ++{ ++ disable_caps(_metadata); ++} ++ ++FIXTURE_TEARDOWN(tsync_without_ruleset) ++{ ++} ++ ++TEST_F(tsync_without_ruleset, check) ++{ ++ int ret; ++ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ++ ret = landlock_restrict_self(-1, variant->flags); ++ if (variant->expected_errno) { ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(variant->expected_errno, errno); ++ } else { ++ EXPECT_EQ(0, ret); ++ } ++} ++ + TEST_HARNESS_MAIN +-- +2.54.0 + diff --git a/queue-7.0/landlock-fix-log_subdomains_off-inheritance-across-fork.patch b/queue-7.0/landlock-fix-log_subdomains_off-inheritance-across-fork.patch new file mode 100644 index 0000000000..7a34db843c --- /dev/null +++ b/queue-7.0/landlock-fix-log_subdomains_off-inheritance-across-fork.patch @@ -0,0 +1,151 @@ +From 874c8f83826c95c62c21d9edfe9ef43e5c346724 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Tue, 7 Apr 2026 18:41:04 +0200 +Subject: landlock: Fix LOG_SUBDOMAINS_OFF inheritance across fork() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit 874c8f83826c95c62c21d9edfe9ef43e5c346724 upstream. + +hook_cred_transfer() only copies the Landlock security blob when the +source credential has a domain. This is inconsistent with +landlock_restrict_self() which can set LOG_SUBDOMAINS_OFF on a +credential without creating a domain (via the ruleset_fd=-1 path): the +field is committed but not preserved across fork() because the child's +prepare_creds() calls hook_cred_transfer() which skips the copy when +domain is NULL. + +This breaks the documented use case where a process mutes subdomain logs +before forking sandboxed children: the children lose the muting and +their domains produce unexpected audit records. + +Fix this by unconditionally copying the Landlock credential blob. + +Cc: Günther Noack +Cc: Jann Horn +Cc: stable@vger.kernel.org +Fixes: ead9079f7569 ("landlock: Add LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF") +Reviewed-by: Günther Noack +Link: https://lore.kernel.org/r/20260407164107.2012589-1-mic@digikod.net +Signed-off-by: Mickaël Salaün +Signed-off-by: Greg Kroah-Hartman +--- + security/landlock/cred.c | 6 - + tools/testing/selftests/landlock/audit_test.c | 88 ++++++++++++++++++++++++++ + 2 files changed, 90 insertions(+), 4 deletions(-) + +--- a/security/landlock/cred.c ++++ b/security/landlock/cred.c +@@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cr + const struct landlock_cred_security *const old_llcred = + landlock_cred(old); + +- if (old_llcred->domain) { +- landlock_get_ruleset(old_llcred->domain); +- *landlock_cred(new) = *old_llcred; +- } ++ landlock_get_ruleset(old_llcred->domain); ++ *landlock_cred(new) = *old_llcred; + } + + static int hook_cred_prepare(struct cred *const new, +--- a/tools/testing/selftests/landlock/audit_test.c ++++ b/tools/testing/selftests/landlock/audit_test.c +@@ -279,6 +279,94 @@ TEST_F(audit, thread) + &audit_tv_default, sizeof(audit_tv_default))); + } + ++/* ++ * Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without ++ * creating a domain) is inherited by children across fork(). This exercises ++ * the hook_cred_transfer() fix: the Landlock credential blob must be copied ++ * even when the source credential has no domain. ++ * ++ * Phase 1 (baseline): a child without muting creates a domain and triggers a ++ * denial that IS logged. ++ * ++ * Phase 2 (after muting): the parent mutes subdomain logs, forks another child ++ * who creates a domain and triggers a denial that is NOT logged. ++ */ ++TEST_F(audit, log_subdomains_off_fork) ++{ ++ const struct landlock_ruleset_attr ruleset_attr = { ++ .scoped = LANDLOCK_SCOPE_SIGNAL, ++ }; ++ struct audit_records records; ++ int ruleset_fd, status; ++ pid_t child; ++ ++ ruleset_fd = ++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ++ /* ++ * Phase 1: forks a child that creates a domain and triggers a denial ++ * before any muting. This proves the audit path works. ++ */ ++ child = fork(); ++ ASSERT_LE(0, child); ++ if (child == 0) { ++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); ++ ASSERT_EQ(-1, kill(getppid(), 0)); ++ ASSERT_EQ(EPERM, errno); ++ _exit(0); ++ return; ++ } ++ ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ ASSERT_EQ(true, WIFEXITED(status)); ++ ASSERT_EQ(0, WEXITSTATUS(status)); ++ ++ /* The denial must be logged (baseline). */ ++ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(), ++ NULL)); ++ ++ /* Drains any remaining records (e.g. domain allocation). */ ++ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); ++ ++ /* ++ * Mutes subdomain logs without creating a domain. The parent's ++ * credential has domain=NULL and log_subdomains_off=1. ++ */ ++ ASSERT_EQ(0, landlock_restrict_self( ++ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); ++ ++ /* ++ * Phase 2: forks a child that creates a domain and triggers a denial. ++ * Because log_subdomains_off was inherited via fork(), the child's ++ * domain has log_status=LANDLOCK_LOG_DISABLED. ++ */ ++ child = fork(); ++ ASSERT_LE(0, child); ++ if (child == 0) { ++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); ++ ASSERT_EQ(-1, kill(getppid(), 0)); ++ ASSERT_EQ(EPERM, errno); ++ _exit(0); ++ return; ++ } ++ ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ ASSERT_EQ(true, WIFEXITED(status)); ++ ASSERT_EQ(0, WEXITSTATUS(status)); ++ ++ /* No denial record should appear. */ ++ EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd, ++ getpid(), NULL)); ++ ++ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); ++ EXPECT_EQ(0, records.access); ++ ++ EXPECT_EQ(0, close(ruleset_fd)); ++} ++ + FIXTURE(audit_flags) + { + struct audit_filter audit_filter; diff --git a/queue-7.0/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests.patch b/queue-7.0/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests.patch new file mode 100644 index 0000000000..c73253919c --- /dev/null +++ b/queue-7.0/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests.patch @@ -0,0 +1,100 @@ +From 7d96f3120a7fb7210d21b520c5b6f495da6ba436 Mon Sep 17 00:00:00 2001 +From: Josh Hunt +Date: Mon, 2 Mar 2026 19:56:19 -0500 +Subject: md/raid10: fix deadlock with check operation and nowait requests + +From: Josh Hunt + +commit 7d96f3120a7fb7210d21b520c5b6f495da6ba436 upstream. + +When an array check is running it will raise the barrier at which point +normal requests will become blocked and increment the nr_pending value to +signal there is work pending inside of wait_barrier(). NOWAIT requests +do not block and so will return immediately with an error, and additionally +do not increment nr_pending in wait_barrier(). Upstream change commit +43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a +call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit +this condition. raid_end_bio_io() eventually calls allow_barrier() and +it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even +though the corresponding increment on nr_pending didn't happen in the +NOWAIT case. + +This can be easily seen by starting a check operation while an application +is doing nowait IO on the same array. This results in a deadlocked state +due to nr_pending value underflowing and so the md resync thread gets stuck +waiting for nr_pending to == 0. + +Output of r10conf state of the array when we hit this condition: + +crash> struct r10conf + barrier = 1, + nr_pending = { + counter = -41 + }, + nr_waiting = 15, + nr_queued = 0, + +Example of md_sync thread stuck waiting on raise_barrier() and other +requests stuck in wait_barrier(): + +md1_resync +[<0>] raise_barrier+0xce/0x1c0 +[<0>] raid10_sync_request+0x1ca/0x1ed0 +[<0>] md_do_sync+0x779/0x1110 +[<0>] md_thread+0x90/0x160 +[<0>] kthread+0xbe/0xf0 +[<0>] ret_from_fork+0x34/0x50 +[<0>] ret_from_fork_asm+0x1a/0x30 + +kworker/u1040:2+flush-253:4 +[<0>] wait_barrier+0x1de/0x220 +[<0>] regular_request_wait+0x30/0x180 +[<0>] raid10_make_request+0x261/0x1000 +[<0>] md_handle_request+0x13b/0x230 +[<0>] __submit_bio+0x107/0x1f0 +[<0>] submit_bio_noacct_nocheck+0x16f/0x390 +[<0>] ext4_io_submit+0x24/0x40 +[<0>] ext4_do_writepages+0x254/0xc80 +[<0>] ext4_writepages+0x84/0x120 +[<0>] do_writepages+0x7a/0x260 +[<0>] __writeback_single_inode+0x3d/0x300 +[<0>] writeback_sb_inodes+0x1dd/0x470 +[<0>] __writeback_inodes_wb+0x4c/0xe0 +[<0>] wb_writeback+0x18b/0x2d0 +[<0>] wb_workfn+0x2a1/0x400 +[<0>] process_one_work+0x149/0x330 +[<0>] worker_thread+0x2d2/0x410 +[<0>] kthread+0xbe/0xf0 +[<0>] ret_from_fork+0x34/0x50 +[<0>] ret_from_fork_asm+0x1a/0x30 + +Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") +Cc: stable@vger.kernel.org +Signed-off-by: Josh Hunt +Link: https://lore.kernel.org/linux-raid/20260303005619.1352958-1-johunt@akamai.com +Signed-off-by: Yu Kuai +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/raid10.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1184,7 +1184,7 @@ static void raid10_read_request(struct m + } + + if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) { +- raid_end_bio_io(r10_bio); ++ free_r10bio(r10_bio); + return; + } + +@@ -1372,7 +1372,7 @@ static void raid10_write_request(struct + + sectors = r10_bio->sectors; + if (!regular_request_wait(mddev, conf, bio, sectors)) { +- raid_end_bio_io(r10_bio); ++ free_r10bio(r10_bio); + return; + } + diff --git a/queue-7.0/media-rc-igorplugusb-heed-coherency-rules.patch b/queue-7.0/media-rc-igorplugusb-heed-coherency-rules.patch new file mode 100644 index 0000000000..04249cbf0f --- /dev/null +++ b/queue-7.0/media-rc-igorplugusb-heed-coherency-rules.patch @@ -0,0 +1,80 @@ +From eac69475b01fe1e861dfe3960b57fa95671c132e Mon Sep 17 00:00:00 2001 +From: Oliver Neukum +Date: Wed, 11 Feb 2026 19:11:51 +0100 +Subject: media: rc: igorplugusb: heed coherency rules + +From: Oliver Neukum + +commit eac69475b01fe1e861dfe3960b57fa95671c132e upstream. + +In a control request, the USB request structure +can be subject to DMA on some HCs. Hence it must obey +the rules for DMA coherency. Allocate it separately. + +Fixes: b1c97193c6437 ("[media] rc: port IgorPlug-USB to rc-core") +Cc: stable@vger.kernel.org +Signed-off-by: Oliver Neukum +Signed-off-by: Sean Young +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/rc/igorplugusb.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/drivers/media/rc/igorplugusb.c ++++ b/drivers/media/rc/igorplugusb.c +@@ -34,7 +34,7 @@ struct igorplugusb { + struct device *dev; + + struct urb *urb; +- struct usb_ctrlrequest request; ++ struct usb_ctrlrequest *request; + + struct timer_list timer; + +@@ -122,7 +122,7 @@ static void igorplugusb_cmd(struct igorp + { + int ret; + +- ir->request.bRequest = cmd; ++ ir->request->bRequest = cmd; + ir->urb->transfer_flags = 0; + ret = usb_submit_urb(ir->urb, GFP_ATOMIC); + if (ret && ret != -EPERM) +@@ -164,13 +164,17 @@ static int igorplugusb_probe(struct usb_ + if (!ir) + return -ENOMEM; + ++ ir->request = kzalloc_obj(*ir->request, GFP_KERNEL); ++ if (!ir->request) ++ goto fail; ++ + ir->dev = &intf->dev; + + timer_setup(&ir->timer, igorplugusb_timer, 0); + +- ir->request.bRequest = GET_INFRACODE; +- ir->request.bRequestType = USB_TYPE_VENDOR | USB_DIR_IN; +- ir->request.wLength = cpu_to_le16(MAX_PACKET); ++ ir->request->bRequest = GET_INFRACODE; ++ ir->request->bRequestType = USB_TYPE_VENDOR | USB_DIR_IN; ++ ir->request->wLength = cpu_to_le16(MAX_PACKET); + + ir->urb = usb_alloc_urb(0, GFP_KERNEL); + if (!ir->urb) +@@ -228,6 +232,7 @@ fail: + usb_free_urb(ir->urb); + rc_free_device(ir->rc); + kfree(ir->buf_in); ++ kfree(ir->request); + + return ret; + } +@@ -243,6 +248,7 @@ static void igorplugusb_disconnect(struc + usb_unpoison_urb(ir->urb); + usb_free_urb(ir->urb); + kfree(ir->buf_in); ++ kfree(ir->request); + } + + static const struct usb_device_id igorplugusb_table[] = { diff --git a/queue-7.0/media-rockchip-rkcif-comply-with-minimum-number-of-buffers-requirement.patch b/queue-7.0/media-rockchip-rkcif-comply-with-minimum-number-of-buffers-requirement.patch new file mode 100644 index 0000000000..3677f1518e --- /dev/null +++ b/queue-7.0/media-rockchip-rkcif-comply-with-minimum-number-of-buffers-requirement.patch @@ -0,0 +1,101 @@ +From 48c8292d4445088d8b3c9d639c7982744a94d819 Mon Sep 17 00:00:00 2001 +From: Michael Riesch +Date: Fri, 20 Feb 2026 09:15:08 +0100 +Subject: media: rockchip: rkcif: comply with minimum number of buffers requirement + +From: Michael Riesch + +commit 48c8292d4445088d8b3c9d639c7982744a94d819 upstream. + +Each stream requires CIF_REQ_BUFS_MIN=1 buffers to enable streaming. +However, it failed with only one buffer provided. + +Comply with the minimum number of buffers requirement and accept +exactly one buffer. + +Fixes: 501802e2ad51 ("media: rockchip: rkcif: add abstraction for dma blocks") +Cc: stable@kernel.org +Tested-by: Paul Elder +Tested-by: Chen-Yu Tsai +Signed-off-by: Michael Riesch +Reviewed-by: Isaac Scott +Reviewed-by: Paul Elder +Signed-off-by: Sakari Ailus +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman +--- + .../platform/rockchip/rkcif/rkcif-stream.c | 44 +++++++++---------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +diff --git a/drivers/media/platform/rockchip/rkcif/rkcif-stream.c b/drivers/media/platform/rockchip/rkcif/rkcif-stream.c +index e00010a91e8b..f15bee4f7cd7 100644 +--- a/drivers/media/platform/rockchip/rkcif/rkcif-stream.c ++++ b/drivers/media/platform/rockchip/rkcif/rkcif-stream.c +@@ -106,42 +106,42 @@ static int rkcif_stream_init_buffers(struct rkcif_stream *stream) + { + struct v4l2_pix_format_mplane *pix = &stream->pix; + +- stream->buffers[0] = rkcif_stream_pop_buffer(stream); +- if (!stream->buffers[0]) +- goto err_buff_0; +- +- stream->buffers[1] = rkcif_stream_pop_buffer(stream); +- if (!stream->buffers[1]) +- goto err_buff_1; +- +- if (stream->queue_buffer) { +- stream->queue_buffer(stream, 0); +- stream->queue_buffer(stream, 1); +- } +- + stream->dummy.size = pix->num_planes * pix->plane_fmt[0].sizeimage; + stream->dummy.vaddr = + dma_alloc_attrs(stream->rkcif->dev, stream->dummy.size, + &stream->dummy.buffer.buff_addr[0], GFP_KERNEL, + DMA_ATTR_NO_KERNEL_MAPPING); + if (!stream->dummy.vaddr) +- goto err_dummy; ++ return -ENOMEM; + + for (unsigned int i = 1; i < pix->num_planes; i++) + stream->dummy.buffer.buff_addr[i] = + stream->dummy.buffer.buff_addr[i - 1] + + pix->plane_fmt[i - 1].bytesperline * pix->height; + ++ stream->buffers[0] = rkcif_stream_pop_buffer(stream); ++ if (!stream->buffers[0]) ++ goto err_dummy_free; ++ ++ stream->buffers[1] = rkcif_stream_pop_buffer(stream); ++ if (!stream->buffers[1]) { ++ stream->buffers[1] = &stream->dummy.buffer; ++ stream->buffers[1]->is_dummy = true; ++ } ++ ++ if (stream->queue_buffer) { ++ stream->queue_buffer(stream, 0); ++ stream->queue_buffer(stream, 1); ++ } ++ + return 0; + +-err_dummy: +- rkcif_stream_return_buffer(stream->buffers[1], VB2_BUF_STATE_QUEUED); +- stream->buffers[1] = NULL; +- +-err_buff_1: +- rkcif_stream_return_buffer(stream->buffers[0], VB2_BUF_STATE_QUEUED); +- stream->buffers[0] = NULL; +-err_buff_0: ++err_dummy_free: ++ dma_free_attrs(stream->rkcif->dev, stream->dummy.size, ++ stream->dummy.vaddr, ++ stream->dummy.buffer.buff_addr[0], ++ DMA_ATTR_NO_KERNEL_MAPPING); ++ stream->dummy.vaddr = NULL; + return -EINVAL; + } + +-- +2.54.0 + diff --git a/queue-7.0/media-rockchip-rkcif-fix-off-by-one-bugs.patch b/queue-7.0/media-rockchip-rkcif-fix-off-by-one-bugs.patch new file mode 100644 index 0000000000..911bab5710 --- /dev/null +++ b/queue-7.0/media-rockchip-rkcif-fix-off-by-one-bugs.patch @@ -0,0 +1,55 @@ +From e4056b84af0fc18c84b4e5741df04ecd8ca17973 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Fri, 20 Feb 2026 09:15:07 +0100 +Subject: media: rockchip: rkcif: fix off by one bugs + +From: Dan Carpenter + +commit e4056b84af0fc18c84b4e5741df04ecd8ca17973 upstream. + +Change these comparisons from > vs >= to avoid accessing one element +beyond the end of the arrays. +While at it, use ARRAY_SIZE instead of the _MAX enum values. + +Fixes: 1f2353f5a1af ("media: rockchip: rkcif: add support for rk3568 vicap mipi capture") +Cc: stable@kernel.org +Signed-off-by: Dan Carpenter +Reviewed-by: Michael Riesch +Reviewed-by: Paul Elder +Reviewed-by: Laurent Pinchart +Tested-by: Chen-Yu Tsai +[fix cosmetic issues] +Signed-off-by: Michael Riesch +Signed-off-by: Sakari Ailus +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/platform/rockchip/rkcif/rkcif-capture-mipi.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/media/platform/rockchip/rkcif/rkcif-capture-mipi.c ++++ b/drivers/media/platform/rockchip/rkcif/rkcif-capture-mipi.c +@@ -489,8 +489,8 @@ static inline unsigned int rkcif_mipi_ge + + block = interface->index - RKCIF_MIPI_BASE; + +- if (WARN_ON_ONCE(block > RKCIF_MIPI_MAX - RKCIF_MIPI_BASE) || +- WARN_ON_ONCE(index > RKCIF_MIPI_REGISTER_MAX)) ++ if (WARN_ON_ONCE(block >= ARRAY_SIZE(rkcif->match_data->mipi->blocks)) || ++ WARN_ON_ONCE(index >= ARRAY_SIZE(rkcif->match_data->mipi->regs))) + return RKCIF_REGISTER_NOTSUPPORTED; + + offset = rkcif->match_data->mipi->blocks[block].offset; +@@ -510,9 +510,9 @@ static inline unsigned int rkcif_mipi_id + block = stream->interface->index - RKCIF_MIPI_BASE; + id = stream->id; + +- if (WARN_ON_ONCE(block > RKCIF_MIPI_MAX - RKCIF_MIPI_BASE) || +- WARN_ON_ONCE(id > RKCIF_ID_MAX) || +- WARN_ON_ONCE(index > RKCIF_MIPI_ID_REGISTER_MAX)) ++ if (WARN_ON_ONCE(block >= ARRAY_SIZE(rkcif->match_data->mipi->blocks)) || ++ WARN_ON_ONCE(id >= ARRAY_SIZE(rkcif->match_data->mipi->regs_id)) || ++ WARN_ON_ONCE(index >= ARRAY_SIZE(rkcif->match_data->mipi->regs_id[id]))) + return RKCIF_REGISTER_NOTSUPPORTED; + + offset = rkcif->match_data->mipi->blocks[block].offset; diff --git a/queue-7.0/mfd-stpmic1-attempt-system-shutdown-twice-in-case-pmic-is-confused.patch b/queue-7.0/mfd-stpmic1-attempt-system-shutdown-twice-in-case-pmic-is-confused.patch new file mode 100644 index 0000000000..251d941ac2 --- /dev/null +++ b/queue-7.0/mfd-stpmic1-attempt-system-shutdown-twice-in-case-pmic-is-confused.patch @@ -0,0 +1,62 @@ +From ffdc5c51f8bcd0e5e8255ca275a0a3b958475d99 Mon Sep 17 00:00:00 2001 +From: Marek Vasut +Date: Thu, 22 Jan 2026 12:13:21 +0100 +Subject: mfd: stpmic1: Attempt system shutdown twice in case PMIC is confused + +From: Marek Vasut + +commit ffdc5c51f8bcd0e5e8255ca275a0a3b958475d99 upstream. + +Attempt to shut down again, in case the first attempt failed. +The STPMIC1 might get confused and the first regmap_update_bits() +returns with -ETIMEDOUT / -110 . If that or similar transient +failure occurs, try to shut down again. If the second attempt +fails, there is some bigger problem, report it to user. + +Cc: stable@vger.kernel.org +Fixes: 6e9df38f359a ("mfd: stpmic1: Add PMIC poweroff via sys-off handler") +Signed-off-by: Marek Vasut +Link: https://patch.msgid.link/20260122111423.62591-1-marex@nabladev.com +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mfd/stpmic1.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/drivers/mfd/stpmic1.c ++++ b/drivers/mfd/stpmic1.c +@@ -16,6 +16,8 @@ + + #include + ++#define STPMIC1_MAX_RETRIES 2 ++ + #define STPMIC1_MAIN_IRQ 0 + + static const struct regmap_range stpmic1_readable_ranges[] = { +@@ -121,9 +123,23 @@ static const struct regmap_irq_chip stpm + static int stpmic1_power_off(struct sys_off_data *data) + { + struct stpmic1 *ddata = data->cb_data; ++ int ret; ++ ++ /* ++ * Attempt to shut down again, in case the first attempt failed. ++ * The STPMIC1 might get confused and the first regmap_update_bits() ++ * returns with -ETIMEDOUT / -110 . If that or similar transient ++ * failure occurs, try to shut down again. If the second attempt ++ * fails, there is some bigger problem, report it to user. ++ */ ++ for (int retries = 0; retries < STPMIC1_MAX_RETRIES; retries++) { ++ ret = regmap_update_bits(ddata->regmap, MAIN_CR, SOFTWARE_SWITCH_OFF, ++ SOFTWARE_SWITCH_OFF); ++ if (!ret) ++ return NOTIFY_DONE; ++ } + +- regmap_update_bits(ddata->regmap, MAIN_CR, +- SOFTWARE_SWITCH_OFF, SOFTWARE_SWITCH_OFF); ++ dev_err(ddata->dev, "Failed to access PMIC I2C bus (%d)\n", ret); + + return NOTIFY_DONE; + } diff --git a/queue-7.0/mm-alloc_tag-clear-codetag-for-pages-allocated-before-page_ext-initialization.patch b/queue-7.0/mm-alloc_tag-clear-codetag-for-pages-allocated-before-page_ext-initialization.patch new file mode 100644 index 0000000000..f46ab1ebc1 --- /dev/null +++ b/queue-7.0/mm-alloc_tag-clear-codetag-for-pages-allocated-before-page_ext-initialization.patch @@ -0,0 +1,282 @@ +From 6b1842775a460245e97d36d3a67d0cfba7c4ff79 Mon Sep 17 00:00:00 2001 +From: Hao Ge +Date: Tue, 31 Mar 2026 16:13:12 +0800 +Subject: mm/alloc_tag: clear codetag for pages allocated before page_ext initialization + +From: Hao Ge + +commit 6b1842775a460245e97d36d3a67d0cfba7c4ff79 upstream. + +Due to initialization ordering, page_ext is allocated and initialized +relatively late during boot. Some pages have already been allocated and +freed before page_ext becomes available, leaving their codetag +uninitialized. + +A clear example is in init_section_page_ext(): alloc_page_ext() calls +kmemleak_alloc(). If the slab cache has no free objects, it falls back to +the buddy allocator to allocate memory. However, at this point page_ext +is not yet fully initialized, so these newly allocated pages have no +codetag set. These pages may later be reclaimed by KASAN, which causes +the warning to trigger when they are freed because their codetag ref is +still empty. + +Use a global array to track pages allocated before page_ext is fully +initialized. The array size is fixed at 8192 entries, and will emit a +warning if this limit is exceeded. When page_ext initialization +completes, set their codetag to empty to avoid warnings when they are +freed later. + +This warning is only observed with CONFIG_MEM_ALLOC_PROFILING_DEBUG=Y and +mem_profiling_compressed disabled: + +[ 9.582133] ------------[ cut here ]------------ +[ 9.582137] alloc_tag was not set +[ 9.582139] WARNING: ./include/linux/alloc_tag.h:164 at __pgalloc_tag_sub+0x40f/0x550, CPU#5: systemd/1 +[ 9.582190] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 7.0.0-rc4 #1 PREEMPT(lazy) +[ 9.582192] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +[ 9.582194] RIP: 0010:__pgalloc_tag_sub+0x40f/0x550 +[ 9.582196] Code: 00 00 4c 29 e5 48 8b 05 1f 88 56 05 48 8d 4c ad 00 48 8d 2c c8 e9 87 fd ff ff 0f 0b 0f 0b e9 f3 fe ff ff 48 8d 3d 61 2f ed 03 <67> 48 0f b9 3a e9 b3 fd ff ff 0f 0b eb e4 e8 5e cd 14 02 4c 89 c7 +[ 9.582197] RSP: 0018:ffffc9000001f940 EFLAGS: 00010246 +[ 9.582200] RAX: dffffc0000000000 RBX: 1ffff92000003f2b RCX: 1ffff110200d806c +[ 9.582201] RDX: ffff8881006c0360 RSI: 0000000000000004 RDI: ffffffff9bc7b460 +[ 9.582202] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff3a62324 +[ 9.582203] R10: ffffffff9d311923 R11: 0000000000000000 R12: ffffea0004001b00 +[ 9.582204] R13: 0000000000002000 R14: ffffea0000000000 R15: ffff8881006c0360 +[ 9.582206] FS: 00007ffbbcf2d940(0000) GS:ffff888450479000(0000) knlGS:0000000000000000 +[ 9.582208] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 9.582210] CR2: 000055ee3aa260d0 CR3: 0000000148b67005 CR4: 0000000000770ef0 +[ 9.582211] PKRU: 55555554 +[ 9.582212] Call Trace: +[ 9.582213] +[ 9.582214] ? __pfx___pgalloc_tag_sub+0x10/0x10 +[ 9.582216] ? check_bytes_and_report+0x68/0x140 +[ 9.582219] __free_frozen_pages+0x2e4/0x1150 +[ 9.582221] ? __free_slab+0xc2/0x2b0 +[ 9.582224] qlist_free_all+0x4c/0xf0 +[ 9.582227] kasan_quarantine_reduce+0x15d/0x180 +[ 9.582229] __kasan_slab_alloc+0x69/0x90 +[ 9.582232] kmem_cache_alloc_noprof+0x14a/0x500 +[ 9.582234] do_getname+0x96/0x310 +[ 9.582237] do_readlinkat+0x91/0x2f0 +[ 9.582239] ? __pfx_do_readlinkat+0x10/0x10 +[ 9.582240] ? get_random_bytes_user+0x1df/0x2c0 +[ 9.582244] __x64_sys_readlinkat+0x96/0x100 +[ 9.582246] do_syscall_64+0xce/0x650 +[ 9.582250] ? __x64_sys_getrandom+0x13a/0x1e0 +[ 9.582252] ? __pfx___x64_sys_getrandom+0x10/0x10 +[ 9.582254] ? do_syscall_64+0x114/0x650 +[ 9.582255] ? ksys_read+0xfc/0x1d0 +[ 9.582258] ? __pfx_ksys_read+0x10/0x10 +[ 9.582260] ? do_syscall_64+0x114/0x650 +[ 9.582262] ? do_syscall_64+0x114/0x650 +[ 9.582264] ? __pfx_fput_close_sync+0x10/0x10 +[ 9.582266] ? file_close_fd_locked+0x178/0x2a0 +[ 9.582268] ? __x64_sys_faccessat2+0x96/0x100 +[ 9.582269] ? __x64_sys_close+0x7d/0xd0 +[ 9.582271] ? do_syscall_64+0x114/0x650 +[ 9.582273] ? do_syscall_64+0x114/0x650 +[ 9.582275] ? clear_bhb_loop+0x50/0xa0 +[ 9.582277] ? clear_bhb_loop+0x50/0xa0 +[ 9.582279] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 9.582280] RIP: 0033:0x7ffbbda345ee +[ 9.582282] Code: 0f 1f 40 00 48 8b 15 29 38 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 0f 1f 40 00 f3 0f 1e fa 49 89 ca b8 0b 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fa 37 0d 00 f7 d8 64 89 01 48 +[ 9.582284] RSP: 002b:00007ffe2ad8de58 EFLAGS: 00000202 ORIG_RAX: 000000000000010b +[ 9.582286] RAX: ffffffffffffffda RBX: 000055ee3aa25570 RCX: 00007ffbbda345ee +[ 9.582287] RDX: 000055ee3aa25570 RSI: 00007ffe2ad8dee0 RDI: 00000000ffffff9c +[ 9.582288] RBP: 0000000000001000 R08: 0000000000000003 R09: 0000000000001001 +[ 9.582289] R10: 0000000000001000 R11: 0000000000000202 R12: 0000000000000033 +[ 9.582290] R13: 00007ffe2ad8dee0 R14: 00000000ffffff9c R15: 00007ffe2ad8deb0 +[ 9.582292] +[ 9.582293] ---[ end trace 0000000000000000 ]--- + +Link: https://lore.kernel.org/20260331081312.123719-1-hao.ge@linux.dev +Fixes: dcfe378c81f72 ("lib: introduce support for page allocation tagging") +Signed-off-by: Hao Ge +Suggested-by: Suren Baghdasaryan +Acked-by: Suren Baghdasaryan +Cc: Kent Overstreet +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/alloc_tag.h | 2 + include/linux/pgalloc_tag.h | 2 + lib/alloc_tag.c | 109 ++++++++++++++++++++++++++++++++++++++++++++ + mm/page_alloc.c | 10 +++- + 4 files changed, 121 insertions(+), 2 deletions(-) + +--- a/include/linux/alloc_tag.h ++++ b/include/linux/alloc_tag.h +@@ -163,9 +163,11 @@ static inline void alloc_tag_sub_check(u + { + WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n"); + } ++void alloc_tag_add_early_pfn(unsigned long pfn); + #else + static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {} + static inline void alloc_tag_sub_check(union codetag_ref *ref) {} ++static inline void alloc_tag_add_early_pfn(unsigned long pfn) {} + #endif + + /* Caller should verify both ref and tag to be valid */ +--- a/include/linux/pgalloc_tag.h ++++ b/include/linux/pgalloc_tag.h +@@ -181,7 +181,7 @@ static inline struct alloc_tag *__pgallo + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub_check(&ref); +- if (ref.ct) ++ if (ref.ct && !is_codetag_empty(&ref)) + tag = ct_to_alloc_tag(ref.ct); + put_page_tag_ref(handle); + } +--- a/lib/alloc_tag.c ++++ b/lib/alloc_tag.c +@@ -6,7 +6,9 @@ + #include + #include + #include ++#include + #include ++#include + #include + #include + #include +@@ -758,8 +760,115 @@ static __init bool need_page_alloc_taggi + return mem_profiling_support; + } + ++#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG ++/* ++ * Track page allocations before page_ext is initialized. ++ * Some pages are allocated before page_ext becomes available, leaving ++ * their codetag uninitialized. Track these early PFNs so we can clear ++ * their codetag refs later to avoid warnings when they are freed. ++ * ++ * Early allocations include: ++ * - Base allocations independent of CPU count ++ * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init, ++ * such as trace ring buffers, scheduler per-cpu data) ++ * ++ * For simplicity, we fix the size to 8192. ++ * If insufficient, a warning will be triggered to alert the user. ++ * ++ * TODO: Replace fixed-size array with dynamic allocation using ++ * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion. ++ */ ++#define EARLY_ALLOC_PFN_MAX 8192 ++ ++static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata; ++static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0); ++ ++static void __init __alloc_tag_add_early_pfn(unsigned long pfn) ++{ ++ int old_idx, new_idx; ++ ++ do { ++ old_idx = atomic_read(&early_pfn_count); ++ if (old_idx >= EARLY_ALLOC_PFN_MAX) { ++ pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n", ++ EARLY_ALLOC_PFN_MAX); ++ return; ++ } ++ new_idx = old_idx + 1; ++ } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx)); ++ ++ early_pfns[old_idx] = pfn; ++} ++ ++typedef void alloc_tag_add_func(unsigned long pfn); ++static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata = ++ RCU_INITIALIZER(__alloc_tag_add_early_pfn); ++ ++void alloc_tag_add_early_pfn(unsigned long pfn) ++{ ++ alloc_tag_add_func *alloc_tag_add; ++ ++ if (static_key_enabled(&mem_profiling_compressed)) ++ return; ++ ++ rcu_read_lock(); ++ alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr); ++ if (alloc_tag_add) ++ alloc_tag_add(pfn); ++ rcu_read_unlock(); ++} ++ ++static void __init clear_early_alloc_pfn_tag_refs(void) ++{ ++ unsigned int i; ++ ++ if (static_key_enabled(&mem_profiling_compressed)) ++ return; ++ ++ rcu_assign_pointer(alloc_tag_add_early_pfn_ptr, NULL); ++ /* Make sure we are not racing with __alloc_tag_add_early_pfn() */ ++ synchronize_rcu(); ++ ++ for (i = 0; i < atomic_read(&early_pfn_count); i++) { ++ unsigned long pfn = early_pfns[i]; ++ ++ if (pfn_valid(pfn)) { ++ struct page *page = pfn_to_page(pfn); ++ union pgtag_ref_handle handle; ++ union codetag_ref ref; ++ ++ if (get_page_tag_ref(page, &ref, &handle)) { ++ /* ++ * An early-allocated page could be freed and reallocated ++ * after its page_ext is initialized but before we clear it. ++ * In that case, it already has a valid tag set. ++ * We should not overwrite that valid tag with CODETAG_EMPTY. ++ * ++ * Note: there is still a small race window between checking ++ * ref.ct and calling set_codetag_empty(). We accept this ++ * race as it's unlikely and the extra complexity of atomic ++ * cmpxchg is not worth it for this debug-only code path. ++ */ ++ if (ref.ct) { ++ put_page_tag_ref(handle); ++ continue; ++ } ++ ++ set_codetag_empty(&ref); ++ update_page_tag_ref(handle, &ref); ++ put_page_tag_ref(handle); ++ } ++ } ++ ++ } ++} ++#else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */ ++static inline void __init clear_early_alloc_pfn_tag_refs(void) {} ++#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ ++ + static __init void init_page_alloc_tagging(void) + { ++ clear_early_alloc_pfn_tag_refs(); + } + + struct page_ext_operations page_alloc_tagging_ops = { +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1289,10 +1289,18 @@ void __pgalloc_tag_add(struct page *page + union pgtag_ref_handle handle; + union codetag_ref ref; + +- if (get_page_tag_ref(page, &ref, &handle)) { ++ if (likely(get_page_tag_ref(page, &ref, &handle))) { + alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); ++ } else { ++ /* ++ * page_ext is not available yet, record the pfn so we can ++ * clear the tag ref later when page_ext is initialized. ++ */ ++ alloc_tag_add_early_pfn(page_to_pfn(page)); ++ if (task->alloc_tag) ++ alloc_tag_set_inaccurate(task->alloc_tag); + } + } + diff --git a/queue-7.0/mm-damon-core-fix-damon_call-vs-kdamond_fn-exit-race.patch b/queue-7.0/mm-damon-core-fix-damon_call-vs-kdamond_fn-exit-race.patch new file mode 100644 index 0000000000..86741a0aab --- /dev/null +++ b/queue-7.0/mm-damon-core-fix-damon_call-vs-kdamond_fn-exit-race.patch @@ -0,0 +1,166 @@ +From 55da81663b9642dd046b26dd6f1baddbcf337c1e Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 27 Mar 2026 16:33:14 -0700 +Subject: mm/damon/core: fix damon_call() vs kdamond_fn() exit race + +From: SeongJae Park + +commit 55da81663b9642dd046b26dd6f1baddbcf337c1e upstream. + +Patch series "mm/damon/core: fix damon_call()/damos_walk() vs kdmond exit +race". + +damon_call() and damos_walk() can leak memory and/or deadlock when they +race with kdamond terminations. Fix those. + + +This patch (of 2); + +When kdamond_fn() main loop is finished, the function cancels all +remaining damon_call() requests and unset the damon_ctx->kdamond so that +API callers and API functions themselves can know the context is +terminated. damon_call() adds the caller's request to the queue first. +After that, it shows if the kdamond of the damon_ctx is still running +(damon_ctx->kdamond is set). Only if the kdamond is running, damon_call() +starts waiting for the kdamond's handling of the newly added request. + +The damon_call() requests registration and damon_ctx->kdamond unset are +protected by different mutexes, though. Hence, damon_call() could race +with damon_ctx->kdamond unset, and result in deadlocks. + +For example, let's suppose kdamond successfully finished the damon_call() +requests cancelling. Right after that, damon_call() is called for the +context. It registers the new request, and shows the context is still +running, because damon_ctx->kdamond unset is not yet done. Hence the +damon_call() caller starts waiting for the handling of the request. +However, the kdamond is already on the termination steps, so it never +handles the new request. As a result, the damon_call() caller threads +infinitely waits. + +Fix this by introducing another damon_ctx field, namely +call_controls_obsolete. It is protected by the +damon_ctx->call_controls_lock, which protects damon_call() requests +registration. Initialize (unset) it in kdamond_fn() before letting +damon_start() returns and set it just before the cancelling of remaining +damon_call() requests is executed. damon_call() reads the obsolete field +under the lock and avoids adding a new request. + +After this change, only requests that are guaranteed to be handled or +cancelled are registered. Hence the after-registration DAMON context +termination check is no longer needed. Remove it together. + +Note that the deadlock will not happen when damon_call() is called for +repeat mode request. In tis case, damon_call() returns instead of waiting +for the handling when the request registration succeeds and it shows the +kdamond is running. However, if the request also has dealloc_on_cancel, +the request memory would be leaked. + +The issue is found by sashiko [1]. + +Link: https://lore.kernel.org/20260327233319.3528-1-sj@kernel.org +Link: https://lore.kernel.org/20260327233319.3528-2-sj@kernel.org +Link: https://lore.kernel.org/20260325141956.87144-1-sj@kernel.org [1] +Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()") +Signed-off-by: SeongJae Park +Cc: # 6.14.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/damon.h | 1 + + mm/damon/core.c | 45 ++++++++++++++------------------------------- + 2 files changed, 15 insertions(+), 31 deletions(-) + +--- a/include/linux/damon.h ++++ b/include/linux/damon.h +@@ -805,6 +805,7 @@ struct damon_ctx { + + /* lists of &struct damon_call_control */ + struct list_head call_controls; ++ bool call_controls_obsolete; + struct mutex call_controls_lock; + + struct damos_walk_control *walk_control; +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -1464,35 +1464,6 @@ int damon_kdamond_pid(struct damon_ctx * + return pid; + } + +-/* +- * damon_call_handle_inactive_ctx() - handle DAMON call request that added to +- * an inactive context. +- * @ctx: The inactive DAMON context. +- * @control: Control variable of the call request. +- * +- * This function is called in a case that @control is added to @ctx but @ctx is +- * not running (inactive). See if @ctx handled @control or not, and cleanup +- * @control if it was not handled. +- * +- * Returns 0 if @control was handled by @ctx, negative error code otherwise. +- */ +-static int damon_call_handle_inactive_ctx( +- struct damon_ctx *ctx, struct damon_call_control *control) +-{ +- struct damon_call_control *c; +- +- mutex_lock(&ctx->call_controls_lock); +- list_for_each_entry(c, &ctx->call_controls, list) { +- if (c == control) { +- list_del(&control->list); +- mutex_unlock(&ctx->call_controls_lock); +- return -EINVAL; +- } +- } +- mutex_unlock(&ctx->call_controls_lock); +- return 0; +-} +- + /** + * damon_call() - Invoke a given function on DAMON worker thread (kdamond). + * @ctx: DAMON context to call the function for. +@@ -1510,6 +1481,10 @@ static int damon_call_handle_inactive_ct + * synchronization. The return value of the function will be saved in + * &damon_call_control->return_code. + * ++ * Note that this function should be called only after damon_start() with the ++ * @ctx has succeeded. Otherwise, this function could fall into an indefinite ++ * wait. ++ * + * Return: 0 on success, negative error code otherwise. + */ + int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) +@@ -1520,10 +1495,12 @@ int damon_call(struct damon_ctx *ctx, st + INIT_LIST_HEAD(&control->list); + + mutex_lock(&ctx->call_controls_lock); ++ if (ctx->call_controls_obsolete) { ++ mutex_unlock(&ctx->call_controls_lock); ++ return -ECANCELED; ++ } + list_add_tail(&control->list, &ctx->call_controls); + mutex_unlock(&ctx->call_controls_lock); +- if (!damon_is_running(ctx)) +- return damon_call_handle_inactive_ctx(ctx, control); + if (control->repeat) + return 0; + wait_for_completion(&control->completion); +@@ -2751,6 +2728,9 @@ static int kdamond_fn(void *data) + + pr_debug("kdamond (%d) starts\n", current->pid); + ++ mutex_lock(&ctx->call_controls_lock); ++ ctx->call_controls_obsolete = false; ++ mutex_unlock(&ctx->call_controls_lock); + complete(&ctx->kdamond_started); + kdamond_init_ctx(ctx); + +@@ -2855,6 +2835,9 @@ done: + damon_destroy_targets(ctx); + + kfree(ctx->regions_score_histogram); ++ mutex_lock(&ctx->call_controls_lock); ++ ctx->call_controls_obsolete = true; ++ mutex_unlock(&ctx->call_controls_lock); + kdamond_call(ctx, true); + damos_walk_cancel(ctx); + diff --git a/queue-7.0/mm-damon-core-fix-damos_walk-vs-kdamond_fn-exit-race.patch b/queue-7.0/mm-damon-core-fix-damos_walk-vs-kdamond_fn-exit-race.patch new file mode 100644 index 0000000000..bb94c12aaa --- /dev/null +++ b/queue-7.0/mm-damon-core-fix-damos_walk-vs-kdamond_fn-exit-race.patch @@ -0,0 +1,124 @@ +From 33c3f6c2b48cd84b441dba1ee3e62290e53930f4 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 27 Mar 2026 16:33:15 -0700 +Subject: mm/damon/core: fix damos_walk() vs kdamond_fn() exit race + +From: SeongJae Park + +commit 33c3f6c2b48cd84b441dba1ee3e62290e53930f4 upstream. + +When kdamond_fn() main loop is finished, the function cancels remaining +damos_walk() request and unset the damon_ctx->kdamond so that API callers +and API functions themselves can show the context is terminated. +damos_walk() adds the caller's request to the queue first. After that, it +shows if the kdamond of the damon_ctx is still running (damon_ctx->kdamond +is set). Only if the kdamond is running, damos_walk() starts waiting for +the kdamond's handling of the newly added request. + +The damos_walk() requests registration and damon_ctx->kdamond unset are +protected by different mutexes, though. Hence, damos_walk() could race +with damon_ctx->kdamond unset, and result in deadlocks. + +For example, let's suppose kdamond successfully finished the damow_walk() +request cancelling. Right after that, damos_walk() is called for the +context. It registers the new request, and shows the context is still +running, because damon_ctx->kdamond unset is not yet done. Hence the +damos_walk() caller starts waiting for the handling of the request. +However, the kdamond is already on the termination steps, so it never +handles the new request. As a result, the damos_walk() caller thread +infinitely waits. + +Fix this by introducing another damon_ctx field, namely +walk_control_obsolete. It is protected by the +damon_ctx->walk_control_lock, which protects damos_walk() request +registration. Initialize (unset) it in kdamond_fn() before letting +damon_start() returns and set it just before the cancelling of the +remaining damos_walk() request is executed. damos_walk() reads the +obsolete field under the lock and avoids adding a new request. + +After this change, only requests that are guaranteed to be handled or +cancelled are registered. Hence the after-registration DAMON context +termination check is no longer needed. Remove it together. + +The issue is found by sashiko [1]. + + +Link: https://lore.kernel.org/20260327233319.3528-3-sj@kernel.org +Link: https://lore.kernel.org/20260325141956.87144-1-sj@kernel.org [1] +Fixes: bf0eaba0ff9c ("mm/damon/core: implement damos_walk()") +Signed-off-by: SeongJae Park +Cc: # 6.14.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/damon.h | 1 + + mm/damon/core.c | 21 ++++++++++++++------- + 2 files changed, 15 insertions(+), 7 deletions(-) + +--- a/include/linux/damon.h ++++ b/include/linux/damon.h +@@ -809,6 +809,7 @@ struct damon_ctx { + struct mutex call_controls_lock; + + struct damos_walk_control *walk_control; ++ bool walk_control_obsolete; + struct mutex walk_control_lock; + + /* +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -1528,6 +1528,10 @@ int damon_call(struct damon_ctx *ctx, st + * passed at least one &damos->apply_interval_us, kdamond marks the request as + * completed so that damos_walk() can wakeup and return. + * ++ * Note that this function should be called only after damon_start() with the ++ * @ctx has succeeded. Otherwise, this function could fall into an indefinite ++ * wait. ++ * + * Return: 0 on success, negative error code otherwise. + */ + int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) +@@ -1535,19 +1539,16 @@ int damos_walk(struct damon_ctx *ctx, st + init_completion(&control->completion); + control->canceled = false; + mutex_lock(&ctx->walk_control_lock); ++ if (ctx->walk_control_obsolete) { ++ mutex_unlock(&ctx->walk_control_lock); ++ return -ECANCELED; ++ } + if (ctx->walk_control) { + mutex_unlock(&ctx->walk_control_lock); + return -EBUSY; + } + ctx->walk_control = control; + mutex_unlock(&ctx->walk_control_lock); +- if (!damon_is_running(ctx)) { +- mutex_lock(&ctx->walk_control_lock); +- if (ctx->walk_control == control) +- ctx->walk_control = NULL; +- mutex_unlock(&ctx->walk_control_lock); +- return -EINVAL; +- } + wait_for_completion(&control->completion); + if (control->canceled) + return -ECANCELED; +@@ -2731,6 +2732,9 @@ static int kdamond_fn(void *data) + mutex_lock(&ctx->call_controls_lock); + ctx->call_controls_obsolete = false; + mutex_unlock(&ctx->call_controls_lock); ++ mutex_lock(&ctx->walk_control_lock); ++ ctx->walk_control_obsolete = false; ++ mutex_unlock(&ctx->walk_control_lock); + complete(&ctx->kdamond_started); + kdamond_init_ctx(ctx); + +@@ -2839,6 +2843,9 @@ done: + ctx->call_controls_obsolete = true; + mutex_unlock(&ctx->call_controls_lock); + kdamond_call(ctx, true); ++ mutex_lock(&ctx->walk_control_lock); ++ ctx->walk_control_obsolete = true; ++ mutex_unlock(&ctx->walk_control_lock); + damos_walk_cancel(ctx); + + pr_debug("kdamond (%d) finishes\n", current->pid); diff --git a/queue-7.0/mm-hugetlb-fix-early-boot-crash-on-parameters-without-separator.patch b/queue-7.0/mm-hugetlb-fix-early-boot-crash-on-parameters-without-separator.patch new file mode 100644 index 0000000000..6944a73330 --- /dev/null +++ b/queue-7.0/mm-hugetlb-fix-early-boot-crash-on-parameters-without-separator.patch @@ -0,0 +1,42 @@ +From c45b354911d01565156e38d7f6bc07edb51fc34c Mon Sep 17 00:00:00 2001 +From: Thorsten Blum +Date: Thu, 9 Apr 2026 12:54:40 +0200 +Subject: mm/hugetlb: fix early boot crash on parameters without '=' separator + +From: Thorsten Blum + +commit c45b354911d01565156e38d7f6bc07edb51fc34c upstream. + +If hugepages, hugepagesz, or default_hugepagesz are specified on the +kernel command line without the '=' separator, early parameter parsing +passes NULL to hugetlb_add_param(), which dereferences it in strlen() and +can crash the system during early boot. + +Reject NULL values in hugetlb_add_param() and return -EINVAL instead. + +Link: https://lore.kernel.org/20260409105437.108686-4-thorsten.blum@linux.dev +Fixes: 5b47c02967ab ("mm/hugetlb: convert cmdline parameters from setup to early") +Signed-off-by: Thorsten Blum +Reviewed-by: Muchun Song +Cc: David Hildenbrand +Cc: Frank van der Linden +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4252,6 +4252,9 @@ static __init int hugetlb_add_param(char + size_t len; + char *p; + ++ if (!s) ++ return -EINVAL; ++ + if (hugetlb_param_index >= HUGE_MAX_CMDLINE_ARGS) + return -EINVAL; + diff --git a/queue-7.0/mm-zone_device-do-not-touch-device-folio-after-calling-folio_free.patch b/queue-7.0/mm-zone_device-do-not-touch-device-folio-after-calling-folio_free.patch new file mode 100644 index 0000000000..8d6233661e --- /dev/null +++ b/queue-7.0/mm-zone_device-do-not-touch-device-folio-after-calling-folio_free.patch @@ -0,0 +1,40 @@ +From 39928984956037cabd304321cb8f342e47421db5 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Fri, 10 Apr 2026 16:03:46 -0700 +Subject: mm/zone_device: do not touch device folio after calling ->folio_free() + +From: Matthew Brost + +commit 39928984956037cabd304321cb8f342e47421db5 upstream. + +The contents of a device folio can immediately change after calling +->folio_free(), as the folio may be reallocated by a driver with a +different order. Instead of touching the folio again to extract the +pgmap, use the local stack variable when calling percpu_ref_put_many(). + +Link: https://lore.kernel.org/20260410230346.4009855-1-matthew.brost@intel.com +Fixes: d245f9b4ab80 ("mm/zone_device: support large zone device private folios") +Signed-off-by: Matthew Brost +Reviewed-by: Balbir Singh +Reviewed-by: Vishal Moola +Reviewed-by: Alistair Popple +Cc: David Hildenbrand +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memremap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/memremap.c ++++ b/mm/memremap.c +@@ -454,7 +454,7 @@ void free_zone_device_folio(struct folio + if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free)) + break; + pgmap->ops->folio_free(folio); +- percpu_ref_put_many(&folio->pgmap->ref, nr); ++ percpu_ref_put_many(&pgmap->ref, nr); + break; + + case MEMORY_DEVICE_GENERIC: diff --git a/queue-7.0/mtd-docg3-fix-use-after-free-in-docg3_release.patch b/queue-7.0/mtd-docg3-fix-use-after-free-in-docg3_release.patch new file mode 100644 index 0000000000..baea0a366c --- /dev/null +++ b/queue-7.0/mtd-docg3-fix-use-after-free-in-docg3_release.patch @@ -0,0 +1,48 @@ +From ca19808bc6fac7e29420d8508df569b346b3e339 Mon Sep 17 00:00:00 2001 +From: James Kim +Date: Mon, 9 Mar 2026 15:05:12 +0900 +Subject: mtd: docg3: fix use-after-free in docg3_release() + +From: James Kim + +commit ca19808bc6fac7e29420d8508df569b346b3e339 upstream. + +In docg3_release(), the docg3 pointer is obtained from +cascade->floors[0]->priv before the loop that calls +doc_release_device() on each floor. doc_release_device() frees the +docg3 struct via kfree(docg3) at line 1881. After the loop, +docg3->cascade->bch dereferences the already-freed pointer. + +Fix this by accessing cascade->bch directly, which is equivalent +since docg3->cascade points back to the same cascade struct, and +is already available as a local variable. This also removes the +now-unused docg3 local variable. + +Fixes: c8ae3f744ddc ("lib/bch: Rework a little bit the exported function names") +Cc: stable@vger.kernel.org +Signed-off-by: James Kim +Signed-off-by: Miquel Raynal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mtd/devices/docg3.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/mtd/devices/docg3.c ++++ b/drivers/mtd/devices/docg3.c +@@ -2049,7 +2049,6 @@ err_probe: + static void docg3_release(struct platform_device *pdev) + { + struct docg3_cascade *cascade = platform_get_drvdata(pdev); +- struct docg3 *docg3 = cascade->floors[0]->priv; + int floor; + + doc_unregister_sysfs(pdev, cascade); +@@ -2057,7 +2056,7 @@ static void docg3_release(struct platfor + if (cascade->floors[floor]) + doc_release_device(cascade->floors[floor]); + +- bch_free(docg3->cascade->bch); ++ bch_free(cascade->bch); + } + + #ifdef CONFIG_OF diff --git a/queue-7.0/nvme-pci-add-nvme_quirk_disable_write_zeroes-for-kingston-om3sgp4.patch b/queue-7.0/nvme-pci-add-nvme_quirk_disable_write_zeroes-for-kingston-om3sgp4.patch new file mode 100644 index 0000000000..b2af365b63 --- /dev/null +++ b/queue-7.0/nvme-pci-add-nvme_quirk_disable_write_zeroes-for-kingston-om3sgp4.patch @@ -0,0 +1,41 @@ +From a8eebf9699d69987cc49cec4e4fdb4111ab32423 Mon Sep 17 00:00:00 2001 +From: Robert Beckett +Date: Fri, 20 Mar 2026 19:22:09 +0000 +Subject: nvme-pci: add NVME_QUIRK_DISABLE_WRITE_ZEROES for Kingston OM3SGP4 + +From: Robert Beckett + +commit a8eebf9699d69987cc49cec4e4fdb4111ab32423 upstream. + +The Kingston OM3SGP42048K2-A00 (PCI ID 2646:502f) firmware has a race +condition when processing concurrent write zeroes and DSM (discard) +commands, causing spurious "LBA Out of Range" errors and IOMMU page +faults at address 0x0. + +The issue is reliably triggered by running two concurrent mkfs commands +on different partitions of the same drive, which generates interleaved +write zeroes and discard operations. + +Disable write zeroes for this device, matching the pattern used for +other Kingston OM* drives that have similar firmware issues. + +Cc: stable@vger.kernel.org +Signed-off-by: Robert Beckett +Assisted-by: claude-opus-4-6-v1 +Signed-off-by: Keith Busch +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/pci.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -4178,6 +4178,8 @@ static const struct pci_device_id nvme_i + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, ++ { PCI_DEVICE(0x2646, 0x502F), /* KINGSTON OM3SGP4xxxxK NVMe SSD */ ++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1f40, 0x1202), /* Netac Technologies Co. NV3000 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */ diff --git a/queue-7.0/nvme-respect-nvme_quirk_disable_write_zeroes-when-wzsl-is-set.patch b/queue-7.0/nvme-respect-nvme_quirk_disable_write_zeroes-when-wzsl-is-set.patch new file mode 100644 index 0000000000..1ca46d0769 --- /dev/null +++ b/queue-7.0/nvme-respect-nvme_quirk_disable_write_zeroes-when-wzsl-is-set.patch @@ -0,0 +1,41 @@ +From 40f0496b617b431f8d2dd94d7f785c1121f8a68a Mon Sep 17 00:00:00 2001 +From: Robert Beckett +Date: Fri, 20 Mar 2026 19:22:08 +0000 +Subject: nvme: respect NVME_QUIRK_DISABLE_WRITE_ZEROES when wzsl is set + +From: Robert Beckett + +commit 40f0496b617b431f8d2dd94d7f785c1121f8a68a upstream. + +The NVM Command Set Identify Controller data may report a non-zero +Write Zeroes Size Limit (wzsl). When present, nvme_init_non_mdts_limits() +unconditionally overrides max_zeroes_sectors from wzsl, even if +NVME_QUIRK_DISABLE_WRITE_ZEROES previously set it to zero. + +This effectively re-enables write zeroes for devices that need it +disabled, defeating the quirk. Several Kingston OM* drives rely on +this quirk to avoid firmware issues with write zeroes commands. + +Check for the quirk before applying the wzsl override. + +Fixes: 5befc7c26e5a ("nvme: implement non-mdts command limits") +Cc: stable@vger.kernel.org +Signed-off-by: Robert Beckett +Assisted-by: claude-opus-4-6-v1 +Signed-off-by: Keith Busch +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -3388,7 +3388,7 @@ static int nvme_init_non_mdts_limits(str + + ctrl->dmrl = id->dmrl; + ctrl->dmrsl = le32_to_cpu(id->dmrsl); +- if (id->wzsl) ++ if (id->wzsl && !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) + ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); + + free_data: diff --git a/queue-7.0/parisc-_llseek-syscall-is-only-available-for-32-bit-userspace.patch b/queue-7.0/parisc-_llseek-syscall-is-only-available-for-32-bit-userspace.patch new file mode 100644 index 0000000000..d36e6e5a28 --- /dev/null +++ b/queue-7.0/parisc-_llseek-syscall-is-only-available-for-32-bit-userspace.patch @@ -0,0 +1,27 @@ +From da3680f564bd787ce974f9931e6e924d908b3b2a Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Tue, 7 Apr 2026 23:56:28 +0200 +Subject: parisc: _llseek syscall is only available for 32-bit userspace + +From: Helge Deller + +commit da3680f564bd787ce974f9931e6e924d908b3b2a upstream. + +Cc: stable@vger.kernel.org +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/kernel/syscalls/syscall.tbl | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/parisc/kernel/syscalls/syscall.tbl ++++ b/arch/parisc/kernel/syscalls/syscall.tbl +@@ -154,7 +154,7 @@ + # 137 was afs_syscall + 138 common setfsuid sys_setfsuid + 139 common setfsgid sys_setfsgid +-140 common _llseek sys_llseek ++140 32 _llseek sys_llseek + 141 common getdents sys_getdents compat_sys_getdents + 142 common _newselect sys_select compat_sys_select + 143 common flock sys_flock diff --git a/queue-7.0/parisc-drop-ip_fast_csum-inline-assembly-implementation.patch b/queue-7.0/parisc-drop-ip_fast_csum-inline-assembly-implementation.patch new file mode 100644 index 0000000000..634ab17dc2 --- /dev/null +++ b/queue-7.0/parisc-drop-ip_fast_csum-inline-assembly-implementation.patch @@ -0,0 +1,266 @@ +From 3dd31a370c1dccb580f729af7c580ccb1ae3c0c9 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Fri, 10 Apr 2026 16:12:31 +0200 +Subject: parisc: Drop ip_fast_csum() inline assembly implementation + +From: Helge Deller + +commit 3dd31a370c1dccb580f729af7c580ccb1ae3c0c9 upstream. + +The assembly code of ip_fast_csum() triggers unaligned access warnings +if the IP header isn't correctly aligned: + + Kernel: unaligned access to 0x173d22e76 in inet_gro_receive+0xbc/0x2e8 (iir 0x0e8810b6) + Kernel: unaligned access to 0x173d22e7e in inet_gro_receive+0xc4/0x2e8 (iir 0x0e88109a) + Kernel: unaligned access to 0x173d22e82 in inet_gro_receive+0xc8/0x2e8 (iir 0x0e90109d) + Kernel: unaligned access to 0x173d22e7a in inet_gro_receive+0xd0/0x2e8 (iir 0x0e9810b8) + Kernel: unaligned access to 0x173d22e86 in inet_gro_receive+0xdc/0x2e8 (iir 0x0e8810b8) + +We have the option to a) ignore the warnings, b) work around it by +adding more code to check for alignment, or c) to switch to the generic +implementation and rely on the compiler to optimize the code. + +Let's go with c), because a) isn't nice, and b) would effectively lead +to an implementation which is basically equal to c). + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v7.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/Kconfig | 3 + + arch/parisc/include/asm/checksum.h | 89 --------------------------------- + arch/parisc/lib/Makefile | 2 + arch/parisc/lib/checksum.c | 99 ------------------------------------- + 4 files changed, 6 insertions(+), 187 deletions(-) + delete mode 100644 arch/parisc/lib/checksum.c + +--- a/arch/parisc/Kconfig ++++ b/arch/parisc/Kconfig +@@ -130,6 +130,9 @@ config GENERIC_BUG + config GENERIC_BUG_RELATIVE_POINTERS + bool + ++config GENERIC_CSUM ++ def_bool y ++ + config GENERIC_HWEIGHT + bool + default y +--- a/arch/parisc/include/asm/checksum.h ++++ b/arch/parisc/include/asm/checksum.h +@@ -4,73 +4,7 @@ + + #include + +-/* +- * computes the checksum of a memory block at buff, length len, +- * and adds in "sum" (32-bit) +- * +- * returns a 32-bit number suitable for feeding into itself +- * or csum_tcpudp_magic +- * +- * this function must be called with even lengths, except +- * for the last fragment, which may be odd +- * +- * it's best to have buff aligned on a 32-bit boundary +- */ +-extern __wsum csum_partial(const void *, int, __wsum); +- +-/* +- * Optimized for IP headers, which always checksum on 4 octet boundaries. +- * +- * Written by Randolph Chung , and then mucked with by +- * LaMont Jones +- */ +-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +-{ +- unsigned int sum; +- unsigned long t0, t1, t2; +- +- __asm__ __volatile__ ( +-" ldws,ma 4(%1), %0\n" +-" addib,<= -4, %2, 2f\n" +-"\n" +-" ldws 4(%1), %4\n" +-" ldws 8(%1), %5\n" +-" add %0, %4, %0\n" +-" ldws,ma 12(%1), %3\n" +-" addc %0, %5, %0\n" +-" addc %0, %3, %0\n" +-"1: ldws,ma 4(%1), %3\n" +-" addib,> -1, %2, 1b\n" +-" addc %0, %3, %0\n" +-"\n" +-" extru %0, 31, 16, %4\n" +-" extru %0, 15, 16, %5\n" +-" addc %4, %5, %0\n" +-" extru %0, 15, 16, %5\n" +-" add %0, %5, %0\n" +-" subi -1, %0, %0\n" +-"2:\n" +- : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (t0), "=r" (t1), "=r" (t2) +- : "1" (iph), "2" (ihl) +- : "memory"); +- +- return (__force __sum16)sum; +-} +- +-/* +- * Fold a partial checksum +- */ +-static inline __sum16 csum_fold(__wsum csum) +-{ +- u32 sum = (__force u32)csum; +- /* add the swapped two 16-bit halves of sum, +- a possible carry from adding the two 16-bit halves, +- will carry from the lower half into the upper half, +- giving us the correct sum in the upper half. */ +- sum += (sum << 16) + (sum >> 16); +- return (__force __sum16)(~sum >> 16); +-} +- ++#define csum_tcpudp_nofold csum_tcpudp_nofold + static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum sum) +@@ -85,26 +19,7 @@ static inline __wsum csum_tcpudp_nofold( + return sum; + } + +-/* +- * computes the checksum of the TCP/UDP pseudo-header +- * returns a 16-bit checksum, already complemented +- */ +-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, +- __u32 len, __u8 proto, +- __wsum sum) +-{ +- return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +-} +- +-/* +- * this routine is used for miscellaneous IP-like checksums, mainly +- * in icmp.c +- */ +-static inline __sum16 ip_compute_csum(const void *buf, int len) +-{ +- return csum_fold (csum_partial(buf, len, 0)); +-} +- ++#include + + #define _HAVE_ARCH_IPV6_CSUM + static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, +--- a/arch/parisc/lib/Makefile ++++ b/arch/parisc/lib/Makefile +@@ -3,7 +3,7 @@ + # Makefile for parisc-specific library files + # + +-lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ ++lib-y := lusercopy.o bitops.o io.o memset.o memcpy.o \ + ucmpdi2.o delay.o + + obj-y := iomap.o +--- a/arch/parisc/lib/checksum.c ++++ /dev/null +@@ -1,99 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-or-later +-/* +- * INET An implementation of the TCP/IP protocol suite for the LINUX +- * operating system. INET is implemented using the BSD Socket +- * interface as the means of communication with the user level. +- * +- * MIPS specific IP/TCP/UDP checksumming routines +- * +- * Authors: Ralf Baechle, +- * Lots of code moved from tcp.c and ip.c; see those files +- * for more names. +- */ +-#include +-#include +- +-#include +-#include +-#include +-#include +- +-#define addc(_t,_r) \ +- __asm__ __volatile__ ( \ +-" add %0, %1, %0\n" \ +-" addc %0, %%r0, %0\n" \ +- : "=r"(_t) \ +- : "r"(_r), "0"(_t)); +- +-static inline unsigned int do_csum(const unsigned char * buff, int len) +-{ +- int odd, count; +- unsigned int result = 0; +- +- if (len <= 0) +- goto out; +- odd = 1 & (unsigned long) buff; +- if (odd) { +- result = be16_to_cpu(*buff); +- len--; +- buff++; +- } +- count = len >> 1; /* nr of 16-bit words.. */ +- if (count) { +- if (2 & (unsigned long) buff) { +- result += *(unsigned short *) buff; +- count--; +- len -= 2; +- buff += 2; +- } +- count >>= 1; /* nr of 32-bit words.. */ +- if (count) { +- while (count >= 4) { +- unsigned int r1, r2, r3, r4; +- r1 = *(unsigned int *)(buff + 0); +- r2 = *(unsigned int *)(buff + 4); +- r3 = *(unsigned int *)(buff + 8); +- r4 = *(unsigned int *)(buff + 12); +- addc(result, r1); +- addc(result, r2); +- addc(result, r3); +- addc(result, r4); +- count -= 4; +- buff += 16; +- } +- while (count) { +- unsigned int w = *(unsigned int *) buff; +- count--; +- buff += 4; +- addc(result, w); +- } +- result = (result & 0xffff) + (result >> 16); +- } +- if (len & 2) { +- result += *(unsigned short *) buff; +- buff += 2; +- } +- } +- if (len & 1) +- result += le16_to_cpu(*buff); +- result = csum_from32to16(result); +- if (odd) +- result = swab16(result); +-out: +- return result; +-} +- +-/* +- * computes a partial checksum, e.g. for TCP/UDP fragments +- */ +-/* +- * why bother folding? +- */ +-__wsum csum_partial(const void *buff, int len, __wsum sum) +-{ +- unsigned int result = do_csum(buff, len); +- addc(result, sum); +- return (__force __wsum)csum_from32to16(result); +-} +- +-EXPORT_SYMBOL(csum_partial); diff --git a/queue-7.0/parisc-led-fix-reference-leak-on-failed-device-registration.patch b/queue-7.0/parisc-led-fix-reference-leak-on-failed-device-registration.patch new file mode 100644 index 0000000000..dcbdf3039c --- /dev/null +++ b/queue-7.0/parisc-led-fix-reference-leak-on-failed-device-registration.patch @@ -0,0 +1,50 @@ +From 707610bcccbd0327530938e33f3f33211a640a4e Mon Sep 17 00:00:00 2001 +From: Guangshuo Li +Date: Thu, 16 Apr 2026 01:05:15 +0800 +Subject: parisc: led: fix reference leak on failed device registration + +From: Guangshuo Li + +commit 707610bcccbd0327530938e33f3f33211a640a4e upstream. + +When platform_device_register() fails in startup_leds(), the embedded +struct device in platform_leds has already been initialized by +device_initialize(), but the failure path only reports the error and +does not drop the device reference for the current platform device: + + startup_leds() + -> platform_device_register(&platform_leds) + -> device_initialize(&platform_leds.dev) + -> setup_pdev_dma_masks(&platform_leds) + -> platform_device_add(&platform_leds) + +This leads to a reference leak when platform_device_register() fails. +Fix this by calling platform_device_put() after reporting the error. + +The issue was identified by a static analysis tool I developed and +confirmed by manual review. + +Fixes: 789e527adfc33 ("parisc: led: Rewrite LED/LCD driver to utilizize Linux LED subsystem") +Cc: stable@vger.kernel.org +Signed-off-by: Guangshuo Li +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/parisc/led.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/parisc/led.c ++++ b/drivers/parisc/led.c +@@ -543,8 +543,10 @@ static void __init register_led_regions( + + static int __init startup_leds(void) + { +- if (platform_device_register(&platform_leds)) +- printk(KERN_INFO "LED: failed to register LEDs\n"); ++ if (platform_device_register(&platform_leds)) { ++ pr_info("LED: failed to register LEDs\n"); ++ platform_device_put(&platform_leds); ++ } + register_led_regions(); + return 0; + } diff --git a/queue-7.0/pci-cadence-use-cdns_pcie_read_sz-for-byte-or-word-read-access.patch b/queue-7.0/pci-cadence-use-cdns_pcie_read_sz-for-byte-or-word-read-access.patch new file mode 100644 index 0000000000..4fd1577f35 --- /dev/null +++ b/queue-7.0/pci-cadence-use-cdns_pcie_read_sz-for-byte-or-word-read-access.patch @@ -0,0 +1,104 @@ +From d9cf7154deed71a4f23e81101571c79cdc77be00 Mon Sep 17 00:00:00 2001 +From: Aksh Garg +Date: Thu, 2 Apr 2026 14:25:45 +0530 +Subject: PCI: cadence: Use cdns_pcie_read_sz() for byte or word read access + +From: Aksh Garg + +commit d9cf7154deed71a4f23e81101571c79cdc77be00 upstream. + +The commit 18ac51ae9df9 ("PCI: cadence: Implement capability search +using PCI core APIs") assumed all the platforms using Cadence PCIe +controller support byte and word register accesses. This is not true +for all platforms (e.g., TI J721E SoC, which only supports dword +register accesses). + +This causes capability searches via cdns_pcie_find_capability() to fail +on such platforms. + +Fix this by using cdns_pcie_read_sz() for config read functions, which +properly handles size-aligned accesses. Remove the now-unused byte and +word read wrapper functions (cdns_pcie_readw and cdns_pcie_readb). + +Fixes: 18ac51ae9df9 ("PCI: cadence: Implement capability search using PCI core APIs") +Signed-off-by: Aksh Garg +Signed-off-by: Manivannan Sadhasivam +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260402085545.284457-1-a-garg7@ti.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/controller/cadence/pcie-cadence.h | 56 +++++++++++--------------- + 1 file changed, 25 insertions(+), 31 deletions(-) + +--- a/drivers/pci/controller/cadence/pcie-cadence.h ++++ b/drivers/pci/controller/cadence/pcie-cadence.h +@@ -249,37 +249,6 @@ static inline u32 cdns_pcie_hpa_readl(st + return readl(pcie->reg_base + reg); + } + +-static inline u16 cdns_pcie_readw(struct cdns_pcie *pcie, u32 reg) +-{ +- return readw(pcie->reg_base + reg); +-} +- +-static inline u8 cdns_pcie_readb(struct cdns_pcie *pcie, u32 reg) +-{ +- return readb(pcie->reg_base + reg); +-} +- +-static inline int cdns_pcie_read_cfg_byte(struct cdns_pcie *pcie, int where, +- u8 *val) +-{ +- *val = cdns_pcie_readb(pcie, where); +- return PCIBIOS_SUCCESSFUL; +-} +- +-static inline int cdns_pcie_read_cfg_word(struct cdns_pcie *pcie, int where, +- u16 *val) +-{ +- *val = cdns_pcie_readw(pcie, where); +- return PCIBIOS_SUCCESSFUL; +-} +- +-static inline int cdns_pcie_read_cfg_dword(struct cdns_pcie *pcie, int where, +- u32 *val) +-{ +- *val = cdns_pcie_readl(pcie, where); +- return PCIBIOS_SUCCESSFUL; +-} +- + static inline u32 cdns_pcie_read_sz(void __iomem *addr, int size) + { + void __iomem *aligned_addr = PTR_ALIGN_DOWN(addr, 0x4); +@@ -320,6 +289,31 @@ static inline void cdns_pcie_write_sz(vo + writel(val, aligned_addr); + } + ++static inline int cdns_pcie_read_cfg_byte(struct cdns_pcie *pcie, int where, ++ u8 *val) ++{ ++ void __iomem *addr = pcie->reg_base + where; ++ ++ *val = cdns_pcie_read_sz(addr, 0x1); ++ return PCIBIOS_SUCCESSFUL; ++} ++ ++static inline int cdns_pcie_read_cfg_word(struct cdns_pcie *pcie, int where, ++ u16 *val) ++{ ++ void __iomem *addr = pcie->reg_base + where; ++ ++ *val = cdns_pcie_read_sz(addr, 0x2); ++ return PCIBIOS_SUCCESSFUL; ++} ++ ++static inline int cdns_pcie_read_cfg_dword(struct cdns_pcie *pcie, int where, ++ u32 *val) ++{ ++ *val = cdns_pcie_readl(pcie, where); ++ return PCIBIOS_SUCCESSFUL; ++} ++ + /* Root Port register access */ + static inline void cdns_pcie_rp_writeb(struct cdns_pcie *pcie, + u32 reg, u8 value) diff --git a/queue-7.0/pci-imx6-fix-reference-clock-source-selection-for-i.mx95.patch b/queue-7.0/pci-imx6-fix-reference-clock-source-selection-for-i.mx95.patch new file mode 100644 index 0000000000..4bfae4f750 --- /dev/null +++ b/queue-7.0/pci-imx6-fix-reference-clock-source-selection-for-i.mx95.patch @@ -0,0 +1,41 @@ +From 88cc4cbe08bba27bb58888d25d336774aa0ccab1 Mon Sep 17 00:00:00 2001 +From: Franz Schnyder +Date: Wed, 25 Mar 2026 10:31:16 +0100 +Subject: PCI: imx6: Fix reference clock source selection for i.MX95 + +From: Franz Schnyder + +commit 88cc4cbe08bba27bb58888d25d336774aa0ccab1 upstream. + +In the PCIe PHY init for the i.MX95, the reference clock source selection +uses a conditional instead of always passing the mask. This currently +breaks functionality if the internal refclk is used. + +To fix this issue, always pass IMX95_PCIE_REF_USE_PAD as the mask and clear +bit if external refclk is not used. This essentially swaps the parameters. + +Fixes: d8574ce57d76 ("PCI: imx6: Add external reference clock input mode support") +Signed-off-by: Franz Schnyder +Signed-off-by: Manivannan Sadhasivam +Signed-off-by: Bjorn Helgaas +Acked-by: Richard Zhu +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260325093118.684142-1-fra.schnyder@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/controller/dwc/pci-imx6.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/pci/controller/dwc/pci-imx6.c ++++ b/drivers/pci/controller/dwc/pci-imx6.c +@@ -268,8 +268,8 @@ static int imx95_pcie_init_phy(struct im + IMX95_PCIE_PHY_CR_PARA_SEL); + + regmap_update_bits(imx_pcie->iomuxc_gpr, IMX95_PCIE_PHY_GEN_CTRL, +- ext ? IMX95_PCIE_REF_USE_PAD : 0, +- IMX95_PCIE_REF_USE_PAD); ++ IMX95_PCIE_REF_USE_PAD, ++ ext ? IMX95_PCIE_REF_USE_PAD : 0); + regmap_update_bits(imx_pcie->iomuxc_gpr, IMX95_PCIE_SS_RW_REG_0, + IMX95_PCIE_REF_CLKEN, + ext ? 0 : IMX95_PCIE_REF_CLKEN); diff --git a/queue-7.0/perf-annotate-use-jump__delete-when-freeing-loongarch-jumps.patch b/queue-7.0/perf-annotate-use-jump__delete-when-freeing-loongarch-jumps.patch new file mode 100644 index 0000000000..83035657c4 --- /dev/null +++ b/queue-7.0/perf-annotate-use-jump__delete-when-freeing-loongarch-jumps.patch @@ -0,0 +1,118 @@ +From a355eefc36c4481188249b067832b40a2c45fa5c Mon Sep 17 00:00:00 2001 +From: Rong Bao +Date: Mon, 13 Apr 2026 18:03:55 +0800 +Subject: perf annotate: Use jump__delete when freeing LoongArch jumps + +From: Rong Bao + +commit a355eefc36c4481188249b067832b40a2c45fa5c upstream. + +Currently, the initialization of loongarch_jump_ops does not contain an +assignment to its .free field. This causes disasm_line__free() to fall +through to ins_ops__delete() for LoongArch jump instructions. + +ins_ops__delete() will free ins_operands.source.raw and +ins_operands.source.name, and these fields overlaps with +ins_operands.jump.raw_comment and ins_operands.jump.raw_func_start. +Since in loongarch_jump__parse(), these two fields are populated by +strchr()-ing the same buffer, trying to free them will lead to undefined +behavior. + +This invalid free usually leads to crashes: + + Process 1712902 (perf) of user 1000 dumped core. + Stack trace of thread 1712902: + #0 0x00007fffef155c58 n/a (libc.so.6 + 0x95c58) + #1 0x00007fffef0f7a94 raise (libc.so.6 + 0x37a94) + #2 0x00007fffef0dd6a8 abort (libc.so.6 + 0x1d6a8) + #3 0x00007fffef145490 n/a (libc.so.6 + 0x85490) + #4 0x00007fffef1646f4 n/a (libc.so.6 + 0xa46f4) + #5 0x00007fffef164718 n/a (libc.so.6 + 0xa4718) + #6 0x00005555583a6764 __zfree (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x106764) + #7 0x000055555854fb70 disasm_line__free (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x2afb70) + #8 0x000055555853d618 annotated_source__purge (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x29d618) + #9 0x000055555852300c __hist_entry__tui_annotate (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x28300c) + #10 0x0000555558526718 do_annotate (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x286718) + #11 0x000055555852ed94 evsel__hists_browse (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x28ed94) + #12 0x000055555831fdd0 cmd_report (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x7fdd0) + #13 0x000055555839b644 handle_internal_command (/home/csmantle/dist/linux-arch/tools/perf/perf + 0xfb644) + #14 0x00005555582fe6ac main (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x5e6ac) + #15 0x00007fffef0ddd90 n/a (libc.so.6 + 0x1dd90) + #16 0x00007fffef0ddf0c __libc_start_main (libc.so.6 + 0x1df0c) + #17 0x00005555582fed10 _start (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x5ed10) + ELF object binary architecture: LoongArch + +... and it can be confirmed with Valgrind: + + ==1721834== Invalid free() / delete / delete[] / realloc() + ==1721834== at 0x4EA9014: free (in /usr/lib/valgrind/vgpreload_memcheck-loongarch64-linux.so) + ==1721834== by 0x4106287: __zfree (zalloc.c:13) + ==1721834== by 0x42ADC8F: disasm_line__free (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x429B737: annotated_source__purge (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x42811EB: __hist_entry__tui_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x42848D7: do_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x428CF33: evsel__hists_browse (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== Address 0x7d34303 is 35 bytes inside a block of size 62 alloc'd + ==1721834== at 0x4EA59B8: malloc (in /usr/lib/valgrind/vgpreload_memcheck-loongarch64-linux.so) + ==1721834== by 0x6B80B6F: strdup (strdup.c:42) + ==1721834== by 0x42AD917: disasm_line__new (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x42AE5A3: symbol__disassemble_objdump (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x42AF0A7: symbol__disassemble (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x429B3CF: symbol__annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x429C233: symbol__annotate2 (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x42804D3: __hist_entry__tui_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x42848D7: do_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) + ==1721834== by 0x428CF33: evsel__hists_browse (in /home/csmantle/dist/linux-arch/tools/perf/perf) + +This patch adds the missing free() specialization in loongarch_jump_ops, +which prevents disasm_line__free() from invoking the default cleanup +function. + +Fixes: fb7fd2a14a503b9a ("perf annotate: Move raw_comment and raw_func_start fields out of 'struct ins_operands'") +Cc: stable@vger.kernel.org +Cc: WANG Rui +Cc: Huacai Chen +Cc: WANG Xuerui +Cc: loongarch@lists.linux.dev +Signed-off-by: Rong Bao +Tested-by: WANG Rui +Signed-off-by: Namhyung Kim +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/annotate-arch/annotate-loongarch.c | 1 + + tools/perf/util/disasm.c | 2 +- + tools/perf/util/disasm.h | 2 ++ + 3 files changed, 4 insertions(+), 1 deletion(-) + +--- a/tools/perf/util/annotate-arch/annotate-loongarch.c ++++ b/tools/perf/util/annotate-arch/annotate-loongarch.c +@@ -110,6 +110,7 @@ static int loongarch_jump__parse(const s + } + + static const struct ins_ops loongarch_jump_ops = { ++ .free = jump__delete, + .parse = loongarch_jump__parse, + .scnprintf = jump__scnprintf, + .is_jump = true, +--- a/tools/perf/util/disasm.c ++++ b/tools/perf/util/disasm.c +@@ -451,7 +451,7 @@ int jump__scnprintf(const struct ins *in + ops->target.offset); + } + +-static void jump__delete(struct ins_operands *ops __maybe_unused) ++void jump__delete(struct ins_operands *ops __maybe_unused) + { + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the +--- a/tools/perf/util/disasm.h ++++ b/tools/perf/util/disasm.h +@@ -161,6 +161,8 @@ int jump__scnprintf(const struct ins *in + int mov__scnprintf(const struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); + ++void jump__delete(struct ins_operands *ops); ++ + int symbol__disassemble(struct symbol *sym, struct annotate_args *args); + + char *expand_tabs(char *line, char **storage, size_t *storage_len); diff --git a/queue-7.0/rbd-fix-null-ptr-deref-when-device_add_disk-fails.patch b/queue-7.0/rbd-fix-null-ptr-deref-when-device_add_disk-fails.patch new file mode 100644 index 0000000000..d4b00fa899 --- /dev/null +++ b/queue-7.0/rbd-fix-null-ptr-deref-when-device_add_disk-fails.patch @@ -0,0 +1,116 @@ +From d1fef92e414433ca7b89abf85cb0df42b8d475eb Mon Sep 17 00:00:00 2001 +From: Dawei Feng +Date: Sun, 19 Apr 2026 17:03:48 +0800 +Subject: rbd: fix null-ptr-deref when device_add_disk() fails + +From: Dawei Feng + +commit d1fef92e414433ca7b89abf85cb0df42b8d475eb upstream. + +do_rbd_add() publishes the device with device_add() before calling +device_add_disk(). If device_add_disk() fails after device_add() +succeeds, the error path calls rbd_free_disk() directly and then later +falls through to rbd_dev_device_release(), which calls rbd_free_disk() +again. This double teardown can leave blk-mq cleanup operating on +invalid state and trigger a null-ptr-deref in +__blk_mq_free_map_and_rqs(), reached from blk_mq_free_tag_set(). + +Fix this by following the normal remove ordering: call device_del() +before rbd_dev_device_release() when device_add_disk() fails after +device_add(). That keeps the teardown sequence consistent and avoids +re-entering disk cleanup through the wrong path. + +The bug was first flagged by an experimental analysis tool we are +developing for kernel memory-management bugs while analyzing +v6.13-rc1. The tool is still under development and is not yet publicly +available. + +We reproduced the bug on v7.0 with a real Ceph backend and a QEMU x86_64 +guest booted with KASAN and CONFIG_FAILSLAB enabled. The reproducer +confines failslab injections to the __add_disk() range and injects +fail-nth while mapping an RBD image through +/sys/bus/rbd/add_single_major. + +On the unpatched kernel, fail-nth=4 reliably triggered the fault: + + Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI + KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] + CPU: 0 UID: 0 PID: 273 Comm: bash Not tainted 7.0.0-01247-gd60bc1401583 #6 PREEMPT(lazy) + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 + RIP: 0010:__blk_mq_free_map_and_rqs+0x8c/0x240 + Code: 00 00 48 8b 6b 60 41 89 f4 49 c1 e4 03 4c 01 e5 45 85 ed 0f 85 0a 01 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 e9 48 c1 e9 03 <80> 3c 01 00 0f 85 31 01 00 00 4c 8b 6d 00 4d 85 ed 0f 84 e2 00 00 + RSP: 0018:ff1100000ab0fac8 EFLAGS: 00000246 + RAX: dffffc0000000000 RBX: ff1100000c4806a0 RCX: 0000000000000000 + RDX: 0000000000000002 RSI: 0000000000000000 RDI: ff1100000c4806f4 + RBP: 0000000000000000 R08: 0000000000000001 R09: ffe21c000189001b + R10: ff1100000c4800df R11: ff1100006cf37be0 R12: 0000000000000000 + R13: 0000000000000000 R14: ff1100000c480700 R15: ff1100000c480004 + FS: 00007f0fbe8fe740(0000) GS:ff110000e5851000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fe53473b2e0 CR3: 0000000012eef000 CR4: 00000000007516f0 + PKRU: 55555554 + Call Trace: + + blk_mq_free_tag_set+0x77/0x460 + do_rbd_add+0x1446/0x2b80 + ? __pfx_do_rbd_add+0x10/0x10 + ? lock_acquire+0x18c/0x300 + ? find_held_lock+0x2b/0x80 + ? sysfs_file_kobj+0xb6/0x1b0 + ? __pfx_sysfs_kf_write+0x10/0x10 + kernfs_fop_write_iter+0x2f4/0x4a0 + vfs_write+0x98e/0x1000 + ? expand_files+0x51f/0x850 + ? __pfx_vfs_write+0x10/0x10 + ksys_write+0xf2/0x1d0 + ? __pfx_ksys_write+0x10/0x10 + do_syscall_64+0x115/0x690 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + RIP: 0033:0x7f0fbea15907 + Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 + RSP: 002b:00007ffe22346ea8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 + RAX: ffffffffffffffda RBX: 0000000000000058 RCX: 00007f0fbea15907 + RDX: 0000000000000058 RSI: 0000563ace6c0ef0 RDI: 0000000000000001 + RBP: 0000563ace6c0ef0 R08: 0000563ace6c0ef0 R09: 6b6435726d694141 + R10: 5250337279762f78 R11: 0000000000000246 R12: 0000000000000058 + R13: 00007f0fbeb1c780 R14: ff1100000c480700 R15: ff1100000c480004 + + +With this fix applied, rerunning the reproducer over fail-nth=1..256 +yields no KASAN reports. + +[ idryomov: rename err_out_device_del -> err_out_device ] + +Cc: stable@vger.kernel.org +Fixes: 27c97abc30e2 ("rbd: add add_disk() error handling") +Signed-off-by: Zilin Guan +Signed-off-by: Dawei Feng +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/rbd.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -7165,7 +7165,7 @@ static ssize_t do_rbd_add(const char *bu + + rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); + if (rc) +- goto err_out_cleanup_disk; ++ goto err_out_device; + + spin_lock(&rbd_dev_list_lock); + list_add_tail(&rbd_dev->node, &rbd_dev_list); +@@ -7179,8 +7179,8 @@ out: + module_put(THIS_MODULE); + return rc; + +-err_out_cleanup_disk: +- rbd_free_disk(rbd_dev); ++err_out_device: ++ device_del(&rbd_dev->dev); + err_out_image_lock: + rbd_dev_image_unlock(rbd_dev); + rbd_dev_device_release(rbd_dev); diff --git a/queue-7.0/rdma-mana_ib-disable-rx-steering-on-rss-qp-destroy.patch b/queue-7.0/rdma-mana_ib-disable-rx-steering-on-rss-qp-destroy.patch new file mode 100644 index 0000000000..c95cd0c18a --- /dev/null +++ b/queue-7.0/rdma-mana_ib-disable-rx-steering-on-rss-qp-destroy.patch @@ -0,0 +1,109 @@ +From dbeb256e8dd87233d891b170c0b32a6466467036 Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Wed, 25 Mar 2026 12:40:57 -0700 +Subject: RDMA/mana_ib: Disable RX steering on RSS QP destroy + +From: Long Li + +commit dbeb256e8dd87233d891b170c0b32a6466467036 upstream. + +When an RSS QP is destroyed (e.g. DPDK exit), mana_ib_destroy_qp_rss() +destroys the RX WQ objects but does not disable vPort RX steering in +firmware. This leaves stale steering configuration that still points to +the destroyed RX objects. + +If traffic continues to arrive (e.g. peer VM is still transmitting) and +the VF interface is subsequently brought up (mana_open), the firmware +may deliver completions using stale CQ IDs from the old RX objects. +These CQ IDs can be reused by the ethernet driver for new TX CQs, +causing RX completions to land on TX CQs: + + WARNING: mana_poll_tx_cq+0x1b8/0x220 [mana] (is_sq == false) + WARNING: mana_gd_process_eq_events+0x209/0x290 (cq_table lookup fails) + +Fix this by disabling vPort RX steering before destroying RX WQ objects. +Note that mana_fence_rqs() cannot be used here because the fence +completion is delivered on the CQ, which is polled by user-mode (e.g. +DPDK) and not visible to the kernel driver. + +Refactor the disable logic into a shared mana_disable_vport_rx() in +mana_en, exported for use by mana_ib, replacing the duplicate code. +The ethernet driver's mana_dealloc_queues() is also updated to call +this common function. + +Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") +Cc: stable@vger.kernel.org +Signed-off-by: Long Li +Link: https://patch.msgid.link/20260325194100.1929056-1-longli@microsoft.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mana/qp.c | 15 +++++++++++++++ + drivers/net/ethernet/microsoft/mana/mana_en.c | 11 ++++++++++- + include/net/mana/mana.h | 1 + + 3 files changed, 26 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mana/qp.c ++++ b/drivers/infiniband/hw/mana/qp.c +@@ -822,6 +822,21 @@ static int mana_ib_destroy_qp_rss(struct + ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port); + mpc = netdev_priv(ndev); + ++ /* Disable vPort RX steering before destroying RX WQ objects. ++ * Otherwise firmware still routes traffic to the destroyed queues, ++ * which can cause bogus completions on reused CQ IDs when the ++ * ethernet driver later creates new queues on mana_open(). ++ * ++ * Unlike the ethernet teardown path, mana_fence_rqs() cannot be ++ * used here because the fence completion CQE is delivered on the ++ * CQ which is polled by userspace (e.g. DPDK), so there is no way ++ * for the kernel to wait for fence completion. ++ * ++ * This is best effort — if it fails there is not much we can do, ++ * and mana_cfg_vport_steering() already logs the error. ++ */ ++ mana_disable_vport_rx(mpc); ++ + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -2889,6 +2889,13 @@ static void mana_rss_table_init(struct m + ethtool_rxfh_indir_default(i, apc->num_queues); + } + ++int mana_disable_vport_rx(struct mana_port_context *apc) ++{ ++ return mana_cfg_vport_steering(apc, TRI_STATE_FALSE, false, false, ++ false); ++} ++EXPORT_SYMBOL_NS(mana_disable_vport_rx, "NET_MANA"); ++ + int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, + bool update_hash, bool update_tab) + { +@@ -3273,10 +3280,12 @@ static int mana_dealloc_queues(struct ne + */ + + apc->rss_state = TRI_STATE_FALSE; +- err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); ++ err = mana_disable_vport_rx(apc); + if (err && mana_en_need_log(apc, err)) + netdev_err(ndev, "Failed to disable vPort: %d\n", err); + ++ mana_fence_rqs(apc); ++ + /* Even in err case, still need to cleanup the vPort */ + mana_destroy_vport(apc); + +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -568,6 +568,7 @@ struct mana_port_context { + netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); + int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); ++int mana_disable_vport_rx(struct mana_port_context *apc); + + int mana_alloc_queues(struct net_device *ndev); + int mana_attach(struct net_device *ndev); diff --git a/queue-7.0/remoteproc-xlnx-only-access-buffer-information-if-ipi-is-buffered.patch b/queue-7.0/remoteproc-xlnx-only-access-buffer-information-if-ipi-is-buffered.patch new file mode 100644 index 0000000000..61effc984e --- /dev/null +++ b/queue-7.0/remoteproc-xlnx-only-access-buffer-information-if-ipi-is-buffered.patch @@ -0,0 +1,54 @@ +From 38dd6ccfdfbbe865569a52fe1ba9fa1478f672e6 Mon Sep 17 00:00:00 2001 +From: Ben Levinsky +Date: Tue, 3 Mar 2026 15:51:27 -0800 +Subject: remoteproc: xlnx: Only access buffer information if IPI is buffered + +From: Ben Levinsky + +commit 38dd6ccfdfbbe865569a52fe1ba9fa1478f672e6 upstream. + +In the receive callback check if message is NULL to prevent +possibility of crash by NULL pointer dereferencing. + +Signed-off-by: Ben Levinsky +Signed-off-by: Tanmay Shah +Fixes: 5dfb28c257b7 ("remoteproc: xilinx: Add mailbox channels for rpmsg") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20260303235127.2317955-3-tanmay.shah@amd.com +Signed-off-by: Mathieu Poirier +Signed-off-by: Greg Kroah-Hartman +--- + drivers/remoteproc/xlnx_r5_remoteproc.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/drivers/remoteproc/xlnx_r5_remoteproc.c ++++ b/drivers/remoteproc/xlnx_r5_remoteproc.c +@@ -232,17 +232,19 @@ static void zynqmp_r5_mb_rx_cb(struct mb + + ipi = container_of(cl, struct mbox_info, mbox_cl); + +- /* copy data from ipi buffer to r5_core */ ++ /* copy data from ipi buffer to r5_core if IPI is buffered. */ + ipi_msg = (struct zynqmp_ipi_message *)msg; +- buf_msg = (struct zynqmp_ipi_message *)ipi->rx_mc_buf; +- len = ipi_msg->len; +- if (len > IPI_BUF_LEN_MAX) { +- dev_warn(cl->dev, "msg size exceeded than %d\n", +- IPI_BUF_LEN_MAX); +- len = IPI_BUF_LEN_MAX; ++ if (ipi_msg) { ++ buf_msg = (struct zynqmp_ipi_message *)ipi->rx_mc_buf; ++ len = ipi_msg->len; ++ if (len > IPI_BUF_LEN_MAX) { ++ dev_warn(cl->dev, "msg size exceeded than %d\n", ++ IPI_BUF_LEN_MAX); ++ len = IPI_BUF_LEN_MAX; ++ } ++ buf_msg->len = len; ++ memcpy(buf_msg->data, ipi_msg->data, len); + } +- buf_msg->len = len; +- memcpy(buf_msg->data, ipi_msg->data, len); + + /* received and processed interrupt ack */ + if (mbox_send_message(ipi->rx_chan, NULL) < 0) diff --git a/queue-7.0/reset-rzv2h-usb2phy-keep-phy-clock-enabled-for-entire-device-lifetime.patch b/queue-7.0/reset-rzv2h-usb2phy-keep-phy-clock-enabled-for-entire-device-lifetime.patch new file mode 100644 index 0000000000..81e552f481 --- /dev/null +++ b/queue-7.0/reset-rzv2h-usb2phy-keep-phy-clock-enabled-for-entire-device-lifetime.patch @@ -0,0 +1,168 @@ +From 8889b289ce1bd11a5102b9617742a1b93bb4843e Mon Sep 17 00:00:00 2001 +From: Tommaso Merciai +Date: Thu, 12 Mar 2026 15:50:38 +0100 +Subject: reset: rzv2h-usb2phy: Keep PHY clock enabled for entire device lifetime + +From: Tommaso Merciai + +commit 8889b289ce1bd11a5102b9617742a1b93bb4843e upstream. + +The driver was disabling the USB2 PHY clock immediately after register +initialization in probe() and after each reset operation. This left the +PHY unclocked even though it must remain active for USB functionality. + +The behavior appeared to work only when another driver +(e.g., USB controller) had already enabled the clock, making operation +unreliable and hardware-dependent. In configurations where this driver +is the sole clock user, USB functionality would fail. + +Fix this by: +- Enabling the clock once in probe() via pm_runtime_resume_and_get() +- Removing all pm_runtime_put() calls from assert/deassert/status +- Registering a devm cleanup action to release the clock at removal +- Removed rzv2h_usbphy_assert_helper() and its call in + rzv2h_usb2phy_reset_probe() + +This ensures the PHY clock remains enabled for the entire device lifetime, +preventing instability and aligning with hardware requirements. + +Cc: stable@vger.kernel.org +Fixes: e3911d7f865b ("reset: Add USB2PHY port reset driver for Renesas RZ/V2H(P)") +Signed-off-by: Tommaso Merciai +Reviewed-by: Philipp Zabel +Signed-off-by: Philipp Zabel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/reset/reset-rzv2h-usb2phy.c | 64 ++++++++++-------------------------- + 1 file changed, 18 insertions(+), 46 deletions(-) + +--- a/drivers/reset/reset-rzv2h-usb2phy.c ++++ b/drivers/reset/reset-rzv2h-usb2phy.c +@@ -49,9 +49,10 @@ static inline struct rzv2h_usb2phy_reset + return container_of(rcdev, struct rzv2h_usb2phy_reset_priv, rcdev); + } + +-/* This function must be called only after pm_runtime_resume_and_get() has been called */ +-static void rzv2h_usbphy_assert_helper(struct rzv2h_usb2phy_reset_priv *priv) ++static int rzv2h_usbphy_reset_assert(struct reset_controller_dev *rcdev, ++ unsigned long id) + { ++ struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); + const struct rzv2h_usb2phy_reset_of_data *data = priv->data; + + scoped_guard(spinlock, &priv->lock) { +@@ -60,24 +61,6 @@ static void rzv2h_usbphy_assert_helper(s + } + + usleep_range(11, 20); +-} +- +-static int rzv2h_usbphy_reset_assert(struct reset_controller_dev *rcdev, +- unsigned long id) +-{ +- struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); +- struct device *dev = priv->dev; +- int ret; +- +- ret = pm_runtime_resume_and_get(dev); +- if (ret) { +- dev_err(dev, "pm_runtime_resume_and_get failed\n"); +- return ret; +- } +- +- rzv2h_usbphy_assert_helper(priv); +- +- pm_runtime_put(dev); + + return 0; + } +@@ -87,14 +70,6 @@ static int rzv2h_usbphy_reset_deassert(s + { + struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); + const struct rzv2h_usb2phy_reset_of_data *data = priv->data; +- struct device *dev = priv->dev; +- int ret; +- +- ret = pm_runtime_resume_and_get(dev); +- if (ret) { +- dev_err(dev, "pm_runtime_resume_and_get failed\n"); +- return ret; +- } + + scoped_guard(spinlock, &priv->lock) { + writel(data->reset_deassert_val, priv->base + data->reset_reg); +@@ -102,8 +77,6 @@ static int rzv2h_usbphy_reset_deassert(s + writel(data->reset_release_val, priv->base + data->reset_reg); + } + +- pm_runtime_put(dev); +- + return 0; + } + +@@ -111,20 +84,10 @@ static int rzv2h_usbphy_reset_status(str + unsigned long id) + { + struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); +- struct device *dev = priv->dev; +- int ret; + u32 reg; + +- ret = pm_runtime_resume_and_get(dev); +- if (ret) { +- dev_err(dev, "pm_runtime_resume_and_get failed\n"); +- return ret; +- } +- + reg = readl(priv->base + priv->data->reset_reg); + +- pm_runtime_put(dev); +- + return (reg & priv->data->reset_status_bits) == priv->data->reset_status_bits; + } + +@@ -141,6 +104,11 @@ static int rzv2h_usb2phy_reset_of_xlate( + return 0; + } + ++static void rzv2h_usb2phy_reset_pm_runtime_put(void *data) ++{ ++ pm_runtime_put(data); ++} ++ + static int rzv2h_usb2phy_reset_probe(struct platform_device *pdev) + { + const struct rzv2h_usb2phy_reset_of_data *data; +@@ -175,14 +143,14 @@ static int rzv2h_usb2phy_reset_probe(str + if (error) + return dev_err_probe(dev, error, "pm_runtime_resume_and_get failed\n"); + ++ error = devm_add_action_or_reset(dev, rzv2h_usb2phy_reset_pm_runtime_put, ++ dev); ++ if (error) ++ return dev_err_probe(dev, error, "unable to register cleanup action\n"); ++ + for (unsigned int i = 0; i < data->init_val_count; i++) + writel(data->init_vals[i].val, priv->base + data->init_vals[i].reg); + +- /* keep usb2phy in asserted state */ +- rzv2h_usbphy_assert_helper(priv); +- +- pm_runtime_put(dev); +- + priv->rcdev.ops = &rzv2h_usbphy_reset_ops; + priv->rcdev.of_reset_n_cells = 0; + priv->rcdev.nr_resets = 1; +@@ -190,7 +158,11 @@ static int rzv2h_usb2phy_reset_probe(str + priv->rcdev.of_node = dev->of_node; + priv->rcdev.dev = dev; + +- return devm_reset_controller_register(dev, &priv->rcdev); ++ error = devm_reset_controller_register(dev, &priv->rcdev); ++ if (error) ++ return dev_err_probe(dev, error, "could not register reset controller\n"); ++ ++ return 0; + } + + /* diff --git a/queue-7.0/sched-use-u64-for-bandwidth-ratio-calculations.patch b/queue-7.0/sched-use-u64-for-bandwidth-ratio-calculations.patch new file mode 100644 index 0000000000..3bb229561b --- /dev/null +++ b/queue-7.0/sched-use-u64-for-bandwidth-ratio-calculations.patch @@ -0,0 +1,69 @@ +From c6e80201e057dfb7253385e60bf541121bf5dc33 Mon Sep 17 00:00:00 2001 +From: Joseph Salisbury +Date: Fri, 3 Apr 2026 17:00:14 -0400 +Subject: sched: Use u64 for bandwidth ratio calculations + +From: Joseph Salisbury + +commit c6e80201e057dfb7253385e60bf541121bf5dc33 upstream. + +to_ratio() computes BW_SHIFT-scaled bandwidth ratios from u64 period and +runtime values, but it returns unsigned long. tg_rt_schedulable() also +stores the current group limit and the accumulated child sum in unsigned +long. + +On 32-bit builds, large bandwidth ratios can be truncated and the RT +group sum can wrap when enough siblings are present. That can let an +overcommitted RT hierarchy pass the schedulability check, and it also +narrows the helper result for other callers. + +Return u64 from to_ratio() and use u64 for the RT group totals so +bandwidth ratios are preserved and compared at full width on both 32-bit +and 64-bit builds. + +Fixes: b40b2e8eb521 ("sched: rt: multi level group constraints") +Assisted-by: Codex:GPT-5 +Signed-off-by: Joseph Salisbury +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260403210014.2713404-1-joseph.salisbury@oracle.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/core.c | 2 +- + kernel/sched/rt.c | 2 +- + kernel/sched/sched.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -4738,7 +4738,7 @@ void sched_post_fork(struct task_struct + scx_post_fork(p); + } + +-unsigned long to_ratio(u64 period, u64 runtime) ++u64 to_ratio(u64 period, u64 runtime) + { + if (runtime == RUNTIME_INF) + return BW_UNIT; +--- a/kernel/sched/rt.c ++++ b/kernel/sched/rt.c +@@ -2652,7 +2652,7 @@ static int tg_rt_schedulable(struct task + { + struct rt_schedulable_data *d = data; + struct task_group *child; +- unsigned long total, sum = 0; ++ u64 total, sum = 0; + u64 period, runtime; + + period = ktime_to_ns(tg->rt_bandwidth.rt_period); +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2899,7 +2899,7 @@ extern void init_cfs_throttle_work(struc + #define MAX_BW_BITS (64 - BW_SHIFT) + #define MAX_BW ((1ULL << MAX_BW_BITS) - 1) + +-extern unsigned long to_ratio(u64 period, u64 runtime); ++extern u64 to_ratio(u64 period, u64 runtime); + + extern void init_entity_runnable_average(struct sched_entity *se); + extern void post_init_entity_util_avg(struct task_struct *p); diff --git a/queue-7.0/selftests-landlock-drain-stale-audit-records-on-init.patch b/queue-7.0/selftests-landlock-drain-stale-audit-records-on-init.patch new file mode 100644 index 0000000000..acf8422ea0 --- /dev/null +++ b/queue-7.0/selftests-landlock-drain-stale-audit-records-on-init.patch @@ -0,0 +1,124 @@ +From 3647a4977fb73da385e5a29b9775a4749733470d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Thu, 2 Apr 2026 21:26:04 +0200 +Subject: selftests/landlock: Drain stale audit records on init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit 3647a4977fb73da385e5a29b9775a4749733470d upstream. + +Non-audit Landlock tests generate audit records as side effects when +audit_enabled is non-zero (e.g. from boot configuration). These records +accumulate in the kernel audit backlog while no audit daemon socket is +open. When the next test opens a new netlink socket and registers as +the audit daemon, the stale backlog is delivered, causing baseline +record count checks to fail spuriously. + +Fix this by draining all pending records in audit_init() right after +setting the receive timeout. The 1-usec SO_RCVTIMEO causes audit_recv() +to return -EAGAIN once the backlog is empty, naturally terminating the +drain loop. + +Domain deallocation records are emitted asynchronously from a work +queue, so they may still arrive after the drain. Remove records.domain +== 0 checks that are not preceded by audit_match_record() calls, which +would otherwise consume stale records before the count. Document this +constraint above audit_count_records(). + +Increasing the drain timeout to catch in-flight deallocation records was +considered but rejected: a longer timeout adds latency to every +audit_init() call even when no stale record is pending, and any fixed +timeout is still not guaranteed to catch all records under load. +Removing the unprotected checks is simpler and avoids the spurious +failures. + +Cc: Günther Noack +Cc: stable@vger.kernel.org +Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") +Reviewed-by: Günther Noack +Link: https://lore.kernel.org/r/20260402192608.1458252-4-mic@digikod.net +Signed-off-by: Mickaël Salaün +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/landlock/audit.h | 19 +++++++++++ + tools/testing/selftests/landlock/audit_test.c | 2 - + tools/testing/selftests/landlock/ptrace_test.c | 1 + tools/testing/selftests/landlock/scoped_abstract_unix_test.c | 1 + 4 files changed, 19 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/landlock/audit.h ++++ b/tools/testing/selftests/landlock/audit.h +@@ -338,6 +338,15 @@ struct audit_records { + size_t domain; + }; + ++/* ++ * WARNING: Do not assert records.domain == 0 without a preceding ++ * audit_match_record() call. Domain deallocation records are emitted ++ * asynchronously from kworker threads and can arrive after the drain in ++ * audit_init(), corrupting the domain count. A preceding audit_match_record() ++ * call consumes stale records while scanning, making the assertion safe in ++ * practice because stale deallocation records arrive before the expected access ++ * records. ++ */ + static int audit_count_records(int audit_fd, struct audit_records *records) + { + struct audit_message msg; +@@ -391,6 +400,16 @@ static int audit_init(void) + if (err) + return -errno; + ++ /* ++ * Drains stale audit records that accumulated in the kernel backlog ++ * while no audit daemon socket was open. This happens when non-audit ++ * Landlock tests generate records while audit_enabled is non-zero (e.g. ++ * from boot configuration), or when domain deallocation records arrive ++ * asynchronously after a previous test's socket was closed. ++ */ ++ while (audit_recv(fd, NULL) == 0) ++ ; ++ + return fd; + } + +--- a/tools/testing/selftests/landlock/audit_test.c ++++ b/tools/testing/selftests/landlock/audit_test.c +@@ -733,7 +733,6 @@ TEST_F(audit_flags, signal) + } else { + EXPECT_EQ(1, records.access); + } +- EXPECT_EQ(0, records.domain); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); +@@ -922,7 +921,6 @@ TEST_F(audit_exec, signal_and_open) + /* Tests that there was no denial until now. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); +- EXPECT_EQ(0, records.domain); + + /* + * Wait for the child to do a first denied action by layer1 and +--- a/tools/testing/selftests/landlock/ptrace_test.c ++++ b/tools/testing/selftests/landlock/ptrace_test.c +@@ -342,7 +342,6 @@ TEST_F(audit, trace) + /* Makes sure there is no superfluous logged records. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); +- EXPECT_EQ(0, records.domain); + + yama_ptrace_scope = get_yama_ptrace_scope(); + ASSERT_LE(0, yama_ptrace_scope); +--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c ++++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c +@@ -312,7 +312,6 @@ TEST_F(scoped_audit, connect_to_child) + /* Makes sure there is no superfluous logged records. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); +- EXPECT_EQ(0, records.domain); + + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); diff --git a/queue-7.0/selftests-landlock-fix-format-warning-for-__u64-in-net_test.patch b/queue-7.0/selftests-landlock-fix-format-warning-for-__u64-in-net_test.patch new file mode 100644 index 0000000000..95e4217cd0 --- /dev/null +++ b/queue-7.0/selftests-landlock-fix-format-warning-for-__u64-in-net_test.patch @@ -0,0 +1,40 @@ +From a060ac0b8c3345639f5f4a01e2c435d34adf7e3d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Thu, 2 Apr 2026 21:26:06 +0200 +Subject: selftests/landlock: Fix format warning for __u64 in net_test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit a060ac0b8c3345639f5f4a01e2c435d34adf7e3d upstream. + +On architectures where __u64 is unsigned long (e.g. powerpc64), using +%llx to format a __u64 triggers a -Wformat warning because %llx expects +unsigned long long. Cast the argument to unsigned long long. + +Cc: Günther Noack +Cc: stable@vger.kernel.org +Fixes: a549d055a22e ("selftests/landlock: Add network tests") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/r/202604020206.62zgOTeP-lkp@intel.com/ +Reviewed-by: Günther Noack +Link: https://lore.kernel.org/r/20260402192608.1458252-6-mic@digikod.net +Signed-off-by: Mickaël Salaün +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/landlock/net_test.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/landlock/net_test.c ++++ b/tools/testing/selftests/landlock/net_test.c +@@ -1356,7 +1356,7 @@ TEST_F(mini, network_access_rights) + &net_port, 0)) + { + TH_LOG("Failed to add rule with access 0x%llx: %s", +- access, strerror(errno)); ++ (unsigned long long)access, strerror(errno)); + } + } + EXPECT_EQ(0, close(ruleset_fd)); diff --git a/queue-7.0/selftests-landlock-fix-snprintf-truncation-checks-in-audit-helpers.patch b/queue-7.0/selftests-landlock-fix-snprintf-truncation-checks-in-audit-helpers.patch new file mode 100644 index 0000000000..84a1ae72b6 --- /dev/null +++ b/queue-7.0/selftests-landlock-fix-snprintf-truncation-checks-in-audit-helpers.patch @@ -0,0 +1,49 @@ +From b566f7a4f0e4f15f78f2e5fac273fa954991e03a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Thu, 2 Apr 2026 21:26:02 +0200 +Subject: selftests/landlock: Fix snprintf truncation checks in audit helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit b566f7a4f0e4f15f78f2e5fac273fa954991e03a upstream. + +snprintf() returns the number of characters that would have been +written, excluding the terminating NUL byte. When the output is +truncated, this return value equals or exceeds the buffer size. Fix +matches_log_domain_allocated() and matches_log_domain_deallocated() to +detect truncation with ">=" instead of ">". + +Cc: Günther Noack +Cc: stable@vger.kernel.org +Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") +Reviewed-by: Günther Noack +Link: https://lore.kernel.org/r/20260402192608.1458252-2-mic@digikod.net +Signed-off-by: Mickaël Salaün +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/landlock/audit.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/landlock/audit.h ++++ b/tools/testing/selftests/landlock/audit.h +@@ -309,7 +309,7 @@ static int __maybe_unused matches_log_do + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); +- if (log_match_len > sizeof(log_match)) ++ if (log_match_len >= sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, +@@ -326,7 +326,7 @@ static int __maybe_unused matches_log_do + + log_match_len = snprintf(log_match, sizeof(log_match), log_template, + num_denials); +- if (log_match_len > sizeof(log_match)) ++ if (log_match_len >= sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, diff --git a/queue-7.0/selftests-landlock-skip-stale-records-in-audit_match_record.patch b/queue-7.0/selftests-landlock-skip-stale-records-in-audit_match_record.patch new file mode 100644 index 0000000000..4ff7339b2b --- /dev/null +++ b/queue-7.0/selftests-landlock-skip-stale-records-in-audit_match_record.patch @@ -0,0 +1,254 @@ +From 07c2572a87573b2a2f0fd6b9f538cd1aeef2eee7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Thu, 2 Apr 2026 21:26:05 +0200 +Subject: selftests/landlock: Skip stale records in audit_match_record() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit 07c2572a87573b2a2f0fd6b9f538cd1aeef2eee7 upstream. + +Domain deallocation records are emitted asynchronously from kworker +threads (via free_ruleset_work()). Stale deallocation records from a +previous test can arrive during the current test's deallocation read +loop and be picked up by audit_match_record() instead of the expected +record, causing a domain ID mismatch. The audit.layers test (which +creates 16 nested domains) is particularly vulnerable because it reads +16 deallocation records in sequence, providing a large window for stale +records to interleave. + +The same issue affects audit_flags.signal, where deallocation records +from a previous test (audit.layers) can leak into the next test and be +picked up by audit_match_record() instead of the expected record. + +Fix this by continuing to read records when the type matches but the +content pattern does not. Stale records are silently consumed, and the +loop only stops when both type and pattern match (or the socket times +out with -EAGAIN). + +Additionally, extend matches_log_domain_deallocated() with an +expected_domain_id parameter. When set, the regex pattern includes the +specific domain ID as a literal hex value, so that deallocation records +for a different domain do not match the pattern at all. This handles +the case where the stale record has the same denial count as the +expected one (e.g. both have denials=1), which the type+pattern loop +alone cannot distinguish. Callers that already know the expected domain +ID (from a prior denial or allocation record) now pass it to filter +precisely. + +When expected_domain_id is set, matches_log_domain_deallocated() also +temporarily increases the socket timeout to audit_tv_dom_drop (1 second) +to wait for the asynchronous kworker deallocation, and restores +audit_tv_default afterward. This removes the need for callers to manage +the timeout switch manually. + +Cc: Günther Noack +Cc: stable@vger.kernel.org +Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") +Link: https://lore.kernel.org/r/20260402192608.1458252-5-mic@digikod.net +Signed-off-by: Mickaël Salaün +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/landlock/audit.h | 84 +++++++++++++++++++------- + tools/testing/selftests/landlock/audit_test.c | 34 ++++------ + 2 files changed, 78 insertions(+), 40 deletions(-) + +--- a/tools/testing/selftests/landlock/audit.h ++++ b/tools/testing/selftests/landlock/audit.h +@@ -249,9 +249,9 @@ static __maybe_unused char *regex_escape + static int audit_match_record(int audit_fd, const __u16 type, + const char *const pattern, __u64 *domain_id) + { +- struct audit_message msg; ++ struct audit_message msg, last_mismatch = {}; + int ret, err = 0; +- bool matches_record = !type; ++ int num_type_match = 0; + regmatch_t matches[2]; + regex_t regex; + +@@ -259,21 +259,35 @@ static int audit_match_record(int audit_ + if (ret) + return -EINVAL; + +- do { ++ /* ++ * Reads records until one matches both the expected type and the ++ * pattern. Type-matching records with non-matching content are ++ * silently consumed, which handles stale domain deallocation records ++ * from a previous test emitted asynchronously by kworker threads. ++ */ ++ while (true) { + memset(&msg, 0, sizeof(msg)); + err = audit_recv(audit_fd, &msg); +- if (err) ++ if (err) { ++ if (num_type_match) { ++ printf("DATA: %s\n", last_mismatch.data); ++ printf("ERROR: %d record(s) matched type %u" ++ " but not pattern: %s\n", ++ num_type_match, type, pattern); ++ } + goto out; ++ } ++ ++ if (type && msg.header.nlmsg_type != type) ++ continue; + +- if (msg.header.nlmsg_type == type) +- matches_record = true; +- } while (!matches_record); +- +- ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches, 0); +- if (ret) { +- printf("DATA: %s\n", msg.data); +- printf("ERROR: no match for pattern: %s\n", pattern); +- err = -ENOENT; ++ ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches, ++ 0); ++ if (!ret) ++ break; ++ ++ num_type_match++; ++ last_mismatch = msg; + } + + if (domain_id) { +@@ -316,21 +330,49 @@ static int __maybe_unused matches_log_do + domain_id); + } + +-static int __maybe_unused matches_log_domain_deallocated( +- int audit_fd, unsigned int num_denials, __u64 *domain_id) ++/* ++ * Matches a domain deallocation record. When expected_domain_id is non-zero, ++ * the pattern includes the specific domain ID so that stale deallocation ++ * records from a previous test (with a different domain ID) are skipped by ++ * audit_match_record(), and the socket timeout is temporarily increased to ++ * audit_tv_dom_drop to wait for the asynchronous kworker deallocation. ++ */ ++static int __maybe_unused ++matches_log_domain_deallocated(int audit_fd, unsigned int num_denials, ++ __u64 expected_domain_id, __u64 *domain_id) + { + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=deallocated denials=%u$"; +- char log_match[sizeof(log_template) + 10]; +- int log_match_len; ++ static const char log_template_with_id[] = ++ "^audit([0-9.:]\\+): domain=\\(%llx\\)" ++ " status=deallocated denials=%u$"; ++ char log_match[sizeof(log_template_with_id) + 32]; ++ int log_match_len, err; ++ ++ if (expected_domain_id) ++ log_match_len = snprintf(log_match, sizeof(log_match), ++ log_template_with_id, ++ (unsigned long long)expected_domain_id, ++ num_denials); ++ else ++ log_match_len = snprintf(log_match, sizeof(log_match), ++ log_template, num_denials); + +- log_match_len = snprintf(log_match, sizeof(log_match), log_template, +- num_denials); + if (log_match_len >= sizeof(log_match)) + return -E2BIG; + +- return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, +- domain_id); ++ if (expected_domain_id) ++ setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, ++ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)); ++ ++ err = audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, ++ domain_id); ++ ++ if (expected_domain_id) ++ setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, ++ sizeof(audit_tv_default)); ++ ++ return err; + } + + struct audit_records { +--- a/tools/testing/selftests/landlock/audit_test.c ++++ b/tools/testing/selftests/landlock/audit_test.c +@@ -139,23 +139,24 @@ TEST_F(audit, layers) + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->exit_code = KSFT_FAIL; + +- /* Purges log from deallocated domains. */ +- EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, +- &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); ++ /* ++ * Purges log from deallocated domains. Records arrive in LIFO order ++ * (innermost domain first) because landlock_put_hierarchy() walks the ++ * chain sequentially in a single kworker context. ++ */ + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { + __u64 deallocated_dom = 2; + + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, ++ (*domain_stack)[i], + &deallocated_dom)); + EXPECT_EQ((*domain_stack)[i], deallocated_dom) + { + TH_LOG("Failed to match domain %llx (#%d)", +- (*domain_stack)[i], i); ++ (unsigned long long)(*domain_stack)[i], i); + } + } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); +- EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, +- &audit_tv_default, sizeof(audit_tv_default))); + EXPECT_EQ(0, close(ruleset_fd)); + } + +@@ -271,13 +272,9 @@ TEST_F(audit, thread) + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + +- EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, +- &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); +- EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, +- &deallocated_dom)); ++ EXPECT_EQ(0, matches_log_domain_deallocated( ++ self->audit_fd, 1, denial_dom, &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); +- EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, +- &audit_tv_default, sizeof(audit_tv_default))); + } + + /* +@@ -753,22 +750,21 @@ TEST_F(audit_flags, signal) + + if (variant->restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) { ++ /* ++ * No deallocation record: denials=0 never matches a real ++ * record. ++ */ + EXPECT_EQ(-EAGAIN, +- matches_log_domain_deallocated(self->audit_fd, 0, ++ matches_log_domain_deallocated(self->audit_fd, 0, 0, + &deallocated_dom)); + EXPECT_EQ(deallocated_dom, 2); + } else { +- EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, +- &audit_tv_dom_drop, +- sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2, ++ *self->domain_id, + &deallocated_dom)); + EXPECT_NE(deallocated_dom, 2); + EXPECT_NE(deallocated_dom, 0); + EXPECT_EQ(deallocated_dom, *self->domain_id); +- EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, +- &audit_tv_default, +- sizeof(audit_tv_default))); + } + } + diff --git a/queue-7.0/selftests-mqueue-fix-incorrectly-named-file.patch b/queue-7.0/selftests-mqueue-fix-incorrectly-named-file.patch new file mode 100644 index 0000000000..95a67fdced --- /dev/null +++ b/queue-7.0/selftests-mqueue-fix-incorrectly-named-file.patch @@ -0,0 +1,41 @@ +From 64fac99037689020ad97e472ae898e96ea3616dc Mon Sep 17 00:00:00 2001 +From: Simon Liebold +Date: Thu, 12 Mar 2026 14:02:00 +0000 +Subject: selftests/mqueue: Fix incorrectly named file + +From: Simon Liebold + +commit 64fac99037689020ad97e472ae898e96ea3616dc upstream. + +Commit 85506aca2eb4 ("selftests/mqueue: Set timeout to 180 seconds") +intended to increase the timeout for mq_perf_tests from the default +kselftest limit of 45 seconds to 180 seconds. + +Unfortunately, the file storing this information was incorrectly named +`setting` instead of `settings`, causing the kselftest runner not to +pick up the limit and keep using the default 45 seconds limit. + +Fix this by renaming it to `settings` to ensure that the kselftest +runner uses the increased timeout of 180 seconds for this test. + +Fixes: 85506aca2eb4 ("selftests/mqueue: Set timeout to 180 seconds") +Cc: # 5.10.y +Signed-off-by: Simon Liebold +Link: https://lore.kernel.org/r/20260312140200.2224850-1-simonlie@amazon.de +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mqueue/{setting => settings} | 0 + tools/testing/selftests/mqueue/setting | 1 - + tools/testing/selftests/mqueue/settings | 1 + + 2 files changed, 1 insertion(+), 1 deletion(-) + rename tools/testing/selftests/mqueue/{setting => settings} (100%) + +--- a/tools/testing/selftests/mqueue/setting ++++ /dev/null +@@ -1 +0,0 @@ +-timeout=180 +--- /dev/null ++++ b/tools/testing/selftests/mqueue/settings +@@ -0,0 +1 @@ ++timeout=180 diff --git a/queue-7.0/series b/queue-7.0/series index c2795f5266..bbf6748899 100644 --- a/queue-7.0/series +++ b/queue-7.0/series @@ -69,3 +69,47 @@ alsa-aoa-i2sbus-fix-of-node-lifetime-handling.patch alsa-aoa-skip-devices-with-no-codecs-in-i2sbus_resume.patch alsa-ctxfi-add-fallback-to-default-rsr-for-s-pdif.patch alsa-seq_oss-return-full-count-for-successful-seq_fullsize-writes.patch +erofs-fix-the-out-of-bounds-nameoff-handling-for-trailing-dirents.patch +ipmi-ssif-clean-up-kthread-on-errors.patch +jbd2-fix-deadlock-in-jbd2_journal_cancel_revoke.patch +kvm-selftests-fix-reserved-value-wrmsr-testcase-for-multi-feature-msrs.patch +md-raid10-fix-deadlock-with-check-operation-and-nowait-requests.patch +media-rc-igorplugusb-heed-coherency-rules.patch +media-rockchip-rkcif-fix-off-by-one-bugs.patch +media-rockchip-rkcif-comply-with-minimum-number-of-buffers-requirement.patch +mfd-stpmic1-attempt-system-shutdown-twice-in-case-pmic-is-confused.patch +mm-alloc_tag-clear-codetag-for-pages-allocated-before-page_ext-initialization.patch +mm-damon-core-fix-damon_call-vs-kdamond_fn-exit-race.patch +mm-damon-core-fix-damos_walk-vs-kdamond_fn-exit-race.patch +mm-hugetlb-fix-early-boot-crash-on-parameters-without-separator.patch +mtd-docg3-fix-use-after-free-in-docg3_release.patch +nvme-pci-add-nvme_quirk_disable_write_zeroes-for-kingston-om3sgp4.patch +nvme-respect-nvme_quirk_disable_write_zeroes-when-wzsl-is-set.patch +parisc-_llseek-syscall-is-only-available-for-32-bit-userspace.patch +parisc-drop-ip_fast_csum-inline-assembly-implementation.patch +parisc-led-fix-reference-leak-on-failed-device-registration.patch +pci-cadence-use-cdns_pcie_read_sz-for-byte-or-word-read-access.patch +pci-imx6-fix-reference-clock-source-selection-for-i.mx95.patch +perf-annotate-use-jump__delete-when-freeing-loongarch-jumps.patch +rdma-mana_ib-disable-rx-steering-on-rss-qp-destroy.patch +remoteproc-xlnx-only-access-buffer-information-if-ipi-is-buffered.patch +reset-rzv2h-usb2phy-keep-phy-clock-enabled-for-entire-device-lifetime.patch +sched-use-u64-for-bandwidth-ratio-calculations.patch +selftests-mqueue-fix-incorrectly-named-file.patch +landlock-fix-log_subdomains_off-inheritance-across-fork.patch +landlock-allow-tsync-with-log_subdomains_off-and-fd-1.patch +selftests-landlock-drain-stale-audit-records-on-init.patch +selftests-landlock-fix-format-warning-for-__u64-in-net_test.patch +selftests-landlock-fix-snprintf-truncation-checks-in-audit-helpers.patch +selftests-landlock-skip-stale-records-in-audit_match_record.patch +rbd-fix-null-ptr-deref-when-device_add_disk-fails.patch +mm-zone_device-do-not-touch-device-folio-after-calling-folio_free.patch +block-fix-zone-write-plugs-refcount-handling-in-disk_zone_wplug_schedule_bio_work.patch +io_uring-zcrx-return-back-two-step-unregistration.patch +io_uring-timeout-check-unused-sqe-fields.patch +block-relax-pgmap-check-in-bio_add_page-for-compatible-zone-device-pages.patch +iio-adc-ti-ads7950-use-iio_push_to_buffers_with_ts_unaligned.patch +io_uring-register-fix-ring-resizing-with-mixed-large-sqes-cqes.patch +io_uring-zcrx-fix-user_struct-uaf.patch +io_uring-poll-fix-signed-comparison-in-io_poll_get_ownership.patch +io_uring-poll-ensure-epoll_oneshot-is-propagated-for-epoll_uring_wake.patch