From: Greg Kroah-Hartman Date: Mon, 26 Feb 2024 12:08:10 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v4.19.308~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=77de24526a9a8fb45318de8a0c5d0c6965e33570;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: accel-ivpu-don-t-enable-any-tiles-by-default-on-vpu40xx.patch ata-libata-core-do-not-try-to-set-sleeping-devices-to-standby.patch btrfs-defrag-avoid-unnecessary-defrag-caused-by-incorrect-extent-size.patch btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch cachefiles-fix-memory-leak-in-cachefiles_add_cache.patch crypto-virtio-akcipher-fix-stack-overflow-on-memcpy.patch cxl-acpi-fix-load-failures-due-to-single-window-creation-failure.patch cxl-pci-fix-disabling-memory-if-dvsec-cxl-range-does-not-match-a-cfmws-window.patch dm-crypt-don-t-modify-the-data-when-using-authenticated-encryption.patch dm-crypt-recheck-the-integrity-tag-after-a-failure.patch dm-integrity-recheck-the-integrity-tag-after-a-failure.patch dm-verity-recheck-the-hash-after-a-failure.patch docs-instruct-latex-to-cope-with-deeper-nesting.patch drm-amd-display-adjust-few-initialization-order-in-dm.patch drm-meson-don-t-remove-bridges-which-are-created-by-other-drivers.patch drm-ttm-fix-an-invalid-freeing-on-already-freed-page-in-error-path.patch fs-aio-restrict-kiocb_set_cancel_fn-to-i-o-submitted-via-libaio.patch gtp-fix-use-after-free-and-null-ptr-deref-in-gtp_genl_dump_pdp.patch kvm-arm64-vgic-its-test-for-valid-irq-in-its_sync_lpi_pending_table.patch kvm-arm64-vgic-its-test-for-valid-irq-in-movall-handler.patch lib-kconfig.debug-test_iov_iter-depends-on-mmu.patch loongarch-call-early_init_fdt_scan_reserved_mem-earlier.patch loongarch-disable-irq-before-init_fn-for-nonboot-cpus.patch loongarch-update-cpu_sibling_map-when-disabling-nonboot-cpus.patch md-fix-missing-release-of-active_io-for-flush.patch mm-damon-lru_sort-fix-quota-status-loss-due-to-online-tunings.patch mm-damon-reclaim-fix-quota-stauts-loss-due-to-online-tunings.patch mm-memcontrol-clarify-swapaccount-0-deprecation-warning.patch mm-swap-fix-race-when-skipping-swapcache.patch platform-x86-intel-vbtn-stop-calling-vbdl-from-notify_handler.patch platform-x86-touchscreen_dmi-allow-partial-prefix-matches-for-acpi-names.patch revert-parisc-only-list-existing-cpus-in-cpu_possible_mask.patch s390-cio-fix-invalid-ebusy-on-ccw_device_start.patch scsi-core-consult-supported-vpd-page-list-prior-to-fetching-page.patch scsi-sd-usb_storage-uas-access-media-prior-to-querying-device-properties.patch scsi-target-pscsi-fix-bio_put-for-error-case.patch selftests-mm-uffd-unit-test-check-if-huge-page-size-is-0.patch sparc-fix-undefined-reference-to-fb_is_primary_device.patch x86-bugs-add-asm-helpers-for-executing-verw.patch --- diff --git a/queue-6.6/accel-ivpu-don-t-enable-any-tiles-by-default-on-vpu40xx.patch b/queue-6.6/accel-ivpu-don-t-enable-any-tiles-by-default-on-vpu40xx.patch new file mode 100644 index 00000000000..38bef7c355f --- /dev/null +++ b/queue-6.6/accel-ivpu-don-t-enable-any-tiles-by-default-on-vpu40xx.patch @@ -0,0 +1,44 @@ +From eb0d253ff9c74dee30aa92fe460b825eb28acd73 Mon Sep 17 00:00:00 2001 +From: Andrzej Kacprowski +Date: Tue, 20 Feb 2024 14:16:24 +0100 +Subject: accel/ivpu: Don't enable any tiles by default on VPU40xx + +From: Andrzej Kacprowski + +commit eb0d253ff9c74dee30aa92fe460b825eb28acd73 upstream. + +There is no point in requesting 1 tile on VPU40xx as the FW will +probably need more tiles to run workloads, so it will have to +reconfigure PLL anyway. Don't enable any tiles and allow the FW to +perform initial tile configuration. + +This improves NPU boot stability as the tiles are always enabled only +by the FW from the same initial state. + +Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") +Cc: stable@vger.kernel.org +Signed-off-by: Andrzej Kacprowski +Signed-off-by: Jacek Lawrynowicz +Reviewed-by: Jeffrey Hugo +Link: https://patchwork.freedesktop.org/patch/msgid/20240220131624.1447813-1-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/ivpu/ivpu_hw_40xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c +index 1c995307c113..a1523d0b1ef3 100644 +--- a/drivers/accel/ivpu/ivpu_hw_40xx.c ++++ b/drivers/accel/ivpu/ivpu_hw_40xx.c +@@ -24,7 +24,7 @@ + #define SKU_HW_ID_SHIFT 16u + #define SKU_HW_ID_MASK 0xffff0000u + +-#define PLL_CONFIG_DEFAULT 0x1 ++#define PLL_CONFIG_DEFAULT 0x0 + #define PLL_CDYN_DEFAULT 0x80 + #define PLL_EPP_DEFAULT 0x80 + #define PLL_REF_CLK_FREQ (50 * 1000000) +-- +2.44.0 + diff --git a/queue-6.6/ata-libata-core-do-not-try-to-set-sleeping-devices-to-standby.patch b/queue-6.6/ata-libata-core-do-not-try-to-set-sleeping-devices-to-standby.patch new file mode 100644 index 00000000000..d3f05e1778c --- /dev/null +++ b/queue-6.6/ata-libata-core-do-not-try-to-set-sleeping-devices-to-standby.patch @@ -0,0 +1,34 @@ +From 4b085736e44dbbe69b5eea1a8a294f404678a1f4 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 11 Jan 2024 20:51:22 +0900 +Subject: ata: libata-core: Do not try to set sleeping devices to standby + +From: Damien Le Moal + +commit 4b085736e44dbbe69b5eea1a8a294f404678a1f4 upstream. + +In ata ata_dev_power_set_standby(), check that the target device is not +sleeping. If it is, there is no need to do anything. + +Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Signed-off-by: Niklas Cassel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/libata-core.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -2034,6 +2034,10 @@ void ata_dev_power_set_active(struct ata + struct ata_taskfile tf; + unsigned int err_mask; + ++ /* If the device is already sleeping, do nothing. */ ++ if (dev->flags & ATA_DFLAG_SLEEPING) ++ return; ++ + /* + * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only + * if supported by the device. diff --git a/queue-6.6/btrfs-defrag-avoid-unnecessary-defrag-caused-by-incorrect-extent-size.patch b/queue-6.6/btrfs-defrag-avoid-unnecessary-defrag-caused-by-incorrect-extent-size.patch new file mode 100644 index 00000000000..5eb6a751793 --- /dev/null +++ b/queue-6.6/btrfs-defrag-avoid-unnecessary-defrag-caused-by-incorrect-extent-size.patch @@ -0,0 +1,111 @@ +From e42b9d8b9ea2672811285e6a7654887ff64d23f3 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 7 Feb 2024 10:00:42 +1030 +Subject: btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size + +From: Qu Wenruo + +commit e42b9d8b9ea2672811285e6a7654887ff64d23f3 upstream. + +[BUG] +With the following file extent layout, defrag would do unnecessary IO +and result more on-disk space usage. + + # mkfs.btrfs -f $dev + # mount $dev $mnt + # xfs_io -f -c "pwrite 0 40m" $mnt/foobar + # sync + # xfs_io -f -c "pwrite 40m 16k" $mnt/foobar + # sync + +Above command would lead to the following file extent layout: + + item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 + generation 7 type 1 (regular) + extent data disk byte 298844160 nr 41943040 + extent data offset 0 nr 41943040 ram 41943040 + extent compression 0 (none) + item 7 key (257 EXTENT_DATA 41943040) itemoff 15763 itemsize 53 + generation 8 type 1 (regular) + extent data disk byte 13631488 nr 16384 + extent data offset 0 nr 16384 ram 16384 + extent compression 0 (none) + +Which is mostly fine. We can allow the final 16K to be merged with the +previous 40M, but it's upon the end users' preference. + +But if we defrag the file using the default parameters, it would result +worse file layout: + + # btrfs filesystem defrag $mnt/foobar + # sync + + item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 + generation 7 type 1 (regular) + extent data disk byte 298844160 nr 41943040 + extent data offset 0 nr 8650752 ram 41943040 + extent compression 0 (none) + item 7 key (257 EXTENT_DATA 8650752) itemoff 15763 itemsize 53 + generation 9 type 1 (regular) + extent data disk byte 340787200 nr 33292288 + extent data offset 0 nr 33292288 ram 33292288 + extent compression 0 (none) + item 8 key (257 EXTENT_DATA 41943040) itemoff 15710 itemsize 53 + generation 8 type 1 (regular) + extent data disk byte 13631488 nr 16384 + extent data offset 0 nr 16384 ram 16384 + extent compression 0 (none) + +Note the original 40M extent is still there, but a new 32M extent is +created for no benefit at all. + +[CAUSE] +There is an existing check to make sure we won't defrag a large enough +extent (the threshold is by default 32M). + +But the check is using the length to the end of the extent: + + range_len = em->len - (cur - em->start); + + /* Skip too large extent */ + if (range_len >= extent_thresh) + goto next; + +This means, for the first 8MiB of the extent, the range_len is always +smaller than the default threshold, and would not be defragged. +But after the first 8MiB, the remaining part would fit the requirement, +and be defragged. + +Such different behavior inside the same extent caused the above problem, +and we should avoid different defrag decision inside the same extent. + +[FIX] +Instead of using @range_len, just use @em->len, so that we have a +consistent decision among the same file extent. + +Now with this fix, we won't touch the extent, thus not making it any +worse. + +Reported-by: Filipe Manana +Fixes: 0cb5950f3f3b ("btrfs: fix deadlock when reserving space during defrag") +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Boris Burkov +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/defrag.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/defrag.c ++++ b/fs/btrfs/defrag.c +@@ -903,7 +903,7 @@ static int defrag_collect_targets(struct + goto add; + + /* Skip too large extent */ +- if (range_len >= extent_thresh) ++ if (em->len >= extent_thresh) + goto next; + + /* diff --git a/queue-6.6/btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch b/queue-6.6/btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch new file mode 100644 index 00000000000..c1b13c12421 --- /dev/null +++ b/queue-6.6/btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch @@ -0,0 +1,241 @@ +From b0ad381fa7690244802aed119b478b4bdafc31dd Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 12 Feb 2024 11:56:02 -0500 +Subject: btrfs: fix deadlock with fiemap and extent locking + +From: Josef Bacik + +commit b0ad381fa7690244802aed119b478b4bdafc31dd upstream. + +While working on the patchset to remove extent locking I got a lockdep +splat with fiemap and pagefaulting with my new extent lock replacement +lock. + +This deadlock exists with our normal code, we just don't have lockdep +annotations with the extent locking so we've never noticed it. + +Since we're copying the fiemap extent to user space on every iteration +we have the chance of pagefaulting. Because we hold the extent lock for +the entire range we could mkwrite into a range in the file that we have +mmap'ed. This would deadlock with the following stack trace + +[<0>] lock_extent+0x28d/0x2f0 +[<0>] btrfs_page_mkwrite+0x273/0x8a0 +[<0>] do_page_mkwrite+0x50/0xb0 +[<0>] do_fault+0xc1/0x7b0 +[<0>] __handle_mm_fault+0x2fa/0x460 +[<0>] handle_mm_fault+0xa4/0x330 +[<0>] do_user_addr_fault+0x1f4/0x800 +[<0>] exc_page_fault+0x7c/0x1e0 +[<0>] asm_exc_page_fault+0x26/0x30 +[<0>] rep_movs_alternative+0x33/0x70 +[<0>] _copy_to_user+0x49/0x70 +[<0>] fiemap_fill_next_extent+0xc8/0x120 +[<0>] emit_fiemap_extent+0x4d/0xa0 +[<0>] extent_fiemap+0x7f8/0xad0 +[<0>] btrfs_fiemap+0x49/0x80 +[<0>] __x64_sys_ioctl+0x3e1/0xb50 +[<0>] do_syscall_64+0x94/0x1a0 +[<0>] entry_SYSCALL_64_after_hwframe+0x6e/0x76 + +I wrote an fstest to reproduce this deadlock without my replacement lock +and verified that the deadlock exists with our existing locking. + +To fix this simply don't take the extent lock for the entire duration of +the fiemap. This is safe in general because we keep track of where we +are when we're searching the tree, so if an ordered extent updates in +the middle of our fiemap call we'll still emit the correct extents +because we know what offset we were on before. + +The only place we maintain the lock is searching delalloc. Since the +delalloc stuff can change during writeback we want to lock the extent +range so we have a consistent view of delalloc at the time we're +checking to see if we need to set the delalloc flag. + +With this patch applied we no longer deadlock with my testcase. + +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 62 +++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 45 insertions(+), 17 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -2646,16 +2646,34 @@ static int fiemap_process_hole(struct bt + * it beyond i_size. + */ + while (cur_offset < end && cur_offset < i_size) { ++ struct extent_state *cached_state = NULL; + u64 delalloc_start; + u64 delalloc_end; + u64 prealloc_start; ++ u64 lockstart; ++ u64 lockend; + u64 prealloc_len = 0; + bool delalloc; + ++ lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); ++ lockend = round_up(end, inode->root->fs_info->sectorsize); ++ ++ /* ++ * We are only locking for the delalloc range because that's the ++ * only thing that can change here. With fiemap we have a lock ++ * on the inode, so no buffered or direct writes can happen. ++ * ++ * However mmaps and normal page writeback will cause this to ++ * change arbitrarily. We have to lock the extent lock here to ++ * make sure that nobody messes with the tree while we're doing ++ * btrfs_find_delalloc_in_range. ++ */ ++ lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, + delalloc_cached_state, + &delalloc_start, + &delalloc_end); ++ unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + if (!delalloc) + break; + +@@ -2823,15 +2841,15 @@ int extent_fiemap(struct btrfs_inode *in + u64 start, u64 len) + { + const u64 ino = btrfs_ino(inode); +- struct extent_state *cached_state = NULL; + struct extent_state *delalloc_cached_state = NULL; + struct btrfs_path *path; + struct fiemap_cache cache = { 0 }; + struct btrfs_backref_share_check_ctx *backref_ctx; + u64 last_extent_end; + u64 prev_extent_end; +- u64 lockstart; +- u64 lockend; ++ u64 range_start; ++ u64 range_end; ++ const u64 sectorsize = inode->root->fs_info->sectorsize; + bool stopped = false; + int ret; + +@@ -2842,12 +2860,11 @@ int extent_fiemap(struct btrfs_inode *in + goto out; + } + +- lockstart = round_down(start, inode->root->fs_info->sectorsize); +- lockend = round_up(start + len, inode->root->fs_info->sectorsize); +- prev_extent_end = lockstart; ++ range_start = round_down(start, sectorsize); ++ range_end = round_up(start + len, sectorsize); ++ prev_extent_end = range_start; + + btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); +- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + + ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); + if (ret < 0) +@@ -2855,7 +2872,7 @@ int extent_fiemap(struct btrfs_inode *in + btrfs_release_path(path); + + path->reada = READA_FORWARD; +- ret = fiemap_search_slot(inode, path, lockstart); ++ ret = fiemap_search_slot(inode, path, range_start); + if (ret < 0) { + goto out_unlock; + } else if (ret > 0) { +@@ -2867,7 +2884,7 @@ int extent_fiemap(struct btrfs_inode *in + goto check_eof_delalloc; + } + +- while (prev_extent_end < lockend) { ++ while (prev_extent_end < range_end) { + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_file_extent_item *ei; + struct btrfs_key key; +@@ -2890,19 +2907,19 @@ int extent_fiemap(struct btrfs_inode *in + * The first iteration can leave us at an extent item that ends + * before our range's start. Move to the next item. + */ +- if (extent_end <= lockstart) ++ if (extent_end <= range_start) + goto next_item; + + backref_ctx->curr_leaf_bytenr = leaf->start; + + /* We have in implicit hole (NO_HOLES feature enabled). */ + if (prev_extent_end < key.offset) { +- const u64 range_end = min(key.offset, lockend) - 1; ++ const u64 hole_end = min(key.offset, range_end) - 1; + + ret = fiemap_process_hole(inode, fieinfo, &cache, + &delalloc_cached_state, + backref_ctx, 0, 0, 0, +- prev_extent_end, range_end); ++ prev_extent_end, hole_end); + if (ret < 0) { + goto out_unlock; + } else if (ret > 0) { +@@ -2912,7 +2929,7 @@ int extent_fiemap(struct btrfs_inode *in + } + + /* We've reached the end of the fiemap range, stop. */ +- if (key.offset >= lockend) { ++ if (key.offset >= range_end) { + stopped = true; + break; + } +@@ -3006,29 +3023,41 @@ check_eof_delalloc: + btrfs_free_path(path); + path = NULL; + +- if (!stopped && prev_extent_end < lockend) { ++ if (!stopped && prev_extent_end < range_end) { + ret = fiemap_process_hole(inode, fieinfo, &cache, + &delalloc_cached_state, backref_ctx, +- 0, 0, 0, prev_extent_end, lockend - 1); ++ 0, 0, 0, prev_extent_end, range_end - 1); + if (ret < 0) + goto out_unlock; +- prev_extent_end = lockend; ++ prev_extent_end = range_end; + } + + if (cache.cached && cache.offset + cache.len >= last_extent_end) { + const u64 i_size = i_size_read(&inode->vfs_inode); + + if (prev_extent_end < i_size) { ++ struct extent_state *cached_state = NULL; + u64 delalloc_start; + u64 delalloc_end; ++ u64 lockstart; ++ u64 lockend; + bool delalloc; + ++ lockstart = round_down(prev_extent_end, sectorsize); ++ lockend = round_up(i_size, sectorsize); ++ ++ /* ++ * See the comment in fiemap_process_hole as to why ++ * we're doing the locking here. ++ */ ++ lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + delalloc = btrfs_find_delalloc_in_range(inode, + prev_extent_end, + i_size - 1, + &delalloc_cached_state, + &delalloc_start, + &delalloc_end); ++ unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + if (!delalloc) + cache.flags |= FIEMAP_EXTENT_LAST; + } else { +@@ -3039,7 +3068,6 @@ check_eof_delalloc: + ret = emit_last_fiemap_cache(fieinfo, &cache); + + out_unlock: +- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); + out: + free_extent_state(delalloc_cached_state); diff --git a/queue-6.6/cachefiles-fix-memory-leak-in-cachefiles_add_cache.patch b/queue-6.6/cachefiles-fix-memory-leak-in-cachefiles_add_cache.patch new file mode 100644 index 00000000000..824ad33cc07 --- /dev/null +++ b/queue-6.6/cachefiles-fix-memory-leak-in-cachefiles_add_cache.patch @@ -0,0 +1,68 @@ +From e21a2f17566cbd64926fb8f16323972f7a064444 Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Sat, 17 Feb 2024 16:14:31 +0800 +Subject: cachefiles: fix memory leak in cachefiles_add_cache() + +From: Baokun Li + +commit e21a2f17566cbd64926fb8f16323972f7a064444 upstream. + +The following memory leak was reported after unbinding /dev/cachefiles: + +================================================================== +unreferenced object 0xffff9b674176e3c0 (size 192): + comm "cachefilesd2", pid 680, jiffies 4294881224 + hex dump (first 32 bytes): + 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc ea38a44b): + [] kmem_cache_alloc+0x2d5/0x370 + [] prepare_creds+0x26/0x2e0 + [] cachefiles_determine_cache_security+0x1f/0x120 + [] cachefiles_add_cache+0x13c/0x3a0 + [] cachefiles_daemon_write+0x146/0x1c0 + [] vfs_write+0xcb/0x520 + [] ksys_write+0x69/0xf0 + [] do_syscall_64+0x72/0x140 + [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 +================================================================== + +Put the reference count of cache_cred in cachefiles_daemon_unbind() to +fix the problem. And also put cache_cred in cachefiles_add_cache() error +branch to avoid memory leaks. + +Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") +CC: stable@vger.kernel.org +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240217081431.796809-1-libaokun1@huawei.com +Acked-by: David Howells +Reviewed-by: Jingbo Xu +Reviewed-by: Jeff Layton +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/cachefiles/cache.c | 2 ++ + fs/cachefiles/daemon.c | 1 + + 2 files changed, 3 insertions(+) + +--- a/fs/cachefiles/cache.c ++++ b/fs/cachefiles/cache.c +@@ -168,6 +168,8 @@ error_unsupported: + dput(root); + error_open_root: + cachefiles_end_secure(cache, saved_cred); ++ put_cred(cache->cache_cred); ++ cache->cache_cred = NULL; + error_getsec: + fscache_relinquish_cache(cache_cookie); + cache->cache = NULL; +--- a/fs/cachefiles/daemon.c ++++ b/fs/cachefiles/daemon.c +@@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(str + cachefiles_put_directory(cache->graveyard); + cachefiles_put_directory(cache->store); + mntput(cache->mnt); ++ put_cred(cache->cache_cred); + + kfree(cache->rootdirname); + kfree(cache->secctx); diff --git a/queue-6.6/crypto-virtio-akcipher-fix-stack-overflow-on-memcpy.patch b/queue-6.6/crypto-virtio-akcipher-fix-stack-overflow-on-memcpy.patch new file mode 100644 index 00000000000..18bccb764ed --- /dev/null +++ b/queue-6.6/crypto-virtio-akcipher-fix-stack-overflow-on-memcpy.patch @@ -0,0 +1,52 @@ +From c0ec2a712daf133d9996a8a1b7ee2d4996080363 Mon Sep 17 00:00:00 2001 +From: zhenwei pi +Date: Tue, 30 Jan 2024 19:27:40 +0800 +Subject: crypto: virtio/akcipher - Fix stack overflow on memcpy + +From: zhenwei pi + +commit c0ec2a712daf133d9996a8a1b7ee2d4996080363 upstream. + +sizeof(struct virtio_crypto_akcipher_session_para) is less than +sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from +stack variable leads stack overflow. Clang reports this issue by +commands: +make -j CC=clang-14 mrproper >/dev/null 2>&1 +make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1 +make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/ + virtio_crypto_akcipher_algs.o + +Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm") +Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/ +Cc: +Signed-off-by: zhenwei pi +Tested-by: Nathan Chancellor # build +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c ++++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +@@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher + } + + static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, +- struct virtio_crypto_ctrl_header *header, void *para, ++ struct virtio_crypto_ctrl_header *header, ++ struct virtio_crypto_akcipher_session_para *para, + const uint8_t *key, unsigned int keylen) + { + struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; +@@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_in + + ctrl = &vc_ctrl_req->ctrl; + memcpy(&ctrl->header, header, sizeof(ctrl->header)); +- memcpy(&ctrl->u, para, sizeof(ctrl->u)); ++ memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); + input = &vc_ctrl_req->input; + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); + diff --git a/queue-6.6/cxl-acpi-fix-load-failures-due-to-single-window-creation-failure.patch b/queue-6.6/cxl-acpi-fix-load-failures-due-to-single-window-creation-failure.patch new file mode 100644 index 00000000000..a9af14cc0fd --- /dev/null +++ b/queue-6.6/cxl-acpi-fix-load-failures-due-to-single-window-creation-failure.patch @@ -0,0 +1,139 @@ +From 5c6224bfabbf7f3e491c51ab50fd2c6f92ba1141 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 16 Feb 2024 19:11:34 -0800 +Subject: cxl/acpi: Fix load failures due to single window creation failure + +From: Dan Williams + +commit 5c6224bfabbf7f3e491c51ab50fd2c6f92ba1141 upstream. + +The expectation is that cxl_parse_cfwms() continues in the face the of +failure as evidenced by code like: + + cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); + if (IS_ERR(cxlrd)) + return 0; + +There are other error paths in that function which mistakenly follow +idiomatic expectations and return an error when they should not. Most of +those mistakes are innocuous checks that hardly ever fail in practice. +However, a recent change succeed in making the implementation more +fragile by applying an idiomatic, but still wrong "fix" [1]. In this +failure case the kernel reports: + + cxl root0: Failed to populate active decoder targets + cxl_acpi ACPI0017:00: Failed to add decode range: [mem 0x00000000-0x7fffffff flags 0x200] + +...which is a real issue with that one window (to be fixed separately), +but ends up failing the entirety of cxl_acpi_probe(). + +Undo that recent breakage while also removing the confusion about +ignoring errors. Update all exits paths to return an error per typical +expectations and let an outer wrapper function handle dropping the +error. + +Fixes: 91019b5bc7c2 ("cxl/acpi: Return 'rc' instead of '0' in cxl_parse_cfmws()") [1] +Cc: +Cc: Breno Leitao +Cc: Alison Schofield +Cc: Vishal Verma +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/acpi.c | 46 ++++++++++++++++++++++++++++------------------ + 1 file changed, 28 insertions(+), 18 deletions(-) + +--- a/drivers/cxl/acpi.c ++++ b/drivers/cxl/acpi.c +@@ -194,31 +194,27 @@ struct cxl_cfmws_context { + int id; + }; + +-static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, +- const unsigned long end) ++static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, ++ struct cxl_cfmws_context *ctx) + { + int target_map[CXL_DECODER_MAX_INTERLEAVE]; +- struct cxl_cfmws_context *ctx = arg; + struct cxl_port *root_port = ctx->root_port; + struct resource *cxl_res = ctx->cxl_res; + struct cxl_cxims_context cxims_ctx; + struct cxl_root_decoder *cxlrd; + struct device *dev = ctx->dev; +- struct acpi_cedt_cfmws *cfmws; + cxl_calc_hb_fn cxl_calc_hb; + struct cxl_decoder *cxld; + unsigned int ways, i, ig; + struct resource *res; + int rc; + +- cfmws = (struct acpi_cedt_cfmws *) header; +- + rc = cxl_acpi_cfmws_verify(dev, cfmws); + if (rc) { + dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", + cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1); +- return 0; ++ return rc; + } + + rc = eiw_to_ways(cfmws->interleave_ways, &ways); +@@ -254,7 +250,7 @@ static int cxl_parse_cfmws(union acpi_su + + cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); + if (IS_ERR(cxlrd)) +- return 0; ++ return PTR_ERR(cxlrd); + + cxld = &cxlrd->cxlsd.cxld; + cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); +@@ -295,16 +291,7 @@ err_xormap: + put_device(&cxld->dev); + else + rc = cxl_decoder_autoremove(dev, cxld); +- if (rc) { +- dev_err(dev, "Failed to add decode range: %pr", res); +- return rc; +- } +- dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", +- dev_name(&cxld->dev), +- phys_to_target_node(cxld->hpa_range.start), +- cxld->hpa_range.start, cxld->hpa_range.end); +- +- return 0; ++ return rc; + + err_insert: + kfree(res->name); +@@ -313,6 +300,29 @@ err_name: + return -ENOMEM; + } + ++static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, ++ const unsigned long end) ++{ ++ struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header; ++ struct cxl_cfmws_context *ctx = arg; ++ struct device *dev = ctx->dev; ++ int rc; ++ ++ rc = __cxl_parse_cfmws(cfmws, ctx); ++ if (rc) ++ dev_err(dev, ++ "Failed to add decode range: [%#llx - %#llx] (%d)\n", ++ cfmws->base_hpa, ++ cfmws->base_hpa + cfmws->window_size - 1, rc); ++ else ++ dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n", ++ phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa, ++ cfmws->base_hpa + cfmws->window_size - 1); ++ ++ /* never fail cxl_acpi load for a single window failure */ ++ return 0; ++} ++ + __mock struct acpi_device *to_cxl_host_bridge(struct device *host, + struct device *dev) + { diff --git a/queue-6.6/cxl-pci-fix-disabling-memory-if-dvsec-cxl-range-does-not-match-a-cfmws-window.patch b/queue-6.6/cxl-pci-fix-disabling-memory-if-dvsec-cxl-range-does-not-match-a-cfmws-window.patch new file mode 100644 index 00000000000..3f0050ed4a7 --- /dev/null +++ b/queue-6.6/cxl-pci-fix-disabling-memory-if-dvsec-cxl-range-does-not-match-a-cfmws-window.patch @@ -0,0 +1,56 @@ +From 0cab687205986491302cd2e440ef1d253031c221 Mon Sep 17 00:00:00 2001 +From: Robert Richter +Date: Fri, 16 Feb 2024 17:01:13 +0100 +Subject: cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window + +From: Robert Richter + +commit 0cab687205986491302cd2e440ef1d253031c221 upstream. + +The Linux CXL subsystem is built on the assumption that HPA == SPA. +That is, the host physical address (HPA) the HDM decoder registers are +programmed with are system physical addresses (SPA). + +During HDM decoder setup, the DVSEC CXL range registers (cxl-3.1, +8.1.3.8) are checked if the memory is enabled and the CXL range is in +a HPA window that is described in a CFMWS structure of the CXL host +bridge (cxl-3.1, 9.18.1.3). + +Now, if the HPA is not an SPA, the CXL range does not match a CFMWS +window and the CXL memory range will be disabled then. The HDM decoder +stops working which causes system memory being disabled and further a +system hang during HDM decoder initialization, typically when a CXL +enabled kernel boots. + +Prevent a system hang and do not disable the HDM decoder if the +decoder's CXL range is not found in a CFMWS window. + +Note the change only fixes a hardware hang, but does not implement +HPA/SPA translation. Support for this can be added in a follow on +patch series. + +Signed-off-by: Robert Richter +Fixes: 34e37b4c432c ("cxl/port: Enable HDM Capability after validating DVSEC Ranges") +Cc: +Link: https://lore.kernel.org/r/20240216160113.407141-1-rrichter@amd.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/pci.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/cxl/core/pci.c ++++ b/drivers/cxl/core/pci.c +@@ -475,9 +475,9 @@ int cxl_hdm_decode_init(struct cxl_dev_s + allowed++; + } + +- if (!allowed) { +- cxl_set_mem_enable(cxlds, 0); +- info->mem_enabled = 0; ++ if (!allowed && info->mem_enabled) { ++ dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); ++ return -ENXIO; + } + + /* diff --git a/queue-6.6/dm-crypt-don-t-modify-the-data-when-using-authenticated-encryption.patch b/queue-6.6/dm-crypt-don-t-modify-the-data-when-using-authenticated-encryption.patch new file mode 100644 index 00000000000..2b2d3e86784 --- /dev/null +++ b/queue-6.6/dm-crypt-don-t-modify-the-data-when-using-authenticated-encryption.patch @@ -0,0 +1,43 @@ +From 50c70240097ce41fe6bce6478b80478281e4d0f7 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 19 Feb 2024 21:30:10 +0100 +Subject: dm-crypt: don't modify the data when using authenticated encryption + +From: Mikulas Patocka + +commit 50c70240097ce41fe6bce6478b80478281e4d0f7 upstream. + +It was said that authenticated encryption could produce invalid tag when +the data that is being encrypted is modified [1]. So, fix this problem by +copying the data into the clone bio first and then encrypt them inside the +clone bio. + +This may reduce performance, but it is needed to prevent the user from +corrupting the device by writing data with O_DIRECT and modifying them at +the same time. + +[1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/ + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-crypt.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -2117,6 +2117,12 @@ static void kcryptd_crypt_write_convert( + io->ctx.bio_out = clone; + io->ctx.iter_out = clone->bi_iter; + ++ if (crypt_integrity_aead(cc)) { ++ bio_copy_data(clone, io->base_bio); ++ io->ctx.bio_in = clone; ++ io->ctx.iter_in = clone->bi_iter; ++ } ++ + sector += bio_sectors(clone); + + crypt_inc_pending(io); diff --git a/queue-6.6/dm-crypt-recheck-the-integrity-tag-after-a-failure.patch b/queue-6.6/dm-crypt-recheck-the-integrity-tag-after-a-failure.patch new file mode 100644 index 00000000000..de12991f55c --- /dev/null +++ b/queue-6.6/dm-crypt-recheck-the-integrity-tag-after-a-failure.patch @@ -0,0 +1,212 @@ +From 42e15d12070b4ff9af2b980f1b65774c2dab0507 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 19 Feb 2024 21:31:11 +0100 +Subject: dm-crypt: recheck the integrity tag after a failure + +From: Mikulas Patocka + +commit 42e15d12070b4ff9af2b980f1b65774c2dab0507 upstream. + +If a userspace process reads (with O_DIRECT) multiple blocks into the same +buffer, dm-crypt reports an authentication error [1]. The error is +reported in a log and it may cause RAID leg being kicked out of the +array. + +This commit fixes dm-crypt, so that if integrity verification fails, the +data is read again into a kernel buffer (where userspace can't modify it) +and the integrity tag is rechecked. If the recheck succeeds, the content +of the kernel buffer is copied into the user buffer; if the recheck fails, +an integrity error is reported. + +[1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-crypt.c | 89 +++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 73 insertions(+), 16 deletions(-) + +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -62,6 +62,8 @@ struct convert_context { + struct skcipher_request *req; + struct aead_request *req_aead; + } r; ++ bool aead_recheck; ++ bool aead_failed; + + }; + +@@ -82,6 +84,8 @@ struct dm_crypt_io { + blk_status_t error; + sector_t sector; + ++ struct bvec_iter saved_bi_iter; ++ + struct rb_node rb_node; + } CRYPTO_MINALIGN_ATTR; + +@@ -1376,10 +1380,13 @@ static int crypt_convert_block_aead(stru + if (r == -EBADMSG) { + sector_t s = le64_to_cpu(*sector); + +- DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", +- ctx->bio_in->bi_bdev, s); +- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", +- ctx->bio_in, s, 0); ++ ctx->aead_failed = true; ++ if (ctx->aead_recheck) { ++ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", ++ ctx->bio_in->bi_bdev, s); ++ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", ++ ctx->bio_in, s, 0); ++ } + } + + if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) +@@ -1763,6 +1770,8 @@ static void crypt_io_init(struct dm_cryp + io->base_bio = bio; + io->sector = sector; + io->error = 0; ++ io->ctx.aead_recheck = false; ++ io->ctx.aead_failed = false; + io->ctx.r.req = NULL; + io->integrity_metadata = NULL; + io->integrity_metadata_from_pool = false; +@@ -1774,6 +1783,8 @@ static void crypt_inc_pending(struct dm_ + atomic_inc(&io->io_pending); + } + ++static void kcryptd_queue_read(struct dm_crypt_io *io); ++ + /* + * One of the bios was finished. Check for completion of + * the whole request and correctly clean up the buffer. +@@ -1787,6 +1798,15 @@ static void crypt_dec_pending(struct dm_ + if (!atomic_dec_and_test(&io->io_pending)) + return; + ++ if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && ++ cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { ++ io->ctx.aead_recheck = true; ++ io->ctx.aead_failed = false; ++ io->error = 0; ++ kcryptd_queue_read(io); ++ return; ++ } ++ + if (io->ctx.r.req) + crypt_free_req(cc, io->ctx.r.req, base_bio); + +@@ -1822,15 +1842,19 @@ static void crypt_endio(struct bio *clon + struct dm_crypt_io *io = clone->bi_private; + struct crypt_config *cc = io->cc; + unsigned int rw = bio_data_dir(clone); +- blk_status_t error; ++ blk_status_t error = clone->bi_status; ++ ++ if (io->ctx.aead_recheck && !error) { ++ kcryptd_queue_crypt(io); ++ return; ++ } + + /* + * free the processed pages + */ +- if (rw == WRITE) ++ if (rw == WRITE || io->ctx.aead_recheck) + crypt_free_buffer_pages(cc, clone); + +- error = clone->bi_status; + bio_put(clone); + + if (rw == READ && !error) { +@@ -1851,6 +1875,22 @@ static int kcryptd_io_read(struct dm_cry + struct crypt_config *cc = io->cc; + struct bio *clone; + ++ if (io->ctx.aead_recheck) { ++ if (!(gfp & __GFP_DIRECT_RECLAIM)) ++ return 1; ++ crypt_inc_pending(io); ++ clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); ++ if (unlikely(!clone)) { ++ crypt_dec_pending(io); ++ return 1; ++ } ++ clone->bi_iter.bi_sector = cc->start + io->sector; ++ crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); ++ io->saved_bi_iter = clone->bi_iter; ++ dm_submit_bio_remap(io->base_bio, clone); ++ return 0; ++ } ++ + /* + * We need the original biovec array in order to decrypt the whole bio + * data *afterwards* -- thanks to immutable biovecs we don't need to +@@ -2113,6 +2153,14 @@ dec: + + static void kcryptd_crypt_read_done(struct dm_crypt_io *io) + { ++ if (io->ctx.aead_recheck) { ++ if (!io->error) { ++ io->ctx.bio_in->bi_iter = io->saved_bi_iter; ++ bio_copy_data(io->base_bio, io->ctx.bio_in); ++ } ++ crypt_free_buffer_pages(io->cc, io->ctx.bio_in); ++ bio_put(io->ctx.bio_in); ++ } + crypt_dec_pending(io); + } + +@@ -2142,11 +2190,17 @@ static void kcryptd_crypt_read_convert(s + + crypt_inc_pending(io); + +- crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, +- io->sector); ++ if (io->ctx.aead_recheck) { ++ io->ctx.cc_sector = io->sector + cc->iv_offset; ++ r = crypt_convert(cc, &io->ctx, ++ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); ++ } else { ++ crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, ++ io->sector); + +- r = crypt_convert(cc, &io->ctx, +- test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); ++ r = crypt_convert(cc, &io->ctx, ++ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); ++ } + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue +@@ -2188,10 +2242,13 @@ static void kcryptd_async_done(void *dat + if (error == -EBADMSG) { + sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); + +- DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", +- ctx->bio_in->bi_bdev, s); +- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", +- ctx->bio_in, s, 0); ++ ctx->aead_failed = true; ++ if (ctx->aead_recheck) { ++ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", ++ ctx->bio_in->bi_bdev, s); ++ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", ++ ctx->bio_in, s, 0); ++ } + io->error = BLK_STS_PROTECTION; + } else if (error < 0) + io->error = BLK_STS_IOERR; +@@ -3117,7 +3174,7 @@ static int crypt_ctr_optional(struct dm_ + sval = strchr(opt_string + strlen("integrity:"), ':') + 1; + if (!strcasecmp(sval, "aead")) { + set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); +- } else if (strcasecmp(sval, "none")) { ++ } else if (strcasecmp(sval, "none")) { + ti->error = "Unknown integrity profile"; + return -EINVAL; + } diff --git a/queue-6.6/dm-integrity-recheck-the-integrity-tag-after-a-failure.patch b/queue-6.6/dm-integrity-recheck-the-integrity-tag-after-a-failure.patch new file mode 100644 index 00000000000..25b473f3deb --- /dev/null +++ b/queue-6.6/dm-integrity-recheck-the-integrity-tag-after-a-failure.patch @@ -0,0 +1,159 @@ +From c88f5e553fe38b2ffc4c33d08654e5281b297677 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 19 Feb 2024 21:27:39 +0100 +Subject: dm-integrity: recheck the integrity tag after a failure + +From: Mikulas Patocka + +commit c88f5e553fe38b2ffc4c33d08654e5281b297677 upstream. + +If a userspace process reads (with O_DIRECT) multiple blocks into the same +buffer, dm-integrity reports an error [1]. The error is reported in a log +and it may cause RAID leg being kicked out of the array. + +This commit fixes dm-integrity, so that if integrity verification fails, +the data is read again into a kernel buffer (where userspace can't modify +it) and the integrity tag is rechecked. If the recheck succeeds, the +content of the kernel buffer is copied into the user buffer; if the +recheck fails, an integrity error is reported. + +[1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-integrity.c | 93 +++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 84 insertions(+), 9 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -278,6 +278,8 @@ struct dm_integrity_c { + + atomic64_t number_of_mismatches; + ++ mempool_t recheck_pool; ++ + struct notifier_block reboot_notifier; + }; + +@@ -1699,6 +1701,79 @@ failed: + get_random_bytes(result, ic->tag_size); + } + ++static void integrity_recheck(struct dm_integrity_io *dio) ++{ ++ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); ++ struct dm_integrity_c *ic = dio->ic; ++ struct bvec_iter iter; ++ struct bio_vec bv; ++ sector_t sector, logical_sector, area, offset; ++ char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; ++ struct page *page; ++ void *buffer; ++ ++ get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); ++ dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, ++ &dio->metadata_offset); ++ sector = get_data_sector(ic, area, offset); ++ logical_sector = dio->range.logical_sector; ++ ++ page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); ++ buffer = page_to_virt(page); ++ ++ __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { ++ unsigned pos = 0; ++ ++ do { ++ char *mem; ++ int r; ++ struct dm_io_request io_req; ++ struct dm_io_region io_loc; ++ io_req.bi_opf = REQ_OP_READ; ++ io_req.mem.type = DM_IO_KMEM; ++ io_req.mem.ptr.addr = buffer; ++ io_req.notify.fn = NULL; ++ io_req.client = ic->io; ++ io_loc.bdev = ic->dev->bdev; ++ io_loc.sector = sector; ++ io_loc.count = ic->sectors_per_block; ++ ++ r = dm_io(&io_req, 1, &io_loc, NULL); ++ if (unlikely(r)) { ++ dio->bi_status = errno_to_blk_status(r); ++ goto free_ret; ++ } ++ ++ integrity_sector_checksum(ic, logical_sector, buffer, ++ checksum_onstack); ++ r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, ++ &dio->metadata_offset, ic->tag_size, TAG_CMP); ++ if (r) { ++ if (r > 0) { ++ DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", ++ bio->bi_bdev, logical_sector); ++ atomic64_inc(&ic->number_of_mismatches); ++ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", ++ bio, logical_sector, 0); ++ r = -EILSEQ; ++ } ++ dio->bi_status = errno_to_blk_status(r); ++ goto free_ret; ++ } ++ ++ mem = bvec_kmap_local(&bv); ++ memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); ++ kunmap_local(mem); ++ ++ pos += ic->sectors_per_block << SECTOR_SHIFT; ++ sector += ic->sectors_per_block; ++ logical_sector += ic->sectors_per_block; ++ } while (pos < bv.bv_len); ++ } ++free_ret: ++ mempool_free(page, &ic->recheck_pool); ++} ++ + static void integrity_metadata(struct work_struct *w) + { + struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); +@@ -1786,15 +1861,8 @@ again: + checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); + if (unlikely(r)) { + if (r > 0) { +- sector_t s; +- +- s = sector - ((r + ic->tag_size - 1) / ic->tag_size); +- DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", +- bio->bi_bdev, s); +- r = -EILSEQ; +- atomic64_inc(&ic->number_of_mismatches); +- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", +- bio, s, 0); ++ integrity_recheck(dio); ++ goto skip_io; + } + if (likely(checksums != checksums_onstack)) + kfree(checksums); +@@ -4271,6 +4339,12 @@ static int dm_integrity_ctr(struct dm_ta + goto bad; + } + ++ r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); ++ if (r) { ++ ti->error = "Cannot allocate mempool"; ++ goto bad; ++ } ++ + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", + WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->metadata_wq) { +@@ -4619,6 +4693,7 @@ static void dm_integrity_dtr(struct dm_t + kvfree(ic->bbs); + if (ic->bufio) + dm_bufio_client_destroy(ic->bufio); ++ mempool_exit(&ic->recheck_pool); + mempool_exit(&ic->journal_io_mempool); + if (ic->io) + dm_io_client_destroy(ic->io); diff --git a/queue-6.6/dm-verity-recheck-the-hash-after-a-failure.patch b/queue-6.6/dm-verity-recheck-the-hash-after-a-failure.patch new file mode 100644 index 00000000000..93c0918c8c6 --- /dev/null +++ b/queue-6.6/dm-verity-recheck-the-hash-after-a-failure.patch @@ -0,0 +1,189 @@ +From 9177f3c0dea6143d05cac1bbd28668fd0e216d11 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 19 Feb 2024 21:28:09 +0100 +Subject: dm-verity: recheck the hash after a failure + +From: Mikulas Patocka + +commit 9177f3c0dea6143d05cac1bbd28668fd0e216d11 upstream. + +If a userspace process reads (with O_DIRECT) multiple blocks into the same +buffer, dm-verity reports an error [1]. + +This commit fixes dm-verity, so that if hash verification fails, the data +is read again into a kernel buffer (where userspace can't modify it) and +the hash is rechecked. If the recheck succeeds, the content of the kernel +buffer is copied into the user buffer; if the recheck fails, an error is +reported. + +[1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-verity-target.c | 86 +++++++++++++++++++++++++++++++++++++++--- + drivers/md/dm-verity.h | 6 ++ + 2 files changed, 86 insertions(+), 6 deletions(-) + +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -482,6 +482,63 @@ int verity_for_bv_block(struct dm_verity + return 0; + } + ++static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, ++ u8 *data, size_t len) ++{ ++ memcpy(data, io->recheck_buffer, len); ++ io->recheck_buffer += len; ++ ++ return 0; ++} ++ ++static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, ++ struct bvec_iter start, sector_t cur_block) ++{ ++ struct page *page; ++ void *buffer; ++ int r; ++ struct dm_io_request io_req; ++ struct dm_io_region io_loc; ++ ++ page = mempool_alloc(&v->recheck_pool, GFP_NOIO); ++ buffer = page_to_virt(page); ++ ++ io_req.bi_opf = REQ_OP_READ; ++ io_req.mem.type = DM_IO_KMEM; ++ io_req.mem.ptr.addr = buffer; ++ io_req.notify.fn = NULL; ++ io_req.client = v->io; ++ io_loc.bdev = v->data_dev->bdev; ++ io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); ++ io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); ++ r = dm_io(&io_req, 1, &io_loc, NULL); ++ if (unlikely(r)) ++ goto free_ret; ++ ++ r = verity_hash(v, verity_io_hash_req(v, io), buffer, ++ 1 << v->data_dev_block_bits, ++ verity_io_real_digest(v, io), true); ++ if (unlikely(r)) ++ goto free_ret; ++ ++ if (memcmp(verity_io_real_digest(v, io), ++ verity_io_want_digest(v, io), v->digest_size)) { ++ r = -EIO; ++ goto free_ret; ++ } ++ ++ io->recheck_buffer = buffer; ++ r = verity_for_bv_block(v, io, &start, verity_recheck_copy); ++ if (unlikely(r)) ++ goto free_ret; ++ ++ r = 0; ++free_ret: ++ mempool_free(page, &v->recheck_pool); ++ ++ return r; ++} ++ + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) + { +@@ -508,9 +565,7 @@ static int verity_verify_io(struct dm_ve + { + bool is_zero; + struct dm_verity *v = io->v; +-#if defined(CONFIG_DM_VERITY_FEC) + struct bvec_iter start; +-#endif + struct bvec_iter iter_copy; + struct bvec_iter *iter; + struct crypto_wait wait; +@@ -561,10 +616,7 @@ static int verity_verify_io(struct dm_ve + if (unlikely(r < 0)) + return r; + +-#if defined(CONFIG_DM_VERITY_FEC) +- if (verity_fec_is_enabled(v)) +- start = *iter; +-#endif ++ start = *iter; + r = verity_for_io_block(v, io, iter, &wait); + if (unlikely(r < 0)) + return r; +@@ -586,6 +638,10 @@ static int verity_verify_io(struct dm_ve + * tasklet since it may sleep, so fallback to work-queue. + */ + return -EAGAIN; ++ } else if (verity_recheck(v, io, start, cur_block) == 0) { ++ if (v->validated_blocks) ++ set_bit(cur_block, v->validated_blocks); ++ continue; + #if defined(CONFIG_DM_VERITY_FEC) + } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, + cur_block, NULL, &start) == 0) { +@@ -941,6 +997,10 @@ static void verity_dtr(struct dm_target + if (v->verify_wq) + destroy_workqueue(v->verify_wq); + ++ mempool_exit(&v->recheck_pool); ++ if (v->io) ++ dm_io_client_destroy(v->io); ++ + if (v->bufio) + dm_bufio_client_destroy(v->bufio); + +@@ -1379,6 +1439,20 @@ static int verity_ctr(struct dm_target * + } + v->hash_blocks = hash_position; + ++ r = mempool_init_page_pool(&v->recheck_pool, 1, 0); ++ if (unlikely(r)) { ++ ti->error = "Cannot allocate mempool"; ++ goto bad; ++ } ++ ++ v->io = dm_io_client_create(); ++ if (IS_ERR(v->io)) { ++ r = PTR_ERR(v->io); ++ v->io = NULL; ++ ti->error = "Cannot allocate dm io"; ++ goto bad; ++ } ++ + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, + 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), + dm_bufio_alloc_callback, NULL, +--- a/drivers/md/dm-verity.h ++++ b/drivers/md/dm-verity.h +@@ -11,6 +11,7 @@ + #ifndef DM_VERITY_H + #define DM_VERITY_H + ++#include + #include + #include + #include +@@ -68,6 +69,9 @@ struct dm_verity { + unsigned long *validated_blocks; /* bitset blocks validated */ + + char *signature_key_desc; /* signature keyring reference */ ++ ++ struct dm_io_client *io; ++ mempool_t recheck_pool; + }; + + struct dm_verity_io { +@@ -84,6 +88,8 @@ struct dm_verity_io { + + struct work_struct work; + ++ char *recheck_buffer; ++ + /* + * Three variably-size fields follow this struct: + * diff --git a/queue-6.6/docs-instruct-latex-to-cope-with-deeper-nesting.patch b/queue-6.6/docs-instruct-latex-to-cope-with-deeper-nesting.patch new file mode 100644 index 00000000000..af83ec7199f --- /dev/null +++ b/queue-6.6/docs-instruct-latex-to-cope-with-deeper-nesting.patch @@ -0,0 +1,43 @@ +From 0df8669f69a8638f04c6a3d1f3b7056c2c18f62c Mon Sep 17 00:00:00 2001 +From: Jonathan Corbet +Date: Mon, 19 Feb 2024 09:05:38 -0700 +Subject: docs: Instruct LaTeX to cope with deeper nesting + +From: Jonathan Corbet + +commit 0df8669f69a8638f04c6a3d1f3b7056c2c18f62c upstream. + +The addition of the XFS online fsck documentation starting with +commit a8f6c2e54ddc ("xfs: document the motivation for online fsck design") +added a deeper level of nesting than LaTeX is prepared to deal with. That +caused a pdfdocs build failure with the helpful "Too deeply nested" error +message buried deeply in Documentation/output/filesystems.log. + +Increase the "maxlistdepth" parameter to instruct LaTeX that it needs to +deal with the deeper nesting whether it wants to or not. + +Suggested-by: Akira Yokosawa +Tested-by: Akira Yokosawa +Cc: stable@vger.kernel.org # v6.4+ +Link: https://lore.kernel.org/linux-doc/67f6ac60-7957-4b92-9d72-a08fbad0e028@gmail.com/ +Signed-off-by: Jonathan Corbet +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/conf.py | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/Documentation/conf.py ++++ b/Documentation/conf.py +@@ -383,6 +383,12 @@ latex_elements = { + verbatimhintsturnover=false, + ''', + ++ # ++ # Some of our authors are fond of deep nesting; tell latex to ++ # cope. ++ # ++ 'maxlistdepth': '10', ++ + # For CJK One-half spacing, need to be in front of hyperref + 'extrapackages': r'\usepackage{setspace}', + diff --git a/queue-6.6/drm-amd-display-adjust-few-initialization-order-in-dm.patch b/queue-6.6/drm-amd-display-adjust-few-initialization-order-in-dm.patch new file mode 100644 index 00000000000..d5180acf99e --- /dev/null +++ b/queue-6.6/drm-amd-display-adjust-few-initialization-order-in-dm.patch @@ -0,0 +1,107 @@ +From 22e1dc4b2fec17af70f297a4295c5f19a0f3fbeb Mon Sep 17 00:00:00 2001 +From: Wayne Lin +Date: Fri, 2 Feb 2024 17:34:11 +0800 +Subject: drm/amd/display: adjust few initialization order in dm + +From: Wayne Lin + +commit 22e1dc4b2fec17af70f297a4295c5f19a0f3fbeb upstream. + +[Why] +Observe error message "Can't retrieve aconnector in hpd_rx_irq_offload_work" +when boot up with a mst tbt4 dock connected. After analyzing, there are few +parts needed to be adjusted: + +1. hpd_rx_offload_wq[].aconnector is not initialzed before the dmub outbox +hpd_irq handler get registered which causes the error message. + +2. registeration of hpd and hpd_rx_irq event for usb4 dp tunneling is not +aligned with legacy interface sequence + +[How] +Put DMUB_NOTIFICATION_HPD and DMUB_NOTIFICATION_HPD_IRQ handler +registration into register_hpd_handlers() to align other interfaces and +get hpd_rx_offload_wq[].aconnector initialized earlier than that. + +Leave DMUB_NOTIFICATION_AUX_REPLY registered as it was since we need that +while calling dc_link_detect(). USB4 connection status will be proactively +detected by dc_link_detect_connection_type() in amdgpu_dm_initialize_drm_device() + +Cc: Stable +Reviewed-by: Aurabindo Pillai +Acked-by: Rodrigo Siqueira +Tested-by: Daniel Wheeler +Signed-off-by: Wayne Lin +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 ++++++++++------------ + 1 file changed, 18 insertions(+), 19 deletions(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -1816,21 +1816,12 @@ static int amdgpu_dm_init(struct amdgpu_ + DRM_ERROR("amdgpu: fail to register dmub aux callback"); + goto error; + } +- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { +- DRM_ERROR("amdgpu: fail to register dmub hpd callback"); +- goto error; +- } +- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { +- DRM_ERROR("amdgpu: fail to register dmub hpd callback"); +- goto error; +- } +- } +- +- /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. +- * It is expected that DMUB will resend any pending notifications at this point, for +- * example HPD from DPIA. +- */ +- if (dc_is_dmub_outbox_supported(adev->dm.dc)) { ++ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. ++ * It is expected that DMUB will resend any pending notifications at this point. Note ++ * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to ++ * align legacy interface initialization sequence. Connection status will be proactivly ++ * detected once in the amdgpu_dm_initialize_drm_device. ++ */ + dc_enable_dmub_outbox(adev->dm.dc); + + /* DPIA trace goes to dmesg logs only if outbox is enabled */ +@@ -3484,6 +3475,14 @@ static void register_hpd_handlers(struct + int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT; + int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT; + ++ if (dc_is_dmub_outbox_supported(adev->dm.dc)) { ++ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) ++ DRM_ERROR("amdgpu: fail to register dmub hpd callback"); ++ ++ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) ++ DRM_ERROR("amdgpu: fail to register dmub hpd callback"); ++ } ++ + list_for_each_entry(connector, + &dev->mode_config.connector_list, head) { + +@@ -3509,10 +3508,6 @@ static void register_hpd_handlers(struct + handle_hpd_rx_irq, + (void *) aconnector); + } +- +- if (adev->dm.hpd_rx_offload_wq) +- adev->dm.hpd_rx_offload_wq[connector->index].aconnector = +- aconnector; + } + } + +@@ -4481,6 +4476,10 @@ static int amdgpu_dm_initialize_drm_devi + + link = dc_get_link_at_index(dm->dc, i); + ++ if (dm->hpd_rx_offload_wq) ++ dm->hpd_rx_offload_wq[aconnector->base.index].aconnector = ++ aconnector; ++ + if (!dc_link_detect_connection_type(link, &new_connection_type)) + DRM_ERROR("KMS: Failed to detect connector\n"); + diff --git a/queue-6.6/drm-meson-don-t-remove-bridges-which-are-created-by-other-drivers.patch b/queue-6.6/drm-meson-don-t-remove-bridges-which-are-created-by-other-drivers.patch new file mode 100644 index 00000000000..fe2b7976d18 --- /dev/null +++ b/queue-6.6/drm-meson-don-t-remove-bridges-which-are-created-by-other-drivers.patch @@ -0,0 +1,66 @@ +From bd915ae73a2d78559b376ad2caf5e4ef51de2455 Mon Sep 17 00:00:00 2001 +From: Martin Blumenstingl +Date: Thu, 15 Feb 2024 23:04:42 +0100 +Subject: drm/meson: Don't remove bridges which are created by other drivers + +From: Martin Blumenstingl + +commit bd915ae73a2d78559b376ad2caf5e4ef51de2455 upstream. + +Stop calling drm_bridge_remove() for bridges allocated/managed by other +drivers in the remove paths of meson_encoder_{cvbs,dsi,hdmi}. +drm_bridge_remove() unregisters the bridge so it cannot be used +anymore. Doing so for bridges we don't own can lead to the video +pipeline not being able to come up after -EPROBE_DEFER of the VPU +because we're unregistering a bridge that's managed by another driver. +The other driver doesn't know that we have unregistered it's bridge +and on subsequent .probe() we're not able to find those bridges anymore +(since nobody re-creates them). + +This fixes probe errors on Meson8b boards with the CVBS outputs enabled. + +Fixes: 09847723c12f ("drm/meson: remove drm bridges at aggregate driver unbind time") +Fixes: 42dcf15f901c ("drm/meson: add DSI encoder") +Cc: +Reported-by: Steve Morvai +Signed-off-by: Martin Blumenstingl +Reviewed-by: Neil Armstrong +Tested-by: Steve Morvai +Link: https://lore.kernel.org/r/20240215220442.1343152-1-martin.blumenstingl@googlemail.com +Reviewed-by: Neil Armstrong +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20240215220442.1343152-1-martin.blumenstingl@googlemail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/meson/meson_encoder_cvbs.c | 1 - + drivers/gpu/drm/meson/meson_encoder_dsi.c | 1 - + drivers/gpu/drm/meson/meson_encoder_hdmi.c | 1 - + 3 files changed, 3 deletions(-) + +--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c ++++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c +@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct me + if (priv->encoders[MESON_ENC_CVBS]) { + meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; + drm_bridge_remove(&meson_encoder_cvbs->bridge); +- drm_bridge_remove(meson_encoder_cvbs->next_bridge); + } + } +--- a/drivers/gpu/drm/meson/meson_encoder_dsi.c ++++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c +@@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct mes + if (priv->encoders[MESON_ENC_DSI]) { + meson_encoder_dsi = priv->encoders[MESON_ENC_DSI]; + drm_bridge_remove(&meson_encoder_dsi->bridge); +- drm_bridge_remove(meson_encoder_dsi->next_bridge); + } + } +--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c ++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c +@@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct me + if (priv->encoders[MESON_ENC_HDMI]) { + meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; + drm_bridge_remove(&meson_encoder_hdmi->bridge); +- drm_bridge_remove(meson_encoder_hdmi->next_bridge); + } + } diff --git a/queue-6.6/drm-ttm-fix-an-invalid-freeing-on-already-freed-page-in-error-path.patch b/queue-6.6/drm-ttm-fix-an-invalid-freeing-on-already-freed-page-in-error-path.patch new file mode 100644 index 00000000000..3c8f6233ce8 --- /dev/null +++ b/queue-6.6/drm-ttm-fix-an-invalid-freeing-on-already-freed-page-in-error-path.patch @@ -0,0 +1,49 @@ +From 40510a941d27d405a82dc3320823d875f94625df Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= +Date: Wed, 21 Feb 2024 08:33:24 +0100 +Subject: drm/ttm: Fix an invalid freeing on already freed page in error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Hellström + +commit 40510a941d27d405a82dc3320823d875f94625df upstream. + +If caching mode change fails due to, for example, OOM we +free the allocated pages in a two-step process. First the pages +for which the caching change has already succeeded. Secondly +the pages for which a caching change did not succeed. + +However the second step was incorrectly freeing the pages already +freed in the first step. + +Fix. + +Signed-off-by: Thomas Hellström +Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path") +Cc: Christian König +Cc: Dave Airlie +Cc: Christian Koenig +Cc: Huang Rui +Cc: dri-devel@lists.freedesktop.org +Cc: # v6.4+ +Reviewed-by: Matthew Auld +Reviewed-by: Christian König +Link: https://patchwork.freedesktop.org/patch/msgid/20240221073324.3303-1-thomas.hellstrom@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/ttm/ttm_pool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/ttm/ttm_pool.c ++++ b/drivers/gpu/drm/ttm/ttm_pool.c +@@ -384,7 +384,7 @@ static void ttm_pool_free_range(struct t + enum ttm_caching caching, + pgoff_t start_page, pgoff_t end_page) + { +- struct page **pages = tt->pages; ++ struct page **pages = &tt->pages[start_page]; + unsigned int order; + pgoff_t i, nr; + diff --git a/queue-6.6/fs-aio-restrict-kiocb_set_cancel_fn-to-i-o-submitted-via-libaio.patch b/queue-6.6/fs-aio-restrict-kiocb_set_cancel_fn-to-i-o-submitted-via-libaio.patch new file mode 100644 index 00000000000..2c0eda06e18 --- /dev/null +++ b/queue-6.6/fs-aio-restrict-kiocb_set_cancel_fn-to-i-o-submitted-via-libaio.patch @@ -0,0 +1,83 @@ +From b820de741ae48ccf50dd95e297889c286ff4f760 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Thu, 15 Feb 2024 12:47:38 -0800 +Subject: fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio + +From: Bart Van Assche + +commit b820de741ae48ccf50dd95e297889c286ff4f760 upstream. + +If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the +following kernel warning appears: + +WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 +Call trace: + kiocb_set_cancel_fn+0x9c/0xa8 + ffs_epfile_read_iter+0x144/0x1d0 + io_read+0x19c/0x498 + io_issue_sqe+0x118/0x27c + io_submit_sqes+0x25c/0x5fc + __arm64_sys_io_uring_enter+0x104/0xab0 + invoke_syscall+0x58/0x11c + el0_svc_common+0xb4/0xf4 + do_el0_svc+0x2c/0xb0 + el0_svc+0x2c/0xa4 + el0t_64_sync_handler+0x68/0xb4 + el0t_64_sync+0x1a4/0x1a8 + +Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is +submitted by libaio. + +Suggested-by: Jens Axboe +Cc: Christoph Hellwig +Cc: Avi Kivity +Cc: Sandeep Dhavale +Cc: Jens Axboe +Cc: Greg Kroah-Hartman +Cc: Kent Overstreet +Cc: stable@vger.kernel.org +Signed-off-by: Bart Van Assche +Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/aio.c | 9 ++++++++- + include/linux/fs.h | 2 ++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -594,6 +594,13 @@ void kiocb_set_cancel_fn(struct kiocb *i + struct kioctx *ctx = req->ki_ctx; + unsigned long flags; + ++ /* ++ * kiocb didn't come from aio or is neither a read nor a write, hence ++ * ignore it. ++ */ ++ if (!(iocb->ki_flags & IOCB_AIO_RW)) ++ return; ++ + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) + return; + +@@ -1463,7 +1470,7 @@ static int aio_prep_rw(struct kiocb *req + req->ki_complete = aio_complete_rw; + req->private = NULL; + req->ki_pos = iocb->aio_offset; +- req->ki_flags = req->ki_filp->f_iocb_flags; ++ req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; + if (iocb->aio_flags & IOCB_FLAG_RESFD) + req->ki_flags |= IOCB_EVENTFD; + if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -352,6 +352,8 @@ enum rw_hint { + * unrelated IO (like cache flushing, new IO generation, etc). + */ + #define IOCB_DIO_CALLER_COMP (1 << 22) ++/* kiocb is a read or write operation submitted by fs/aio.c. */ ++#define IOCB_AIO_RW (1 << 23) + + /* for use in trace events */ + #define TRACE_IOCB_STRINGS \ diff --git a/queue-6.6/gtp-fix-use-after-free-and-null-ptr-deref-in-gtp_genl_dump_pdp.patch b/queue-6.6/gtp-fix-use-after-free-and-null-ptr-deref-in-gtp_genl_dump_pdp.patch new file mode 100644 index 00000000000..f305f1ff177 --- /dev/null +++ b/queue-6.6/gtp-fix-use-after-free-and-null-ptr-deref-in-gtp_genl_dump_pdp.patch @@ -0,0 +1,97 @@ +From 136cfaca22567a03bbb3bf53a43d8cb5748b80ec Mon Sep 17 00:00:00 2001 +From: Vasiliy Kovalev +Date: Wed, 14 Feb 2024 19:27:33 +0300 +Subject: gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp() + +From: Vasiliy Kovalev + +commit 136cfaca22567a03bbb3bf53a43d8cb5748b80ec upstream. + +The gtp_net_ops pernet operations structure for the subsystem must be +registered before registering the generic netlink family. + +Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: + +general protection fault, probably for non-canonical address +0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI +KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] +CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 +RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp] +Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86 + df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> + 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74 +RSP: 0018:ffff888014107220 EFLAGS: 00010202 +RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000 +RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 +R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000 +FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0 +PKRU: 55555554 +Call Trace: + + ? show_regs+0x90/0xa0 + ? die_addr+0x50/0xd0 + ? exc_general_protection+0x148/0x220 + ? asm_exc_general_protection+0x22/0x30 + ? gtp_genl_dump_pdp+0x1be/0x800 [gtp] + ? __alloc_skb+0x1dd/0x350 + ? __pfx___alloc_skb+0x10/0x10 + genl_dumpit+0x11d/0x230 + netlink_dump+0x5b9/0xce0 + ? lockdep_hardirqs_on_prepare+0x253/0x430 + ? __pfx_netlink_dump+0x10/0x10 + ? kasan_save_track+0x10/0x40 + ? __kasan_kmalloc+0x9b/0xa0 + ? genl_start+0x675/0x970 + __netlink_dump_start+0x6fc/0x9f0 + genl_family_rcv_msg_dumpit+0x1bb/0x2d0 + ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10 + ? genl_op_from_small+0x2a/0x440 + ? cap_capable+0x1d0/0x240 + ? __pfx_genl_start+0x10/0x10 + ? __pfx_genl_dumpit+0x10/0x10 + ? __pfx_genl_done+0x10/0x10 + ? security_capable+0x9d/0xe0 + +Cc: stable@vger.kernel.org +Signed-off-by: Vasiliy Kovalev +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -1907,20 +1907,20 @@ static int __init gtp_init(void) + if (err < 0) + goto error_out; + +- err = genl_register_family(>p_genl_family); ++ err = register_pernet_subsys(>p_net_ops); + if (err < 0) + goto unreg_rtnl_link; + +- err = register_pernet_subsys(>p_net_ops); ++ err = genl_register_family(>p_genl_family); + if (err < 0) +- goto unreg_genl_family; ++ goto unreg_pernet_subsys; + + pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", + sizeof(struct pdp_ctx)); + return 0; + +-unreg_genl_family: +- genl_unregister_family(>p_genl_family); ++unreg_pernet_subsys: ++ unregister_pernet_subsys(>p_net_ops); + unreg_rtnl_link: + rtnl_link_unregister(>p_link_ops); + error_out: diff --git a/queue-6.6/kvm-arm64-vgic-its-test-for-valid-irq-in-its_sync_lpi_pending_table.patch b/queue-6.6/kvm-arm64-vgic-its-test-for-valid-irq-in-its_sync_lpi_pending_table.patch new file mode 100644 index 00000000000..20f1b4aae33 --- /dev/null +++ b/queue-6.6/kvm-arm64-vgic-its-test-for-valid-irq-in-its_sync_lpi_pending_table.patch @@ -0,0 +1,36 @@ +From 8d3a7dfb801d157ac423261d7cd62c33e95375f8 Mon Sep 17 00:00:00 2001 +From: Oliver Upton +Date: Wed, 21 Feb 2024 09:27:31 +0000 +Subject: KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table() + +From: Oliver Upton + +commit 8d3a7dfb801d157ac423261d7cd62c33e95375f8 upstream. + +vgic_get_irq() may not return a valid descriptor if there is no ITS that +holds a valid translation for the specified INTID. If that is the case, +it is safe to silently ignore it and continue processing the LPI pending +table. + +Cc: stable@vger.kernel.org +Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table") +Signed-off-by: Oliver Upton +Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/vgic/vgic-its.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/arm64/kvm/vgic/vgic-its.c ++++ b/arch/arm64/kvm/vgic/vgic-its.c +@@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(st + } + + irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); ++ if (!irq) ++ continue; ++ + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = pendmask & (1U << bit_nr); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); diff --git a/queue-6.6/kvm-arm64-vgic-its-test-for-valid-irq-in-movall-handler.patch b/queue-6.6/kvm-arm64-vgic-its-test-for-valid-irq-in-movall-handler.patch new file mode 100644 index 00000000000..e7e7b71f0ef --- /dev/null +++ b/queue-6.6/kvm-arm64-vgic-its-test-for-valid-irq-in-movall-handler.patch @@ -0,0 +1,35 @@ +From 85a71ee9a0700f6c18862ef3b0011ed9dad99aca Mon Sep 17 00:00:00 2001 +From: Oliver Upton +Date: Wed, 21 Feb 2024 09:27:32 +0000 +Subject: KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler + +From: Oliver Upton + +commit 85a71ee9a0700f6c18862ef3b0011ed9dad99aca upstream. + +It is possible that an LPI mapped in a different ITS gets unmapped while +handling the MOVALL command. If that is the case, there is no state that +can be migrated to the destination. Silently ignore it and continue +migrating other LPIs. + +Cc: stable@vger.kernel.org +Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE") +Signed-off-by: Oliver Upton +Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/vgic/vgic-its.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/arm64/kvm/vgic/vgic-its.c ++++ b/arch/arm64/kvm/vgic/vgic-its.c +@@ -1427,6 +1427,8 @@ static int vgic_its_cmd_handle_movall(st + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); ++ if (!irq) ++ continue; + + update_affinity(irq, vcpu2); + diff --git a/queue-6.6/lib-kconfig.debug-test_iov_iter-depends-on-mmu.patch b/queue-6.6/lib-kconfig.debug-test_iov_iter-depends-on-mmu.patch new file mode 100644 index 00000000000..66dc432a67e --- /dev/null +++ b/queue-6.6/lib-kconfig.debug-test_iov_iter-depends-on-mmu.patch @@ -0,0 +1,44 @@ +From 1eb1e984379e2da04361763f66eec90dd75cf63e Mon Sep 17 00:00:00 2001 +From: Guenter Roeck +Date: Thu, 8 Feb 2024 07:30:10 -0800 +Subject: lib/Kconfig.debug: TEST_IOV_ITER depends on MMU + +From: Guenter Roeck + +commit 1eb1e984379e2da04361763f66eec90dd75cf63e upstream. + +Trying to run the iov_iter unit test on a nommu system such as the qemu +kc705-nommu emulation results in a crash. + + KTAP version 1 + # Subtest: iov_iter + # module: kunit_iov_iter + 1..9 +BUG: failure at mm/nommu.c:318/vmap()! +Kernel panic - not syncing: BUG! + +The test calls vmap() directly, but vmap() is not supported on nommu +systems, causing the crash. TEST_IOV_ITER therefore needs to depend on +MMU. + +Link: https://lkml.kernel.org/r/20240208153010.1439753-1-linux@roeck-us.net +Fixes: 2d71340ff1d4 ("iov_iter: Kunit tests for copying to/from an iterator") +Signed-off-by: Guenter Roeck +Cc: David Howells +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/Kconfig.debug | 1 + + 1 file changed, 1 insertion(+) + +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -2225,6 +2225,7 @@ config TEST_DIV64 + config TEST_IOV_ITER + tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS + depends on KUNIT ++ depends on MMU + default KUNIT_ALL_TESTS + help + Enable this to turn on testing of the operation of the I/O iterator diff --git a/queue-6.6/loongarch-call-early_init_fdt_scan_reserved_mem-earlier.patch b/queue-6.6/loongarch-call-early_init_fdt_scan_reserved_mem-earlier.patch new file mode 100644 index 00000000000..286c3d2baed --- /dev/null +++ b/queue-6.6/loongarch-call-early_init_fdt_scan_reserved_mem-earlier.patch @@ -0,0 +1,49 @@ +From 9fa304b9f8ec440e614af6d35826110c633c4074 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Fri, 23 Feb 2024 14:36:31 +0800 +Subject: LoongArch: Call early_init_fdt_scan_reserved_mem() earlier + +From: Huacai Chen + +commit 9fa304b9f8ec440e614af6d35826110c633c4074 upstream. + +The unflatten_and_copy_device_tree() function contains a call to +memblock_alloc(). This means that memblock is allocating memory before +any of the reserved memory regions are set aside in the arch_mem_init() +function which calls early_init_fdt_scan_reserved_mem(). Therefore, +there is a possibility for memblock to allocate from any of the +reserved memory regions. + +Hence, move the call to early_init_fdt_scan_reserved_mem() to be earlier +in the init sequence, so that the reserved memory regions are set aside +before any allocations are done using memblock. + +Cc: stable@vger.kernel.org +Fixes: 88d4d957edc707e ("LoongArch: Add FDT booting support from efi system table") +Signed-off-by: Oreoluwa Babatunde +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kernel/setup.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/loongarch/kernel/setup.c ++++ b/arch/loongarch/kernel/setup.c +@@ -367,6 +367,8 @@ void __init platform_init(void) + acpi_gbl_use_default_register_widths = false; + acpi_boot_table_init(); + #endif ++ ++ early_init_fdt_scan_reserved_mem(); + unflatten_and_copy_device_tree(); + + #ifdef CONFIG_NUMA +@@ -400,8 +402,6 @@ static void __init arch_mem_init(char ** + + check_kernel_sections_mem(); + +- early_init_fdt_scan_reserved_mem(); +- + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate diff --git a/queue-6.6/loongarch-disable-irq-before-init_fn-for-nonboot-cpus.patch b/queue-6.6/loongarch-disable-irq-before-init_fn-for-nonboot-cpus.patch new file mode 100644 index 00000000000..4906d722505 --- /dev/null +++ b/queue-6.6/loongarch-disable-irq-before-init_fn-for-nonboot-cpus.patch @@ -0,0 +1,74 @@ +From 1001db6c42e4012b55e5ee19405490f23e033b5a Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Fri, 23 Feb 2024 14:36:31 +0800 +Subject: LoongArch: Disable IRQ before init_fn() for nonboot CPUs + +From: Huacai Chen + +commit 1001db6c42e4012b55e5ee19405490f23e033b5a upstream. + +Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to +silence such warnings (and also avoid potential errors due to unexpected +interrupts): + +WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280 +CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 +pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0 +a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000 +a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c +t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e +t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000 +t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001 +s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001 +s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8 + ra: 90000000047bd56c tlb_init+0x24c/0x528 + ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280 + CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) + PRMD: 00000000 (PPLV0 -PIE -PWE) + EUEN: 00000000 (-FPE -SXE -ASXE -BTE) + ECFG: 00071000 (LIE=12 VS=7) +ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) + PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) +CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 +Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000 + 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140 + 9000000006456888 9000000006456880 900000010039f950 0000000000000001 + 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700 + 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003 + 0000000000040000 9000000007930370 0000000002b90000 0000000000000004 + 9000000006366000 900000000619a140 0000000000000000 0000000000000004 + 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940 + 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8 + 00000000000000b0 0000000000000000 0000000000000000 0000000000071000 + ... +Call Trace: +[<90000000047a4828>] show_stack+0x48/0x1a0 +[<9000000005b61874>] dump_stack_lvl+0x84/0xcc +[<90000000047f60ac>] __warn+0x8c/0x1e0 +[<9000000005b0ab34>] report_bug+0x1b4/0x280 +[<9000000005b63110>] do_bp+0x2d0/0x480 +[<90000000047a2e20>] handle_bp+0x120/0x1c0 +[<90000000048e3334>] rcu_cpu_starting+0x214/0x280 +[<90000000047bd568>] tlb_init+0x248/0x528 +[<90000000047a4c44>] per_cpu_trap_init+0x124/0x160 +[<90000000047a19f4>] cpu_probe+0x494/0xa00 +[<90000000047b551c>] start_secondary+0x3c/0xc0 +[<9000000005b66134>] smpboot_entry+0x50/0x58 + +Cc: stable@vger.kernel.org +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kernel/smp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/loongarch/kernel/smp.c ++++ b/arch/loongarch/kernel/smp.c +@@ -334,6 +334,7 @@ void __noreturn arch_cpu_idle_dead(void) + addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); + } while (addr == 0); + ++ local_irq_disable(); + init_fn = (void *)TO_CACHE(addr); + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); + diff --git a/queue-6.6/loongarch-update-cpu_sibling_map-when-disabling-nonboot-cpus.patch b/queue-6.6/loongarch-update-cpu_sibling_map-when-disabling-nonboot-cpus.patch new file mode 100644 index 00000000000..ebe4d2598f3 --- /dev/null +++ b/queue-6.6/loongarch-update-cpu_sibling_map-when-disabling-nonboot-cpus.patch @@ -0,0 +1,208 @@ +From 752cd08da320a667a833803a8fd6bb266114cce5 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Fri, 23 Feb 2024 14:36:31 +0800 +Subject: LoongArch: Update cpu_sibling_map when disabling nonboot CPUs + +From: Huacai Chen + +commit 752cd08da320a667a833803a8fd6bb266114cce5 upstream. + +Update cpu_sibling_map when disabling nonboot CPUs by defining & calling +clear_cpu_sibling_map(), otherwise we get such errors on SMT systems: + +jump label: negative count! +WARNING: CPU: 6 PID: 45 at kernel/jump_label.c:263 __static_key_slow_dec_cpuslocked+0xec/0x100 +CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 +pc 90000000004c302c ra 90000000004c302c tp 90000001005bc000 sp 90000001005bfd20 +a0 000000000000001b a1 900000000224c278 a2 90000001005bfb58 a3 900000000224c280 +a4 900000000224c278 a5 90000001005bfb50 a6 0000000000000001 a7 0000000000000001 +t0 ce87a4763eb5234a t1 ce87a4763eb5234a t2 0000000000000000 t3 0000000000000000 +t4 0000000000000006 t5 0000000000000000 t6 0000000000000064 t7 0000000000001964 +t8 000000000009ebf6 u0 9000000001f2a068 s9 0000000000000000 s0 900000000246a2d8 +s1 ffffffffffffffff s2 ffffffffffffffff s3 90000000021518c0 s4 0000000000000040 +s5 9000000002151058 s6 9000000009828e40 s7 00000000000000b4 s8 0000000000000006 + ra: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 + ERA: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 + CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) + PRMD: 00000004 (PPLV0 +PIE -PWE) + EUEN: 00000000 (-FPE -SXE -ASXE -BTE) + ECFG: 00071c1c (LIE=2-4,10-12 VS=7) +ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) + PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) +CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 +Stack : 0000000000000000 900000000203f258 900000000179afc8 90000001005bc000 + 90000001005bf980 0000000000000000 90000001005bf988 9000000001fe0be0 + 900000000224c280 900000000224c278 90000001005bf8c0 0000000000000001 + 0000000000000001 ce87a4763eb5234a 0000000007f38000 90000001003f8cc0 + 0000000000000000 0000000000000006 0000000000000000 4c206e6f73676e6f + 6f4c203a656d616e 000000000009ec99 0000000007f38000 0000000000000000 + 900000000214b000 9000000001fe0be0 0000000000000004 0000000000000000 + 0000000000000107 0000000000000009 ffffffffffafdabe 00000000000000b4 + 0000000000000006 90000000004c302c 9000000000224528 00005555939a0c7c + 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c + ... +Call Trace: +[<9000000000224528>] show_stack+0x48/0x1a0 +[<900000000179afc8>] dump_stack_lvl+0x78/0xa0 +[<9000000000263ed0>] __warn+0x90/0x1a0 +[<90000000017419b8>] report_bug+0x1b8/0x280 +[<900000000179c564>] do_bp+0x264/0x420 +[<90000000004c302c>] __static_key_slow_dec_cpuslocked+0xec/0x100 +[<90000000002b4d7c>] sched_cpu_deactivate+0x2fc/0x300 +[<9000000000266498>] cpuhp_invoke_callback+0x178/0x8a0 +[<9000000000267f70>] cpuhp_thread_fun+0xf0/0x240 +[<90000000002a117c>] smpboot_thread_fn+0x1dc/0x2e0 +[<900000000029a720>] kthread+0x140/0x160 +[<9000000000222288>] ret_from_kernel_thread+0xc/0xa4 + +Cc: stable@vger.kernel.org +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kernel/smp.c | 121 ++++++++++++++++++++++++-------------------- + 1 file changed, 68 insertions(+), 53 deletions(-) + +--- a/arch/loongarch/kernel/smp.c ++++ b/arch/loongarch/kernel/smp.c +@@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, i + } + } + ++static inline void set_cpu_core_map(int cpu) ++{ ++ int i; ++ ++ cpumask_set_cpu(cpu, &cpu_core_setup_map); ++ ++ for_each_cpu(i, &cpu_core_setup_map) { ++ if (cpu_data[cpu].package == cpu_data[i].package) { ++ cpumask_set_cpu(i, &cpu_core_map[cpu]); ++ cpumask_set_cpu(cpu, &cpu_core_map[i]); ++ } ++ } ++} ++ ++static inline void set_cpu_sibling_map(int cpu) ++{ ++ int i; ++ ++ cpumask_set_cpu(cpu, &cpu_sibling_setup_map); ++ ++ for_each_cpu(i, &cpu_sibling_setup_map) { ++ if (cpus_are_siblings(cpu, i)) { ++ cpumask_set_cpu(i, &cpu_sibling_map[cpu]); ++ cpumask_set_cpu(cpu, &cpu_sibling_map[i]); ++ } ++ } ++} ++ ++static inline void clear_cpu_sibling_map(int cpu) ++{ ++ int i; ++ ++ for_each_cpu(i, &cpu_sibling_setup_map) { ++ if (cpus_are_siblings(cpu, i)) { ++ cpumask_clear_cpu(i, &cpu_sibling_map[cpu]); ++ cpumask_clear_cpu(cpu, &cpu_sibling_map[i]); ++ } ++ } ++ ++ cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); ++} ++ ++/* ++ * Calculate a new cpu_foreign_map mask whenever a ++ * new cpu appears or disappears. ++ */ ++void calculate_cpu_foreign_map(void) ++{ ++ int i, k, core_present; ++ cpumask_t temp_foreign_map; ++ ++ /* Re-calculate the mask */ ++ cpumask_clear(&temp_foreign_map); ++ for_each_online_cpu(i) { ++ core_present = 0; ++ for_each_cpu(k, &temp_foreign_map) ++ if (cpus_are_siblings(i, k)) ++ core_present = 1; ++ if (!core_present) ++ cpumask_set_cpu(i, &temp_foreign_map); ++ } ++ ++ for_each_online_cpu(i) ++ cpumask_andnot(&cpu_foreign_map[i], ++ &temp_foreign_map, &cpu_sibling_map[i]); ++} ++ + /* Send mailbox buffer via Mail_Send */ + static void csr_mail_send(uint64_t data, int cpu, int mailbox) + { +@@ -300,6 +367,7 @@ int loongson_cpu_disable(void) + numa_remove_cpu(cpu); + #endif + set_cpu_online(cpu, false); ++ clear_cpu_sibling_map(cpu); + calculate_cpu_foreign_map(); + local_irq_save(flags); + irq_migrate_all_off_this_cpu(); +@@ -377,59 +445,6 @@ static int __init ipi_pm_init(void) + core_initcall(ipi_pm_init); + #endif + +-static inline void set_cpu_sibling_map(int cpu) +-{ +- int i; +- +- cpumask_set_cpu(cpu, &cpu_sibling_setup_map); +- +- for_each_cpu(i, &cpu_sibling_setup_map) { +- if (cpus_are_siblings(cpu, i)) { +- cpumask_set_cpu(i, &cpu_sibling_map[cpu]); +- cpumask_set_cpu(cpu, &cpu_sibling_map[i]); +- } +- } +-} +- +-static inline void set_cpu_core_map(int cpu) +-{ +- int i; +- +- cpumask_set_cpu(cpu, &cpu_core_setup_map); +- +- for_each_cpu(i, &cpu_core_setup_map) { +- if (cpu_data[cpu].package == cpu_data[i].package) { +- cpumask_set_cpu(i, &cpu_core_map[cpu]); +- cpumask_set_cpu(cpu, &cpu_core_map[i]); +- } +- } +-} +- +-/* +- * Calculate a new cpu_foreign_map mask whenever a +- * new cpu appears or disappears. +- */ +-void calculate_cpu_foreign_map(void) +-{ +- int i, k, core_present; +- cpumask_t temp_foreign_map; +- +- /* Re-calculate the mask */ +- cpumask_clear(&temp_foreign_map); +- for_each_online_cpu(i) { +- core_present = 0; +- for_each_cpu(k, &temp_foreign_map) +- if (cpus_are_siblings(i, k)) +- core_present = 1; +- if (!core_present) +- cpumask_set_cpu(i, &temp_foreign_map); +- } +- +- for_each_online_cpu(i) +- cpumask_andnot(&cpu_foreign_map[i], +- &temp_foreign_map, &cpu_sibling_map[i]); +-} +- + /* Preload SMP state for boot cpu */ + void smp_prepare_boot_cpu(void) + { diff --git a/queue-6.6/md-fix-missing-release-of-active_io-for-flush.patch b/queue-6.6/md-fix-missing-release-of-active_io-for-flush.patch new file mode 100644 index 00000000000..d91d2ea9174 --- /dev/null +++ b/queue-6.6/md-fix-missing-release-of-active_io-for-flush.patch @@ -0,0 +1,58 @@ +From 855678ed8534518e2b428bcbcec695de9ba248e8 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Thu, 1 Feb 2024 17:25:51 +0800 +Subject: md: Fix missing release of 'active_io' for flush + +From: Yu Kuai + +commit 855678ed8534518e2b428bcbcec695de9ba248e8 upstream. + +submit_flushes + atomic_set(&mddev->flush_pending, 1); + rdev_for_each_rcu(rdev, mddev) + atomic_inc(&mddev->flush_pending); + bi->bi_end_io = md_end_flush + submit_bio(bi); + /* flush io is done first */ + md_end_flush + if (atomic_dec_and_test(&mddev->flush_pending)) + percpu_ref_put(&mddev->active_io) + -> active_io is not released + + if (atomic_dec_and_test(&mddev->flush_pending)) + -> missing release of active_io + +For consequence, mddev_suspend() will wait for 'active_io' to be zero +forever. + +Fix this problem by releasing 'active_io' in submit_flushes() if +'flush_pending' is decreased to zero. + +Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration") +Cc: stable@vger.kernel.org # v6.1+ +Reported-by: Blazej Kucman +Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@linux.intel.com/ +Signed-off-by: Yu Kuai +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20240201092559.910982-7-yukuai1@huaweicloud.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/md.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -530,8 +530,12 @@ static void submit_flushes(struct work_s + rcu_read_lock(); + } + rcu_read_unlock(); +- if (atomic_dec_and_test(&mddev->flush_pending)) ++ if (atomic_dec_and_test(&mddev->flush_pending)) { ++ /* The pair is percpu_ref_get() from md_flush_request() */ ++ percpu_ref_put(&mddev->active_io); ++ + queue_work(md_wq, &mddev->flush_work); ++ } + } + + static void md_submit_flush_data(struct work_struct *ws) diff --git a/queue-6.6/mm-damon-lru_sort-fix-quota-status-loss-due-to-online-tunings.patch b/queue-6.6/mm-damon-lru_sort-fix-quota-status-loss-due-to-online-tunings.patch new file mode 100644 index 00000000000..248acb96b95 --- /dev/null +++ b/queue-6.6/mm-damon-lru_sort-fix-quota-status-loss-due-to-online-tunings.patch @@ -0,0 +1,101 @@ +From 13d0599ab3b2ff17f798353f24bcbef1659d3cfc Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 16 Feb 2024 11:40:25 -0800 +Subject: mm/damon/lru_sort: fix quota status loss due to online tunings + +From: SeongJae Park + +commit 13d0599ab3b2ff17f798353f24bcbef1659d3cfc upstream. + +For online parameters change, DAMON_LRU_SORT creates new schemes based on +latest values of the parameters and replaces the old schemes with the new +one. When creating it, the internal status of the quotas of the old +schemes is not preserved. As a result, charging of the quota starts from +zero after the online tuning. The data that collected to estimate the +throughput of the scheme's action is also reset, and therefore the +estimation should start from the scratch again. Because the throughput +estimation is being used to convert the time quota to the effective size +quota, this could result in temporal time quota inaccuracy. It would be +recovered over time, though. In short, the quota accuracy could be +temporarily degraded after online parameters update. + +Fix the problem by checking the case and copying the internal fields for +the status. + +Link: https://lkml.kernel.org/r/20240216194025.9207-3-sj@kernel.org +Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") +Signed-off-by: SeongJae Park +Cc: [6.0+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++------- + 1 file changed, 36 insertions(+), 7 deletions(-) + +--- a/mm/damon/lru_sort.c ++++ b/mm/damon/lru_sort.c +@@ -183,9 +183,21 @@ static struct damos *damon_lru_sort_new_ + return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); + } + ++static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, ++ struct damos_quota *src) ++{ ++ dst->total_charged_sz = src->total_charged_sz; ++ dst->total_charged_ns = src->total_charged_ns; ++ dst->charged_sz = src->charged_sz; ++ dst->charged_from = src->charged_from; ++ dst->charge_target_from = src->charge_target_from; ++ dst->charge_addr_from = src->charge_addr_from; ++} ++ + static int damon_lru_sort_apply_parameters(void) + { +- struct damos *scheme; ++ struct damos *scheme, *hot_scheme, *cold_scheme; ++ struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; + unsigned int hot_thres, cold_thres; + int err = 0; + +@@ -193,18 +205,35 @@ static int damon_lru_sort_apply_paramete + if (err) + return err; + ++ damon_for_each_scheme(scheme, ctx) { ++ if (!old_hot_scheme) { ++ old_hot_scheme = scheme; ++ continue; ++ } ++ old_cold_scheme = scheme; ++ } ++ + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * + hot_thres_access_freq / 1000; +- scheme = damon_lru_sort_new_hot_scheme(hot_thres); +- if (!scheme) ++ hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); ++ if (!hot_scheme) + return -ENOMEM; +- damon_set_schemes(ctx, &scheme, 1); ++ if (old_hot_scheme) ++ damon_lru_sort_copy_quota_status(&hot_scheme->quota, ++ &old_hot_scheme->quota); + + cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; +- scheme = damon_lru_sort_new_cold_scheme(cold_thres); +- if (!scheme) ++ cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); ++ if (!cold_scheme) { ++ damon_destroy_scheme(hot_scheme); + return -ENOMEM; +- damon_add_scheme(ctx, scheme); ++ } ++ if (old_cold_scheme) ++ damon_lru_sort_copy_quota_status(&cold_scheme->quota, ++ &old_cold_scheme->quota); ++ ++ damon_set_schemes(ctx, &hot_scheme, 1); ++ damon_add_scheme(ctx, cold_scheme); + + return damon_set_region_biggest_system_ram_default(target, + &monitor_region_start, diff --git a/queue-6.6/mm-damon-reclaim-fix-quota-stauts-loss-due-to-online-tunings.patch b/queue-6.6/mm-damon-reclaim-fix-quota-stauts-loss-due-to-online-tunings.patch new file mode 100644 index 00000000000..84fd9115e68 --- /dev/null +++ b/queue-6.6/mm-damon-reclaim-fix-quota-stauts-loss-due-to-online-tunings.patch @@ -0,0 +1,80 @@ +From 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 16 Feb 2024 11:40:24 -0800 +Subject: mm/damon/reclaim: fix quota stauts loss due to online tunings + +From: SeongJae Park + +commit 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 upstream. + +Patch series "mm/damon: fix quota status loss due to online tunings". + +DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status +when applying new user parameters, and hence could cause temporal quota +accuracy degradation. Fix it by preserving the status. + + +This patch (of 2): + +For online parameters change, DAMON_RECLAIM creates new scheme based on +latest values of the parameters and replaces the old scheme with the new +one. When creating it, the internal status of the quota of the old +scheme is not preserved. As a result, charging of the quota starts from +zero after the online tuning. The data that collected to estimate the +throughput of the scheme's action is also reset, and therefore the +estimation should start from the scratch again. Because the throughput +estimation is being used to convert the time quota to the effective size +quota, this could result in temporal time quota inaccuracy. It would be +recovered over time, though. In short, the quota accuracy could be +temporarily degraded after online parameters update. + +Fix the problem by checking the case and copying the internal fields for +the status. + +Link: https://lkml.kernel.org/r/20240216194025.9207-1-sj@kernel.org +Link: https://lkml.kernel.org/r/20240216194025.9207-2-sj@kernel.org +Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update") +Signed-off-by: SeongJae Park +Cc: [5.19+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/reclaim.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/mm/damon/reclaim.c ++++ b/mm/damon/reclaim.c +@@ -148,9 +148,20 @@ static struct damos *damon_reclaim_new_s + &damon_reclaim_wmarks); + } + ++static void damon_reclaim_copy_quota_status(struct damos_quota *dst, ++ struct damos_quota *src) ++{ ++ dst->total_charged_sz = src->total_charged_sz; ++ dst->total_charged_ns = src->total_charged_ns; ++ dst->charged_sz = src->charged_sz; ++ dst->charged_from = src->charged_from; ++ dst->charge_target_from = src->charge_target_from; ++ dst->charge_addr_from = src->charge_addr_from; ++} ++ + static int damon_reclaim_apply_parameters(void) + { +- struct damos *scheme; ++ struct damos *scheme, *old_scheme; + struct damos_filter *filter; + int err = 0; + +@@ -162,6 +173,11 @@ static int damon_reclaim_apply_parameter + scheme = damon_reclaim_new_scheme(); + if (!scheme) + return -ENOMEM; ++ if (!list_empty(&ctx->schemes)) { ++ damon_for_each_scheme(old_scheme, ctx) ++ damon_reclaim_copy_quota_status(&scheme->quota, ++ &old_scheme->quota); ++ } + if (skip_anon) { + filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true); + if (!filter) { diff --git a/queue-6.6/mm-memcontrol-clarify-swapaccount-0-deprecation-warning.patch b/queue-6.6/mm-memcontrol-clarify-swapaccount-0-deprecation-warning.patch new file mode 100644 index 00000000000..44b668d9112 --- /dev/null +++ b/queue-6.6/mm-memcontrol-clarify-swapaccount-0-deprecation-warning.patch @@ -0,0 +1,59 @@ +From 118642d7f606fc9b9c92ee611275420320290ffb Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Tue, 13 Feb 2024 03:16:34 -0500 +Subject: mm: memcontrol: clarify swapaccount=0 deprecation warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Johannes Weiner + +commit 118642d7f606fc9b9c92ee611275420320290ffb upstream. + +The swapaccount deprecation warning is throwing false positives. Since we +deprecated the knob and defaulted to enabling, the only reports we've been +getting are from folks that set swapaccount=1. While this is a nice +affirmation that always-enabling was the right choice, we certainly don't +want to warn when users request the supported mode. + +Only warn when disabling is requested, and clarify the warning. + +[colin.i.king@gmail.com: spelling: "commdandline" -> "commandline"] + Link: https://lkml.kernel.org/r/20240215090544.1649201-1-colin.i.king@gmail.com +Link: https://lkml.kernel.org/r/20240213081634.3652326-1-hannes@cmpxchg.org +Fixes: b25806dcd3d5 ("mm: memcontrol: deprecate swapaccounting=0 mode") +Signed-off-by: Colin Ian King +Reported-by: "Jonas Schäfer" +Reported-by: Narcis Garcia +Suggested-by: Yosry Ahmed +Signed-off-by: Johannes Weiner +Reviewed-by: Yosry Ahmed +Acked-by: Michal Hocko +Acked-by: Shakeel Butt +Cc: Roman Gushchin +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -7613,9 +7613,13 @@ bool mem_cgroup_swap_full(struct folio * + + static int __init setup_swap_account(char *s) + { +- pr_warn_once("The swapaccount= commandline option is deprecated. " +- "Please report your usecase to linux-mm@kvack.org if you " +- "depend on this functionality.\n"); ++ bool res; ++ ++ if (!kstrtobool(s, &res) && !res) ++ pr_warn_once("The swapaccount=0 commandline option is deprecated " ++ "in favor of configuring swap control via cgroupfs. " ++ "Please report your usecase to linux-mm@kvack.org if you " ++ "depend on this functionality.\n"); + return 1; + } + __setup("swapaccount=", setup_swap_account); diff --git a/queue-6.6/mm-swap-fix-race-when-skipping-swapcache.patch b/queue-6.6/mm-swap-fix-race-when-skipping-swapcache.patch new file mode 100644 index 00000000000..13ff005cec1 --- /dev/null +++ b/queue-6.6/mm-swap-fix-race-when-skipping-swapcache.patch @@ -0,0 +1,226 @@ +From 13ddaf26be324a7f951891ecd9ccd04466d27458 Mon Sep 17 00:00:00 2001 +From: Kairui Song +Date: Wed, 7 Feb 2024 02:25:59 +0800 +Subject: mm/swap: fix race when skipping swapcache + +From: Kairui Song + +commit 13ddaf26be324a7f951891ecd9ccd04466d27458 upstream. + +When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads +swapin the same entry at the same time, they get different pages (A, B). +Before one thread (T0) finishes the swapin and installs page (A) to the +PTE, another thread (T1) could finish swapin of page (B), swap_free the +entry, then swap out the possibly modified page reusing the same entry. +It breaks the pte_same check in (T0) because PTE value is unchanged, +causing ABA problem. Thread (T0) will install a stalled page (A) into the +PTE and cause data corruption. + +One possible callstack is like this: + +CPU0 CPU1 +---- ---- +do_swap_page() do_swap_page() with same entry + + +swap_read_folio() <- read to page A swap_read_folio() <- read to page B + +... set_pte_at() + swap_free() <- entry is free + + +pte_same() <- Check pass, PTE seems + unchanged, but page A + is stalled! +swap_free() <- page B content lost! +set_pte_at() <- staled page A installed! + +And besides, for ZRAM, swap_free() allows the swap device to discard the +entry content, so even if page (B) is not modified, if swap_read_folio() +on CPU0 happens later than swap_free() on CPU1, it may also cause data +loss. + +To fix this, reuse swapcache_prepare which will pin the swap entry using +the cache flag, and allow only one thread to swap it in, also prevent any +parallel code from putting the entry in the cache. Release the pin after +PT unlocked. + +Racers just loop and wait since it's a rare and very short event. A +schedule_timeout_uninterruptible(1) call is added to avoid repeated page +faults wasting too much CPU, causing livelock or adding too much noise to +perf statistics. A similar livelock issue was described in commit +029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") + +Reproducer: + +This race issue can be triggered easily using a well constructed +reproducer and patched brd (with a delay in read path) [1]: + +With latest 6.8 mainline, race caused data loss can be observed easily: +$ gcc -g -lpthread test-thread-swap-race.c && ./a.out + Polulating 32MB of memory region... + Keep swapping out... + Starting round 0... + Spawning 65536 workers... + 32746 workers spawned, wait for done... + Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! + Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! + Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! + Round 0 Failed, 15 data loss! + +This reproducer spawns multiple threads sharing the same memory region +using a small swap device. Every two threads updates mapped pages one by +one in opposite direction trying to create a race, with one dedicated +thread keep swapping out the data out using madvise. + +The reproducer created a reproduce rate of about once every 5 minutes, so +the race should be totally possible in production. + +After this patch, I ran the reproducer for over a few hundred rounds and +no data loss observed. + +Performance overhead is minimal, microbenchmark swapin 10G from 32G +zram: + +Before: 10934698 us +After: 11157121 us +Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) + +[kasong@tencent.com: v4] + Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com +Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com +Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") +Reported-by: "Huang, Ying" +Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ +Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] +Signed-off-by: Kairui Song +Reviewed-by: "Huang, Ying" +Acked-by: Yu Zhao +Acked-by: David Hildenbrand +Acked-by: Chris Li +Cc: Hugh Dickins +Cc: Johannes Weiner +Cc: Matthew Wilcox (Oracle) +Cc: Michal Hocko +Cc: Minchan Kim +Cc: Yosry Ahmed +Cc: Yu Zhao +Cc: Barry Song <21cnbao@gmail.com> +Cc: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/swap.h | 5 +++++ + mm/memory.c | 20 ++++++++++++++++++++ + mm/swap.h | 5 +++++ + mm/swapfile.c | 13 +++++++++++++ + 4 files changed, 43 insertions(+) + +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -552,6 +552,11 @@ static inline int swap_duplicate(swp_ent + return 0; + } + ++static inline int swapcache_prepare(swp_entry_t swp) ++{ ++ return 0; ++} ++ + static inline void swap_free(swp_entry_t swp) + { + } +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3726,6 +3726,7 @@ vm_fault_t do_swap_page(struct vm_fault + struct page *page; + struct swap_info_struct *si = NULL; + rmap_t rmap_flags = RMAP_NONE; ++ bool need_clear_cache = false; + bool exclusive = false; + swp_entry_t entry; + pte_t pte; +@@ -3794,6 +3795,20 @@ vm_fault_t do_swap_page(struct vm_fault + if (!folio) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { ++ /* ++ * Prevent parallel swapin from proceeding with ++ * the cache flag. Otherwise, another thread may ++ * finish swapin first, free the entry, and swapout ++ * reusing the same entry. It's undetectable as ++ * pte_same() returns true due to entry reuse. ++ */ ++ if (swapcache_prepare(entry)) { ++ /* Relax a bit to prevent rapid repeated page faults */ ++ schedule_timeout_uninterruptible(1); ++ goto out; ++ } ++ need_clear_cache = true; ++ + /* skip swapcache */ + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, + vma, vmf->address, false); +@@ -4040,6 +4055,9 @@ unlock: + if (vmf->pte) + pte_unmap_unlock(vmf->pte, vmf->ptl); + out: ++ /* Clear the swap cache pin for direct swapin after PTL unlock */ ++ if (need_clear_cache) ++ swapcache_clear(si, entry); + if (si) + put_swap_device(si); + return ret; +@@ -4054,6 +4072,8 @@ out_release: + folio_unlock(swapcache); + folio_put(swapcache); + } ++ if (need_clear_cache) ++ swapcache_clear(si, entry); + if (si) + put_swap_device(si); + return ret; +--- a/mm/swap.h ++++ b/mm/swap.h +@@ -38,6 +38,7 @@ void __delete_from_swap_cache(struct fol + void delete_from_swap_cache(struct folio *folio); + void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end); ++void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); + struct folio *swap_cache_get_folio(swp_entry_t entry, + struct vm_area_struct *vma, unsigned long addr); + struct folio *filemap_get_incore_folio(struct address_space *mapping, +@@ -96,6 +97,10 @@ static inline int swap_writepage(struct + return 0; + } + ++static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) ++{ ++} ++ + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, + struct vm_area_struct *vma, unsigned long addr) + { +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -3362,6 +3362,19 @@ int swapcache_prepare(swp_entry_t entry) + return __swap_duplicate(entry, SWAP_HAS_CACHE); + } + ++void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) ++{ ++ struct swap_cluster_info *ci; ++ unsigned long offset = swp_offset(entry); ++ unsigned char usage; ++ ++ ci = lock_cluster_or_swap_info(si, offset); ++ usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); ++ unlock_cluster_or_swap_info(si, ci); ++ if (!usage) ++ free_swap_slot(entry); ++} ++ + struct swap_info_struct *swp_swap_info(swp_entry_t entry) + { + return swap_type_to_swap_info(swp_type(entry)); diff --git a/queue-6.6/platform-x86-intel-vbtn-stop-calling-vbdl-from-notify_handler.patch b/queue-6.6/platform-x86-intel-vbtn-stop-calling-vbdl-from-notify_handler.patch new file mode 100644 index 00000000000..f1625e4ae81 --- /dev/null +++ b/queue-6.6/platform-x86-intel-vbtn-stop-calling-vbdl-from-notify_handler.patch @@ -0,0 +1,50 @@ +From 84c16d01ff219bc0a5dca5219db6b8b86a6854fb Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Fri, 16 Feb 2024 21:33:00 +0100 +Subject: platform/x86: intel-vbtn: Stop calling "VBDL" from notify_handler + +From: Hans de Goede + +commit 84c16d01ff219bc0a5dca5219db6b8b86a6854fb upstream. + +Commit 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing +tablet-mode-switch events") causes 2 issues on the ThinkPad X1 Tablet Gen2: + +1. The ThinkPad will wake up immediately from suspend +2. When put in tablet mode SW_TABLET_MODE reverts to 0 after about 1 second + +Both these issues are caused by the "VBDL" ACPI method call added +at the end of the notify_handler. + +And it never became entirely clear if this call is even necessary to fix +the issue of missing tablet-mode-switch events on the Dell Inspiron 7352. + +Drop the "VBDL" ACPI method call again to fix the 2 issues this is +causing on the ThinkPad X1 Tablet Gen2. + +Fixes: 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") +Reported-by: Alexander Kobel +Closes: https://lore.kernel.org/platform-driver-x86/295984ce-bd4b-49bd-adc5-ffe7c898d7f0@a-kobel.de/ +Cc: regressions@lists.linux.dev +Cc: Arnold Gozum +Cc: stable@vger.kernel.org +Signed-off-by: Hans de Goede +Tested-by: Alexander Kobel +Link: https://lore.kernel.org/r/20240216203300.245826-1-hdegoede@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/vbtn.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/platform/x86/intel/vbtn.c ++++ b/drivers/platform/x86/intel/vbtn.c +@@ -200,9 +200,6 @@ static void notify_handler(acpi_handle h + autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); + + sparse_keymap_report_event(input_dev, event, val, autorelease); +- +- /* Some devices need this to report further events */ +- acpi_evaluate_object(handle, "VBDL", NULL, NULL); + } + + /* diff --git a/queue-6.6/platform-x86-touchscreen_dmi-allow-partial-prefix-matches-for-acpi-names.patch b/queue-6.6/platform-x86-touchscreen_dmi-allow-partial-prefix-matches-for-acpi-names.patch new file mode 100644 index 00000000000..cfee8ed2469 --- /dev/null +++ b/queue-6.6/platform-x86-touchscreen_dmi-allow-partial-prefix-matches-for-acpi-names.patch @@ -0,0 +1,58 @@ +From dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Mon, 12 Feb 2024 13:06:07 +0100 +Subject: platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names + +From: Hans de Goede + +commit dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 upstream. + +On some devices the ACPI name of the touchscreen is e.g. either +MSSL1680:00 or MSSL1680:01 depending on the BIOS version. + +This happens for example on the "Chuwi Hi8 Air" tablet where the initial +commit's ts_data uses "MSSL1680:00" but the tablets from the github issue +and linux-hardware.org probe linked below both use "MSSL1680:01". + +Replace the strcmp() match on ts_data->acpi_name with a strstarts() +check to allow using a partial match on just the ACPI HID of "MSSL1680" +and change the ts_data->acpi_name for the "Chuwi Hi8 Air" accordingly +to fix the touchscreen not working on models where it is "MSSL1680:01". + +Note this drops the length check for I2C_NAME_SIZE. This never was +necessary since the ACPI names used are never more then 11 chars and +I2C_NAME_SIZE is 20 so the replaced strncmp() would always stop long +before reaching I2C_NAME_SIZE. + +Link: https://linux-hardware.org/?computer=AC4301C0542A +Fixes: bbb97d728f77 ("platform/x86: touchscreen_dmi: Add info for the Chuwi Hi8 Air tablet") +Closes: https://github.com/onitake/gsl-firmware/issues/91 +Cc: stable@vger.kernel.org +Reviewed-by: Kuppuswamy Sathyanarayanan +Signed-off-by: Hans de Goede +Link: https://lore.kernel.org/r/20240212120608.30469-1-hdegoede@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/touchscreen_dmi.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/platform/x86/touchscreen_dmi.c ++++ b/drivers/platform/x86/touchscreen_dmi.c +@@ -81,7 +81,7 @@ static const struct property_entry chuwi + }; + + static const struct ts_dmi_data chuwi_hi8_air_data = { +- .acpi_name = "MSSL1680:00", ++ .acpi_name = "MSSL1680", + .properties = chuwi_hi8_air_props, + }; + +@@ -1821,7 +1821,7 @@ static void ts_dmi_add_props(struct i2c_ + int error; + + if (has_acpi_companion(dev) && +- !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { ++ strstarts(client->name, ts_data->acpi_name)) { + error = device_create_managed_software_node(dev, ts_data->properties, NULL); + if (error) + dev_err(dev, "failed to add properties: %d\n", error); diff --git a/queue-6.6/revert-parisc-only-list-existing-cpus-in-cpu_possible_mask.patch b/queue-6.6/revert-parisc-only-list-existing-cpus-in-cpu_possible_mask.patch new file mode 100644 index 00000000000..77f45b4694f --- /dev/null +++ b/queue-6.6/revert-parisc-only-list-existing-cpus-in-cpu_possible_mask.patch @@ -0,0 +1,57 @@ +From 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 5 Feb 2024 10:39:20 +0100 +Subject: Revert "parisc: Only list existing CPUs in cpu_possible_mask" + +From: Helge Deller + +commit 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 upstream. + +This reverts commit 0921244f6f4f0d05698b953fe632a99b38907226. + +It broke CPU hotplugging because it modifies the __cpu_possible_mask +after bootup, so that it will be different than nr_cpu_ids, which +then effictively breaks the workqueue setup code and triggers crashes +when shutting down CPUs at runtime. + +Guenter was the first who noticed the wrong values in __cpu_possible_mask, +since the cpumask Kunit tests were failig. + +Reverting this commit fixes both issues, but sadly brings back this +uncritical runtime warning: +register_cpu_capacity_sysctl: too early to get CPU4 device! + +Signed-off-by: Helge Deller +Reported-by: Guenter Roeck +Link: https://lkml.org/lkml/2024/2/4/146 +Link: https://lore.kernel.org/lkml/Zb0mbHlIud_bqftx@slm.duckdns.org/t/ +Cc: stable@vger.kernel.org # 6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/kernel/processor.c | 8 -------- + 1 file changed, 8 deletions(-) + +--- a/arch/parisc/kernel/processor.c ++++ b/arch/parisc/kernel/processor.c +@@ -172,7 +172,6 @@ static int __init processor_probe(struct + p->cpu_num = cpu_info.cpu_num; + p->cpu_loc = cpu_info.cpu_loc; + +- set_cpu_possible(cpuid, true); + store_cpu_topology(cpuid); + + #ifdef CONFIG_SMP +@@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver _ + */ + void __init processor_init(void) + { +- unsigned int cpu; +- + reset_cpu_topology(); +- +- /* reset possible mask. We will mark those which are possible. */ +- for_each_possible_cpu(cpu) +- set_cpu_possible(cpu, false); +- + register_parisc_driver(&cpu_driver); + } diff --git a/queue-6.6/s390-cio-fix-invalid-ebusy-on-ccw_device_start.patch b/queue-6.6/s390-cio-fix-invalid-ebusy-on-ccw_device_start.patch new file mode 100644 index 00000000000..cfc5aca0108 --- /dev/null +++ b/queue-6.6/s390-cio-fix-invalid-ebusy-on-ccw_device_start.patch @@ -0,0 +1,99 @@ +From 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 Mon Sep 17 00:00:00 2001 +From: Peter Oberparleiter +Date: Wed, 14 Feb 2024 16:06:28 +0100 +Subject: s390/cio: fix invalid -EBUSY on ccw_device_start + +From: Peter Oberparleiter + +commit 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 upstream. + +The s390 common I/O layer (CIO) returns an unexpected -EBUSY return code +when drivers try to start I/O while a path-verification (PV) process is +pending. This can lead to failed device initialization attempts with +symptoms like broken network connectivity after boot. + +Fix this by replacing the -EBUSY return code with a deferred condition +code 1 reply to make path-verification handling consistent from a +driver's point of view. + +The problem can be reproduced semi-regularly using the following process, +while repeating steps 2-3 as necessary (example assumes an OSA device +with bus-IDs 0.0.a000-0.0.a002 on CHPID 0.02): + +1. echo 0.0.a000,0.0.a001,0.0.a002 >/sys/bus/ccwgroup/drivers/qeth/group +2. echo 0 > /sys/bus/ccwgroup/devices/0.0.a000/online +3. echo 1 > /sys/bus/ccwgroup/devices/0.0.a000/online ; \ + echo on > /sys/devices/css0/chp0.02/status + +Background information: + +The common I/O layer starts path-verification I/Os when it receives +indications about changes in a device path's availability. This occurs +for example when hardware events indicate a change in channel-path +status, or when a manual operation such as a CHPID vary or configure +operation is performed. + +If a driver attempts to start I/O while a PV is running, CIO reports a +successful I/O start (ccw_device_start() return code 0). Then, after +completion of PV, CIO synthesizes an interrupt response that indicates +an asynchronous status condition that prevented the start of the I/O +(deferred condition code 1). + +If a PV indication arrives while a device is busy with driver-owned I/O, +PV is delayed until after I/O completion was reported to the driver's +interrupt handler. To ensure that PV can be started eventually, CIO +reports a device busy condition (ccw_device_start() return code -EBUSY) +if a driver tries to start another I/O while PV is pending. + +In some cases this -EBUSY return code causes device drivers to consider +a device not operational, resulting in failed device initialization. + +Note: The code that introduced the problem was added in 2003. Symptoms +started appearing with the following CIO commit that causes a PV +indication when a device is removed from the cio_ignore list after the +associated parent subchannel device was probed, but before online +processing of the CCW device has started: + +2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") + +During boot, the cio_ignore list is modified by the cio_ignore dracut +module [1] as well as Linux vendor-specific systemd service scripts[2]. +When combined, this commit and boot scripts cause a frequent occurrence +of the problem during boot. + +[1] https://github.com/dracutdevs/dracut/tree/master/modules.d/81cio_ignore +[2] https://github.com/SUSE/s390-tools/blob/master/cio_ignore.service + +Cc: stable@vger.kernel.org # v5.15+ +Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") +Tested-By: Thorsten Winkler +Reviewed-by: Thorsten Winkler +Signed-off-by: Peter Oberparleiter +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/cio/device_ops.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/s390/cio/device_ops.c ++++ b/drivers/s390/cio/device_ops.c +@@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct + return -EINVAL; + if (cdev->private->state == DEV_STATE_NOT_OPER) + return -ENODEV; +- if (cdev->private->state == DEV_STATE_VERIFY) { ++ if (cdev->private->state == DEV_STATE_VERIFY || ++ cdev->private->flags.doverify) { + /* Remember to fake irb when finished. */ + if (!cdev->private->flags.fake_irb) { + cdev->private->flags.fake_irb = FAKE_CMD_IRB; +@@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct + } + if (cdev->private->state != DEV_STATE_ONLINE || + ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && +- !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || +- cdev->private->flags.doverify) ++ !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) + return -EBUSY; + ret = cio_set_options (sch, flags); + if (ret) diff --git a/queue-6.6/scsi-core-consult-supported-vpd-page-list-prior-to-fetching-page.patch b/queue-6.6/scsi-core-consult-supported-vpd-page-list-prior-to-fetching-page.patch new file mode 100644 index 00000000000..000f83db795 --- /dev/null +++ b/queue-6.6/scsi-core-consult-supported-vpd-page-list-prior-to-fetching-page.patch @@ -0,0 +1,100 @@ +From b5fc07a5fb56216a49e6c1d0b172d5464d99a89b Mon Sep 17 00:00:00 2001 +From: "Martin K. Petersen" +Date: Wed, 14 Feb 2024 17:14:11 -0500 +Subject: scsi: core: Consult supported VPD page list prior to fetching page + +From: Martin K. Petersen + +commit b5fc07a5fb56216a49e6c1d0b172d5464d99a89b upstream. + +Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full +page") removed the logic which checks whether a VPD page is present on +the supported pages list before asking for the page itself. That was +done because SPC helpfully states "The Supported VPD Pages VPD page +list may or may not include all the VPD pages that are able to be +returned by the device server". Testing had revealed a few devices +that supported some of the 0xBn pages but didn't actually list them in +page 0. + +Julian Sikorski bisected a problem with his drive resetting during +discovery to the commit above. As it turns out, this particular drive +firmware will crash if we attempt to fetch page 0xB9. + +Various approaches were attempted to work around this. In the end, +reinstating the logic that consults VPD page 0 before fetching any +other page was the path of least resistance. A firmware update for the +devices which originally compelled us to remove the check has since +been released. + +Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com +Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") +Cc: stable@vger.kernel.org +Cc: Bart Van Assche +Reported-by: Julian Sikorski +Tested-by: Julian Sikorski +Reviewed-by: Lee Duncan +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/scsi.c | 22 ++++++++++++++++++++-- + include/scsi/scsi_device.h | 4 ---- + 2 files changed, 20 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/scsi.c ++++ b/drivers/scsi/scsi.c +@@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_ + return result + 4; + } + ++enum scsi_vpd_parameters { ++ SCSI_VPD_HEADER_SIZE = 4, ++ SCSI_VPD_LIST_SIZE = 36, ++}; ++ + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) + { +- unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); ++ unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); + int result; + + if (sdev->no_vpd_size) + return SCSI_DEFAULT_VPD_LEN; + + /* ++ * Fetch the supported pages VPD and validate that the requested page ++ * number is present. ++ */ ++ if (page != 0) { ++ result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); ++ if (result < SCSI_VPD_HEADER_SIZE) ++ return 0; ++ ++ result -= SCSI_VPD_HEADER_SIZE; ++ if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) ++ return 0; ++ } ++ /* + * Fetch the VPD page header to find out how big the page + * is. This is done to prevent problems on legacy devices + * which can not handle allocation lengths as large as + * potentially requested by the caller. + */ +- result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); ++ result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); + if (result < 0) + return 0; + +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -100,10 +100,6 @@ struct scsi_vpd { + unsigned char data[]; + }; + +-enum scsi_vpd_parameters { +- SCSI_VPD_HEADER_SIZE = 4, +-}; +- + struct scsi_device { + struct Scsi_Host *host; + struct request_queue *request_queue; diff --git a/queue-6.6/scsi-sd-usb_storage-uas-access-media-prior-to-querying-device-properties.patch b/queue-6.6/scsi-sd-usb_storage-uas-access-media-prior-to-querying-device-properties.patch new file mode 100644 index 00000000000..b36790b416c --- /dev/null +++ b/queue-6.6/scsi-sd-usb_storage-uas-access-media-prior-to-querying-device-properties.patch @@ -0,0 +1,144 @@ +From 321da3dc1f3c92a12e3c5da934090d2992a8814c Mon Sep 17 00:00:00 2001 +From: "Martin K. Petersen" +Date: Tue, 13 Feb 2024 09:33:06 -0500 +Subject: scsi: sd: usb_storage: uas: Access media prior to querying device properties + +From: Martin K. Petersen + +commit 321da3dc1f3c92a12e3c5da934090d2992a8814c upstream. + +It has been observed that some USB/UAS devices return generic properties +hardcoded in firmware for mode pages for a period of time after a device +has been discovered. The reported properties are either garbage or they do +not accurately reflect the characteristics of the physical storage device +attached in the case of a bridge. + +Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to +avoid calling revalidate twice") we would call revalidate several +times during device discovery. As a result, incorrect values would +eventually get replaced with ones accurately describing the attached +storage. When we did away with the redundant revalidate pass, several +cases were reported where devices reported nonsensical values or would +end up in write-protected state. + +An initial attempt at addressing this issue involved introducing a +delayed second revalidate invocation. However, this approach still +left some devices reporting incorrect characteristics. + +Tasos Sahanidis debugged the problem further and identified that +introducing a READ operation prior to MODE SENSE fixed the problem and that +it wasn't a timing issue. Issuing a READ appears to cause the devices to +update their state to reflect the actual properties of the storage +media. Device properties like vendor, model, and storage capacity appear to +be correctly reported from the get-go. It is unclear why these devices +defer populating the remaining characteristics. + +Match the behavior of a well known commercial operating system and +trigger a READ operation prior to querying device characteristics to +force the device to populate the mode pages. + +The additional READ is triggered by a flag set in the USB storage and +UAS drivers. We avoid issuing the READ for other transport classes +since some storage devices identify Linux through our particular +discovery command sequence. + +Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com +Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") +Cc: stable@vger.kernel.org +Reported-by: Tasos Sahanidis +Reviewed-by: Ewan D. Milne +Reviewed-by: Bart Van Assche +Tested-by: Tasos Sahanidis +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- + drivers/usb/storage/scsiglue.c | 7 +++++++ + drivers/usb/storage/uas.c | 7 +++++++ + include/scsi/scsi_device.h | 1 + + 4 files changed, 40 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3404,6 +3404,24 @@ static bool sd_validate_opt_xfer_size(st + return true; + } + ++static void sd_read_block_zero(struct scsi_disk *sdkp) ++{ ++ unsigned int buf_len = sdkp->device->sector_size; ++ char *buffer, cmd[10] = { }; ++ ++ buffer = kmalloc(buf_len, GFP_KERNEL); ++ if (!buffer) ++ return; ++ ++ cmd[0] = READ_10; ++ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ ++ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ ++ ++ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, ++ SD_TIMEOUT, sdkp->max_retries, NULL); ++ kfree(buffer); ++} ++ + /** + * sd_revalidate_disk - called the first time a new disk is seen, + * performs disk spin up, read_capacity, etc. +@@ -3443,7 +3461,13 @@ static int sd_revalidate_disk(struct gen + */ + if (sdkp->media_present) { + sd_read_capacity(sdkp, buffer); +- ++ /* ++ * Some USB/UAS devices return generic values for mode pages ++ * until the media has been accessed. Trigger a READ operation ++ * to force the device to populate mode pages. ++ */ ++ if (sdp->read_before_ms) ++ sd_read_block_zero(sdkp); + /* + * set the default to rotational. All non-rotational devices + * support the block characteristics VPD page, which will +--- a/drivers/usb/storage/scsiglue.c ++++ b/drivers/usb/storage/scsiglue.c +@@ -180,6 +180,13 @@ static int slave_configure(struct scsi_d + sdev->use_192_bytes_for_3f = 1; + + /* ++ * Some devices report generic values until the media has been ++ * accessed. Force a READ(10) prior to querying device ++ * characteristics. ++ */ ++ sdev->read_before_ms = 1; ++ ++ /* + * Some devices don't like MODE SENSE with page=0x3f, + * which is the command used for checking if a device + * is write-protected. Now that we tell the sd driver +--- a/drivers/usb/storage/uas.c ++++ b/drivers/usb/storage/uas.c +@@ -879,6 +879,13 @@ static int uas_slave_configure(struct sc + sdev->guess_capacity = 1; + + /* ++ * Some devices report generic values until the media has been ++ * accessed. Force a READ(10) prior to querying device ++ * characteristics. ++ */ ++ sdev->read_before_ms = 1; ++ ++ /* + * Some devices don't like MODE SENSE with page=0x3f, + * which is the command used for checking if a device + * is write-protected. Now that we tell the sd driver +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -208,6 +208,7 @@ struct scsi_device { + unsigned use_10_for_rw:1; /* first try 10-byte read / write */ + unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ + unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ ++ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ + unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ + unsigned no_write_same:1; /* no WRITE SAME command */ + unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ diff --git a/queue-6.6/scsi-target-pscsi-fix-bio_put-for-error-case.patch b/queue-6.6/scsi-target-pscsi-fix-bio_put-for-error-case.patch new file mode 100644 index 00000000000..cdf686af6bf --- /dev/null +++ b/queue-6.6/scsi-target-pscsi-fix-bio_put-for-error-case.patch @@ -0,0 +1,47 @@ +From de959094eb2197636f7c803af0943cb9d3b35804 Mon Sep 17 00:00:00 2001 +From: Naohiro Aota +Date: Wed, 14 Feb 2024 23:43:56 +0900 +Subject: scsi: target: pscsi: Fix bio_put() for error case + +From: Naohiro Aota + +commit de959094eb2197636f7c803af0943cb9d3b35804 upstream. + +As of commit 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc +wrapper"), a bio allocated by bio_kmalloc() must be freed by bio_uninit() +and kfree(). That is not done properly for the error case, hitting WARN and +NULL pointer dereference in bio_free(). + +Fixes: 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper") +CC: stable@vger.kernel.org # 6.1+ +Signed-off-by: Naohiro Aota +Link: https://lore.kernel.org/r/20240214144356.101814-1-naohiro.aota@wdc.com +Reviewed-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/target/target_core_pscsi.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/target/target_core_pscsi.c ++++ b/drivers/target/target_core_pscsi.c +@@ -907,12 +907,15 @@ new_bio: + + return 0; + fail: +- if (bio) +- bio_put(bio); ++ if (bio) { ++ bio_uninit(bio); ++ kfree(bio); ++ } + while (req->bio) { + bio = req->bio; + req->bio = bio->bi_next; +- bio_put(bio); ++ bio_uninit(bio); ++ kfree(bio); + } + req->biotail = NULL; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/queue-6.6/selftests-mm-uffd-unit-test-check-if-huge-page-size-is-0.patch b/queue-6.6/selftests-mm-uffd-unit-test-check-if-huge-page-size-is-0.patch new file mode 100644 index 00000000000..f7e68bb80bc --- /dev/null +++ b/queue-6.6/selftests-mm-uffd-unit-test-check-if-huge-page-size-is-0.patch @@ -0,0 +1,41 @@ +From 7efa6f2c803366f84c3c362f01e822490669d72b Mon Sep 17 00:00:00 2001 +From: Terry Tritton +Date: Mon, 5 Feb 2024 14:50:56 +0000 +Subject: selftests/mm: uffd-unit-test check if huge page size is 0 + +From: Terry Tritton + +commit 7efa6f2c803366f84c3c362f01e822490669d72b upstream. + +If HUGETLBFS is not enabled then the default_huge_page_size function will +return 0 and cause a divide by 0 error. Add a check to see if the huge page +size is 0 and skip the hugetlb tests if it is. + +Link: https://lkml.kernel.org/r/20240205145055.3545806-2-terry.tritton@linaro.org +Fixes: 16a45b57cbf2 ("selftests/mm: add framework for uffd-unit-test") +Signed-off-by: Terry Tritton +Cc: Peter Griffin +Cc: Shuah Khan +Cc: Peter Xu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/uffd-unit-tests.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/tools/testing/selftests/mm/uffd-unit-tests.c ++++ b/tools/testing/selftests/mm/uffd-unit-tests.c +@@ -1309,6 +1309,12 @@ int main(int argc, char *argv[]) + continue; + + uffd_test_start("%s on %s", test->name, mem_type->name); ++ if ((mem_type->mem_flag == MEM_HUGETLB || ++ mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && ++ (default_huge_page_size() == 0)) { ++ uffd_test_skip("huge page size is 0, feature missing?"); ++ continue; ++ } + if (!uffd_feature_supported(test)) { + uffd_test_skip("feature missing"); + continue; diff --git a/queue-6.6/series b/queue-6.6/series index e398022c667..0ee1179648a 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -131,3 +131,42 @@ xen-events-drop-xen_allocate_irqs_dynamic.patch xen-events-modify-internal-un-bind-interfaces.patch xen-events-close-evtchn-after-mapping-cleanup.patch ib-hfi1-fix-sdma.h-tx-num_descs-off-by-one-error.patch +x86-bugs-add-asm-helpers-for-executing-verw.patch +docs-instruct-latex-to-cope-with-deeper-nesting.patch +loongarch-call-early_init_fdt_scan_reserved_mem-earlier.patch +loongarch-disable-irq-before-init_fn-for-nonboot-cpus.patch +loongarch-update-cpu_sibling_map-when-disabling-nonboot-cpus.patch +btrfs-defrag-avoid-unnecessary-defrag-caused-by-incorrect-extent-size.patch +btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch +drm-ttm-fix-an-invalid-freeing-on-already-freed-page-in-error-path.patch +drm-meson-don-t-remove-bridges-which-are-created-by-other-drivers.patch +drm-amd-display-adjust-few-initialization-order-in-dm.patch +s390-cio-fix-invalid-ebusy-on-ccw_device_start.patch +ata-libata-core-do-not-try-to-set-sleeping-devices-to-standby.patch +fs-aio-restrict-kiocb_set_cancel_fn-to-i-o-submitted-via-libaio.patch +lib-kconfig.debug-test_iov_iter-depends-on-mmu.patch +dm-crypt-recheck-the-integrity-tag-after-a-failure.patch +revert-parisc-only-list-existing-cpus-in-cpu_possible_mask.patch +dm-integrity-recheck-the-integrity-tag-after-a-failure.patch +dm-crypt-don-t-modify-the-data-when-using-authenticated-encryption.patch +dm-verity-recheck-the-hash-after-a-failure.patch +cxl-acpi-fix-load-failures-due-to-single-window-creation-failure.patch +cxl-pci-fix-disabling-memory-if-dvsec-cxl-range-does-not-match-a-cfmws-window.patch +scsi-sd-usb_storage-uas-access-media-prior-to-querying-device-properties.patch +scsi-target-pscsi-fix-bio_put-for-error-case.patch +scsi-core-consult-supported-vpd-page-list-prior-to-fetching-page.patch +selftests-mm-uffd-unit-test-check-if-huge-page-size-is-0.patch +mm-swap-fix-race-when-skipping-swapcache.patch +mm-damon-lru_sort-fix-quota-status-loss-due-to-online-tunings.patch +mm-memcontrol-clarify-swapaccount-0-deprecation-warning.patch +mm-damon-reclaim-fix-quota-stauts-loss-due-to-online-tunings.patch +platform-x86-intel-vbtn-stop-calling-vbdl-from-notify_handler.patch +platform-x86-touchscreen_dmi-allow-partial-prefix-matches-for-acpi-names.patch +cachefiles-fix-memory-leak-in-cachefiles_add_cache.patch +sparc-fix-undefined-reference-to-fb_is_primary_device.patch +md-fix-missing-release-of-active_io-for-flush.patch +kvm-arm64-vgic-its-test-for-valid-irq-in-movall-handler.patch +kvm-arm64-vgic-its-test-for-valid-irq-in-its_sync_lpi_pending_table.patch +accel-ivpu-don-t-enable-any-tiles-by-default-on-vpu40xx.patch +gtp-fix-use-after-free-and-null-ptr-deref-in-gtp_genl_dump_pdp.patch +crypto-virtio-akcipher-fix-stack-overflow-on-memcpy.patch diff --git a/queue-6.6/sparc-fix-undefined-reference-to-fb_is_primary_device.patch b/queue-6.6/sparc-fix-undefined-reference-to-fb_is_primary_device.patch new file mode 100644 index 00000000000..0ceb97acde6 --- /dev/null +++ b/queue-6.6/sparc-fix-undefined-reference-to-fb_is_primary_device.patch @@ -0,0 +1,60 @@ +From ed683b9bb91fc274383e222ba5873a9ee9033462 Mon Sep 17 00:00:00 2001 +From: Javier Martinez Canillas +Date: Tue, 20 Feb 2024 10:54:12 +0100 +Subject: sparc: Fix undefined reference to fb_is_primary_device + +From: Javier Martinez Canillas + +commit ed683b9bb91fc274383e222ba5873a9ee9033462 upstream. + +Commit 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE +symbols") added a new FB_CORE Kconfig symbol, that can be enabled to only +have fbcon/VT and DRM fbdev emulation, but without support for any legacy +fbdev driver. + +Unfortunately, it missed to change the CONFIG_FB in arch/sparc makefiles, +which leads to the following linking error in some sparc64 configurations: + + sparc64-linux-ld: drivers/video/fbdev/core/fbcon.o: in function `fbcon_fb_registered': +>> fbcon.c:(.text+0x4f60): undefined reference to `fb_is_primary_device' + +Fixes: 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE symbols") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/r/202401290306.IV8rhJ02-lkp@intel.com/ +Signed-off-by: Javier Martinez Canillas +Reviewed-by: Thomas Zimmermann +Acked-by: Arnd Bergmann +Cc: # v6.6+ +Signed-off-by: Thomas Zimmermann +Link: https://patchwork.freedesktop.org/patch/msgid/20240220095428.3341195-1-javierm@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/Makefile | 2 +- + arch/sparc/video/Makefile | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile +index 5f6035936131..2a03daa68f28 100644 +--- a/arch/sparc/Makefile ++++ b/arch/sparc/Makefile +@@ -60,7 +60,7 @@ libs-y += arch/sparc/prom/ + libs-y += arch/sparc/lib/ + + drivers-$(CONFIG_PM) += arch/sparc/power/ +-drivers-$(CONFIG_FB) += arch/sparc/video/ ++drivers-$(CONFIG_FB_CORE) += arch/sparc/video/ + + boot := arch/sparc/boot + +diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile +index 6baddbd58e4d..d4d83f1702c6 100644 +--- a/arch/sparc/video/Makefile ++++ b/arch/sparc/video/Makefile +@@ -1,3 +1,3 @@ + # SPDX-License-Identifier: GPL-2.0-only + +-obj-$(CONFIG_FB) += fbdev.o ++obj-$(CONFIG_FB_CORE) += fbdev.o +-- +2.44.0 + diff --git a/queue-6.6/x86-bugs-add-asm-helpers-for-executing-verw.patch b/queue-6.6/x86-bugs-add-asm-helpers-for-executing-verw.patch new file mode 100644 index 00000000000..c2147ea7134 --- /dev/null +++ b/queue-6.6/x86-bugs-add-asm-helpers-for-executing-verw.patch @@ -0,0 +1,120 @@ +From baf8361e54550a48a7087b603313ad013cc13386 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Tue, 13 Feb 2024 18:21:35 -0800 +Subject: x86/bugs: Add asm helpers for executing VERW + +From: Pawan Gupta + +commit baf8361e54550a48a7087b603313ad013cc13386 upstream. + +MDS mitigation requires clearing the CPU buffers before returning to +user. This needs to be done late in the exit-to-user path. Current +location of VERW leaves a possibility of kernel data ending up in CPU +buffers for memory accesses done after VERW such as: + + 1. Kernel data accessed by an NMI between VERW and return-to-user can + remain in CPU buffers since NMI returning to kernel does not + execute VERW to clear CPU buffers. + 2. Alyssa reported that after VERW is executed, + CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system + call. Memory accesses during stack scrubbing can move kernel stack + contents into CPU buffers. + 3. When caller saved registers are restored after a return from + function executing VERW, the kernel stack accesses can remain in + CPU buffers(since they occur after VERW). + +To fix this VERW needs to be moved very late in exit-to-user path. + +In preparation for moving VERW to entry/exit asm code, create macros +that can be used in asm. Also make VERW patching depend on a new feature +flag X86_FEATURE_CLEAR_CPU_BUF. + +Reported-by: Alyssa Milburn +Suggested-by: Andrew Cooper +Suggested-by: Peter Zijlstra +Signed-off-by: Pawan Gupta +Signed-off-by: Dave Hansen +Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry.S | 23 +++++++++++++++++++++++ + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ + 3 files changed, 37 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/entry.S ++++ b/arch/x86/entry/entry.S +@@ -6,6 +6,9 @@ + #include + #include + #include ++#include ++#include ++#include + + .pushsection .noinstr.text, "ax" + +@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) + EXPORT_SYMBOL_GPL(entry_ibpb); + + .popsection ++ ++/* ++ * Define the VERW operand that is disguised as entry code so that ++ * it can be referenced with KPTI enabled. This ensure VERW can be ++ * used late in exit-to-user path after page tables are switched. ++ */ ++.pushsection .entry.text, "ax" ++ ++.align L1_CACHE_BYTES, 0xcc ++SYM_CODE_START_NOALIGN(mds_verw_sel) ++ UNWIND_HINT_UNDEFINED ++ ANNOTATE_NOENDBR ++ .word __KERNEL_DS ++.align L1_CACHE_BYTES, 0xcc ++SYM_CODE_END(mds_verw_sel); ++/* For KVM */ ++EXPORT_SYMBOL_GPL(mds_verw_sel); ++ ++.popsection ++ +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -97,7 +97,7 @@ + #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ + #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ + #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ +-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ ++#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ + #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ + #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ + #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -329,6 +329,17 @@ + #endif + .endm + ++/* ++ * Macro to execute VERW instruction that mitigate transient data sampling ++ * attacks such as MDS. On affected systems a microcode update overloaded VERW ++ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. ++ * ++ * Note: Only the memory operand variant of VERW clears the CPU buffers. ++ */ ++.macro CLEAR_CPU_BUFFERS ++ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF ++.endm ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_RETPOLINE_SAFE \ +@@ -545,6 +556,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ + + DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + ++extern u16 mds_verw_sel; ++ + #include + + /**