--- /dev/null
+From eb0d253ff9c74dee30aa92fe460b825eb28acd73 Mon Sep 17 00:00:00 2001
+From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
+Date: Tue, 20 Feb 2024 14:16:24 +0100
+Subject: accel/ivpu: Don't enable any tiles by default on VPU40xx
+
+From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
+
+commit eb0d253ff9c74dee30aa92fe460b825eb28acd73 upstream.
+
+There is no point in requesting 1 tile on VPU40xx as the FW will
+probably need more tiles to run workloads, so it will have to
+reconfigure PLL anyway. Don't enable any tiles and allow the FW to
+perform initial tile configuration.
+
+This improves NPU boot stability as the tiles are always enabled only
+by the FW from the same initial state.
+
+Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4")
+Cc: stable@vger.kernel.org
+Signed-off-by: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
+Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240220131624.1447813-1-jacek.lawrynowicz@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/accel/ivpu/ivpu_hw_40xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
+index 1c995307c113..a1523d0b1ef3 100644
+--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
++++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
+@@ -24,7 +24,7 @@
+ #define SKU_HW_ID_SHIFT 16u
+ #define SKU_HW_ID_MASK 0xffff0000u
+
+-#define PLL_CONFIG_DEFAULT 0x1
++#define PLL_CONFIG_DEFAULT 0x0
+ #define PLL_CDYN_DEFAULT 0x80
+ #define PLL_EPP_DEFAULT 0x80
+ #define PLL_REF_CLK_FREQ (50 * 1000000)
+--
+2.44.0
+
--- /dev/null
+From 4b085736e44dbbe69b5eea1a8a294f404678a1f4 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Thu, 11 Jan 2024 20:51:22 +0900
+Subject: ata: libata-core: Do not try to set sleeping devices to standby
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 4b085736e44dbbe69b5eea1a8a294f404678a1f4 upstream.
+
+In ata ata_dev_power_set_standby(), check that the target device is not
+sleeping. If it is, there is no need to do anything.
+
+Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -2034,6 +2034,10 @@ void ata_dev_power_set_active(struct ata
+ struct ata_taskfile tf;
+ unsigned int err_mask;
+
++ /* If the device is already sleeping, do nothing. */
++ if (dev->flags & ATA_DFLAG_SLEEPING)
++ return;
++
+ /*
+ * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only
+ * if supported by the device.
--- /dev/null
+From e42b9d8b9ea2672811285e6a7654887ff64d23f3 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 7 Feb 2024 10:00:42 +1030
+Subject: btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit e42b9d8b9ea2672811285e6a7654887ff64d23f3 upstream.
+
+[BUG]
+With the following file extent layout, defrag would do unnecessary IO
+and result more on-disk space usage.
+
+ # mkfs.btrfs -f $dev
+ # mount $dev $mnt
+ # xfs_io -f -c "pwrite 0 40m" $mnt/foobar
+ # sync
+ # xfs_io -f -c "pwrite 40m 16k" $mnt/foobar
+ # sync
+
+Above command would lead to the following file extent layout:
+
+ item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53
+ generation 7 type 1 (regular)
+ extent data disk byte 298844160 nr 41943040
+ extent data offset 0 nr 41943040 ram 41943040
+ extent compression 0 (none)
+ item 7 key (257 EXTENT_DATA 41943040) itemoff 15763 itemsize 53
+ generation 8 type 1 (regular)
+ extent data disk byte 13631488 nr 16384
+ extent data offset 0 nr 16384 ram 16384
+ extent compression 0 (none)
+
+Which is mostly fine. We can allow the final 16K to be merged with the
+previous 40M, but it's upon the end users' preference.
+
+But if we defrag the file using the default parameters, it would result
+worse file layout:
+
+ # btrfs filesystem defrag $mnt/foobar
+ # sync
+
+ item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53
+ generation 7 type 1 (regular)
+ extent data disk byte 298844160 nr 41943040
+ extent data offset 0 nr 8650752 ram 41943040
+ extent compression 0 (none)
+ item 7 key (257 EXTENT_DATA 8650752) itemoff 15763 itemsize 53
+ generation 9 type 1 (regular)
+ extent data disk byte 340787200 nr 33292288
+ extent data offset 0 nr 33292288 ram 33292288
+ extent compression 0 (none)
+ item 8 key (257 EXTENT_DATA 41943040) itemoff 15710 itemsize 53
+ generation 8 type 1 (regular)
+ extent data disk byte 13631488 nr 16384
+ extent data offset 0 nr 16384 ram 16384
+ extent compression 0 (none)
+
+Note the original 40M extent is still there, but a new 32M extent is
+created for no benefit at all.
+
+[CAUSE]
+There is an existing check to make sure we won't defrag a large enough
+extent (the threshold is by default 32M).
+
+But the check is using the length to the end of the extent:
+
+ range_len = em->len - (cur - em->start);
+
+ /* Skip too large extent */
+ if (range_len >= extent_thresh)
+ goto next;
+
+This means, for the first 8MiB of the extent, the range_len is always
+smaller than the default threshold, and would not be defragged.
+But after the first 8MiB, the remaining part would fit the requirement,
+and be defragged.
+
+Such different behavior inside the same extent caused the above problem,
+and we should avoid different defrag decision inside the same extent.
+
+[FIX]
+Instead of using @range_len, just use @em->len, so that we have a
+consistent decision among the same file extent.
+
+Now with this fix, we won't touch the extent, thus not making it any
+worse.
+
+Reported-by: Filipe Manana <fdmanana@suse.com>
+Fixes: 0cb5950f3f3b ("btrfs: fix deadlock when reserving space during defrag")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/defrag.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/defrag.c
++++ b/fs/btrfs/defrag.c
+@@ -903,7 +903,7 @@ static int defrag_collect_targets(struct
+ goto add;
+
+ /* Skip too large extent */
+- if (range_len >= extent_thresh)
++ if (em->len >= extent_thresh)
+ goto next;
+
+ /*
--- /dev/null
+From b0ad381fa7690244802aed119b478b4bdafc31dd Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 12 Feb 2024 11:56:02 -0500
+Subject: btrfs: fix deadlock with fiemap and extent locking
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit b0ad381fa7690244802aed119b478b4bdafc31dd upstream.
+
+While working on the patchset to remove extent locking I got a lockdep
+splat with fiemap and pagefaulting with my new extent lock replacement
+lock.
+
+This deadlock exists with our normal code, we just don't have lockdep
+annotations with the extent locking so we've never noticed it.
+
+Since we're copying the fiemap extent to user space on every iteration
+we have the chance of pagefaulting. Because we hold the extent lock for
+the entire range we could mkwrite into a range in the file that we have
+mmap'ed. This would deadlock with the following stack trace
+
+[<0>] lock_extent+0x28d/0x2f0
+[<0>] btrfs_page_mkwrite+0x273/0x8a0
+[<0>] do_page_mkwrite+0x50/0xb0
+[<0>] do_fault+0xc1/0x7b0
+[<0>] __handle_mm_fault+0x2fa/0x460
+[<0>] handle_mm_fault+0xa4/0x330
+[<0>] do_user_addr_fault+0x1f4/0x800
+[<0>] exc_page_fault+0x7c/0x1e0
+[<0>] asm_exc_page_fault+0x26/0x30
+[<0>] rep_movs_alternative+0x33/0x70
+[<0>] _copy_to_user+0x49/0x70
+[<0>] fiemap_fill_next_extent+0xc8/0x120
+[<0>] emit_fiemap_extent+0x4d/0xa0
+[<0>] extent_fiemap+0x7f8/0xad0
+[<0>] btrfs_fiemap+0x49/0x80
+[<0>] __x64_sys_ioctl+0x3e1/0xb50
+[<0>] do_syscall_64+0x94/0x1a0
+[<0>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
+
+I wrote an fstest to reproduce this deadlock without my replacement lock
+and verified that the deadlock exists with our existing locking.
+
+To fix this simply don't take the extent lock for the entire duration of
+the fiemap. This is safe in general because we keep track of where we
+are when we're searching the tree, so if an ordered extent updates in
+the middle of our fiemap call we'll still emit the correct extents
+because we know what offset we were on before.
+
+The only place we maintain the lock is searching delalloc. Since the
+delalloc stuff can change during writeback we want to lock the extent
+range so we have a consistent view of delalloc at the time we're
+checking to see if we need to set the delalloc flag.
+
+With this patch applied we no longer deadlock with my testcase.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 62 +++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 45 insertions(+), 17 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2646,16 +2646,34 @@ static int fiemap_process_hole(struct bt
+ * it beyond i_size.
+ */
+ while (cur_offset < end && cur_offset < i_size) {
++ struct extent_state *cached_state = NULL;
+ u64 delalloc_start;
+ u64 delalloc_end;
+ u64 prealloc_start;
++ u64 lockstart;
++ u64 lockend;
+ u64 prealloc_len = 0;
+ bool delalloc;
+
++ lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
++ lockend = round_up(end, inode->root->fs_info->sectorsize);
++
++ /*
++ * We are only locking for the delalloc range because that's the
++ * only thing that can change here. With fiemap we have a lock
++ * on the inode, so no buffered or direct writes can happen.
++ *
++ * However mmaps and normal page writeback will cause this to
++ * change arbitrarily. We have to lock the extent lock here to
++ * make sure that nobody messes with the tree while we're doing
++ * btrfs_find_delalloc_in_range.
++ */
++ lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
+ delalloc_cached_state,
+ &delalloc_start,
+ &delalloc_end);
++ unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ if (!delalloc)
+ break;
+
+@@ -2823,15 +2841,15 @@ int extent_fiemap(struct btrfs_inode *in
+ u64 start, u64 len)
+ {
+ const u64 ino = btrfs_ino(inode);
+- struct extent_state *cached_state = NULL;
+ struct extent_state *delalloc_cached_state = NULL;
+ struct btrfs_path *path;
+ struct fiemap_cache cache = { 0 };
+ struct btrfs_backref_share_check_ctx *backref_ctx;
+ u64 last_extent_end;
+ u64 prev_extent_end;
+- u64 lockstart;
+- u64 lockend;
++ u64 range_start;
++ u64 range_end;
++ const u64 sectorsize = inode->root->fs_info->sectorsize;
+ bool stopped = false;
+ int ret;
+
+@@ -2842,12 +2860,11 @@ int extent_fiemap(struct btrfs_inode *in
+ goto out;
+ }
+
+- lockstart = round_down(start, inode->root->fs_info->sectorsize);
+- lockend = round_up(start + len, inode->root->fs_info->sectorsize);
+- prev_extent_end = lockstart;
++ range_start = round_down(start, sectorsize);
++ range_end = round_up(start + len, sectorsize);
++ prev_extent_end = range_start;
+
+ btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
+- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+
+ ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
+ if (ret < 0)
+@@ -2855,7 +2872,7 @@ int extent_fiemap(struct btrfs_inode *in
+ btrfs_release_path(path);
+
+ path->reada = READA_FORWARD;
+- ret = fiemap_search_slot(inode, path, lockstart);
++ ret = fiemap_search_slot(inode, path, range_start);
+ if (ret < 0) {
+ goto out_unlock;
+ } else if (ret > 0) {
+@@ -2867,7 +2884,7 @@ int extent_fiemap(struct btrfs_inode *in
+ goto check_eof_delalloc;
+ }
+
+- while (prev_extent_end < lockend) {
++ while (prev_extent_end < range_end) {
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_file_extent_item *ei;
+ struct btrfs_key key;
+@@ -2890,19 +2907,19 @@ int extent_fiemap(struct btrfs_inode *in
+ * The first iteration can leave us at an extent item that ends
+ * before our range's start. Move to the next item.
+ */
+- if (extent_end <= lockstart)
++ if (extent_end <= range_start)
+ goto next_item;
+
+ backref_ctx->curr_leaf_bytenr = leaf->start;
+
+ /* We have in implicit hole (NO_HOLES feature enabled). */
+ if (prev_extent_end < key.offset) {
+- const u64 range_end = min(key.offset, lockend) - 1;
++ const u64 hole_end = min(key.offset, range_end) - 1;
+
+ ret = fiemap_process_hole(inode, fieinfo, &cache,
+ &delalloc_cached_state,
+ backref_ctx, 0, 0, 0,
+- prev_extent_end, range_end);
++ prev_extent_end, hole_end);
+ if (ret < 0) {
+ goto out_unlock;
+ } else if (ret > 0) {
+@@ -2912,7 +2929,7 @@ int extent_fiemap(struct btrfs_inode *in
+ }
+
+ /* We've reached the end of the fiemap range, stop. */
+- if (key.offset >= lockend) {
++ if (key.offset >= range_end) {
+ stopped = true;
+ break;
+ }
+@@ -3006,29 +3023,41 @@ check_eof_delalloc:
+ btrfs_free_path(path);
+ path = NULL;
+
+- if (!stopped && prev_extent_end < lockend) {
++ if (!stopped && prev_extent_end < range_end) {
+ ret = fiemap_process_hole(inode, fieinfo, &cache,
+ &delalloc_cached_state, backref_ctx,
+- 0, 0, 0, prev_extent_end, lockend - 1);
++ 0, 0, 0, prev_extent_end, range_end - 1);
+ if (ret < 0)
+ goto out_unlock;
+- prev_extent_end = lockend;
++ prev_extent_end = range_end;
+ }
+
+ if (cache.cached && cache.offset + cache.len >= last_extent_end) {
+ const u64 i_size = i_size_read(&inode->vfs_inode);
+
+ if (prev_extent_end < i_size) {
++ struct extent_state *cached_state = NULL;
+ u64 delalloc_start;
+ u64 delalloc_end;
++ u64 lockstart;
++ u64 lockend;
+ bool delalloc;
+
++ lockstart = round_down(prev_extent_end, sectorsize);
++ lockend = round_up(i_size, sectorsize);
++
++ /*
++ * See the comment in fiemap_process_hole as to why
++ * we're doing the locking here.
++ */
++ lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ delalloc = btrfs_find_delalloc_in_range(inode,
+ prev_extent_end,
+ i_size - 1,
+ &delalloc_cached_state,
+ &delalloc_start,
+ &delalloc_end);
++ unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ if (!delalloc)
+ cache.flags |= FIEMAP_EXTENT_LAST;
+ } else {
+@@ -3039,7 +3068,6 @@ check_eof_delalloc:
+ ret = emit_last_fiemap_cache(fieinfo, &cache);
+
+ out_unlock:
+- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ out:
+ free_extent_state(delalloc_cached_state);
--- /dev/null
+From e21a2f17566cbd64926fb8f16323972f7a064444 Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Sat, 17 Feb 2024 16:14:31 +0800
+Subject: cachefiles: fix memory leak in cachefiles_add_cache()
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit e21a2f17566cbd64926fb8f16323972f7a064444 upstream.
+
+The following memory leak was reported after unbinding /dev/cachefiles:
+
+==================================================================
+unreferenced object 0xffff9b674176e3c0 (size 192):
+ comm "cachefilesd2", pid 680, jiffies 4294881224
+ hex dump (first 32 bytes):
+ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace (crc ea38a44b):
+ [<ffffffff8eb8a1a5>] kmem_cache_alloc+0x2d5/0x370
+ [<ffffffff8e917f86>] prepare_creds+0x26/0x2e0
+ [<ffffffffc002eeef>] cachefiles_determine_cache_security+0x1f/0x120
+ [<ffffffffc00243ec>] cachefiles_add_cache+0x13c/0x3a0
+ [<ffffffffc0025216>] cachefiles_daemon_write+0x146/0x1c0
+ [<ffffffff8ebc4a3b>] vfs_write+0xcb/0x520
+ [<ffffffff8ebc5069>] ksys_write+0x69/0xf0
+ [<ffffffff8f6d4662>] do_syscall_64+0x72/0x140
+ [<ffffffff8f8000aa>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
+==================================================================
+
+Put the reference count of cache_cred in cachefiles_daemon_unbind() to
+fix the problem. And also put cache_cred in cachefiles_add_cache() error
+branch to avoid memory leaks.
+
+Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem")
+CC: stable@vger.kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240217081431.796809-1-libaokun1@huawei.com
+Acked-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cachefiles/cache.c | 2 ++
+ fs/cachefiles/daemon.c | 1 +
+ 2 files changed, 3 insertions(+)
+
+--- a/fs/cachefiles/cache.c
++++ b/fs/cachefiles/cache.c
+@@ -168,6 +168,8 @@ error_unsupported:
+ dput(root);
+ error_open_root:
+ cachefiles_end_secure(cache, saved_cred);
++ put_cred(cache->cache_cred);
++ cache->cache_cred = NULL;
+ error_getsec:
+ fscache_relinquish_cache(cache_cookie);
+ cache->cache = NULL;
+--- a/fs/cachefiles/daemon.c
++++ b/fs/cachefiles/daemon.c
+@@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(str
+ cachefiles_put_directory(cache->graveyard);
+ cachefiles_put_directory(cache->store);
+ mntput(cache->mnt);
++ put_cred(cache->cache_cred);
+
+ kfree(cache->rootdirname);
+ kfree(cache->secctx);
--- /dev/null
+From c0ec2a712daf133d9996a8a1b7ee2d4996080363 Mon Sep 17 00:00:00 2001
+From: zhenwei pi <pizhenwei@bytedance.com>
+Date: Tue, 30 Jan 2024 19:27:40 +0800
+Subject: crypto: virtio/akcipher - Fix stack overflow on memcpy
+
+From: zhenwei pi <pizhenwei@bytedance.com>
+
+commit c0ec2a712daf133d9996a8a1b7ee2d4996080363 upstream.
+
+sizeof(struct virtio_crypto_akcipher_session_para) is less than
+sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from
+stack variable leads stack overflow. Clang reports this issue by
+commands:
+make -j CC=clang-14 mrproper >/dev/null 2>&1
+make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1
+make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/
+ virtio_crypto_akcipher_algs.o
+
+Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm")
+Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/
+Cc: <stable@vger.kernel.org>
+Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
+Tested-by: Nathan Chancellor <nathan@kernel.org> # build
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
++++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+@@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher
+ }
+
+ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx,
+- struct virtio_crypto_ctrl_header *header, void *para,
++ struct virtio_crypto_ctrl_header *header,
++ struct virtio_crypto_akcipher_session_para *para,
+ const uint8_t *key, unsigned int keylen)
+ {
+ struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3];
+@@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_in
+
+ ctrl = &vc_ctrl_req->ctrl;
+ memcpy(&ctrl->header, header, sizeof(ctrl->header));
+- memcpy(&ctrl->u, para, sizeof(ctrl->u));
++ memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para));
+ input = &vc_ctrl_req->input;
+ input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR);
+
--- /dev/null
+From 5c6224bfabbf7f3e491c51ab50fd2c6f92ba1141 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 16 Feb 2024 19:11:34 -0800
+Subject: cxl/acpi: Fix load failures due to single window creation failure
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 5c6224bfabbf7f3e491c51ab50fd2c6f92ba1141 upstream.
+
+The expectation is that cxl_parse_cfwms() continues in the face the of
+failure as evidenced by code like:
+
+ cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
+ if (IS_ERR(cxlrd))
+ return 0;
+
+There are other error paths in that function which mistakenly follow
+idiomatic expectations and return an error when they should not. Most of
+those mistakes are innocuous checks that hardly ever fail in practice.
+However, a recent change succeed in making the implementation more
+fragile by applying an idiomatic, but still wrong "fix" [1]. In this
+failure case the kernel reports:
+
+ cxl root0: Failed to populate active decoder targets
+ cxl_acpi ACPI0017:00: Failed to add decode range: [mem 0x00000000-0x7fffffff flags 0x200]
+
+...which is a real issue with that one window (to be fixed separately),
+but ends up failing the entirety of cxl_acpi_probe().
+
+Undo that recent breakage while also removing the confusion about
+ignoring errors. Update all exits paths to return an error per typical
+expectations and let an outer wrapper function handle dropping the
+error.
+
+Fixes: 91019b5bc7c2 ("cxl/acpi: Return 'rc' instead of '0' in cxl_parse_cfmws()") [1]
+Cc: <stable@vger.kernel.org>
+Cc: Breno Leitao <leitao@debian.org>
+Cc: Alison Schofield <alison.schofield@intel.com>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/acpi.c | 46 ++++++++++++++++++++++++++++------------------
+ 1 file changed, 28 insertions(+), 18 deletions(-)
+
+--- a/drivers/cxl/acpi.c
++++ b/drivers/cxl/acpi.c
+@@ -194,31 +194,27 @@ struct cxl_cfmws_context {
+ int id;
+ };
+
+-static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
+- const unsigned long end)
++static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
++ struct cxl_cfmws_context *ctx)
+ {
+ int target_map[CXL_DECODER_MAX_INTERLEAVE];
+- struct cxl_cfmws_context *ctx = arg;
+ struct cxl_port *root_port = ctx->root_port;
+ struct resource *cxl_res = ctx->cxl_res;
+ struct cxl_cxims_context cxims_ctx;
+ struct cxl_root_decoder *cxlrd;
+ struct device *dev = ctx->dev;
+- struct acpi_cedt_cfmws *cfmws;
+ cxl_calc_hb_fn cxl_calc_hb;
+ struct cxl_decoder *cxld;
+ unsigned int ways, i, ig;
+ struct resource *res;
+ int rc;
+
+- cfmws = (struct acpi_cedt_cfmws *) header;
+-
+ rc = cxl_acpi_cfmws_verify(dev, cfmws);
+ if (rc) {
+ dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
+ cfmws->base_hpa,
+ cfmws->base_hpa + cfmws->window_size - 1);
+- return 0;
++ return rc;
+ }
+
+ rc = eiw_to_ways(cfmws->interleave_ways, &ways);
+@@ -254,7 +250,7 @@ static int cxl_parse_cfmws(union acpi_su
+
+ cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
+ if (IS_ERR(cxlrd))
+- return 0;
++ return PTR_ERR(cxlrd);
+
+ cxld = &cxlrd->cxlsd.cxld;
+ cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
+@@ -295,16 +291,7 @@ err_xormap:
+ put_device(&cxld->dev);
+ else
+ rc = cxl_decoder_autoremove(dev, cxld);
+- if (rc) {
+- dev_err(dev, "Failed to add decode range: %pr", res);
+- return rc;
+- }
+- dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
+- dev_name(&cxld->dev),
+- phys_to_target_node(cxld->hpa_range.start),
+- cxld->hpa_range.start, cxld->hpa_range.end);
+-
+- return 0;
++ return rc;
+
+ err_insert:
+ kfree(res->name);
+@@ -313,6 +300,29 @@ err_name:
+ return -ENOMEM;
+ }
+
++static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
++ const unsigned long end)
++{
++ struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header;
++ struct cxl_cfmws_context *ctx = arg;
++ struct device *dev = ctx->dev;
++ int rc;
++
++ rc = __cxl_parse_cfmws(cfmws, ctx);
++ if (rc)
++ dev_err(dev,
++ "Failed to add decode range: [%#llx - %#llx] (%d)\n",
++ cfmws->base_hpa,
++ cfmws->base_hpa + cfmws->window_size - 1, rc);
++ else
++ dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n",
++ phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa,
++ cfmws->base_hpa + cfmws->window_size - 1);
++
++ /* never fail cxl_acpi load for a single window failure */
++ return 0;
++}
++
+ __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
+ struct device *dev)
+ {
--- /dev/null
+From 0cab687205986491302cd2e440ef1d253031c221 Mon Sep 17 00:00:00 2001
+From: Robert Richter <rrichter@amd.com>
+Date: Fri, 16 Feb 2024 17:01:13 +0100
+Subject: cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window
+
+From: Robert Richter <rrichter@amd.com>
+
+commit 0cab687205986491302cd2e440ef1d253031c221 upstream.
+
+The Linux CXL subsystem is built on the assumption that HPA == SPA.
+That is, the host physical address (HPA) the HDM decoder registers are
+programmed with are system physical addresses (SPA).
+
+During HDM decoder setup, the DVSEC CXL range registers (cxl-3.1,
+8.1.3.8) are checked if the memory is enabled and the CXL range is in
+a HPA window that is described in a CFMWS structure of the CXL host
+bridge (cxl-3.1, 9.18.1.3).
+
+Now, if the HPA is not an SPA, the CXL range does not match a CFMWS
+window and the CXL memory range will be disabled then. The HDM decoder
+stops working which causes system memory being disabled and further a
+system hang during HDM decoder initialization, typically when a CXL
+enabled kernel boots.
+
+Prevent a system hang and do not disable the HDM decoder if the
+decoder's CXL range is not found in a CFMWS window.
+
+Note the change only fixes a hardware hang, but does not implement
+HPA/SPA translation. Support for this can be added in a follow on
+patch series.
+
+Signed-off-by: Robert Richter <rrichter@amd.com>
+Fixes: 34e37b4c432c ("cxl/port: Enable HDM Capability after validating DVSEC Ranges")
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20240216160113.407141-1-rrichter@amd.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/core/pci.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/cxl/core/pci.c
++++ b/drivers/cxl/core/pci.c
+@@ -475,9 +475,9 @@ int cxl_hdm_decode_init(struct cxl_dev_s
+ allowed++;
+ }
+
+- if (!allowed) {
+- cxl_set_mem_enable(cxlds, 0);
+- info->mem_enabled = 0;
++ if (!allowed && info->mem_enabled) {
++ dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
++ return -ENXIO;
+ }
+
+ /*
--- /dev/null
+From 50c70240097ce41fe6bce6478b80478281e4d0f7 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 19 Feb 2024 21:30:10 +0100
+Subject: dm-crypt: don't modify the data when using authenticated encryption
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 50c70240097ce41fe6bce6478b80478281e4d0f7 upstream.
+
+It was said that authenticated encryption could produce invalid tag when
+the data that is being encrypted is modified [1]. So, fix this problem by
+copying the data into the clone bio first and then encrypt them inside the
+clone bio.
+
+This may reduce performance, but it is needed to prevent the user from
+corrupting the device by writing data with O_DIRECT and modifying them at
+the same time.
+
+[1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-crypt.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -2117,6 +2117,12 @@ static void kcryptd_crypt_write_convert(
+ io->ctx.bio_out = clone;
+ io->ctx.iter_out = clone->bi_iter;
+
++ if (crypt_integrity_aead(cc)) {
++ bio_copy_data(clone, io->base_bio);
++ io->ctx.bio_in = clone;
++ io->ctx.iter_in = clone->bi_iter;
++ }
++
+ sector += bio_sectors(clone);
+
+ crypt_inc_pending(io);
--- /dev/null
+From 42e15d12070b4ff9af2b980f1b65774c2dab0507 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 19 Feb 2024 21:31:11 +0100
+Subject: dm-crypt: recheck the integrity tag after a failure
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 42e15d12070b4ff9af2b980f1b65774c2dab0507 upstream.
+
+If a userspace process reads (with O_DIRECT) multiple blocks into the same
+buffer, dm-crypt reports an authentication error [1]. The error is
+reported in a log and it may cause RAID leg being kicked out of the
+array.
+
+This commit fixes dm-crypt, so that if integrity verification fails, the
+data is read again into a kernel buffer (where userspace can't modify it)
+and the integrity tag is rechecked. If the recheck succeeds, the content
+of the kernel buffer is copied into the user buffer; if the recheck fails,
+an integrity error is reported.
+
+[1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-crypt.c | 89 +++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 73 insertions(+), 16 deletions(-)
+
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -62,6 +62,8 @@ struct convert_context {
+ struct skcipher_request *req;
+ struct aead_request *req_aead;
+ } r;
++ bool aead_recheck;
++ bool aead_failed;
+
+ };
+
+@@ -82,6 +84,8 @@ struct dm_crypt_io {
+ blk_status_t error;
+ sector_t sector;
+
++ struct bvec_iter saved_bi_iter;
++
+ struct rb_node rb_node;
+ } CRYPTO_MINALIGN_ATTR;
+
+@@ -1376,10 +1380,13 @@ static int crypt_convert_block_aead(stru
+ if (r == -EBADMSG) {
+ sector_t s = le64_to_cpu(*sector);
+
+- DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+- ctx->bio_in->bi_bdev, s);
+- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
+- ctx->bio_in, s, 0);
++ ctx->aead_failed = true;
++ if (ctx->aead_recheck) {
++ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
++ ctx->bio_in->bi_bdev, s);
++ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
++ ctx->bio_in, s, 0);
++ }
+ }
+
+ if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
+@@ -1763,6 +1770,8 @@ static void crypt_io_init(struct dm_cryp
+ io->base_bio = bio;
+ io->sector = sector;
+ io->error = 0;
++ io->ctx.aead_recheck = false;
++ io->ctx.aead_failed = false;
+ io->ctx.r.req = NULL;
+ io->integrity_metadata = NULL;
+ io->integrity_metadata_from_pool = false;
+@@ -1774,6 +1783,8 @@ static void crypt_inc_pending(struct dm_
+ atomic_inc(&io->io_pending);
+ }
+
++static void kcryptd_queue_read(struct dm_crypt_io *io);
++
+ /*
+ * One of the bios was finished. Check for completion of
+ * the whole request and correctly clean up the buffer.
+@@ -1787,6 +1798,15 @@ static void crypt_dec_pending(struct dm_
+ if (!atomic_dec_and_test(&io->io_pending))
+ return;
+
++ if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) &&
++ cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) {
++ io->ctx.aead_recheck = true;
++ io->ctx.aead_failed = false;
++ io->error = 0;
++ kcryptd_queue_read(io);
++ return;
++ }
++
+ if (io->ctx.r.req)
+ crypt_free_req(cc, io->ctx.r.req, base_bio);
+
+@@ -1822,15 +1842,19 @@ static void crypt_endio(struct bio *clon
+ struct dm_crypt_io *io = clone->bi_private;
+ struct crypt_config *cc = io->cc;
+ unsigned int rw = bio_data_dir(clone);
+- blk_status_t error;
++ blk_status_t error = clone->bi_status;
++
++ if (io->ctx.aead_recheck && !error) {
++ kcryptd_queue_crypt(io);
++ return;
++ }
+
+ /*
+ * free the processed pages
+ */
+- if (rw == WRITE)
++ if (rw == WRITE || io->ctx.aead_recheck)
+ crypt_free_buffer_pages(cc, clone);
+
+- error = clone->bi_status;
+ bio_put(clone);
+
+ if (rw == READ && !error) {
+@@ -1851,6 +1875,22 @@ static int kcryptd_io_read(struct dm_cry
+ struct crypt_config *cc = io->cc;
+ struct bio *clone;
+
++ if (io->ctx.aead_recheck) {
++ if (!(gfp & __GFP_DIRECT_RECLAIM))
++ return 1;
++ crypt_inc_pending(io);
++ clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
++ if (unlikely(!clone)) {
++ crypt_dec_pending(io);
++ return 1;
++ }
++ clone->bi_iter.bi_sector = cc->start + io->sector;
++ crypt_convert_init(cc, &io->ctx, clone, clone, io->sector);
++ io->saved_bi_iter = clone->bi_iter;
++ dm_submit_bio_remap(io->base_bio, clone);
++ return 0;
++ }
++
+ /*
+ * We need the original biovec array in order to decrypt the whole bio
+ * data *afterwards* -- thanks to immutable biovecs we don't need to
+@@ -2113,6 +2153,14 @@ dec:
+
+ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
+ {
++ if (io->ctx.aead_recheck) {
++ if (!io->error) {
++ io->ctx.bio_in->bi_iter = io->saved_bi_iter;
++ bio_copy_data(io->base_bio, io->ctx.bio_in);
++ }
++ crypt_free_buffer_pages(io->cc, io->ctx.bio_in);
++ bio_put(io->ctx.bio_in);
++ }
+ crypt_dec_pending(io);
+ }
+
+@@ -2142,11 +2190,17 @@ static void kcryptd_crypt_read_convert(s
+
+ crypt_inc_pending(io);
+
+- crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
+- io->sector);
++ if (io->ctx.aead_recheck) {
++ io->ctx.cc_sector = io->sector + cc->iv_offset;
++ r = crypt_convert(cc, &io->ctx,
++ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
++ } else {
++ crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
++ io->sector);
+
+- r = crypt_convert(cc, &io->ctx,
+- test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
++ r = crypt_convert(cc, &io->ctx,
++ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
++ }
+ /*
+ * Crypto API backlogged the request, because its queue was full
+ * and we're in softirq context, so continue from a workqueue
+@@ -2188,10 +2242,13 @@ static void kcryptd_async_done(void *dat
+ if (error == -EBADMSG) {
+ sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq));
+
+- DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+- ctx->bio_in->bi_bdev, s);
+- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
+- ctx->bio_in, s, 0);
++ ctx->aead_failed = true;
++ if (ctx->aead_recheck) {
++ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
++ ctx->bio_in->bi_bdev, s);
++ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
++ ctx->bio_in, s, 0);
++ }
+ io->error = BLK_STS_PROTECTION;
+ } else if (error < 0)
+ io->error = BLK_STS_IOERR;
+@@ -3117,7 +3174,7 @@ static int crypt_ctr_optional(struct dm_
+ sval = strchr(opt_string + strlen("integrity:"), ':') + 1;
+ if (!strcasecmp(sval, "aead")) {
+ set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags);
+- } else if (strcasecmp(sval, "none")) {
++ } else if (strcasecmp(sval, "none")) {
+ ti->error = "Unknown integrity profile";
+ return -EINVAL;
+ }
--- /dev/null
+From c88f5e553fe38b2ffc4c33d08654e5281b297677 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 19 Feb 2024 21:27:39 +0100
+Subject: dm-integrity: recheck the integrity tag after a failure
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit c88f5e553fe38b2ffc4c33d08654e5281b297677 upstream.
+
+If a userspace process reads (with O_DIRECT) multiple blocks into the same
+buffer, dm-integrity reports an error [1]. The error is reported in a log
+and it may cause RAID leg being kicked out of the array.
+
+This commit fixes dm-integrity, so that if integrity verification fails,
+the data is read again into a kernel buffer (where userspace can't modify
+it) and the integrity tag is rechecked. If the recheck succeeds, the
+content of the kernel buffer is copied into the user buffer; if the
+recheck fails, an integrity error is reported.
+
+[1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c | 93 +++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 84 insertions(+), 9 deletions(-)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -278,6 +278,8 @@ struct dm_integrity_c {
+
+ atomic64_t number_of_mismatches;
+
++ mempool_t recheck_pool;
++
+ struct notifier_block reboot_notifier;
+ };
+
+@@ -1699,6 +1701,79 @@ failed:
+ get_random_bytes(result, ic->tag_size);
+ }
+
++static void integrity_recheck(struct dm_integrity_io *dio)
++{
++ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
++ struct dm_integrity_c *ic = dio->ic;
++ struct bvec_iter iter;
++ struct bio_vec bv;
++ sector_t sector, logical_sector, area, offset;
++ char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
++ struct page *page;
++ void *buffer;
++
++ get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
++ dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset,
++ &dio->metadata_offset);
++ sector = get_data_sector(ic, area, offset);
++ logical_sector = dio->range.logical_sector;
++
++ page = mempool_alloc(&ic->recheck_pool, GFP_NOIO);
++ buffer = page_to_virt(page);
++
++ __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
++ unsigned pos = 0;
++
++ do {
++ char *mem;
++ int r;
++ struct dm_io_request io_req;
++ struct dm_io_region io_loc;
++ io_req.bi_opf = REQ_OP_READ;
++ io_req.mem.type = DM_IO_KMEM;
++ io_req.mem.ptr.addr = buffer;
++ io_req.notify.fn = NULL;
++ io_req.client = ic->io;
++ io_loc.bdev = ic->dev->bdev;
++ io_loc.sector = sector;
++ io_loc.count = ic->sectors_per_block;
++
++ r = dm_io(&io_req, 1, &io_loc, NULL);
++ if (unlikely(r)) {
++ dio->bi_status = errno_to_blk_status(r);
++ goto free_ret;
++ }
++
++ integrity_sector_checksum(ic, logical_sector, buffer,
++ checksum_onstack);
++ r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block,
++ &dio->metadata_offset, ic->tag_size, TAG_CMP);
++ if (r) {
++ if (r > 0) {
++ DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
++ bio->bi_bdev, logical_sector);
++ atomic64_inc(&ic->number_of_mismatches);
++ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
++ bio, logical_sector, 0);
++ r = -EILSEQ;
++ }
++ dio->bi_status = errno_to_blk_status(r);
++ goto free_ret;
++ }
++
++ mem = bvec_kmap_local(&bv);
++ memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT);
++ kunmap_local(mem);
++
++ pos += ic->sectors_per_block << SECTOR_SHIFT;
++ sector += ic->sectors_per_block;
++ logical_sector += ic->sectors_per_block;
++ } while (pos < bv.bv_len);
++ }
++free_ret:
++ mempool_free(page, &ic->recheck_pool);
++}
++
+ static void integrity_metadata(struct work_struct *w)
+ {
+ struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
+@@ -1786,15 +1861,8 @@ again:
+ checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
+ if (unlikely(r)) {
+ if (r > 0) {
+- sector_t s;
+-
+- s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
+- DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
+- bio->bi_bdev, s);
+- r = -EILSEQ;
+- atomic64_inc(&ic->number_of_mismatches);
+- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
+- bio, s, 0);
++ integrity_recheck(dio);
++ goto skip_io;
+ }
+ if (likely(checksums != checksums_onstack))
+ kfree(checksums);
+@@ -4271,6 +4339,12 @@ static int dm_integrity_ctr(struct dm_ta
+ goto bad;
+ }
+
++ r = mempool_init_page_pool(&ic->recheck_pool, 1, 0);
++ if (r) {
++ ti->error = "Cannot allocate mempool";
++ goto bad;
++ }
++
+ ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
+ WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
+ if (!ic->metadata_wq) {
+@@ -4619,6 +4693,7 @@ static void dm_integrity_dtr(struct dm_t
+ kvfree(ic->bbs);
+ if (ic->bufio)
+ dm_bufio_client_destroy(ic->bufio);
++ mempool_exit(&ic->recheck_pool);
+ mempool_exit(&ic->journal_io_mempool);
+ if (ic->io)
+ dm_io_client_destroy(ic->io);
--- /dev/null
+From 9177f3c0dea6143d05cac1bbd28668fd0e216d11 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 19 Feb 2024 21:28:09 +0100
+Subject: dm-verity: recheck the hash after a failure
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 9177f3c0dea6143d05cac1bbd28668fd0e216d11 upstream.
+
+If a userspace process reads (with O_DIRECT) multiple blocks into the same
+buffer, dm-verity reports an error [1].
+
+This commit fixes dm-verity, so that if hash verification fails, the data
+is read again into a kernel buffer (where userspace can't modify it) and
+the hash is rechecked. If the recheck succeeds, the content of the kernel
+buffer is copied into the user buffer; if the recheck fails, an error is
+reported.
+
+[1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-verity-target.c | 86 +++++++++++++++++++++++++++++++++++++++---
+ drivers/md/dm-verity.h | 6 ++
+ 2 files changed, 86 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-verity-target.c
++++ b/drivers/md/dm-verity-target.c
+@@ -482,6 +482,63 @@ int verity_for_bv_block(struct dm_verity
+ return 0;
+ }
+
++static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io,
++ u8 *data, size_t len)
++{
++ memcpy(data, io->recheck_buffer, len);
++ io->recheck_buffer += len;
++
++ return 0;
++}
++
++static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
++ struct bvec_iter start, sector_t cur_block)
++{
++ struct page *page;
++ void *buffer;
++ int r;
++ struct dm_io_request io_req;
++ struct dm_io_region io_loc;
++
++ page = mempool_alloc(&v->recheck_pool, GFP_NOIO);
++ buffer = page_to_virt(page);
++
++ io_req.bi_opf = REQ_OP_READ;
++ io_req.mem.type = DM_IO_KMEM;
++ io_req.mem.ptr.addr = buffer;
++ io_req.notify.fn = NULL;
++ io_req.client = v->io;
++ io_loc.bdev = v->data_dev->bdev;
++ io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT);
++ io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT);
++ r = dm_io(&io_req, 1, &io_loc, NULL);
++ if (unlikely(r))
++ goto free_ret;
++
++ r = verity_hash(v, verity_io_hash_req(v, io), buffer,
++ 1 << v->data_dev_block_bits,
++ verity_io_real_digest(v, io), true);
++ if (unlikely(r))
++ goto free_ret;
++
++ if (memcmp(verity_io_real_digest(v, io),
++ verity_io_want_digest(v, io), v->digest_size)) {
++ r = -EIO;
++ goto free_ret;
++ }
++
++ io->recheck_buffer = buffer;
++ r = verity_for_bv_block(v, io, &start, verity_recheck_copy);
++ if (unlikely(r))
++ goto free_ret;
++
++ r = 0;
++free_ret:
++ mempool_free(page, &v->recheck_pool);
++
++ return r;
++}
++
+ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+ {
+@@ -508,9 +565,7 @@ static int verity_verify_io(struct dm_ve
+ {
+ bool is_zero;
+ struct dm_verity *v = io->v;
+-#if defined(CONFIG_DM_VERITY_FEC)
+ struct bvec_iter start;
+-#endif
+ struct bvec_iter iter_copy;
+ struct bvec_iter *iter;
+ struct crypto_wait wait;
+@@ -561,10 +616,7 @@ static int verity_verify_io(struct dm_ve
+ if (unlikely(r < 0))
+ return r;
+
+-#if defined(CONFIG_DM_VERITY_FEC)
+- if (verity_fec_is_enabled(v))
+- start = *iter;
+-#endif
++ start = *iter;
+ r = verity_for_io_block(v, io, iter, &wait);
+ if (unlikely(r < 0))
+ return r;
+@@ -586,6 +638,10 @@ static int verity_verify_io(struct dm_ve
+ * tasklet since it may sleep, so fallback to work-queue.
+ */
+ return -EAGAIN;
++ } else if (verity_recheck(v, io, start, cur_block) == 0) {
++ if (v->validated_blocks)
++ set_bit(cur_block, v->validated_blocks);
++ continue;
+ #if defined(CONFIG_DM_VERITY_FEC)
+ } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+ cur_block, NULL, &start) == 0) {
+@@ -941,6 +997,10 @@ static void verity_dtr(struct dm_target
+ if (v->verify_wq)
+ destroy_workqueue(v->verify_wq);
+
++ mempool_exit(&v->recheck_pool);
++ if (v->io)
++ dm_io_client_destroy(v->io);
++
+ if (v->bufio)
+ dm_bufio_client_destroy(v->bufio);
+
+@@ -1379,6 +1439,20 @@ static int verity_ctr(struct dm_target *
+ }
+ v->hash_blocks = hash_position;
+
++ r = mempool_init_page_pool(&v->recheck_pool, 1, 0);
++ if (unlikely(r)) {
++ ti->error = "Cannot allocate mempool";
++ goto bad;
++ }
++
++ v->io = dm_io_client_create();
++ if (IS_ERR(v->io)) {
++ r = PTR_ERR(v->io);
++ v->io = NULL;
++ ti->error = "Cannot allocate dm io";
++ goto bad;
++ }
++
+ v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
+ 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
+ dm_bufio_alloc_callback, NULL,
+--- a/drivers/md/dm-verity.h
++++ b/drivers/md/dm-verity.h
+@@ -11,6 +11,7 @@
+ #ifndef DM_VERITY_H
+ #define DM_VERITY_H
+
++#include <linux/dm-io.h>
+ #include <linux/dm-bufio.h>
+ #include <linux/device-mapper.h>
+ #include <linux/interrupt.h>
+@@ -68,6 +69,9 @@ struct dm_verity {
+ unsigned long *validated_blocks; /* bitset blocks validated */
+
+ char *signature_key_desc; /* signature keyring reference */
++
++ struct dm_io_client *io;
++ mempool_t recheck_pool;
+ };
+
+ struct dm_verity_io {
+@@ -84,6 +88,8 @@ struct dm_verity_io {
+
+ struct work_struct work;
+
++ char *recheck_buffer;
++
+ /*
+ * Three variably-size fields follow this struct:
+ *
--- /dev/null
+From 0df8669f69a8638f04c6a3d1f3b7056c2c18f62c Mon Sep 17 00:00:00 2001
+From: Jonathan Corbet <corbet@lwn.net>
+Date: Mon, 19 Feb 2024 09:05:38 -0700
+Subject: docs: Instruct LaTeX to cope with deeper nesting
+
+From: Jonathan Corbet <corbet@lwn.net>
+
+commit 0df8669f69a8638f04c6a3d1f3b7056c2c18f62c upstream.
+
+The addition of the XFS online fsck documentation starting with
+commit a8f6c2e54ddc ("xfs: document the motivation for online fsck design")
+added a deeper level of nesting than LaTeX is prepared to deal with. That
+caused a pdfdocs build failure with the helpful "Too deeply nested" error
+message buried deeply in Documentation/output/filesystems.log.
+
+Increase the "maxlistdepth" parameter to instruct LaTeX that it needs to
+deal with the deeper nesting whether it wants to or not.
+
+Suggested-by: Akira Yokosawa <akiyks@gmail.com>
+Tested-by: Akira Yokosawa <akiyks@gmail.com>
+Cc: stable@vger.kernel.org # v6.4+
+Link: https://lore.kernel.org/linux-doc/67f6ac60-7957-4b92-9d72-a08fbad0e028@gmail.com/
+Signed-off-by: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/conf.py | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/Documentation/conf.py
++++ b/Documentation/conf.py
+@@ -383,6 +383,12 @@ latex_elements = {
+ verbatimhintsturnover=false,
+ ''',
+
++ #
++ # Some of our authors are fond of deep nesting; tell latex to
++ # cope.
++ #
++ 'maxlistdepth': '10',
++
+ # For CJK One-half spacing, need to be in front of hyperref
+ 'extrapackages': r'\usepackage{setspace}',
+
--- /dev/null
+From 22e1dc4b2fec17af70f297a4295c5f19a0f3fbeb Mon Sep 17 00:00:00 2001
+From: Wayne Lin <wayne.lin@amd.com>
+Date: Fri, 2 Feb 2024 17:34:11 +0800
+Subject: drm/amd/display: adjust few initialization order in dm
+
+From: Wayne Lin <wayne.lin@amd.com>
+
+commit 22e1dc4b2fec17af70f297a4295c5f19a0f3fbeb upstream.
+
+[Why]
+Observe error message "Can't retrieve aconnector in hpd_rx_irq_offload_work"
+when boot up with a mst tbt4 dock connected. After analyzing, there are few
+parts needed to be adjusted:
+
+1. hpd_rx_offload_wq[].aconnector is not initialzed before the dmub outbox
+hpd_irq handler get registered which causes the error message.
+
+2. registeration of hpd and hpd_rx_irq event for usb4 dp tunneling is not
+aligned with legacy interface sequence
+
+[How]
+Put DMUB_NOTIFICATION_HPD and DMUB_NOTIFICATION_HPD_IRQ handler
+registration into register_hpd_handlers() to align other interfaces and
+get hpd_rx_offload_wq[].aconnector initialized earlier than that.
+
+Leave DMUB_NOTIFICATION_AUX_REPLY registered as it was since we need that
+while calling dc_link_detect(). USB4 connection status will be proactively
+detected by dc_link_detect_connection_type() in amdgpu_dm_initialize_drm_device()
+
+Cc: Stable <stable@vger.kernel.org>
+Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Wayne Lin <wayne.lin@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 ++++++++++------------
+ 1 file changed, 18 insertions(+), 19 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -1816,21 +1816,12 @@ static int amdgpu_dm_init(struct amdgpu_
+ DRM_ERROR("amdgpu: fail to register dmub aux callback");
+ goto error;
+ }
+- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
+- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+- goto error;
+- }
+- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
+- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+- goto error;
+- }
+- }
+-
+- /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+- * It is expected that DMUB will resend any pending notifications at this point, for
+- * example HPD from DPIA.
+- */
+- if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
++ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
++ * It is expected that DMUB will resend any pending notifications at this point. Note
++ * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
++ * align legacy interface initialization sequence. Connection status will be proactivly
++ * detected once in the amdgpu_dm_initialize_drm_device.
++ */
+ dc_enable_dmub_outbox(adev->dm.dc);
+
+ /* DPIA trace goes to dmesg logs only if outbox is enabled */
+@@ -3484,6 +3475,14 @@ static void register_hpd_handlers(struct
+ int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
+ int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
+
++ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
++ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true))
++ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
++
++ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true))
++ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
++ }
++
+ list_for_each_entry(connector,
+ &dev->mode_config.connector_list, head) {
+
+@@ -3509,10 +3508,6 @@ static void register_hpd_handlers(struct
+ handle_hpd_rx_irq,
+ (void *) aconnector);
+ }
+-
+- if (adev->dm.hpd_rx_offload_wq)
+- adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
+- aconnector;
+ }
+ }
+
+@@ -4481,6 +4476,10 @@ static int amdgpu_dm_initialize_drm_devi
+
+ link = dc_get_link_at_index(dm->dc, i);
+
++ if (dm->hpd_rx_offload_wq)
++ dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
++ aconnector;
++
+ if (!dc_link_detect_connection_type(link, &new_connection_type))
+ DRM_ERROR("KMS: Failed to detect connector\n");
+
--- /dev/null
+From bd915ae73a2d78559b376ad2caf5e4ef51de2455 Mon Sep 17 00:00:00 2001
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Date: Thu, 15 Feb 2024 23:04:42 +0100
+Subject: drm/meson: Don't remove bridges which are created by other drivers
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+commit bd915ae73a2d78559b376ad2caf5e4ef51de2455 upstream.
+
+Stop calling drm_bridge_remove() for bridges allocated/managed by other
+drivers in the remove paths of meson_encoder_{cvbs,dsi,hdmi}.
+drm_bridge_remove() unregisters the bridge so it cannot be used
+anymore. Doing so for bridges we don't own can lead to the video
+pipeline not being able to come up after -EPROBE_DEFER of the VPU
+because we're unregistering a bridge that's managed by another driver.
+The other driver doesn't know that we have unregistered it's bridge
+and on subsequent .probe() we're not able to find those bridges anymore
+(since nobody re-creates them).
+
+This fixes probe errors on Meson8b boards with the CVBS outputs enabled.
+
+Fixes: 09847723c12f ("drm/meson: remove drm bridges at aggregate driver unbind time")
+Fixes: 42dcf15f901c ("drm/meson: add DSI encoder")
+Cc: <stable@vger.kernel.org>
+Reported-by: Steve Morvai <stevemorvai@hotmail.com>
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Tested-by: Steve Morvai <stevemorvai@hotmail.com>
+Link: https://lore.kernel.org/r/20240215220442.1343152-1-martin.blumenstingl@googlemail.com
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240215220442.1343152-1-martin.blumenstingl@googlemail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/meson/meson_encoder_cvbs.c | 1 -
+ drivers/gpu/drm/meson/meson_encoder_dsi.c | 1 -
+ drivers/gpu/drm/meson/meson_encoder_hdmi.c | 1 -
+ 3 files changed, 3 deletions(-)
+
+--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
++++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct me
+ if (priv->encoders[MESON_ENC_CVBS]) {
+ meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS];
+ drm_bridge_remove(&meson_encoder_cvbs->bridge);
+- drm_bridge_remove(meson_encoder_cvbs->next_bridge);
+ }
+ }
+--- a/drivers/gpu/drm/meson/meson_encoder_dsi.c
++++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c
+@@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct mes
+ if (priv->encoders[MESON_ENC_DSI]) {
+ meson_encoder_dsi = priv->encoders[MESON_ENC_DSI];
+ drm_bridge_remove(&meson_encoder_dsi->bridge);
+- drm_bridge_remove(meson_encoder_dsi->next_bridge);
+ }
+ }
+--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+@@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct me
+ if (priv->encoders[MESON_ENC_HDMI]) {
+ meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI];
+ drm_bridge_remove(&meson_encoder_hdmi->bridge);
+- drm_bridge_remove(meson_encoder_hdmi->next_bridge);
+ }
+ }
--- /dev/null
+From 40510a941d27d405a82dc3320823d875f94625df Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
+Date: Wed, 21 Feb 2024 08:33:24 +0100
+Subject: drm/ttm: Fix an invalid freeing on already freed page in error path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+
+commit 40510a941d27d405a82dc3320823d875f94625df upstream.
+
+If caching mode change fails due to, for example, OOM we
+free the allocated pages in a two-step process. First the pages
+for which the caching change has already succeeded. Secondly
+the pages for which a caching change did not succeed.
+
+However the second step was incorrectly freeing the pages already
+freed in the first step.
+
+Fix.
+
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path")
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Christian Koenig <christian.koenig@amd.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v6.4+
+Reviewed-by: Matthew Auld <matthew.auld@intel.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240221073324.3303-1-thomas.hellstrom@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_pool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_pool.c
++++ b/drivers/gpu/drm/ttm/ttm_pool.c
+@@ -384,7 +384,7 @@ static void ttm_pool_free_range(struct t
+ enum ttm_caching caching,
+ pgoff_t start_page, pgoff_t end_page)
+ {
+- struct page **pages = tt->pages;
++ struct page **pages = &tt->pages[start_page];
+ unsigned int order;
+ pgoff_t i, nr;
+
--- /dev/null
+From b820de741ae48ccf50dd95e297889c286ff4f760 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Thu, 15 Feb 2024 12:47:38 -0800
+Subject: fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit b820de741ae48ccf50dd95e297889c286ff4f760 upstream.
+
+If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the
+following kernel warning appears:
+
+WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8
+Call trace:
+ kiocb_set_cancel_fn+0x9c/0xa8
+ ffs_epfile_read_iter+0x144/0x1d0
+ io_read+0x19c/0x498
+ io_issue_sqe+0x118/0x27c
+ io_submit_sqes+0x25c/0x5fc
+ __arm64_sys_io_uring_enter+0x104/0xab0
+ invoke_syscall+0x58/0x11c
+ el0_svc_common+0xb4/0xf4
+ do_el0_svc+0x2c/0xb0
+ el0_svc+0x2c/0xa4
+ el0t_64_sync_handler+0x68/0xb4
+ el0t_64_sync+0x1a4/0x1a8
+
+Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is
+submitted by libaio.
+
+Suggested-by: Jens Axboe <axboe@kernel.dk>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Avi Kivity <avi@scylladb.com>
+Cc: Sandeep Dhavale <dhavale@google.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: stable@vger.kernel.org
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/aio.c | 9 ++++++++-
+ include/linux/fs.h | 2 ++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -594,6 +594,13 @@ void kiocb_set_cancel_fn(struct kiocb *i
+ struct kioctx *ctx = req->ki_ctx;
+ unsigned long flags;
+
++ /*
++ * kiocb didn't come from aio or is neither a read nor a write, hence
++ * ignore it.
++ */
++ if (!(iocb->ki_flags & IOCB_AIO_RW))
++ return;
++
+ if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+ return;
+
+@@ -1463,7 +1470,7 @@ static int aio_prep_rw(struct kiocb *req
+ req->ki_complete = aio_complete_rw;
+ req->private = NULL;
+ req->ki_pos = iocb->aio_offset;
+- req->ki_flags = req->ki_filp->f_iocb_flags;
++ req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW;
+ if (iocb->aio_flags & IOCB_FLAG_RESFD)
+ req->ki_flags |= IOCB_EVENTFD;
+ if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -352,6 +352,8 @@ enum rw_hint {
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+ #define IOCB_DIO_CALLER_COMP (1 << 22)
++/* kiocb is a read or write operation submitted by fs/aio.c. */
++#define IOCB_AIO_RW (1 << 23)
+
+ /* for use in trace events */
+ #define TRACE_IOCB_STRINGS \
--- /dev/null
+From 136cfaca22567a03bbb3bf53a43d8cb5748b80ec Mon Sep 17 00:00:00 2001
+From: Vasiliy Kovalev <kovalev@altlinux.org>
+Date: Wed, 14 Feb 2024 19:27:33 +0300
+Subject: gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp()
+
+From: Vasiliy Kovalev <kovalev@altlinux.org>
+
+commit 136cfaca22567a03bbb3bf53a43d8cb5748b80ec upstream.
+
+The gtp_net_ops pernet operations structure for the subsystem must be
+registered before registering the generic netlink family.
+
+Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug:
+
+general protection fault, probably for non-canonical address
+0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI
+KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017]
+CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014
+RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp]
+Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86
+ df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80>
+ 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74
+RSP: 0018:ffff888014107220 EFLAGS: 00010202
+RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000
+RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000
+FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? show_regs+0x90/0xa0
+ ? die_addr+0x50/0xd0
+ ? exc_general_protection+0x148/0x220
+ ? asm_exc_general_protection+0x22/0x30
+ ? gtp_genl_dump_pdp+0x1be/0x800 [gtp]
+ ? __alloc_skb+0x1dd/0x350
+ ? __pfx___alloc_skb+0x10/0x10
+ genl_dumpit+0x11d/0x230
+ netlink_dump+0x5b9/0xce0
+ ? lockdep_hardirqs_on_prepare+0x253/0x430
+ ? __pfx_netlink_dump+0x10/0x10
+ ? kasan_save_track+0x10/0x40
+ ? __kasan_kmalloc+0x9b/0xa0
+ ? genl_start+0x675/0x970
+ __netlink_dump_start+0x6fc/0x9f0
+ genl_family_rcv_msg_dumpit+0x1bb/0x2d0
+ ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10
+ ? genl_op_from_small+0x2a/0x440
+ ? cap_capable+0x1d0/0x240
+ ? __pfx_genl_start+0x10/0x10
+ ? __pfx_genl_dumpit+0x10/0x10
+ ? __pfx_genl_done+0x10/0x10
+ ? security_capable+0x9d/0xe0
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Vasiliy Kovalev <kovalev@altlinux.org>
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -1907,20 +1907,20 @@ static int __init gtp_init(void)
+ if (err < 0)
+ goto error_out;
+
+- err = genl_register_family(>p_genl_family);
++ err = register_pernet_subsys(>p_net_ops);
+ if (err < 0)
+ goto unreg_rtnl_link;
+
+- err = register_pernet_subsys(>p_net_ops);
++ err = genl_register_family(>p_genl_family);
+ if (err < 0)
+- goto unreg_genl_family;
++ goto unreg_pernet_subsys;
+
+ pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
+ sizeof(struct pdp_ctx));
+ return 0;
+
+-unreg_genl_family:
+- genl_unregister_family(>p_genl_family);
++unreg_pernet_subsys:
++ unregister_pernet_subsys(>p_net_ops);
+ unreg_rtnl_link:
+ rtnl_link_unregister(>p_link_ops);
+ error_out:
--- /dev/null
+From 8d3a7dfb801d157ac423261d7cd62c33e95375f8 Mon Sep 17 00:00:00 2001
+From: Oliver Upton <oliver.upton@linux.dev>
+Date: Wed, 21 Feb 2024 09:27:31 +0000
+Subject: KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table()
+
+From: Oliver Upton <oliver.upton@linux.dev>
+
+commit 8d3a7dfb801d157ac423261d7cd62c33e95375f8 upstream.
+
+vgic_get_irq() may not return a valid descriptor if there is no ITS that
+holds a valid translation for the specified INTID. If that is the case,
+it is safe to silently ignore it and continue processing the LPI pending
+table.
+
+Cc: stable@vger.kernel.org
+Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table")
+Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-its.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/arm64/kvm/vgic/vgic-its.c
++++ b/arch/arm64/kvm/vgic/vgic-its.c
+@@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(st
+ }
+
+ irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
++ if (!irq)
++ continue;
++
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ irq->pending_latch = pendmask & (1U << bit_nr);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
--- /dev/null
+From 85a71ee9a0700f6c18862ef3b0011ed9dad99aca Mon Sep 17 00:00:00 2001
+From: Oliver Upton <oliver.upton@linux.dev>
+Date: Wed, 21 Feb 2024 09:27:32 +0000
+Subject: KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler
+
+From: Oliver Upton <oliver.upton@linux.dev>
+
+commit 85a71ee9a0700f6c18862ef3b0011ed9dad99aca upstream.
+
+It is possible that an LPI mapped in a different ITS gets unmapped while
+handling the MOVALL command. If that is the case, there is no state that
+can be migrated to the destination. Silently ignore it and continue
+migrating other LPIs.
+
+Cc: stable@vger.kernel.org
+Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE")
+Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-its.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/arm64/kvm/vgic/vgic-its.c
++++ b/arch/arm64/kvm/vgic/vgic-its.c
+@@ -1427,6 +1427,8 @@ static int vgic_its_cmd_handle_movall(st
+
+ for (i = 0; i < irq_count; i++) {
+ irq = vgic_get_irq(kvm, NULL, intids[i]);
++ if (!irq)
++ continue;
+
+ update_affinity(irq, vcpu2);
+
--- /dev/null
+From 1eb1e984379e2da04361763f66eec90dd75cf63e Mon Sep 17 00:00:00 2001
+From: Guenter Roeck <linux@roeck-us.net>
+Date: Thu, 8 Feb 2024 07:30:10 -0800
+Subject: lib/Kconfig.debug: TEST_IOV_ITER depends on MMU
+
+From: Guenter Roeck <linux@roeck-us.net>
+
+commit 1eb1e984379e2da04361763f66eec90dd75cf63e upstream.
+
+Trying to run the iov_iter unit test on a nommu system such as the qemu
+kc705-nommu emulation results in a crash.
+
+ KTAP version 1
+ # Subtest: iov_iter
+ # module: kunit_iov_iter
+ 1..9
+BUG: failure at mm/nommu.c:318/vmap()!
+Kernel panic - not syncing: BUG!
+
+The test calls vmap() directly, but vmap() is not supported on nommu
+systems, causing the crash. TEST_IOV_ITER therefore needs to depend on
+MMU.
+
+Link: https://lkml.kernel.org/r/20240208153010.1439753-1-linux@roeck-us.net
+Fixes: 2d71340ff1d4 ("iov_iter: Kunit tests for copying to/from an iterator")
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Cc: David Howells <dhowells@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/Kconfig.debug | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -2225,6 +2225,7 @@ config TEST_DIV64
+ config TEST_IOV_ITER
+ tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
+ depends on KUNIT
++ depends on MMU
+ default KUNIT_ALL_TESTS
+ help
+ Enable this to turn on testing of the operation of the I/O iterator
--- /dev/null
+From 9fa304b9f8ec440e614af6d35826110c633c4074 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Fri, 23 Feb 2024 14:36:31 +0800
+Subject: LoongArch: Call early_init_fdt_scan_reserved_mem() earlier
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 9fa304b9f8ec440e614af6d35826110c633c4074 upstream.
+
+The unflatten_and_copy_device_tree() function contains a call to
+memblock_alloc(). This means that memblock is allocating memory before
+any of the reserved memory regions are set aside in the arch_mem_init()
+function which calls early_init_fdt_scan_reserved_mem(). Therefore,
+there is a possibility for memblock to allocate from any of the
+reserved memory regions.
+
+Hence, move the call to early_init_fdt_scan_reserved_mem() to be earlier
+in the init sequence, so that the reserved memory regions are set aside
+before any allocations are done using memblock.
+
+Cc: stable@vger.kernel.org
+Fixes: 88d4d957edc707e ("LoongArch: Add FDT booting support from efi system table")
+Signed-off-by: Oreoluwa Babatunde <quic_obabatun@quicinc.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/setup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/kernel/setup.c
++++ b/arch/loongarch/kernel/setup.c
+@@ -367,6 +367,8 @@ void __init platform_init(void)
+ acpi_gbl_use_default_register_widths = false;
+ acpi_boot_table_init();
+ #endif
++
++ early_init_fdt_scan_reserved_mem();
+ unflatten_and_copy_device_tree();
+
+ #ifdef CONFIG_NUMA
+@@ -400,8 +402,6 @@ static void __init arch_mem_init(char **
+
+ check_kernel_sections_mem();
+
+- early_init_fdt_scan_reserved_mem();
+-
+ /*
+ * In order to reduce the possibility of kernel panic when failed to
+ * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
--- /dev/null
+From 1001db6c42e4012b55e5ee19405490f23e033b5a Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Fri, 23 Feb 2024 14:36:31 +0800
+Subject: LoongArch: Disable IRQ before init_fn() for nonboot CPUs
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 1001db6c42e4012b55e5ee19405490f23e033b5a upstream.
+
+Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to
+silence such warnings (and also avoid potential errors due to unexpected
+interrupts):
+
+WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198
+pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0
+a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000
+a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c
+t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e
+t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000
+t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001
+s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001
+s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8
+ ra: 90000000047bd56c tlb_init+0x24c/0x528
+ ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280
+ CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
+ PRMD: 00000000 (PPLV0 -PIE -PWE)
+ EUEN: 00000000 (-FPE -SXE -ASXE -BTE)
+ ECFG: 00071000 (LIE=12 VS=7)
+ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0)
+ PRID: 0014c010 (Loongson-64bit, Loongson-3A5000)
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198
+Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000
+ 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140
+ 9000000006456888 9000000006456880 900000010039f950 0000000000000001
+ 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700
+ 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003
+ 0000000000040000 9000000007930370 0000000002b90000 0000000000000004
+ 9000000006366000 900000000619a140 0000000000000000 0000000000000004
+ 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940
+ 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8
+ 00000000000000b0 0000000000000000 0000000000000000 0000000000071000
+ ...
+Call Trace:
+[<90000000047a4828>] show_stack+0x48/0x1a0
+[<9000000005b61874>] dump_stack_lvl+0x84/0xcc
+[<90000000047f60ac>] __warn+0x8c/0x1e0
+[<9000000005b0ab34>] report_bug+0x1b4/0x280
+[<9000000005b63110>] do_bp+0x2d0/0x480
+[<90000000047a2e20>] handle_bp+0x120/0x1c0
+[<90000000048e3334>] rcu_cpu_starting+0x214/0x280
+[<90000000047bd568>] tlb_init+0x248/0x528
+[<90000000047a4c44>] per_cpu_trap_init+0x124/0x160
+[<90000000047a19f4>] cpu_probe+0x494/0xa00
+[<90000000047b551c>] start_secondary+0x3c/0xc0
+[<9000000005b66134>] smpboot_entry+0x50/0x58
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/smp.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/loongarch/kernel/smp.c
++++ b/arch/loongarch/kernel/smp.c
+@@ -334,6 +334,7 @@ void __noreturn arch_cpu_idle_dead(void)
+ addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
+ } while (addr == 0);
+
++ local_irq_disable();
+ init_fn = (void *)TO_CACHE(addr);
+ iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
+
--- /dev/null
+From 752cd08da320a667a833803a8fd6bb266114cce5 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Fri, 23 Feb 2024 14:36:31 +0800
+Subject: LoongArch: Update cpu_sibling_map when disabling nonboot CPUs
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 752cd08da320a667a833803a8fd6bb266114cce5 upstream.
+
+Update cpu_sibling_map when disabling nonboot CPUs by defining & calling
+clear_cpu_sibling_map(), otherwise we get such errors on SMT systems:
+
+jump label: negative count!
+WARNING: CPU: 6 PID: 45 at kernel/jump_label.c:263 __static_key_slow_dec_cpuslocked+0xec/0x100
+CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340
+pc 90000000004c302c ra 90000000004c302c tp 90000001005bc000 sp 90000001005bfd20
+a0 000000000000001b a1 900000000224c278 a2 90000001005bfb58 a3 900000000224c280
+a4 900000000224c278 a5 90000001005bfb50 a6 0000000000000001 a7 0000000000000001
+t0 ce87a4763eb5234a t1 ce87a4763eb5234a t2 0000000000000000 t3 0000000000000000
+t4 0000000000000006 t5 0000000000000000 t6 0000000000000064 t7 0000000000001964
+t8 000000000009ebf6 u0 9000000001f2a068 s9 0000000000000000 s0 900000000246a2d8
+s1 ffffffffffffffff s2 ffffffffffffffff s3 90000000021518c0 s4 0000000000000040
+s5 9000000002151058 s6 9000000009828e40 s7 00000000000000b4 s8 0000000000000006
+ ra: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100
+ ERA: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100
+ CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
+ PRMD: 00000004 (PPLV0 +PIE -PWE)
+ EUEN: 00000000 (-FPE -SXE -ASXE -BTE)
+ ECFG: 00071c1c (LIE=2-4,10-12 VS=7)
+ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0)
+ PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV)
+CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340
+Stack : 0000000000000000 900000000203f258 900000000179afc8 90000001005bc000
+ 90000001005bf980 0000000000000000 90000001005bf988 9000000001fe0be0
+ 900000000224c280 900000000224c278 90000001005bf8c0 0000000000000001
+ 0000000000000001 ce87a4763eb5234a 0000000007f38000 90000001003f8cc0
+ 0000000000000000 0000000000000006 0000000000000000 4c206e6f73676e6f
+ 6f4c203a656d616e 000000000009ec99 0000000007f38000 0000000000000000
+ 900000000214b000 9000000001fe0be0 0000000000000004 0000000000000000
+ 0000000000000107 0000000000000009 ffffffffffafdabe 00000000000000b4
+ 0000000000000006 90000000004c302c 9000000000224528 00005555939a0c7c
+ 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c
+ ...
+Call Trace:
+[<9000000000224528>] show_stack+0x48/0x1a0
+[<900000000179afc8>] dump_stack_lvl+0x78/0xa0
+[<9000000000263ed0>] __warn+0x90/0x1a0
+[<90000000017419b8>] report_bug+0x1b8/0x280
+[<900000000179c564>] do_bp+0x264/0x420
+[<90000000004c302c>] __static_key_slow_dec_cpuslocked+0xec/0x100
+[<90000000002b4d7c>] sched_cpu_deactivate+0x2fc/0x300
+[<9000000000266498>] cpuhp_invoke_callback+0x178/0x8a0
+[<9000000000267f70>] cpuhp_thread_fun+0xf0/0x240
+[<90000000002a117c>] smpboot_thread_fn+0x1dc/0x2e0
+[<900000000029a720>] kthread+0x140/0x160
+[<9000000000222288>] ret_from_kernel_thread+0xc/0xa4
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/smp.c | 121 ++++++++++++++++++++++++--------------------
+ 1 file changed, 68 insertions(+), 53 deletions(-)
+
+--- a/arch/loongarch/kernel/smp.c
++++ b/arch/loongarch/kernel/smp.c
+@@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, i
+ }
+ }
+
++static inline void set_cpu_core_map(int cpu)
++{
++ int i;
++
++ cpumask_set_cpu(cpu, &cpu_core_setup_map);
++
++ for_each_cpu(i, &cpu_core_setup_map) {
++ if (cpu_data[cpu].package == cpu_data[i].package) {
++ cpumask_set_cpu(i, &cpu_core_map[cpu]);
++ cpumask_set_cpu(cpu, &cpu_core_map[i]);
++ }
++ }
++}
++
++static inline void set_cpu_sibling_map(int cpu)
++{
++ int i;
++
++ cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
++
++ for_each_cpu(i, &cpu_sibling_setup_map) {
++ if (cpus_are_siblings(cpu, i)) {
++ cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
++ cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
++ }
++ }
++}
++
++static inline void clear_cpu_sibling_map(int cpu)
++{
++ int i;
++
++ for_each_cpu(i, &cpu_sibling_setup_map) {
++ if (cpus_are_siblings(cpu, i)) {
++ cpumask_clear_cpu(i, &cpu_sibling_map[cpu]);
++ cpumask_clear_cpu(cpu, &cpu_sibling_map[i]);
++ }
++ }
++
++ cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
++}
++
++/*
++ * Calculate a new cpu_foreign_map mask whenever a
++ * new cpu appears or disappears.
++ */
++void calculate_cpu_foreign_map(void)
++{
++ int i, k, core_present;
++ cpumask_t temp_foreign_map;
++
++ /* Re-calculate the mask */
++ cpumask_clear(&temp_foreign_map);
++ for_each_online_cpu(i) {
++ core_present = 0;
++ for_each_cpu(k, &temp_foreign_map)
++ if (cpus_are_siblings(i, k))
++ core_present = 1;
++ if (!core_present)
++ cpumask_set_cpu(i, &temp_foreign_map);
++ }
++
++ for_each_online_cpu(i)
++ cpumask_andnot(&cpu_foreign_map[i],
++ &temp_foreign_map, &cpu_sibling_map[i]);
++}
++
+ /* Send mailbox buffer via Mail_Send */
+ static void csr_mail_send(uint64_t data, int cpu, int mailbox)
+ {
+@@ -300,6 +367,7 @@ int loongson_cpu_disable(void)
+ numa_remove_cpu(cpu);
+ #endif
+ set_cpu_online(cpu, false);
++ clear_cpu_sibling_map(cpu);
+ calculate_cpu_foreign_map();
+ local_irq_save(flags);
+ irq_migrate_all_off_this_cpu();
+@@ -377,59 +445,6 @@ static int __init ipi_pm_init(void)
+ core_initcall(ipi_pm_init);
+ #endif
+
+-static inline void set_cpu_sibling_map(int cpu)
+-{
+- int i;
+-
+- cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+-
+- for_each_cpu(i, &cpu_sibling_setup_map) {
+- if (cpus_are_siblings(cpu, i)) {
+- cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
+- cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
+- }
+- }
+-}
+-
+-static inline void set_cpu_core_map(int cpu)
+-{
+- int i;
+-
+- cpumask_set_cpu(cpu, &cpu_core_setup_map);
+-
+- for_each_cpu(i, &cpu_core_setup_map) {
+- if (cpu_data[cpu].package == cpu_data[i].package) {
+- cpumask_set_cpu(i, &cpu_core_map[cpu]);
+- cpumask_set_cpu(cpu, &cpu_core_map[i]);
+- }
+- }
+-}
+-
+-/*
+- * Calculate a new cpu_foreign_map mask whenever a
+- * new cpu appears or disappears.
+- */
+-void calculate_cpu_foreign_map(void)
+-{
+- int i, k, core_present;
+- cpumask_t temp_foreign_map;
+-
+- /* Re-calculate the mask */
+- cpumask_clear(&temp_foreign_map);
+- for_each_online_cpu(i) {
+- core_present = 0;
+- for_each_cpu(k, &temp_foreign_map)
+- if (cpus_are_siblings(i, k))
+- core_present = 1;
+- if (!core_present)
+- cpumask_set_cpu(i, &temp_foreign_map);
+- }
+-
+- for_each_online_cpu(i)
+- cpumask_andnot(&cpu_foreign_map[i],
+- &temp_foreign_map, &cpu_sibling_map[i]);
+-}
+-
+ /* Preload SMP state for boot cpu */
+ void smp_prepare_boot_cpu(void)
+ {
--- /dev/null
+From 855678ed8534518e2b428bcbcec695de9ba248e8 Mon Sep 17 00:00:00 2001
+From: Yu Kuai <yukuai3@huawei.com>
+Date: Thu, 1 Feb 2024 17:25:51 +0800
+Subject: md: Fix missing release of 'active_io' for flush
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+commit 855678ed8534518e2b428bcbcec695de9ba248e8 upstream.
+
+submit_flushes
+ atomic_set(&mddev->flush_pending, 1);
+ rdev_for_each_rcu(rdev, mddev)
+ atomic_inc(&mddev->flush_pending);
+ bi->bi_end_io = md_end_flush
+ submit_bio(bi);
+ /* flush io is done first */
+ md_end_flush
+ if (atomic_dec_and_test(&mddev->flush_pending))
+ percpu_ref_put(&mddev->active_io)
+ -> active_io is not released
+
+ if (atomic_dec_and_test(&mddev->flush_pending))
+ -> missing release of active_io
+
+For consequence, mddev_suspend() will wait for 'active_io' to be zero
+forever.
+
+Fix this problem by releasing 'active_io' in submit_flushes() if
+'flush_pending' is decreased to zero.
+
+Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration")
+Cc: stable@vger.kernel.org # v6.1+
+Reported-by: Blazej Kucman <blazej.kucman@linux.intel.com>
+Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@linux.intel.com/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20240201092559.910982-7-yukuai1@huaweicloud.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/md.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -530,8 +530,12 @@ static void submit_flushes(struct work_s
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+- if (atomic_dec_and_test(&mddev->flush_pending))
++ if (atomic_dec_and_test(&mddev->flush_pending)) {
++ /* The pair is percpu_ref_get() from md_flush_request() */
++ percpu_ref_put(&mddev->active_io);
++
+ queue_work(md_wq, &mddev->flush_work);
++ }
+ }
+
+ static void md_submit_flush_data(struct work_struct *ws)
--- /dev/null
+From 13d0599ab3b2ff17f798353f24bcbef1659d3cfc Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 16 Feb 2024 11:40:25 -0800
+Subject: mm/damon/lru_sort: fix quota status loss due to online tunings
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 13d0599ab3b2ff17f798353f24bcbef1659d3cfc upstream.
+
+For online parameters change, DAMON_LRU_SORT creates new schemes based on
+latest values of the parameters and replaces the old schemes with the new
+one. When creating it, the internal status of the quotas of the old
+schemes is not preserved. As a result, charging of the quota starts from
+zero after the online tuning. The data that collected to estimate the
+throughput of the scheme's action is also reset, and therefore the
+estimation should start from the scratch again. Because the throughput
+estimation is being used to convert the time quota to the effective size
+quota, this could result in temporal time quota inaccuracy. It would be
+recovered over time, though. In short, the quota accuracy could be
+temporarily degraded after online parameters update.
+
+Fix the problem by checking the case and copying the internal fields for
+the status.
+
+Link: https://lkml.kernel.org/r/20240216194025.9207-3-sj@kernel.org
+Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> [6.0+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 36 insertions(+), 7 deletions(-)
+
+--- a/mm/damon/lru_sort.c
++++ b/mm/damon/lru_sort.c
+@@ -183,9 +183,21 @@ static struct damos *damon_lru_sort_new_
+ return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
+ }
+
++static void damon_lru_sort_copy_quota_status(struct damos_quota *dst,
++ struct damos_quota *src)
++{
++ dst->total_charged_sz = src->total_charged_sz;
++ dst->total_charged_ns = src->total_charged_ns;
++ dst->charged_sz = src->charged_sz;
++ dst->charged_from = src->charged_from;
++ dst->charge_target_from = src->charge_target_from;
++ dst->charge_addr_from = src->charge_addr_from;
++}
++
+ static int damon_lru_sort_apply_parameters(void)
+ {
+- struct damos *scheme;
++ struct damos *scheme, *hot_scheme, *cold_scheme;
++ struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL;
+ unsigned int hot_thres, cold_thres;
+ int err = 0;
+
+@@ -193,18 +205,35 @@ static int damon_lru_sort_apply_paramete
+ if (err)
+ return err;
+
++ damon_for_each_scheme(scheme, ctx) {
++ if (!old_hot_scheme) {
++ old_hot_scheme = scheme;
++ continue;
++ }
++ old_cold_scheme = scheme;
++ }
++
+ hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
+ hot_thres_access_freq / 1000;
+- scheme = damon_lru_sort_new_hot_scheme(hot_thres);
+- if (!scheme)
++ hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres);
++ if (!hot_scheme)
+ return -ENOMEM;
+- damon_set_schemes(ctx, &scheme, 1);
++ if (old_hot_scheme)
++ damon_lru_sort_copy_quota_status(&hot_scheme->quota,
++ &old_hot_scheme->quota);
+
+ cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval;
+- scheme = damon_lru_sort_new_cold_scheme(cold_thres);
+- if (!scheme)
++ cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres);
++ if (!cold_scheme) {
++ damon_destroy_scheme(hot_scheme);
+ return -ENOMEM;
+- damon_add_scheme(ctx, scheme);
++ }
++ if (old_cold_scheme)
++ damon_lru_sort_copy_quota_status(&cold_scheme->quota,
++ &old_cold_scheme->quota);
++
++ damon_set_schemes(ctx, &hot_scheme, 1);
++ damon_add_scheme(ctx, cold_scheme);
+
+ return damon_set_region_biggest_system_ram_default(target,
+ &monitor_region_start,
--- /dev/null
+From 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 16 Feb 2024 11:40:24 -0800
+Subject: mm/damon/reclaim: fix quota stauts loss due to online tunings
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 upstream.
+
+Patch series "mm/damon: fix quota status loss due to online tunings".
+
+DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status
+when applying new user parameters, and hence could cause temporal quota
+accuracy degradation. Fix it by preserving the status.
+
+
+This patch (of 2):
+
+For online parameters change, DAMON_RECLAIM creates new scheme based on
+latest values of the parameters and replaces the old scheme with the new
+one. When creating it, the internal status of the quota of the old
+scheme is not preserved. As a result, charging of the quota starts from
+zero after the online tuning. The data that collected to estimate the
+throughput of the scheme's action is also reset, and therefore the
+estimation should start from the scratch again. Because the throughput
+estimation is being used to convert the time quota to the effective size
+quota, this could result in temporal time quota inaccuracy. It would be
+recovered over time, though. In short, the quota accuracy could be
+temporarily degraded after online parameters update.
+
+Fix the problem by checking the case and copying the internal fields for
+the status.
+
+Link: https://lkml.kernel.org/r/20240216194025.9207-1-sj@kernel.org
+Link: https://lkml.kernel.org/r/20240216194025.9207-2-sj@kernel.org
+Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> [5.19+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/reclaim.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/reclaim.c
++++ b/mm/damon/reclaim.c
+@@ -148,9 +148,20 @@ static struct damos *damon_reclaim_new_s
+ &damon_reclaim_wmarks);
+ }
+
++static void damon_reclaim_copy_quota_status(struct damos_quota *dst,
++ struct damos_quota *src)
++{
++ dst->total_charged_sz = src->total_charged_sz;
++ dst->total_charged_ns = src->total_charged_ns;
++ dst->charged_sz = src->charged_sz;
++ dst->charged_from = src->charged_from;
++ dst->charge_target_from = src->charge_target_from;
++ dst->charge_addr_from = src->charge_addr_from;
++}
++
+ static int damon_reclaim_apply_parameters(void)
+ {
+- struct damos *scheme;
++ struct damos *scheme, *old_scheme;
+ struct damos_filter *filter;
+ int err = 0;
+
+@@ -162,6 +173,11 @@ static int damon_reclaim_apply_parameter
+ scheme = damon_reclaim_new_scheme();
+ if (!scheme)
+ return -ENOMEM;
++ if (!list_empty(&ctx->schemes)) {
++ damon_for_each_scheme(old_scheme, ctx)
++ damon_reclaim_copy_quota_status(&scheme->quota,
++ &old_scheme->quota);
++ }
+ if (skip_anon) {
+ filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
+ if (!filter) {
--- /dev/null
+From 118642d7f606fc9b9c92ee611275420320290ffb Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Tue, 13 Feb 2024 03:16:34 -0500
+Subject: mm: memcontrol: clarify swapaccount=0 deprecation warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 118642d7f606fc9b9c92ee611275420320290ffb upstream.
+
+The swapaccount deprecation warning is throwing false positives. Since we
+deprecated the knob and defaulted to enabling, the only reports we've been
+getting are from folks that set swapaccount=1. While this is a nice
+affirmation that always-enabling was the right choice, we certainly don't
+want to warn when users request the supported mode.
+
+Only warn when disabling is requested, and clarify the warning.
+
+[colin.i.king@gmail.com: spelling: "commdandline" -> "commandline"]
+ Link: https://lkml.kernel.org/r/20240215090544.1649201-1-colin.i.king@gmail.com
+Link: https://lkml.kernel.org/r/20240213081634.3652326-1-hannes@cmpxchg.org
+Fixes: b25806dcd3d5 ("mm: memcontrol: deprecate swapaccounting=0 mode")
+Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
+Reported-by: "Jonas Schäfer" <jonas@wielicki.name>
+Reported-by: Narcis Garcia <debianlists@actiu.net>
+Suggested-by: Yosry Ahmed <yosryahmed@google.com>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Shakeel Butt <shakeelb@google.com>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -7613,9 +7613,13 @@ bool mem_cgroup_swap_full(struct folio *
+
+ static int __init setup_swap_account(char *s)
+ {
+- pr_warn_once("The swapaccount= commandline option is deprecated. "
+- "Please report your usecase to linux-mm@kvack.org if you "
+- "depend on this functionality.\n");
++ bool res;
++
++ if (!kstrtobool(s, &res) && !res)
++ pr_warn_once("The swapaccount=0 commandline option is deprecated "
++ "in favor of configuring swap control via cgroupfs. "
++ "Please report your usecase to linux-mm@kvack.org if you "
++ "depend on this functionality.\n");
+ return 1;
+ }
+ __setup("swapaccount=", setup_swap_account);
--- /dev/null
+From 13ddaf26be324a7f951891ecd9ccd04466d27458 Mon Sep 17 00:00:00 2001
+From: Kairui Song <kasong@tencent.com>
+Date: Wed, 7 Feb 2024 02:25:59 +0800
+Subject: mm/swap: fix race when skipping swapcache
+
+From: Kairui Song <kasong@tencent.com>
+
+commit 13ddaf26be324a7f951891ecd9ccd04466d27458 upstream.
+
+When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads
+swapin the same entry at the same time, they get different pages (A, B).
+Before one thread (T0) finishes the swapin and installs page (A) to the
+PTE, another thread (T1) could finish swapin of page (B), swap_free the
+entry, then swap out the possibly modified page reusing the same entry.
+It breaks the pte_same check in (T0) because PTE value is unchanged,
+causing ABA problem. Thread (T0) will install a stalled page (A) into the
+PTE and cause data corruption.
+
+One possible callstack is like this:
+
+CPU0 CPU1
+---- ----
+do_swap_page() do_swap_page() with same entry
+<direct swapin path> <direct swapin path>
+<alloc page A> <alloc page B>
+swap_read_folio() <- read to page A swap_read_folio() <- read to page B
+<slow on later locks or interrupt> <finished swapin first>
+... set_pte_at()
+ swap_free() <- entry is free
+ <write to page B, now page A stalled>
+ <swap out page B to same swap entry>
+pte_same() <- Check pass, PTE seems
+ unchanged, but page A
+ is stalled!
+swap_free() <- page B content lost!
+set_pte_at() <- staled page A installed!
+
+And besides, for ZRAM, swap_free() allows the swap device to discard the
+entry content, so even if page (B) is not modified, if swap_read_folio()
+on CPU0 happens later than swap_free() on CPU1, it may also cause data
+loss.
+
+To fix this, reuse swapcache_prepare which will pin the swap entry using
+the cache flag, and allow only one thread to swap it in, also prevent any
+parallel code from putting the entry in the cache. Release the pin after
+PT unlocked.
+
+Racers just loop and wait since it's a rare and very short event. A
+schedule_timeout_uninterruptible(1) call is added to avoid repeated page
+faults wasting too much CPU, causing livelock or adding too much noise to
+perf statistics. A similar livelock issue was described in commit
+029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead")
+
+Reproducer:
+
+This race issue can be triggered easily using a well constructed
+reproducer and patched brd (with a delay in read path) [1]:
+
+With latest 6.8 mainline, race caused data loss can be observed easily:
+$ gcc -g -lpthread test-thread-swap-race.c && ./a.out
+ Polulating 32MB of memory region...
+ Keep swapping out...
+ Starting round 0...
+ Spawning 65536 workers...
+ 32746 workers spawned, wait for done...
+ Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss!
+ Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss!
+ Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss!
+ Round 0 Failed, 15 data loss!
+
+This reproducer spawns multiple threads sharing the same memory region
+using a small swap device. Every two threads updates mapped pages one by
+one in opposite direction trying to create a race, with one dedicated
+thread keep swapping out the data out using madvise.
+
+The reproducer created a reproduce rate of about once every 5 minutes, so
+the race should be totally possible in production.
+
+After this patch, I ran the reproducer for over a few hundred rounds and
+no data loss observed.
+
+Performance overhead is minimal, microbenchmark swapin 10G from 32G
+zram:
+
+Before: 10934698 us
+After: 11157121 us
+Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag)
+
+[kasong@tencent.com: v4]
+ Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com
+Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com
+Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device")
+Reported-by: "Huang, Ying" <ying.huang@intel.com>
+Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/
+Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1]
+Signed-off-by: Kairui Song <kasong@tencent.com>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Acked-by: Yu Zhao <yuzhao@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Acked-by: Chris Li <chrisl@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/swap.h | 5 +++++
+ mm/memory.c | 20 ++++++++++++++++++++
+ mm/swap.h | 5 +++++
+ mm/swapfile.c | 13 +++++++++++++
+ 4 files changed, 43 insertions(+)
+
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -552,6 +552,11 @@ static inline int swap_duplicate(swp_ent
+ return 0;
+ }
+
++static inline int swapcache_prepare(swp_entry_t swp)
++{
++ return 0;
++}
++
+ static inline void swap_free(swp_entry_t swp)
+ {
+ }
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3726,6 +3726,7 @@ vm_fault_t do_swap_page(struct vm_fault
+ struct page *page;
+ struct swap_info_struct *si = NULL;
+ rmap_t rmap_flags = RMAP_NONE;
++ bool need_clear_cache = false;
+ bool exclusive = false;
+ swp_entry_t entry;
+ pte_t pte;
+@@ -3794,6 +3795,20 @@ vm_fault_t do_swap_page(struct vm_fault
+ if (!folio) {
+ if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
+ __swap_count(entry) == 1) {
++ /*
++ * Prevent parallel swapin from proceeding with
++ * the cache flag. Otherwise, another thread may
++ * finish swapin first, free the entry, and swapout
++ * reusing the same entry. It's undetectable as
++ * pte_same() returns true due to entry reuse.
++ */
++ if (swapcache_prepare(entry)) {
++ /* Relax a bit to prevent rapid repeated page faults */
++ schedule_timeout_uninterruptible(1);
++ goto out;
++ }
++ need_clear_cache = true;
++
+ /* skip swapcache */
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
+ vma, vmf->address, false);
+@@ -4040,6 +4055,9 @@ unlock:
+ if (vmf->pte)
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ out:
++ /* Clear the swap cache pin for direct swapin after PTL unlock */
++ if (need_clear_cache)
++ swapcache_clear(si, entry);
+ if (si)
+ put_swap_device(si);
+ return ret;
+@@ -4054,6 +4072,8 @@ out_release:
+ folio_unlock(swapcache);
+ folio_put(swapcache);
+ }
++ if (need_clear_cache)
++ swapcache_clear(si, entry);
+ if (si)
+ put_swap_device(si);
+ return ret;
+--- a/mm/swap.h
++++ b/mm/swap.h
+@@ -38,6 +38,7 @@ void __delete_from_swap_cache(struct fol
+ void delete_from_swap_cache(struct folio *folio);
+ void clear_shadow_from_swap_cache(int type, unsigned long begin,
+ unsigned long end);
++void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry);
+ struct folio *swap_cache_get_folio(swp_entry_t entry,
+ struct vm_area_struct *vma, unsigned long addr);
+ struct folio *filemap_get_incore_folio(struct address_space *mapping,
+@@ -96,6 +97,10 @@ static inline int swap_writepage(struct
+ return 0;
+ }
+
++static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
++{
++}
++
+ static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
+ struct vm_area_struct *vma, unsigned long addr)
+ {
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -3362,6 +3362,19 @@ int swapcache_prepare(swp_entry_t entry)
+ return __swap_duplicate(entry, SWAP_HAS_CACHE);
+ }
+
++void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
++{
++ struct swap_cluster_info *ci;
++ unsigned long offset = swp_offset(entry);
++ unsigned char usage;
++
++ ci = lock_cluster_or_swap_info(si, offset);
++ usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE);
++ unlock_cluster_or_swap_info(si, ci);
++ if (!usage)
++ free_swap_slot(entry);
++}
++
+ struct swap_info_struct *swp_swap_info(swp_entry_t entry)
+ {
+ return swap_type_to_swap_info(swp_type(entry));
--- /dev/null
+From 84c16d01ff219bc0a5dca5219db6b8b86a6854fb Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Fri, 16 Feb 2024 21:33:00 +0100
+Subject: platform/x86: intel-vbtn: Stop calling "VBDL" from notify_handler
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 84c16d01ff219bc0a5dca5219db6b8b86a6854fb upstream.
+
+Commit 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing
+tablet-mode-switch events") causes 2 issues on the ThinkPad X1 Tablet Gen2:
+
+1. The ThinkPad will wake up immediately from suspend
+2. When put in tablet mode SW_TABLET_MODE reverts to 0 after about 1 second
+
+Both these issues are caused by the "VBDL" ACPI method call added
+at the end of the notify_handler.
+
+And it never became entirely clear if this call is even necessary to fix
+the issue of missing tablet-mode-switch events on the Dell Inspiron 7352.
+
+Drop the "VBDL" ACPI method call again to fix the 2 issues this is
+causing on the ThinkPad X1 Tablet Gen2.
+
+Fixes: 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events")
+Reported-by: Alexander Kobel <a-kobel@a-kobel.de>
+Closes: https://lore.kernel.org/platform-driver-x86/295984ce-bd4b-49bd-adc5-ffe7c898d7f0@a-kobel.de/
+Cc: regressions@lists.linux.dev
+Cc: Arnold Gozum <arngozum@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Tested-by: Alexander Kobel <a-kobel@a-kobel.de>
+Link: https://lore.kernel.org/r/20240216203300.245826-1-hdegoede@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/vbtn.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/platform/x86/intel/vbtn.c
++++ b/drivers/platform/x86/intel/vbtn.c
+@@ -200,9 +200,6 @@ static void notify_handler(acpi_handle h
+ autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE);
+
+ sparse_keymap_report_event(input_dev, event, val, autorelease);
+-
+- /* Some devices need this to report further events */
+- acpi_evaluate_object(handle, "VBDL", NULL, NULL);
+ }
+
+ /*
--- /dev/null
+From dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 12 Feb 2024 13:06:07 +0100
+Subject: platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 upstream.
+
+On some devices the ACPI name of the touchscreen is e.g. either
+MSSL1680:00 or MSSL1680:01 depending on the BIOS version.
+
+This happens for example on the "Chuwi Hi8 Air" tablet where the initial
+commit's ts_data uses "MSSL1680:00" but the tablets from the github issue
+and linux-hardware.org probe linked below both use "MSSL1680:01".
+
+Replace the strcmp() match on ts_data->acpi_name with a strstarts()
+check to allow using a partial match on just the ACPI HID of "MSSL1680"
+and change the ts_data->acpi_name for the "Chuwi Hi8 Air" accordingly
+to fix the touchscreen not working on models where it is "MSSL1680:01".
+
+Note this drops the length check for I2C_NAME_SIZE. This never was
+necessary since the ACPI names used are never more then 11 chars and
+I2C_NAME_SIZE is 20 so the replaced strncmp() would always stop long
+before reaching I2C_NAME_SIZE.
+
+Link: https://linux-hardware.org/?computer=AC4301C0542A
+Fixes: bbb97d728f77 ("platform/x86: touchscreen_dmi: Add info for the Chuwi Hi8 Air tablet")
+Closes: https://github.com/onitake/gsl-firmware/issues/91
+Cc: stable@vger.kernel.org
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20240212120608.30469-1-hdegoede@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/touchscreen_dmi.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/platform/x86/touchscreen_dmi.c
++++ b/drivers/platform/x86/touchscreen_dmi.c
+@@ -81,7 +81,7 @@ static const struct property_entry chuwi
+ };
+
+ static const struct ts_dmi_data chuwi_hi8_air_data = {
+- .acpi_name = "MSSL1680:00",
++ .acpi_name = "MSSL1680",
+ .properties = chuwi_hi8_air_props,
+ };
+
+@@ -1821,7 +1821,7 @@ static void ts_dmi_add_props(struct i2c_
+ int error;
+
+ if (has_acpi_companion(dev) &&
+- !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) {
++ strstarts(client->name, ts_data->acpi_name)) {
+ error = device_create_managed_software_node(dev, ts_data->properties, NULL);
+ if (error)
+ dev_err(dev, "failed to add properties: %d\n", error);
--- /dev/null
+From 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Mon, 5 Feb 2024 10:39:20 +0100
+Subject: Revert "parisc: Only list existing CPUs in cpu_possible_mask"
+
+From: Helge Deller <deller@gmx.de>
+
+commit 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 upstream.
+
+This reverts commit 0921244f6f4f0d05698b953fe632a99b38907226.
+
+It broke CPU hotplugging because it modifies the __cpu_possible_mask
+after bootup, so that it will be different than nr_cpu_ids, which
+then effictively breaks the workqueue setup code and triggers crashes
+when shutting down CPUs at runtime.
+
+Guenter was the first who noticed the wrong values in __cpu_possible_mask,
+since the cpumask Kunit tests were failig.
+
+Reverting this commit fixes both issues, but sadly brings back this
+uncritical runtime warning:
+register_cpu_capacity_sysctl: too early to get CPU4 device!
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lkml.org/lkml/2024/2/4/146
+Link: https://lore.kernel.org/lkml/Zb0mbHlIud_bqftx@slm.duckdns.org/t/
+Cc: stable@vger.kernel.org # 6.0+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/parisc/kernel/processor.c | 8 --------
+ 1 file changed, 8 deletions(-)
+
+--- a/arch/parisc/kernel/processor.c
++++ b/arch/parisc/kernel/processor.c
+@@ -172,7 +172,6 @@ static int __init processor_probe(struct
+ p->cpu_num = cpu_info.cpu_num;
+ p->cpu_loc = cpu_info.cpu_loc;
+
+- set_cpu_possible(cpuid, true);
+ store_cpu_topology(cpuid);
+
+ #ifdef CONFIG_SMP
+@@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver _
+ */
+ void __init processor_init(void)
+ {
+- unsigned int cpu;
+-
+ reset_cpu_topology();
+-
+- /* reset possible mask. We will mark those which are possible. */
+- for_each_possible_cpu(cpu)
+- set_cpu_possible(cpu, false);
+-
+ register_parisc_driver(&cpu_driver);
+ }
--- /dev/null
+From 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 Mon Sep 17 00:00:00 2001
+From: Peter Oberparleiter <oberpar@linux.ibm.com>
+Date: Wed, 14 Feb 2024 16:06:28 +0100
+Subject: s390/cio: fix invalid -EBUSY on ccw_device_start
+
+From: Peter Oberparleiter <oberpar@linux.ibm.com>
+
+commit 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 upstream.
+
+The s390 common I/O layer (CIO) returns an unexpected -EBUSY return code
+when drivers try to start I/O while a path-verification (PV) process is
+pending. This can lead to failed device initialization attempts with
+symptoms like broken network connectivity after boot.
+
+Fix this by replacing the -EBUSY return code with a deferred condition
+code 1 reply to make path-verification handling consistent from a
+driver's point of view.
+
+The problem can be reproduced semi-regularly using the following process,
+while repeating steps 2-3 as necessary (example assumes an OSA device
+with bus-IDs 0.0.a000-0.0.a002 on CHPID 0.02):
+
+1. echo 0.0.a000,0.0.a001,0.0.a002 >/sys/bus/ccwgroup/drivers/qeth/group
+2. echo 0 > /sys/bus/ccwgroup/devices/0.0.a000/online
+3. echo 1 > /sys/bus/ccwgroup/devices/0.0.a000/online ; \
+ echo on > /sys/devices/css0/chp0.02/status
+
+Background information:
+
+The common I/O layer starts path-verification I/Os when it receives
+indications about changes in a device path's availability. This occurs
+for example when hardware events indicate a change in channel-path
+status, or when a manual operation such as a CHPID vary or configure
+operation is performed.
+
+If a driver attempts to start I/O while a PV is running, CIO reports a
+successful I/O start (ccw_device_start() return code 0). Then, after
+completion of PV, CIO synthesizes an interrupt response that indicates
+an asynchronous status condition that prevented the start of the I/O
+(deferred condition code 1).
+
+If a PV indication arrives while a device is busy with driver-owned I/O,
+PV is delayed until after I/O completion was reported to the driver's
+interrupt handler. To ensure that PV can be started eventually, CIO
+reports a device busy condition (ccw_device_start() return code -EBUSY)
+if a driver tries to start another I/O while PV is pending.
+
+In some cases this -EBUSY return code causes device drivers to consider
+a device not operational, resulting in failed device initialization.
+
+Note: The code that introduced the problem was added in 2003. Symptoms
+started appearing with the following CIO commit that causes a PV
+indication when a device is removed from the cio_ignore list after the
+associated parent subchannel device was probed, but before online
+processing of the CCW device has started:
+
+2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers")
+
+During boot, the cio_ignore list is modified by the cio_ignore dracut
+module [1] as well as Linux vendor-specific systemd service scripts[2].
+When combined, this commit and boot scripts cause a frequent occurrence
+of the problem during boot.
+
+[1] https://github.com/dracutdevs/dracut/tree/master/modules.d/81cio_ignore
+[2] https://github.com/SUSE/s390-tools/blob/master/cio_ignore.service
+
+Cc: stable@vger.kernel.org # v5.15+
+Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers")
+Tested-By: Thorsten Winkler <twinkler@linux.ibm.com>
+Reviewed-by: Thorsten Winkler <twinkler@linux.ibm.com>
+Signed-off-by: Peter Oberparleiter <oberpar@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/cio/device_ops.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/s390/cio/device_ops.c
++++ b/drivers/s390/cio/device_ops.c
+@@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct
+ return -EINVAL;
+ if (cdev->private->state == DEV_STATE_NOT_OPER)
+ return -ENODEV;
+- if (cdev->private->state == DEV_STATE_VERIFY) {
++ if (cdev->private->state == DEV_STATE_VERIFY ||
++ cdev->private->flags.doverify) {
+ /* Remember to fake irb when finished. */
+ if (!cdev->private->flags.fake_irb) {
+ cdev->private->flags.fake_irb = FAKE_CMD_IRB;
+@@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct
+ }
+ if (cdev->private->state != DEV_STATE_ONLINE ||
+ ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) &&
+- !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) ||
+- cdev->private->flags.doverify)
++ !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)))
+ return -EBUSY;
+ ret = cio_set_options (sch, flags);
+ if (ret)
--- /dev/null
+From b5fc07a5fb56216a49e6c1d0b172d5464d99a89b Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Wed, 14 Feb 2024 17:14:11 -0500
+Subject: scsi: core: Consult supported VPD page list prior to fetching page
+
+From: Martin K. Petersen <martin.petersen@oracle.com>
+
+commit b5fc07a5fb56216a49e6c1d0b172d5464d99a89b upstream.
+
+Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full
+page") removed the logic which checks whether a VPD page is present on
+the supported pages list before asking for the page itself. That was
+done because SPC helpfully states "The Supported VPD Pages VPD page
+list may or may not include all the VPD pages that are able to be
+returned by the device server". Testing had revealed a few devices
+that supported some of the 0xBn pages but didn't actually list them in
+page 0.
+
+Julian Sikorski bisected a problem with his drive resetting during
+discovery to the commit above. As it turns out, this particular drive
+firmware will crash if we attempt to fetch page 0xB9.
+
+Various approaches were attempted to work around this. In the end,
+reinstating the logic that consults VPD page 0 before fetching any
+other page was the path of least resistance. A firmware update for the
+devices which originally compelled us to remove the check has since
+been released.
+
+Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com
+Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page")
+Cc: stable@vger.kernel.org
+Cc: Bart Van Assche <bvanassche@acm.org>
+Reported-by: Julian Sikorski <belegdol@gmail.com>
+Tested-by: Julian Sikorski <belegdol@gmail.com>
+Reviewed-by: Lee Duncan <lee.duncan@suse.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi.c | 22 ++++++++++++++++++++--
+ include/scsi/scsi_device.h | 4 ----
+ 2 files changed, 20 insertions(+), 6 deletions(-)
+
+--- a/drivers/scsi/scsi.c
++++ b/drivers/scsi/scsi.c
+@@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_
+ return result + 4;
+ }
+
++enum scsi_vpd_parameters {
++ SCSI_VPD_HEADER_SIZE = 4,
++ SCSI_VPD_LIST_SIZE = 36,
++};
++
+ static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
+ {
+- unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4);
++ unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4);
+ int result;
+
+ if (sdev->no_vpd_size)
+ return SCSI_DEFAULT_VPD_LEN;
+
+ /*
++ * Fetch the supported pages VPD and validate that the requested page
++ * number is present.
++ */
++ if (page != 0) {
++ result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd));
++ if (result < SCSI_VPD_HEADER_SIZE)
++ return 0;
++
++ result -= SCSI_VPD_HEADER_SIZE;
++ if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result))
++ return 0;
++ }
++ /*
+ * Fetch the VPD page header to find out how big the page
+ * is. This is done to prevent problems on legacy devices
+ * which can not handle allocation lengths as large as
+ * potentially requested by the caller.
+ */
+- result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header));
++ result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE);
+ if (result < 0)
+ return 0;
+
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -100,10 +100,6 @@ struct scsi_vpd {
+ unsigned char data[];
+ };
+
+-enum scsi_vpd_parameters {
+- SCSI_VPD_HEADER_SIZE = 4,
+-};
+-
+ struct scsi_device {
+ struct Scsi_Host *host;
+ struct request_queue *request_queue;
--- /dev/null
+From 321da3dc1f3c92a12e3c5da934090d2992a8814c Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Tue, 13 Feb 2024 09:33:06 -0500
+Subject: scsi: sd: usb_storage: uas: Access media prior to querying device properties
+
+From: Martin K. Petersen <martin.petersen@oracle.com>
+
+commit 321da3dc1f3c92a12e3c5da934090d2992a8814c upstream.
+
+It has been observed that some USB/UAS devices return generic properties
+hardcoded in firmware for mode pages for a period of time after a device
+has been discovered. The reported properties are either garbage or they do
+not accurately reflect the characteristics of the physical storage device
+attached in the case of a bridge.
+
+Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to
+avoid calling revalidate twice") we would call revalidate several
+times during device discovery. As a result, incorrect values would
+eventually get replaced with ones accurately describing the attached
+storage. When we did away with the redundant revalidate pass, several
+cases were reported where devices reported nonsensical values or would
+end up in write-protected state.
+
+An initial attempt at addressing this issue involved introducing a
+delayed second revalidate invocation. However, this approach still
+left some devices reporting incorrect characteristics.
+
+Tasos Sahanidis debugged the problem further and identified that
+introducing a READ operation prior to MODE SENSE fixed the problem and that
+it wasn't a timing issue. Issuing a READ appears to cause the devices to
+update their state to reflect the actual properties of the storage
+media. Device properties like vendor, model, and storage capacity appear to
+be correctly reported from the get-go. It is unclear why these devices
+defer populating the remaining characteristics.
+
+Match the behavior of a well known commercial operating system and
+trigger a READ operation prior to querying device characteristics to
+force the device to populate the mode pages.
+
+The additional READ is triggered by a flag set in the USB storage and
+UAS drivers. We avoid issuing the READ for other transport classes
+since some storage devices identify Linux through our particular
+discovery command sequence.
+
+Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com
+Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice")
+Cc: stable@vger.kernel.org
+Reported-by: Tasos Sahanidis <tasos@tasossah.com>
+Reviewed-by: Ewan D. Milne <emilne@redhat.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Tested-by: Tasos Sahanidis <tasos@tasossah.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/sd.c | 26 +++++++++++++++++++++++++-
+ drivers/usb/storage/scsiglue.c | 7 +++++++
+ drivers/usb/storage/uas.c | 7 +++++++
+ include/scsi/scsi_device.h | 1 +
+ 4 files changed, 40 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3404,6 +3404,24 @@ static bool sd_validate_opt_xfer_size(st
+ return true;
+ }
+
++static void sd_read_block_zero(struct scsi_disk *sdkp)
++{
++ unsigned int buf_len = sdkp->device->sector_size;
++ char *buffer, cmd[10] = { };
++
++ buffer = kmalloc(buf_len, GFP_KERNEL);
++ if (!buffer)
++ return;
++
++ cmd[0] = READ_10;
++ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */
++ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */
++
++ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len,
++ SD_TIMEOUT, sdkp->max_retries, NULL);
++ kfree(buffer);
++}
++
+ /**
+ * sd_revalidate_disk - called the first time a new disk is seen,
+ * performs disk spin up, read_capacity, etc.
+@@ -3443,7 +3461,13 @@ static int sd_revalidate_disk(struct gen
+ */
+ if (sdkp->media_present) {
+ sd_read_capacity(sdkp, buffer);
+-
++ /*
++ * Some USB/UAS devices return generic values for mode pages
++ * until the media has been accessed. Trigger a READ operation
++ * to force the device to populate mode pages.
++ */
++ if (sdp->read_before_ms)
++ sd_read_block_zero(sdkp);
+ /*
+ * set the default to rotational. All non-rotational devices
+ * support the block characteristics VPD page, which will
+--- a/drivers/usb/storage/scsiglue.c
++++ b/drivers/usb/storage/scsiglue.c
+@@ -180,6 +180,13 @@ static int slave_configure(struct scsi_d
+ sdev->use_192_bytes_for_3f = 1;
+
+ /*
++ * Some devices report generic values until the media has been
++ * accessed. Force a READ(10) prior to querying device
++ * characteristics.
++ */
++ sdev->read_before_ms = 1;
++
++ /*
+ * Some devices don't like MODE SENSE with page=0x3f,
+ * which is the command used for checking if a device
+ * is write-protected. Now that we tell the sd driver
+--- a/drivers/usb/storage/uas.c
++++ b/drivers/usb/storage/uas.c
+@@ -879,6 +879,13 @@ static int uas_slave_configure(struct sc
+ sdev->guess_capacity = 1;
+
+ /*
++ * Some devices report generic values until the media has been
++ * accessed. Force a READ(10) prior to querying device
++ * characteristics.
++ */
++ sdev->read_before_ms = 1;
++
++ /*
+ * Some devices don't like MODE SENSE with page=0x3f,
+ * which is the command used for checking if a device
+ * is write-protected. Now that we tell the sd driver
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -208,6 +208,7 @@ struct scsi_device {
+ unsigned use_10_for_rw:1; /* first try 10-byte read / write */
+ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
+ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */
++ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */
+ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
+ unsigned no_write_same:1; /* no WRITE SAME command */
+ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
--- /dev/null
+From de959094eb2197636f7c803af0943cb9d3b35804 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Wed, 14 Feb 2024 23:43:56 +0900
+Subject: scsi: target: pscsi: Fix bio_put() for error case
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit de959094eb2197636f7c803af0943cb9d3b35804 upstream.
+
+As of commit 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc
+wrapper"), a bio allocated by bio_kmalloc() must be freed by bio_uninit()
+and kfree(). That is not done properly for the error case, hitting WARN and
+NULL pointer dereference in bio_free().
+
+Fixes: 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper")
+CC: stable@vger.kernel.org # 6.1+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Link: https://lore.kernel.org/r/20240214144356.101814-1-naohiro.aota@wdc.com
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/target/target_core_pscsi.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/target/target_core_pscsi.c
++++ b/drivers/target/target_core_pscsi.c
+@@ -907,12 +907,15 @@ new_bio:
+
+ return 0;
+ fail:
+- if (bio)
+- bio_put(bio);
++ if (bio) {
++ bio_uninit(bio);
++ kfree(bio);
++ }
+ while (req->bio) {
+ bio = req->bio;
+ req->bio = bio->bi_next;
+- bio_put(bio);
++ bio_uninit(bio);
++ kfree(bio);
+ }
+ req->biotail = NULL;
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
--- /dev/null
+From 7efa6f2c803366f84c3c362f01e822490669d72b Mon Sep 17 00:00:00 2001
+From: Terry Tritton <terry.tritton@linaro.org>
+Date: Mon, 5 Feb 2024 14:50:56 +0000
+Subject: selftests/mm: uffd-unit-test check if huge page size is 0
+
+From: Terry Tritton <terry.tritton@linaro.org>
+
+commit 7efa6f2c803366f84c3c362f01e822490669d72b upstream.
+
+If HUGETLBFS is not enabled then the default_huge_page_size function will
+return 0 and cause a divide by 0 error. Add a check to see if the huge page
+size is 0 and skip the hugetlb tests if it is.
+
+Link: https://lkml.kernel.org/r/20240205145055.3545806-2-terry.tritton@linaro.org
+Fixes: 16a45b57cbf2 ("selftests/mm: add framework for uffd-unit-test")
+Signed-off-by: Terry Tritton <terry.tritton@linaro.org>
+Cc: Peter Griffin <peter.griffin@linaro.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/uffd-unit-tests.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/tools/testing/selftests/mm/uffd-unit-tests.c
++++ b/tools/testing/selftests/mm/uffd-unit-tests.c
+@@ -1309,6 +1309,12 @@ int main(int argc, char *argv[])
+ continue;
+
+ uffd_test_start("%s on %s", test->name, mem_type->name);
++ if ((mem_type->mem_flag == MEM_HUGETLB ||
++ mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
++ (default_huge_page_size() == 0)) {
++ uffd_test_skip("huge page size is 0, feature missing?");
++ continue;
++ }
+ if (!uffd_feature_supported(test)) {
+ uffd_test_skip("feature missing");
+ continue;
xen-events-modify-internal-un-bind-interfaces.patch
xen-events-close-evtchn-after-mapping-cleanup.patch
ib-hfi1-fix-sdma.h-tx-num_descs-off-by-one-error.patch
+x86-bugs-add-asm-helpers-for-executing-verw.patch
+docs-instruct-latex-to-cope-with-deeper-nesting.patch
+loongarch-call-early_init_fdt_scan_reserved_mem-earlier.patch
+loongarch-disable-irq-before-init_fn-for-nonboot-cpus.patch
+loongarch-update-cpu_sibling_map-when-disabling-nonboot-cpus.patch
+btrfs-defrag-avoid-unnecessary-defrag-caused-by-incorrect-extent-size.patch
+btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch
+drm-ttm-fix-an-invalid-freeing-on-already-freed-page-in-error-path.patch
+drm-meson-don-t-remove-bridges-which-are-created-by-other-drivers.patch
+drm-amd-display-adjust-few-initialization-order-in-dm.patch
+s390-cio-fix-invalid-ebusy-on-ccw_device_start.patch
+ata-libata-core-do-not-try-to-set-sleeping-devices-to-standby.patch
+fs-aio-restrict-kiocb_set_cancel_fn-to-i-o-submitted-via-libaio.patch
+lib-kconfig.debug-test_iov_iter-depends-on-mmu.patch
+dm-crypt-recheck-the-integrity-tag-after-a-failure.patch
+revert-parisc-only-list-existing-cpus-in-cpu_possible_mask.patch
+dm-integrity-recheck-the-integrity-tag-after-a-failure.patch
+dm-crypt-don-t-modify-the-data-when-using-authenticated-encryption.patch
+dm-verity-recheck-the-hash-after-a-failure.patch
+cxl-acpi-fix-load-failures-due-to-single-window-creation-failure.patch
+cxl-pci-fix-disabling-memory-if-dvsec-cxl-range-does-not-match-a-cfmws-window.patch
+scsi-sd-usb_storage-uas-access-media-prior-to-querying-device-properties.patch
+scsi-target-pscsi-fix-bio_put-for-error-case.patch
+scsi-core-consult-supported-vpd-page-list-prior-to-fetching-page.patch
+selftests-mm-uffd-unit-test-check-if-huge-page-size-is-0.patch
+mm-swap-fix-race-when-skipping-swapcache.patch
+mm-damon-lru_sort-fix-quota-status-loss-due-to-online-tunings.patch
+mm-memcontrol-clarify-swapaccount-0-deprecation-warning.patch
+mm-damon-reclaim-fix-quota-stauts-loss-due-to-online-tunings.patch
+platform-x86-intel-vbtn-stop-calling-vbdl-from-notify_handler.patch
+platform-x86-touchscreen_dmi-allow-partial-prefix-matches-for-acpi-names.patch
+cachefiles-fix-memory-leak-in-cachefiles_add_cache.patch
+sparc-fix-undefined-reference-to-fb_is_primary_device.patch
+md-fix-missing-release-of-active_io-for-flush.patch
+kvm-arm64-vgic-its-test-for-valid-irq-in-movall-handler.patch
+kvm-arm64-vgic-its-test-for-valid-irq-in-its_sync_lpi_pending_table.patch
+accel-ivpu-don-t-enable-any-tiles-by-default-on-vpu40xx.patch
+gtp-fix-use-after-free-and-null-ptr-deref-in-gtp_genl_dump_pdp.patch
+crypto-virtio-akcipher-fix-stack-overflow-on-memcpy.patch
--- /dev/null
+From ed683b9bb91fc274383e222ba5873a9ee9033462 Mon Sep 17 00:00:00 2001
+From: Javier Martinez Canillas <javierm@redhat.com>
+Date: Tue, 20 Feb 2024 10:54:12 +0100
+Subject: sparc: Fix undefined reference to fb_is_primary_device
+
+From: Javier Martinez Canillas <javierm@redhat.com>
+
+commit ed683b9bb91fc274383e222ba5873a9ee9033462 upstream.
+
+Commit 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE
+symbols") added a new FB_CORE Kconfig symbol, that can be enabled to only
+have fbcon/VT and DRM fbdev emulation, but without support for any legacy
+fbdev driver.
+
+Unfortunately, it missed to change the CONFIG_FB in arch/sparc makefiles,
+which leads to the following linking error in some sparc64 configurations:
+
+ sparc64-linux-ld: drivers/video/fbdev/core/fbcon.o: in function `fbcon_fb_registered':
+>> fbcon.c:(.text+0x4f60): undefined reference to `fb_is_primary_device'
+
+Fixes: 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE symbols")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/r/202401290306.IV8rhJ02-lkp@intel.com/
+Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Acked-by: Arnd Bergmann <arnd@arndb.de>
+Cc: <stable@vger.kernel.org> # v6.6+
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240220095428.3341195-1-javierm@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/Makefile | 2 +-
+ arch/sparc/video/Makefile | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
+index 5f6035936131..2a03daa68f28 100644
+--- a/arch/sparc/Makefile
++++ b/arch/sparc/Makefile
+@@ -60,7 +60,7 @@ libs-y += arch/sparc/prom/
+ libs-y += arch/sparc/lib/
+
+ drivers-$(CONFIG_PM) += arch/sparc/power/
+-drivers-$(CONFIG_FB) += arch/sparc/video/
++drivers-$(CONFIG_FB_CORE) += arch/sparc/video/
+
+ boot := arch/sparc/boot
+
+diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile
+index 6baddbd58e4d..d4d83f1702c6 100644
+--- a/arch/sparc/video/Makefile
++++ b/arch/sparc/video/Makefile
+@@ -1,3 +1,3 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+
+-obj-$(CONFIG_FB) += fbdev.o
++obj-$(CONFIG_FB_CORE) += fbdev.o
+--
+2.44.0
+
--- /dev/null
+From baf8361e54550a48a7087b603313ad013cc13386 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 13 Feb 2024 18:21:35 -0800
+Subject: x86/bugs: Add asm helpers for executing VERW
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit baf8361e54550a48a7087b603313ad013cc13386 upstream.
+
+MDS mitigation requires clearing the CPU buffers before returning to
+user. This needs to be done late in the exit-to-user path. Current
+location of VERW leaves a possibility of kernel data ending up in CPU
+buffers for memory accesses done after VERW such as:
+
+ 1. Kernel data accessed by an NMI between VERW and return-to-user can
+ remain in CPU buffers since NMI returning to kernel does not
+ execute VERW to clear CPU buffers.
+ 2. Alyssa reported that after VERW is executed,
+ CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
+ call. Memory accesses during stack scrubbing can move kernel stack
+ contents into CPU buffers.
+ 3. When caller saved registers are restored after a return from
+ function executing VERW, the kernel stack accesses can remain in
+ CPU buffers(since they occur after VERW).
+
+To fix this VERW needs to be moved very late in exit-to-user path.
+
+In preparation for moving VERW to entry/exit asm code, create macros
+that can be used in asm. Also make VERW patching depend on a new feature
+flag X86_FEATURE_CLEAR_CPU_BUF.
+
+Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry.S | 23 +++++++++++++++++++++++
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++
+ 3 files changed, 37 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry.S
++++ b/arch/x86/entry/entry.S
+@@ -6,6 +6,9 @@
+ #include <linux/linkage.h>
+ #include <asm/export.h>
+ #include <asm/msr-index.h>
++#include <asm/unwind_hints.h>
++#include <asm/segment.h>
++#include <asm/cache.h>
+
+ .pushsection .noinstr.text, "ax"
+
+@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
+ EXPORT_SYMBOL_GPL(entry_ibpb);
+
+ .popsection
++
++/*
++ * Define the VERW operand that is disguised as entry code so that
++ * it can be referenced with KPTI enabled. This ensure VERW can be
++ * used late in exit-to-user path after page tables are switched.
++ */
++.pushsection .entry.text, "ax"
++
++.align L1_CACHE_BYTES, 0xcc
++SYM_CODE_START_NOALIGN(mds_verw_sel)
++ UNWIND_HINT_UNDEFINED
++ ANNOTATE_NOENDBR
++ .word __KERNEL_DS
++.align L1_CACHE_BYTES, 0xcc
++SYM_CODE_END(mds_verw_sel);
++/* For KVM */
++EXPORT_SYMBOL_GPL(mds_verw_sel);
++
++.popsection
++
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -97,7 +97,7 @@
+ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
+ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
+ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
+-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
++#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
+ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
+ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -329,6 +329,17 @@
+ #endif
+ .endm
+
++/*
++ * Macro to execute VERW instruction that mitigate transient data sampling
++ * attacks such as MDS. On affected systems a microcode update overloaded VERW
++ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
++ *
++ * Note: Only the memory operand variant of VERW clears the CPU buffers.
++ */
++.macro CLEAR_CPU_BUFFERS
++ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
++.endm
++
+ #else /* __ASSEMBLY__ */
+
+ #define ANNOTATE_RETPOLINE_SAFE \
+@@ -545,6 +556,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_
+
+ DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
++extern u16 mds_verw_sel;
++
+ #include <asm/segment.h>
+
+ /**