--- /dev/null
+From 6bfe3959b0e7a526f5c64747801a8613f002f05a Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Mon, 31 Jul 2023 19:16:35 +0800
+Subject: btrfs: compare the correct fsid/metadata_uuid in btrfs_validate_super
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 6bfe3959b0e7a526f5c64747801a8613f002f05a upstream.
+
+The function btrfs_validate_super() should verify the metadata_uuid in
+the provided superblock argument. Because, all its callers expect it to
+do that.
+
+Such as in the following stacks:
+
+ write_all_supers()
+ sb = fs_info->super_for_commit;
+ btrfs_validate_write_super(.., sb)
+ btrfs_validate_super(.., sb, ..)
+
+ scrub_one_super()
+ btrfs_validate_super(.., sb, ..)
+
+And
+ check_dev_super()
+ btrfs_validate_super(.., sb, ..)
+
+However, it currently verifies the fs_info::super_copy::metadata_uuid
+instead. Fix this using the correct metadata_uuid in the superblock
+argument.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2391,13 +2391,11 @@ int btrfs_validate_super(struct btrfs_fs
+ ret = -EINVAL;
+ }
+
+- if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
+- memcmp(fs_info->fs_devices->metadata_uuid,
+- fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
++ if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb),
++ BTRFS_FSID_SIZE) != 0) {
+ btrfs_err(fs_info,
+ "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
+- fs_info->super_copy->metadata_uuid,
+- fs_info->fs_devices->metadata_uuid);
++ btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid);
+ ret = -EINVAL;
+ }
+
--- /dev/null
+From 3c771c194402ffe20d4de68d9fc21e703179a9ce Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 14:33:30 +0800
+Subject: btrfs: scrub: avoid unnecessary csum tree search preparing stripes
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 3c771c194402ffe20d4de68d9fc21e703179a9ce upstream.
+
+One of the bottleneck of the new scrub code is the extra csum tree
+search.
+
+The old code would only do the csum tree search for each scrub bio,
+which can be as large as 512KiB, thus they can afford to allocate a new
+path each time.
+
+But the new scrub code is doing csum tree search for each stripe, which
+is only 64KiB, this means we'd better re-use the same csum path during
+each search.
+
+This patch would introduce a per-sctx path for csum tree search, as we
+don't need to re-allocate the path every time we need to do a csum tree
+search.
+
+With this change we can further improve the queue depth and improve the
+scrub read performance:
+
+Before (with regression and cached extent tree path):
+
+ Device r/s rkB/s rrqm/s %rrqm r_await rareq-sz aqu-sz %util
+ nvme0n1p3 15875.00 1013328.00 12.00 0.08 0.08 63.83 1.35 100.00
+
+After (with both cached extent/csum tree path):
+
+ nvme0n1p3 17759.00 1133280.00 10.00 0.06 0.08 63.81 1.50 100.00
+
+Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure")
+CC: stable@vger.kernel.org # 6.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file-item.c | 36 +++++++++++++++++++++++-------------
+ fs/btrfs/file-item.h | 6 +++---
+ fs/btrfs/raid56.c | 4 ++--
+ fs/btrfs/scrub.c | 29 +++++++++++++++++++----------
+ 4 files changed, 47 insertions(+), 28 deletions(-)
+
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -597,29 +597,37 @@ fail:
+ * Each bit represents a sector. Thus caller should ensure @csum_buf passed
+ * in is large enough to contain all csums.
+ */
+-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
+- u8 *csum_buf, unsigned long *csum_bitmap,
+- bool search_commit)
++int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
++ u64 start, u64 end, u8 *csum_buf,
++ unsigned long *csum_bitmap)
+ {
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_key key;
+- struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_csum_item *item;
+ const u64 orig_start = start;
++ bool free_path = false;
+ int ret;
+
+ ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
+ IS_ALIGNED(end + 1, fs_info->sectorsize));
+
+- path = btrfs_alloc_path();
+- if (!path)
+- return -ENOMEM;
+-
+- if (search_commit) {
+- path->skip_locking = 1;
+- path->reada = READA_FORWARD;
+- path->search_commit_root = 1;
++ if (!path) {
++ path = btrfs_alloc_path();
++ if (!path)
++ return -ENOMEM;
++ free_path = true;
++ }
++
++ /* Check if we can reuse the previous path. */
++ if (path->nodes[0]) {
++ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
++
++ if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
++ key.type == BTRFS_EXTENT_CSUM_KEY &&
++ key.offset <= start)
++ goto search_forward;
++ btrfs_release_path(path);
+ }
+
+ key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+@@ -656,6 +664,7 @@ int btrfs_lookup_csums_bitmap(struct btr
+ }
+ }
+
++search_forward:
+ while (start <= end) {
+ u64 csum_end;
+
+@@ -712,7 +721,8 @@ int btrfs_lookup_csums_bitmap(struct btr
+ }
+ ret = 0;
+ fail:
+- btrfs_free_path(path);
++ if (free_path)
++ btrfs_free_path(path);
+ return ret;
+ }
+
+--- a/fs/btrfs/file-item.h
++++ b/fs/btrfs/file-item.h
+@@ -57,9 +57,9 @@ int btrfs_lookup_csums_range(struct btrf
+ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
+ struct list_head *list, int search_commit,
+ bool nowait);
+-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
+- u8 *csum_buf, unsigned long *csum_bitmap,
+- bool search_commit);
++int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
++ u64 start, u64 end, u8 *csum_buf,
++ unsigned long *csum_bitmap);
+ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
+ const struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi,
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -2112,8 +2112,8 @@ static void fill_data_csums(struct btrfs
+ goto error;
+ }
+
+- ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
+- rbio->csum_buf, rbio->csum_bitmap, false);
++ ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1,
++ rbio->csum_buf, rbio->csum_bitmap);
+ if (ret < 0)
+ goto error;
+ if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits))
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -176,6 +176,7 @@ struct scrub_ctx {
+ struct scrub_stripe *raid56_data_stripes;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_path extent_path;
++ struct btrfs_path csum_path;
+ int first_free;
+ int cur_stripe;
+ atomic_t cancel_req;
+@@ -342,6 +343,8 @@ static noinline_for_stack struct scrub_c
+ sctx->fs_info = fs_info;
+ sctx->extent_path.search_commit_root = 1;
+ sctx->extent_path.skip_locking = 1;
++ sctx->csum_path.search_commit_root = 1;
++ sctx->csum_path.skip_locking = 1;
+ for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
+ int ret;
+
+@@ -1472,6 +1475,7 @@ static void scrub_stripe_reset_bitmaps(s
+ */
+ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
+ struct btrfs_path *extent_path,
++ struct btrfs_path *csum_path,
+ struct btrfs_device *dev, u64 physical,
+ int mirror_num, u64 logical_start,
+ u32 logical_len,
+@@ -1563,9 +1567,9 @@ static int scrub_find_fill_first_stripe(
+ */
+ ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
+
+- ret = btrfs_lookup_csums_bitmap(csum_root, stripe->logical,
+- stripe_end, stripe->csums,
+- &csum_bitmap, true);
++ ret = btrfs_lookup_csums_bitmap(csum_root, csum_path,
++ stripe->logical, stripe_end,
++ stripe->csums, &csum_bitmap);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+@@ -1767,9 +1771,9 @@ static int queue_scrub_stripe(struct scr
+
+ /* We can queue one stripe using the remaining slot. */
+ scrub_reset_stripe(stripe);
+- ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, dev,
+- physical, mirror_num, logical,
+- length, stripe);
++ ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
++ &sctx->csum_path, dev, physical,
++ mirror_num, logical, length, stripe);
+ /* Either >0 as no more extents or <0 for error. */
+ if (ret)
+ return ret;
+@@ -1788,6 +1792,7 @@ static int scrub_raid56_parity_stripe(st
+ struct btrfs_raid_bio *rbio;
+ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_path extent_path = { 0 };
++ struct btrfs_path csum_path = { 0 };
+ struct bio *bio;
+ struct scrub_stripe *stripe;
+ bool all_empty = true;
+@@ -1799,12 +1804,14 @@ static int scrub_raid56_parity_stripe(st
+ ASSERT(sctx->raid56_data_stripes);
+
+ /*
+- * For data stripe search, we cannot re-use the same extent path, as
+- * the data stripe bytenr may be smaller than previous extent. Thus we
+- * have to use our own extent path.
++ * For data stripe search, we cannot re-use the same extent/csum paths,
++ * as the data stripe bytenr may be smaller than previous extent. Thus
++ * we have to use our own extent/csum paths.
+ */
+ extent_path.search_commit_root = 1;
+ extent_path.skip_locking = 1;
++ csum_path.search_commit_root = 1;
++ csum_path.skip_locking = 1;
+
+ for (int i = 0; i < data_stripes; i++) {
+ int stripe_index;
+@@ -1820,7 +1827,7 @@ static int scrub_raid56_parity_stripe(st
+
+ scrub_reset_stripe(stripe);
+ set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
+- ret = scrub_find_fill_first_stripe(bg, &extent_path,
++ ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path,
+ map->stripes[stripe_index].dev, physical, 1,
+ full_stripe_start + btrfs_stripe_nr_to_offset(i),
+ BTRFS_STRIPE_LEN, stripe);
+@@ -1949,6 +1956,7 @@ static int scrub_raid56_parity_stripe(st
+ btrfs_bio_counter_dec(fs_info);
+
+ btrfs_release_path(&extent_path);
++ btrfs_release_path(&csum_path);
+ out:
+ return ret;
+ }
+@@ -2243,6 +2251,7 @@ out:
+ if (!ret)
+ ret = ret2;
+ btrfs_release_path(&sctx->extent_path);
++ btrfs_release_path(&sctx->csum_path);
+
+ if (sctx->raid56_data_stripes) {
+ for (int i = 0; i < nr_data_stripes(map); i++)
--- /dev/null
+From 1dc4888e725dc748b82858984f2a5bd41efc5201 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 14:33:29 +0800
+Subject: btrfs: scrub: avoid unnecessary extent tree search preparing stripes
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 1dc4888e725dc748b82858984f2a5bd41efc5201 upstream.
+
+Since commit e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror()
+to scrub_stripe infrastructure"), scrub no longer re-use the same path
+for extent tree search.
+
+This can lead to unnecessary extent tree search, especially for the new
+stripe based scrub, as we have way more stripes to prepare.
+
+This patch would re-introduce a shared path for extent tree search, and
+properly release it when the block group is scrubbed.
+
+This change alone can improve scrub performance slightly by reducing the
+time spend preparing the stripe thus improving the queue depth.
+
+Before (with regression):
+
+ Device r/s rkB/s rrqm/s %rrqm r_await rareq-sz aqu-sz %util
+ nvme0n1p3 15578.00 993616.00 5.00 0.03 0.09 63.78 1.32 100.00
+
+After (with this patch):
+
+ nvme0n1p3 15875.00 1013328.00 12.00 0.08 0.08 63.83 1.35 100.00
+
+Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure")
+CC: stable@vger.kernel.org # 6.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/scrub.c | 41 +++++++++++++++++++++++++++++------------
+ 1 file changed, 29 insertions(+), 12 deletions(-)
+
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -175,6 +175,7 @@ struct scrub_ctx {
+ struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
+ struct scrub_stripe *raid56_data_stripes;
+ struct btrfs_fs_info *fs_info;
++ struct btrfs_path extent_path;
+ int first_free;
+ int cur_stripe;
+ atomic_t cancel_req;
+@@ -339,6 +340,8 @@ static noinline_for_stack struct scrub_c
+ refcount_set(&sctx->refs, 1);
+ sctx->is_dev_replace = is_dev_replace;
+ sctx->fs_info = fs_info;
++ sctx->extent_path.search_commit_root = 1;
++ sctx->extent_path.skip_locking = 1;
+ for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
+ int ret;
+
+@@ -1468,6 +1471,7 @@ static void scrub_stripe_reset_bitmaps(s
+ * Return <0 for error.
+ */
+ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
++ struct btrfs_path *extent_path,
+ struct btrfs_device *dev, u64 physical,
+ int mirror_num, u64 logical_start,
+ u32 logical_len,
+@@ -1477,7 +1481,6 @@ static int scrub_find_fill_first_stripe(
+ struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start);
+ struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start);
+ const u64 logical_end = logical_start + logical_len;
+- struct btrfs_path path = { 0 };
+ u64 cur_logical = logical_start;
+ u64 stripe_end;
+ u64 extent_start;
+@@ -1493,14 +1496,13 @@ static int scrub_find_fill_first_stripe(
+ /* The range must be inside the bg. */
+ ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
+
+- path.search_commit_root = 1;
+- path.skip_locking = 1;
+-
+- ret = find_first_extent_item(extent_root, &path, logical_start, logical_len);
++ ret = find_first_extent_item(extent_root, extent_path, logical_start,
++ logical_len);
+ /* Either error or not found. */
+ if (ret)
+ goto out;
+- get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
++ get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags,
++ &extent_gen);
+ if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ stripe->nr_meta_extents++;
+ if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
+@@ -1528,7 +1530,7 @@ static int scrub_find_fill_first_stripe(
+
+ /* Fill the extent info for the remaining sectors. */
+ while (cur_logical <= stripe_end) {
+- ret = find_first_extent_item(extent_root, &path, cur_logical,
++ ret = find_first_extent_item(extent_root, extent_path, cur_logical,
+ stripe_end - cur_logical + 1);
+ if (ret < 0)
+ goto out;
+@@ -1536,7 +1538,7 @@ static int scrub_find_fill_first_stripe(
+ ret = 0;
+ break;
+ }
+- get_extent_info(&path, &extent_start, &extent_len,
++ get_extent_info(extent_path, &extent_start, &extent_len,
+ &extent_flags, &extent_gen);
+ if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ stripe->nr_meta_extents++;
+@@ -1576,7 +1578,6 @@ static int scrub_find_fill_first_stripe(
+ }
+ set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
+ out:
+- btrfs_release_path(&path);
+ return ret;
+ }
+
+@@ -1766,8 +1767,9 @@ static int queue_scrub_stripe(struct scr
+
+ /* We can queue one stripe using the remaining slot. */
+ scrub_reset_stripe(stripe);
+- ret = scrub_find_fill_first_stripe(bg, dev, physical, mirror_num,
+- logical, length, stripe);
++ ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, dev,
++ physical, mirror_num, logical,
++ length, stripe);
+ /* Either >0 as no more extents or <0 for error. */
+ if (ret)
+ return ret;
+@@ -1785,6 +1787,7 @@ static int scrub_raid56_parity_stripe(st
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct btrfs_raid_bio *rbio;
+ struct btrfs_io_context *bioc = NULL;
++ struct btrfs_path extent_path = { 0 };
+ struct bio *bio;
+ struct scrub_stripe *stripe;
+ bool all_empty = true;
+@@ -1795,6 +1798,14 @@ static int scrub_raid56_parity_stripe(st
+
+ ASSERT(sctx->raid56_data_stripes);
+
++ /*
++ * For data stripe search, we cannot re-use the same extent path, as
++ * the data stripe bytenr may be smaller than previous extent. Thus we
++ * have to use our own extent path.
++ */
++ extent_path.search_commit_root = 1;
++ extent_path.skip_locking = 1;
++
+ for (int i = 0; i < data_stripes; i++) {
+ int stripe_index;
+ int rot;
+@@ -1809,7 +1820,7 @@ static int scrub_raid56_parity_stripe(st
+
+ scrub_reset_stripe(stripe);
+ set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
+- ret = scrub_find_fill_first_stripe(bg,
++ ret = scrub_find_fill_first_stripe(bg, &extent_path,
+ map->stripes[stripe_index].dev, physical, 1,
+ full_stripe_start + btrfs_stripe_nr_to_offset(i),
+ BTRFS_STRIPE_LEN, stripe);
+@@ -1937,6 +1948,7 @@ static int scrub_raid56_parity_stripe(st
+ bio_put(bio);
+ btrfs_bio_counter_dec(fs_info);
+
++ btrfs_release_path(&extent_path);
+ out:
+ return ret;
+ }
+@@ -2109,6 +2121,9 @@ static noinline_for_stack int scrub_stri
+ u64 stripe_logical;
+ int stop_loop = 0;
+
++ /* Extent_path should be released by now. */
++ ASSERT(sctx->extent_path.nodes[0] == NULL);
++
+ scrub_blocked_if_needed(fs_info);
+
+ if (sctx->is_dev_replace &&
+@@ -2227,6 +2242,8 @@ out:
+ ret2 = flush_scrub_stripes(sctx);
+ if (!ret)
+ ret = ret2;
++ btrfs_release_path(&sctx->extent_path);
++
+ if (sctx->raid56_data_stripes) {
+ for (int i = 0; i < nr_data_stripes(map); i++)
+ release_scrub_stripe(&sctx->raid56_data_stripes[i]);
--- /dev/null
+From ae76d8e3e1351aa1ba09cc68dab6866d356f2e17 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 14:33:31 +0800
+Subject: btrfs: scrub: fix grouping of read IO
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit ae76d8e3e1351aa1ba09cc68dab6866d356f2e17 upstream.
+
+[REGRESSION]
+There are several regression reports about the scrub performance with
+v6.4 kernel.
+
+On a PCIe 3.0 device, the old v6.3 kernel can go 3GB/s scrub speed, but
+v6.4 can only go 1GB/s, an obvious 66% performance drop.
+
+[CAUSE]
+Iostat shows a very different behavior between v6.3 and v6.4 kernel:
+
+ Device r/s rkB/s rrqm/s %rrqm r_await rareq-sz aqu-sz %util
+ nvme0n1p3 9731.00 3425544.00 17237.00 63.92 2.18 352.02 21.18 100.00
+ nvme0n1p3 15578.00 993616.00 5.00 0.03 0.09 63.78 1.32 100.00
+
+The upper one is v6.3 while the lower one is v6.4.
+
+There are several obvious differences:
+
+- Very few read merges
+ This turns out to be a behavior change that we no longer do bio
+ plug/unplug.
+
+- Very low aqu-sz
+ This is due to the submit-and-wait behavior of flush_scrub_stripes(),
+ and extra extent/csum tree search.
+
+Both behaviors are not that obvious on SATA SSDs, as SATA SSDs have NCQ
+to merge the reads, while SATA SSDs can not handle high queue depth well
+either.
+
+[FIX]
+For now this patch focuses on the read speed fix. Dev-replace replace
+speed needs more work.
+
+For the read part, we go two directions to fix the problems:
+
+- Re-introduce blk plug/unplug to merge read requests
+ This is pretty simple, and the behavior is pretty easy to observe.
+
+ This would enlarge the average read request size to 512K.
+
+- Introduce multi-group reads and no longer wait for each group
+ Instead of the old behavior, which submits 8 stripes and waits for
+ them, here we would enlarge the total number of stripes to 16 * 8.
+ Which is 8M per device, the same limit as the old scrub in-flight
+ bios size limit.
+
+ Now every time we fill a group (8 stripes), we submit them and
+ continue to next stripes.
+
+ Only when the full 16 * 8 stripes are all filled, we submit the
+ remaining ones (the last group), and wait for all groups to finish.
+ Then submit the repair writes and dev-replace writes.
+
+ This should enlarge the queue depth.
+
+This would greatly improve the merge rate (thus read block size) and
+queue depth:
+
+Before (with regression, and cached extent/csum path):
+
+ Device r/s rkB/s rrqm/s %rrqm r_await rareq-sz aqu-sz %util
+ nvme0n1p3 20666.00 1318240.00 10.00 0.05 0.08 63.79 1.63 100.00
+
+After (with all patches applied):
+
+ nvme0n1p3 5165.00 2278304.00 30557.00 85.54 0.55 441.10 2.81 100.00
+
+i.e. 1287 to 2224 MB/s.
+
+CC: stable@vger.kernel.org # 6.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/scrub.c | 96 ++++++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 71 insertions(+), 25 deletions(-)
+
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -43,9 +43,20 @@ struct scrub_ctx;
+ /*
+ * The following value only influences the performance.
+ *
+- * This determines the batch size for stripe submitted in one go.
++ * This detemines how many stripes would be submitted in one go,
++ * which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP).
+ */
+-#define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
++#define SCRUB_STRIPES_PER_GROUP 8
++
++/*
++ * How many groups we have for each sctx.
++ *
++ * This would be 8M per device, the same value as the old scrub in-flight bios
++ * size limit.
++ */
++#define SCRUB_GROUPS_PER_SCTX 16
++
++#define SCRUB_TOTAL_STRIPES (SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP)
+
+ /*
+ * The following value times PAGE_SIZE needs to be large enough to match the
+@@ -172,7 +183,7 @@ struct scrub_stripe {
+ };
+
+ struct scrub_ctx {
+- struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
++ struct scrub_stripe stripes[SCRUB_TOTAL_STRIPES];
+ struct scrub_stripe *raid56_data_stripes;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_path extent_path;
+@@ -317,10 +328,10 @@ static noinline_for_stack void scrub_fre
+ if (!sctx)
+ return;
+
+- for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
++ for (i = 0; i < SCRUB_TOTAL_STRIPES; i++)
+ release_scrub_stripe(&sctx->stripes[i]);
+
+- kfree(sctx);
++ kvfree(sctx);
+ }
+
+ static void scrub_put_ctx(struct scrub_ctx *sctx)
+@@ -335,7 +346,10 @@ static noinline_for_stack struct scrub_c
+ struct scrub_ctx *sctx;
+ int i;
+
+- sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
++ /* Since sctx has inline 128 stripes, it can go beyond 64K easily. Use
++ * kvzalloc().
++ */
++ sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL);
+ if (!sctx)
+ goto nomem;
+ refcount_set(&sctx->refs, 1);
+@@ -345,7 +359,7 @@ static noinline_for_stack struct scrub_c
+ sctx->extent_path.skip_locking = 1;
+ sctx->csum_path.search_commit_root = 1;
+ sctx->csum_path.skip_locking = 1;
+- for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
++ for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
+ int ret;
+
+ ret = init_scrub_stripe(fs_info, &sctx->stripes[i]);
+@@ -1659,6 +1673,28 @@ static bool stripe_has_metadata_error(st
+ return false;
+ }
+
++static void submit_initial_group_read(struct scrub_ctx *sctx,
++ unsigned int first_slot,
++ unsigned int nr_stripes)
++{
++ struct blk_plug plug;
++
++ ASSERT(first_slot < SCRUB_TOTAL_STRIPES);
++ ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES);
++
++ scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
++ btrfs_stripe_nr_to_offset(nr_stripes));
++ blk_start_plug(&plug);
++ for (int i = 0; i < nr_stripes; i++) {
++ struct scrub_stripe *stripe = &sctx->stripes[first_slot + i];
++
++ /* Those stripes should be initialized. */
++ ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
++ scrub_submit_initial_read(sctx, stripe);
++ }
++ blk_finish_plug(&plug);
++}
++
+ static int flush_scrub_stripes(struct scrub_ctx *sctx)
+ {
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+@@ -1671,11 +1707,11 @@ static int flush_scrub_stripes(struct sc
+
+ ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
+
+- scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
+- btrfs_stripe_nr_to_offset(nr_stripes));
+- for (int i = 0; i < nr_stripes; i++) {
+- stripe = &sctx->stripes[i];
+- scrub_submit_initial_read(sctx, stripe);
++ /* Submit the stripes which are populated but not submitted. */
++ if (nr_stripes % SCRUB_STRIPES_PER_GROUP) {
++ const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP);
++
++ submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot);
+ }
+
+ for (int i = 0; i < nr_stripes; i++) {
+@@ -1755,21 +1791,19 @@ static void raid56_scrub_wait_endio(stru
+
+ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
+ struct btrfs_device *dev, int mirror_num,
+- u64 logical, u32 length, u64 physical)
++ u64 logical, u32 length, u64 physical,
++ u64 *found_logical_ret)
+ {
+ struct scrub_stripe *stripe;
+ int ret;
+
+- /* No available slot, submit all stripes and wait for them. */
+- if (sctx->cur_stripe >= SCRUB_STRIPES_PER_SCTX) {
+- ret = flush_scrub_stripes(sctx);
+- if (ret < 0)
+- return ret;
+- }
++ /*
++ * There should always be one slot left, as caller filling the last
++ * slot should flush them all.
++ */
++ ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
+
+ stripe = &sctx->stripes[sctx->cur_stripe];
+-
+- /* We can queue one stripe using the remaining slot. */
+ scrub_reset_stripe(stripe);
+ ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
+ &sctx->csum_path, dev, physical,
+@@ -1777,7 +1811,20 @@ static int queue_scrub_stripe(struct scr
+ /* Either >0 as no more extents or <0 for error. */
+ if (ret)
+ return ret;
++ if (found_logical_ret)
++ *found_logical_ret = stripe->logical;
+ sctx->cur_stripe++;
++
++ /* We filled one group, submit it. */
++ if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) {
++ const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP;
++
++ submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP);
++ }
++
++ /* Last slot used, flush them all. */
++ if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES)
++ return flush_scrub_stripes(sctx);
+ return 0;
+ }
+
+@@ -1990,6 +2037,7 @@ static int scrub_simple_mirror(struct sc
+ path.skip_locking = 1;
+ /* Go through each extent items inside the logical range */
+ while (cur_logical < logical_end) {
++ u64 found_logical;
+ u64 cur_physical = physical + cur_logical - logical_start;
+
+ /* Canceled? */
+@@ -2014,7 +2062,7 @@ static int scrub_simple_mirror(struct sc
+
+ ret = queue_scrub_stripe(sctx, bg, device, mirror_num,
+ cur_logical, logical_end - cur_logical,
+- cur_physical);
++ cur_physical, &found_logical);
+ if (ret > 0) {
+ /* No more extent, just update the accounting */
+ sctx->stat.last_physical = physical + logical_length;
+@@ -2024,9 +2072,7 @@ static int scrub_simple_mirror(struct sc
+ if (ret < 0)
+ break;
+
+- ASSERT(sctx->cur_stripe > 0);
+- cur_logical = sctx->stripes[sctx->cur_stripe - 1].logical
+- + BTRFS_STRIPE_LEN;
++ cur_logical = found_logical + BTRFS_STRIPE_LEN;
+
+ /* Don't hold CPU for too long time */
+ cond_resched();
--- /dev/null
+From d167aa76dc0683828588c25767da07fb549e4f48 Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Mon, 31 Jul 2023 19:16:34 +0800
+Subject: btrfs: use the correct superblock to compare fsid in btrfs_validate_super
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit d167aa76dc0683828588c25767da07fb549e4f48 upstream.
+
+The function btrfs_validate_super() should verify the fsid in the provided
+superblock argument. Because, all its callers expect it to do that.
+
+Such as in the following stack:
+
+ write_all_supers()
+ sb = fs_info->super_for_commit;
+ btrfs_validate_write_super(.., sb)
+ btrfs_validate_super(.., sb, ..)
+
+ scrub_one_super()
+ btrfs_validate_super(.., sb, ..)
+
+And
+ check_dev_super()
+ btrfs_validate_super(.., sb, ..)
+
+However, it currently verifies the fs_info::super_copy::fsid instead,
+which is not correct. Fix this using the correct fsid in the superblock
+argument.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2384,11 +2384,10 @@ int btrfs_validate_super(struct btrfs_fs
+ ret = -EINVAL;
+ }
+
+- if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
+- BTRFS_FSID_SIZE)) {
++ if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
+ btrfs_err(fs_info,
+ "superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
+- fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
++ sb->fsid, fs_info->fs_devices->fsid);
+ ret = -EINVAL;
+ }
+
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
- fs/btrfs/space-info.c | 6 +-----
+ fs/btrfs/space-info.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
-diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
-index 356638f54fef..d7e8cd4f140c 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
-@@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+@@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs
return 0;
used = btrfs_space_info_used(space_info, true);
if (used + bytes < space_info->total_bytes + avail)
return 1;
---
-2.42.0
-
--- /dev/null
+From 49a30c3d1a2258fc93cfe6eea8e4951dabadc824 Mon Sep 17 00:00:00 2001
+From: Wenjing Liu <wenjing.liu@amd.com>
+Date: Tue, 15 Aug 2023 10:47:52 -0400
+Subject: drm/amd/display: always switch off ODM before committing more streams
+
+From: Wenjing Liu <wenjing.liu@amd.com>
+
+commit 49a30c3d1a2258fc93cfe6eea8e4951dabadc824 upstream.
+
+ODM power optimization is only supported with single stream. When ODM
+power optimization is enabled, we might not have enough free pipes for
+enabling other stream. So when we are committing more than 1 stream we
+should first switch off ODM power optimization to make room for new
+stream and then allocating pipe resource for the new stream.
+
+Cc: stable@vger.kernel.org
+Fixes: 59de751e3845 ("drm/amd/display: add ODM case when looking for first split pipe")
+Reviewed-by: Dillon Varone <dillon.varone@amd.com>
+Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/core/dc.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
+@@ -2061,12 +2061,12 @@ enum dc_status dc_commit_streams(struct
+ }
+ }
+
+- /* Check for case where we are going from odm 2:1 to max
+- * pipe scenario. For these cases, we will call
+- * commit_minimal_transition_state() to exit out of odm 2:1
+- * first before processing new streams
++ /* ODM Combine 2:1 power optimization is only applied for single stream
++ * scenario, it uses extra pipes than needed to reduce power consumption
++ * We need to switch off this feature to make room for new streams.
+ */
+- if (stream_count == dc->res_pool->pipe_count) {
++ if (stream_count > dc->current_state->stream_count &&
++ dc->current_state->stream_count == 1) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->next_odm_pipe)
--- /dev/null
+From 57a943ebfcdb4a97fbb409640234bdb44bfa1953 Mon Sep 17 00:00:00 2001
+From: Melissa Wen <mwen@igalia.com>
+Date: Thu, 31 Aug 2023 15:12:28 -0100
+Subject: drm/amd/display: enable cursor degamma for DCN3+ DRM legacy gamma
+
+From: Melissa Wen <mwen@igalia.com>
+
+commit 57a943ebfcdb4a97fbb409640234bdb44bfa1953 upstream.
+
+For DRM legacy gamma, AMD display manager applies implicit sRGB degamma
+using a pre-defined sRGB transfer function. It works fine for DCN2
+family where degamma ROM and custom curves go to the same color block.
+But, on DCN3+, degamma is split into two blocks: degamma ROM for
+pre-defined TFs and `gamma correction` for user/custom curves and
+degamma ROM settings doesn't apply to cursor plane. To get DRM legacy
+gamma working as expected, enable cursor degamma ROM for implict sRGB
+degamma on HW with this configuration.
+
+Cc: stable@vger.kernel.org
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2803
+Fixes: 96b020e2163f ("drm/amd/display: check attr flag before set cursor degamma on DCN3+")
+Signed-off-by: Melissa Wen <mwen@igalia.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+@@ -1260,6 +1260,13 @@ void amdgpu_dm_plane_handle_cursor_updat
+ attributes.rotation_angle = 0;
+ attributes.attribute_flags.value = 0;
+
++ /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
++ * legacy gamma setup.
++ */
++ if (crtc_state->cm_is_degamma_srgb &&
++ adev->dm.dc->caps.color.dpp.gamma_corr)
++ attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
++
+ attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
+
+ if (crtc_state->stream) {
--- /dev/null
+From 47428f4b638d3b3264a2efa1a567b0bbddbb6107 Mon Sep 17 00:00:00 2001
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Date: Thu, 31 Aug 2023 15:22:35 -0400
+Subject: drm/amd/display: limit the v_startup workaround to ASICs older than DCN3.1
+
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+
+commit 47428f4b638d3b3264a2efa1a567b0bbddbb6107 upstream.
+
+Since, calling dcn20_adjust_freesync_v_startup() on DCN3.1+ ASICs
+can cause the display to flicker and underflow to occur, we shouldn't
+call it for them. So, ensure that the DCN version is less than
+DCN_VERSION_3_1 before calling dcn20_adjust_freesync_v_startup().
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Fangzhi Zuo <jerry.zuo@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+@@ -1099,7 +1099,8 @@ void dcn20_calculate_dlg_params(struct d
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
+ pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+- if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
++ if (dc->ctx->dce_version < DCN_VERSION_3_1 &&
++ context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+ dcn20_adjust_freesync_v_startup(
+ &context->res_ctx.pipe_ctx[i].stream->timing,
+ &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
--- /dev/null
+From 07e388aab042774f284a2ad75a70a194517cdad4 Mon Sep 17 00:00:00 2001
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Date: Tue, 5 Sep 2023 13:27:22 -0400
+Subject: drm/amd/display: prevent potential division by zero errors
+
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+
+commit 07e388aab042774f284a2ad75a70a194517cdad4 upstream.
+
+There are two places in apply_below_the_range() where it's possible for
+a divide by zero error to occur. So, to fix this make sure the divisor
+is non-zero before attempting the computation in both cases.
+
+Cc: stable@vger.kernel.org
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2637
+Fixes: a463b263032f ("drm/amd/display: Fix frames_to_insert math")
+Fixes: ded6119e825a ("drm/amd/display: Reinstate LFC optimization")
+Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
++++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+@@ -338,7 +338,9 @@ static void apply_below_the_range(struct
+ * - Delta for CEIL: delta_from_mid_point_in_us_1
+ * - Delta for FLOOR: delta_from_mid_point_in_us_2
+ */
+- if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) {
++ if (mid_point_frames_ceil &&
++ (last_render_time_in_us / mid_point_frames_ceil) <
++ in_out_vrr->min_duration_in_us) {
+ /* Check for out of range.
+ * If using CEIL produces a value that is out of range,
+ * then we are forced to use FLOOR.
+@@ -385,8 +387,9 @@ static void apply_below_the_range(struct
+ /* Either we've calculated the number of frames to insert,
+ * or we need to insert min duration frames
+ */
+- if (last_render_time_in_us / frames_to_insert <
+- in_out_vrr->min_duration_in_us){
++ if (frames_to_insert &&
++ (last_render_time_in_us / frames_to_insert) <
++ in_out_vrr->min_duration_in_us){
+ frames_to_insert -= (frames_to_insert > 1) ?
+ 1 : 0;
+ }
--- /dev/null
+From 5a3ccb1400339268c5e3dc1fa044a7f6c7f59a02 Mon Sep 17 00:00:00 2001
+From: Gabe Teeger <gabe.teeger@amd.com>
+Date: Mon, 14 Aug 2023 16:06:18 -0400
+Subject: drm/amd/display: Remove wait while locked
+
+From: Gabe Teeger <gabe.teeger@amd.com>
+
+commit 5a3ccb1400339268c5e3dc1fa044a7f6c7f59a02 upstream.
+
+[Why]
+We wait for mpc idle while in a locked state, leading to potential
+deadlock.
+
+[What]
+Move the wait_for_idle call to outside of HW lock. This and a
+call to wait_drr_doublebuffer_pending_clear are moved added to a new
+static helper function called wait_for_outstanding_hw_updates, to make
+the interface clearer.
+
+Cc: stable@vger.kernel.org
+Fixes: 8f0d304d21b3 ("drm/amd/display: Do not commit pipe when updating DRR")
+Reviewed-by: Jun Lei <jun.lei@amd.com>
+Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Gabe Teeger <gabe.teeger@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/Makefile | 1
+ drivers/gpu/drm/amd/display/dc/core/dc.c | 58 ++++++++++++++-------
+ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 11 ---
+ 3 files changed, 42 insertions(+), 28 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/Makefile
+@@ -78,3 +78,4 @@ DC_EDID += dc_edid_parser.o
+ AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
+ AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
+ AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
++
+--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
+@@ -3589,6 +3589,45 @@ static void commit_planes_for_stream_fas
+ top_pipe_to_program->stream->update_flags.raw = 0;
+ }
+
++static void wait_for_outstanding_hw_updates(struct dc *dc, const struct dc_state *dc_context)
++{
++/*
++ * This function calls HWSS to wait for any potentially double buffered
++ * operations to complete. It should be invoked as a pre-amble prior
++ * to full update programming before asserting any HW locks.
++ */
++ int pipe_idx;
++ int opp_inst;
++ int opp_count = dc->res_pool->pipe_count;
++ struct hubp *hubp;
++ int mpcc_inst;
++ const struct pipe_ctx *pipe_ctx;
++
++ for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
++ pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
++
++ if (!pipe_ctx->stream)
++ continue;
++
++ if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
++ pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
++
++ hubp = pipe_ctx->plane_res.hubp;
++ if (!hubp)
++ continue;
++
++ mpcc_inst = hubp->inst;
++ // MPCC inst is equal to pipe index in practice
++ for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
++ if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
++ dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
++ dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
++ break;
++ }
++ }
++ }
++}
++
+ static void commit_planes_for_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+@@ -3607,24 +3646,9 @@ static void commit_planes_for_stream(str
+ // dc->current_state anymore, so we have to cache it before we apply
+ // the new SubVP context
+ subvp_prev_use = false;
+-
+-
+ dc_z10_restore(dc);
+-
+- if (update_type == UPDATE_TYPE_FULL) {
+- /* wait for all double-buffer activity to clear on all pipes */
+- int pipe_idx;
+-
+- for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
+- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
+-
+- if (!pipe_ctx->stream)
+- continue;
+-
+- if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
+- pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
+- }
+- }
++ if (update_type == UPDATE_TYPE_FULL)
++ wait_for_outstanding_hw_updates(dc, context);
+
+ if (update_type == UPDATE_TYPE_FULL) {
+ dc_allow_idle_optimizations(dc, false);
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -1580,17 +1580,6 @@ static void dcn20_update_dchubp_dpp(
+ || plane_state->update_flags.bits.global_alpha_change
+ || plane_state->update_flags.bits.per_pixel_alpha_change) {
+ // MPCC inst is equal to pipe index in practice
+- int mpcc_inst = hubp->inst;
+- int opp_inst;
+- int opp_count = dc->res_pool->pipe_count;
+-
+- for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+- if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
+- dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+- dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+- break;
+- }
+- }
+ hws->funcs.update_mpcc(dc, pipe_ctx);
+ }
+
--- /dev/null
+From 1482650bc7ef01ebb24ec2c3a2e4d50e45da4d8c Mon Sep 17 00:00:00 2001
+From: Wenjing Liu <wenjing.liu@amd.com>
+Date: Mon, 14 Aug 2023 17:11:16 -0400
+Subject: drm/amd/display: update blank state on ODM changes
+
+From: Wenjing Liu <wenjing.liu@amd.com>
+
+commit 1482650bc7ef01ebb24ec2c3a2e4d50e45da4d8c upstream.
+
+When we are dynamically adding new ODM slices, we didn't update
+blank state, if the pipe used by new ODM slice is previously blanked,
+we will continue outputting blank pixel data on that slice causing
+right half of the screen showing blank image.
+
+The previous fix was a temporary hack to directly update current state
+when committing new state. This could potentially cause hw and sw
+state synchronization issues and it is not permitted by dc commit
+design.
+
+Cc: stable@vger.kernel.org
+Fixes: 7fbf451e7639 ("drm/amd/display: Reinit DPG when exiting dynamic ODM")
+Reviewed-by: Dillon Varone <dillon.varone@amd.com>
+Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 36 +++++----------------
+ 1 file changed, 9 insertions(+), 27 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -1103,29 +1103,6 @@ void dcn20_blank_pixel_data(
+ 0);
+ }
+
+- if (!blank && dc->debug.enable_single_display_2to1_odm_policy) {
+- /* when exiting dynamic ODM need to reinit DPG state for unused pipes */
+- struct pipe_ctx *old_odm_pipe = dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx].next_odm_pipe;
+-
+- odm_pipe = pipe_ctx->next_odm_pipe;
+-
+- while (old_odm_pipe) {
+- if (!odm_pipe || old_odm_pipe->pipe_idx != odm_pipe->pipe_idx)
+- dc->hwss.set_disp_pattern_generator(dc,
+- old_odm_pipe,
+- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
+- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
+- COLOR_DEPTH_888,
+- NULL,
+- 0,
+- 0,
+- 0);
+- old_odm_pipe = old_odm_pipe->next_odm_pipe;
+- if (odm_pipe)
+- odm_pipe = odm_pipe->next_odm_pipe;
+- }
+- }
+-
+ if (!blank)
+ if (stream_res->abm) {
+ dc->hwss.set_pipe(pipe_ctx);
+@@ -1706,11 +1683,16 @@ static void dcn20_program_pipe(
+ struct dc_state *context)
+ {
+ struct dce_hwseq *hws = dc->hwseq;
+- /* Only need to unblank on top pipe */
+
+- if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level)
+- && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
+- hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
++ /* Only need to unblank on top pipe */
++ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
++ if (pipe_ctx->update_flags.bits.enable ||
++ pipe_ctx->update_flags.bits.odm ||
++ pipe_ctx->stream->update_flags.bits.abm_level)
++ hws->funcs.blank_pixel_data(dc, pipe_ctx,
++ !pipe_ctx->plane_state ||
++ !pipe_ctx->plane_state->visible);
++ }
+
+ /* Only update TG on top pipe */
+ if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
--- /dev/null
+From 0a611560f53bfd489e33f4a718c915f1a6123d03 Mon Sep 17 00:00:00 2001
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Date: Tue, 15 Aug 2023 09:13:37 -0400
+Subject: drm/amdgpu: register a dirty framebuffer callback for fbcon
+
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+
+commit 0a611560f53bfd489e33f4a718c915f1a6123d03 upstream.
+
+fbcon requires that we implement &drm_framebuffer_funcs.dirty.
+Otherwise, the framebuffer might take a while to flush (which would
+manifest as noticeable lag). However, we can't enable this callback for
+non-fbcon cases since it may cause too many atomic commits to be made at
+once. So, implement amdgpu_dirtyfb() and only enable it for fbcon
+framebuffers (we can use the "struct drm_file file" parameter in the
+callback to check for this since it is only NULL when called by fbcon,
+at least in the mainline kernel) on devices that support atomic KMS.
+
+Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: stable@vger.kernel.org # 6.1+
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2519
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 26 +++++++++++++++++++++++++-
+ 1 file changed, 25 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+@@ -38,6 +38,8 @@
+ #include <linux/pci.h>
+ #include <linux/pm_runtime.h>
+ #include <drm/drm_crtc_helper.h>
++#include <drm/drm_damage_helper.h>
++#include <drm/drm_drv.h>
+ #include <drm/drm_edid.h>
+ #include <drm/drm_fb_helper.h>
+ #include <drm/drm_gem_framebuffer_helper.h>
+@@ -529,11 +531,29 @@ bool amdgpu_display_ddc_probe(struct amd
+ return true;
+ }
+
++static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
++ unsigned int flags, unsigned int color,
++ struct drm_clip_rect *clips, unsigned int num_clips)
++{
++
++ if (file)
++ return -ENOSYS;
++
++ return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
++ num_clips);
++}
++
+ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
+ };
+
++static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
++ .destroy = drm_gem_fb_destroy,
++ .create_handle = drm_gem_fb_create_handle,
++ .dirty = amdgpu_dirtyfb
++};
++
+ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+ uint64_t bo_flags)
+ {
+@@ -1136,7 +1156,11 @@ static int amdgpu_display_gem_fb_verify_
+ if (ret)
+ goto err;
+
+- ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
++ if (drm_drv_uses_atomic_modeset(dev))
++ ret = drm_framebuffer_init(dev, &rfb->base,
++ &amdgpu_fb_funcs_atomic);
++ else
++ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
+ if (ret)
+ goto err;
--- /dev/null
+From e9dca969b2426702a73719ab9207e43c6d80b581 Mon Sep 17 00:00:00 2001
+From: Jay Cornwall <jay.cornwall@amd.com>
+Date: Fri, 25 Aug 2023 12:18:41 -0400
+Subject: drm/amdkfd: Add missing gfx11 MQD manager callbacks
+
+From: Jay Cornwall <jay.cornwall@amd.com>
+
+commit e9dca969b2426702a73719ab9207e43c6d80b581 upstream.
+
+mqd_stride function was introduced in commit 2f77b9a242a2
+("drm/amdkfd: Update MQD management on multi XCC setup")
+but not assigned for gfx11. Fixes a NULL dereference in debugfs.
+
+Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.5.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+@@ -437,6 +437,7 @@ struct mqd_manager *mqd_manager_init_v11
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+ mqd->get_wave_state = get_wave_state;
++ mqd->mqd_stride = kfd_mqd_stride;
+ #if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+ #endif
+@@ -452,6 +453,7 @@ struct mqd_manager *mqd_manager_init_v11
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
++ mqd->mqd_stride = kfd_mqd_stride;
+ #if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+ #endif
+@@ -481,6 +483,7 @@ struct mqd_manager *mqd_manager_init_v11
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v11_sdma_mqd);
++ mqd->mqd_stride = kfd_mqd_stride;
+ #if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+ #endif
--- /dev/null
+From aa656d48e871a1b062e1bbf9474d8b831c35074c Mon Sep 17 00:00:00 2001
+From: Liu Ying <victor.liu@nxp.com>
+Date: Mon, 12 Jun 2023 17:23:59 +0800
+Subject: drm/mxsfb: Disable overlay plane in mxsfb_plane_overlay_atomic_disable()
+
+From: Liu Ying <victor.liu@nxp.com>
+
+commit aa656d48e871a1b062e1bbf9474d8b831c35074c upstream.
+
+When disabling overlay plane in mxsfb_plane_overlay_atomic_update(),
+overlay plane's framebuffer pointer is NULL. So, dereferencing it would
+cause a kernel Oops(NULL pointer dereferencing). Fix the issue by
+disabling overlay plane in mxsfb_plane_overlay_atomic_disable() instead.
+
+Fixes: cb285a5348e7 ("drm: mxsfb: Replace mxsfb_get_fb_paddr() with drm_fb_cma_get_gem_addr()")
+Cc: stable@vger.kernel.org # 5.19+
+Signed-off-by: Liu Ying <victor.liu@nxp.com>
+Reviewed-by: Marek Vasut <marex@denx.de>
+Signed-off-by: Marek Vasut <marex@denx.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230612092359.784115-1-victor.liu@nxp.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/mxsfb/mxsfb_kms.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c
++++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+@@ -611,6 +611,14 @@ static void mxsfb_plane_overlay_atomic_u
+ writel(ctrl, mxsfb->base + LCDC_AS_CTRL);
+ }
+
++static void mxsfb_plane_overlay_atomic_disable(struct drm_plane *plane,
++ struct drm_atomic_state *state)
++{
++ struct mxsfb_drm_private *mxsfb = to_mxsfb_drm_private(plane->dev);
++
++ writel(0, mxsfb->base + LCDC_AS_CTRL);
++}
++
+ static bool mxsfb_format_mod_supported(struct drm_plane *plane,
+ uint32_t format,
+ uint64_t modifier)
+@@ -626,6 +634,7 @@ static const struct drm_plane_helper_fun
+ static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = {
+ .atomic_check = mxsfb_plane_atomic_check,
+ .atomic_update = mxsfb_plane_overlay_atomic_update,
++ .atomic_disable = mxsfb_plane_overlay_atomic_disable,
+ };
+
+ static const struct drm_plane_funcs mxsfb_plane_funcs = {
--- /dev/null
+From 7cafe9b8e22bb3d77f130c461aedf6868c4aaf58 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 28 Jul 2023 18:15:48 -0700
+Subject: KVM: nSVM: Check instead of asserting on nested TSC scaling support
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 7cafe9b8e22bb3d77f130c461aedf6868c4aaf58 upstream.
+
+Check for nested TSC scaling support on nested SVM VMRUN instead of
+asserting that TSC scaling is exposed to L1 if L1's MSR_AMD64_TSC_RATIO
+has diverged from KVM's default. Userspace can trigger the WARN at will
+by writing the MSR and then updating guest CPUID to hide the feature
+(modifying guest CPUID is allowed anytime before KVM_RUN). E.g. hacking
+KVM's state_test selftest to do
+
+ vcpu_set_msr(vcpu, MSR_AMD64_TSC_RATIO, 0);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_TSCRATEMSR);
+
+after restoring state in a new VM+vCPU yields an endless supply of:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 164 PID: 62565 at arch/x86/kvm/svm/nested.c:699
+ nested_vmcb02_prepare_control+0x3d6/0x3f0 [kvm_amd]
+ Call Trace:
+ <TASK>
+ enter_svm_guest_mode+0x114/0x560 [kvm_amd]
+ nested_svm_vmrun+0x260/0x330 [kvm_amd]
+ vmrun_interception+0x29/0x30 [kvm_amd]
+ svm_invoke_exit_handler+0x35/0x100 [kvm_amd]
+ svm_handle_exit+0xe7/0x180 [kvm_amd]
+ kvm_arch_vcpu_ioctl_run+0x1eab/0x2570 [kvm]
+ kvm_vcpu_ioctl+0x4c9/0x5b0 [kvm]
+ __se_sys_ioctl+0x7a/0xc0
+ __x64_sys_ioctl+0x21/0x30
+ do_syscall_64+0x41/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+ RIP: 0033:0x45ca1b
+
+Note, the nested #VMEXIT path has the same flaw, but needs a different
+fix and will be handled separately.
+
+Fixes: 5228eb96a487 ("KVM: x86: nSVM: implement nested TSC scaling")
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230729011608.1065019-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -695,10 +695,9 @@ static void nested_vmcb02_prepare_contro
+
+ vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
+
+- if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
+- WARN_ON(!svm->tsc_scaling_enabled);
++ if (svm->tsc_scaling_enabled &&
++ svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
+ nested_svm_update_tsc_ratio_msr(vcpu);
+- }
+
+ vmcb02->control.int_ctl =
+ (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
--- /dev/null
+From 0c94e2468491cbf0754f49a5136ab51294a96b69 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 28 Jul 2023 18:15:49 -0700
+Subject: KVM: nSVM: Load L1's TSC multiplier based on L1 state, not L2 state
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0c94e2468491cbf0754f49a5136ab51294a96b69 upstream.
+
+When emulating nested VM-Exit, load L1's TSC multiplier if L1's desired
+ratio doesn't match the current ratio, not if the ratio L1 is using for
+L2 diverges from the default. Functionally, the end result is the same
+as KVM will run L2 with L1's multiplier if L2's multiplier is the default,
+i.e. checking that L1's multiplier is loaded is equivalent to checking if
+L2 has a non-default multiplier.
+
+However, the assertion that TSC scaling is exposed to L1 is flawed, as
+userspace can trigger the WARN at will by writing the MSR and then
+updating guest CPUID to hide the feature (modifying guest CPUID is
+allowed anytime before KVM_RUN). E.g. hacking KVM's state_test
+selftest to do
+
+ vcpu_set_msr(vcpu, MSR_AMD64_TSC_RATIO, 0);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_TSCRATEMSR);
+
+after restoring state in a new VM+vCPU yields an endless supply of:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 10 PID: 206939 at arch/x86/kvm/svm/nested.c:1105
+ nested_svm_vmexit+0x6af/0x720 [kvm_amd]
+ Call Trace:
+ nested_svm_exit_handled+0x102/0x1f0 [kvm_amd]
+ svm_handle_exit+0xb9/0x180 [kvm_amd]
+ kvm_arch_vcpu_ioctl_run+0x1eab/0x2570 [kvm]
+ kvm_vcpu_ioctl+0x4c9/0x5b0 [kvm]
+ ? trace_hardirqs_off+0x4d/0xa0
+ __se_sys_ioctl+0x7a/0xc0
+ __x64_sys_ioctl+0x21/0x30
+ do_syscall_64+0x41/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Unlike the nested VMRUN path, hoisting the svm->tsc_scaling_enabled check
+into the if-statement is wrong as KVM needs to ensure L1's multiplier is
+loaded in the above scenario. Alternatively, the WARN_ON() could simply
+be deleted, but that would make KVM's behavior even more subtle, e.g. it's
+not immediately obvious why it's safe to write MSR_AMD64_TSC_RATIO when
+checking only tsc_ratio_msr.
+
+Fixes: 5228eb96a487 ("KVM: x86: nSVM: implement nested TSC scaling")
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230729011608.1065019-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1100,8 +1100,8 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+ }
+
+- if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
+- WARN_ON(!svm->tsc_scaling_enabled);
++ if (kvm_caps.has_tsc_control &&
++ vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
+ vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
+ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+ }
--- /dev/null
+From cb49631ad111570f1bad37702c11c2ae07fa2e3c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 24 Aug 2023 18:36:18 -0700
+Subject: KVM: SVM: Don't inject #UD if KVM attempts to skip SEV guest insn
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit cb49631ad111570f1bad37702c11c2ae07fa2e3c upstream.
+
+Don't inject a #UD if KVM attempts to "emulate" to skip an instruction
+for an SEV guest, and instead resume the guest and hope that it can make
+forward progress. When commit 04c40f344def ("KVM: SVM: Inject #UD on
+attempted emulation for SEV guest w/o insn buffer") added the completely
+arbitrary #UD behavior, there were no known scenarios where a well-behaved
+guest would induce a VM-Exit that triggered emulation, i.e. it was thought
+that injecting #UD would be helpful.
+
+However, now that KVM (correctly) attempts to re-inject INT3/INTO, e.g. if
+a #NPF is encountered when attempting to deliver the INT3/INTO, an SEV
+guest can trigger emulation without a buffer, through no fault of its own.
+Resuming the guest and retrying the INT3/INTO is architecturally wrong,
+e.g. the vCPU will incorrectly re-hit code #DBs, but for SEV guests there
+is literally no other option that has a chance of making forward progress.
+
+Drop the #UD injection for all "skip" emulation, not just those related to
+INT3/INTO, even though that means that the guest will likely end up in an
+infinite loop instead of getting a #UD (the vCPU may also crash, e.g. if
+KVM emulated everything about an instruction except for advancing RIP).
+There's no evidence that suggests that an unexpected #UD is actually
+better than hanging the vCPU, e.g. a soft-hung vCPU can still respond to
+IRQs and NMIs to generate a backtrace.
+
+Reported-by: Wu Zongyo <wuzongyo@mail.ustc.edu.cn>
+Closes: https://lore.kernel.org/all/8eb933fd-2cf3-d7a9-32fe-2a1d82eac42a@mail.ustc.edu.cn
+Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
+Cc: stable@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20230825013621.2845700-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 35 +++++++++++++++++++++++++++--------
+ 1 file changed, 27 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -365,6 +365,8 @@ static void svm_set_interrupt_shadow(str
+ svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
+ }
++static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
++ void *insn, int insn_len);
+
+ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
+ bool commit_side_effects)
+@@ -385,6 +387,14 @@ static int __svm_skip_emulated_instructi
+ }
+
+ if (!svm->next_rip) {
++ /*
++ * FIXME: Drop this when kvm_emulate_instruction() does the
++ * right thing and treats "can't emulate" as outright failure
++ * for EMULTYPE_SKIP.
++ */
++ if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
++ return 0;
++
+ if (unlikely(!commit_side_effects))
+ old_rflags = svm->vmcb->save.rflags;
+
+@@ -4651,16 +4661,25 @@ static bool svm_can_emulate_instruction(
+ * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
+ * decode garbage.
+ *
+- * Inject #UD if KVM reached this point without an instruction buffer.
+- * In practice, this path should never be hit by a well-behaved guest,
+- * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
+- * is still theoretically reachable, e.g. via unaccelerated fault-like
+- * AVIC access, and needs to be handled by KVM to avoid putting the
+- * guest into an infinite loop. Injecting #UD is somewhat arbitrary,
+- * but its the least awful option given lack of insight into the guest.
++ * If KVM is NOT trying to simply skip an instruction, inject #UD if
++ * KVM reached this point without an instruction buffer. In practice,
++ * this path should never be hit by a well-behaved guest, e.g. KVM
++ * doesn't intercept #UD or #GP for SEV guests, but this path is still
++ * theoretically reachable, e.g. via unaccelerated fault-like AVIC
++ * access, and needs to be handled by KVM to avoid putting the guest
++ * into an infinite loop. Injecting #UD is somewhat arbitrary, but
++ * its the least awful option given lack of insight into the guest.
++ *
++ * If KVM is trying to skip an instruction, simply resume the guest.
++ * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
++ * will attempt to re-inject the INT3/INTO and skip the instruction.
++ * In that scenario, retrying the INT3/INTO and hoping the guest will
++ * make forward progress is the only option that has a chance of
++ * success (and in practice it will work the vast majority of the time).
+ */
+ if (unlikely(!insn)) {
+- kvm_queue_exception(vcpu, UD_VECTOR);
++ if (!(emul_type & EMULTYPE_SKIP))
++ kvm_queue_exception(vcpu, UD_VECTOR);
+ return false;
+ }
+
--- /dev/null
+From f1187ef24eb8f36e8ad8106d22615ceddeea6097 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 24 Aug 2023 19:23:56 -0700
+Subject: KVM: SVM: Get source vCPUs from source VM for SEV-ES intrahost migration
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f1187ef24eb8f36e8ad8106d22615ceddeea6097 upstream.
+
+Fix a goof where KVM tries to grab source vCPUs from the destination VM
+when doing intrahost migration. Grabbing the wrong vCPU not only hoses
+the guest, it also crashes the host due to the VMSA pointer being left
+NULL.
+
+ BUG: unable to handle page fault for address: ffffe38687000000
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP NOPTI
+ CPU: 39 PID: 17143 Comm: sev_migrate_tes Tainted: GO 6.5.0-smp--fff2e47e6c3b-next #151
+ Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.28.0 07/10/2023
+ RIP: 0010:__free_pages+0x15/0xd0
+ RSP: 0018:ffff923fcf6e3c78 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: ffffe38687000000 RCX: 0000000000000100
+ RDX: 0000000000000100 RSI: 0000000000000000 RDI: ffffe38687000000
+ RBP: ffff923fcf6e3c88 R08: ffff923fcafb0000 R09: 0000000000000000
+ R10: 0000000000000000 R11: ffffffff83619b90 R12: ffff923fa9540000
+ R13: 0000000000080007 R14: ffff923f6d35d000 R15: 0000000000000000
+ FS: 0000000000000000(0000) GS:ffff929d0d7c0000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: ffffe38687000000 CR3: 0000005224c34005 CR4: 0000000000770ee0
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ sev_free_vcpu+0xcb/0x110 [kvm_amd]
+ svm_vcpu_free+0x75/0xf0 [kvm_amd]
+ kvm_arch_vcpu_destroy+0x36/0x140 [kvm]
+ kvm_destroy_vcpus+0x67/0x100 [kvm]
+ kvm_arch_destroy_vm+0x161/0x1d0 [kvm]
+ kvm_put_kvm+0x276/0x560 [kvm]
+ kvm_vm_release+0x25/0x30 [kvm]
+ __fput+0x106/0x280
+ ____fput+0x12/0x20
+ task_work_run+0x86/0xb0
+ do_exit+0x2e3/0x9c0
+ do_group_exit+0xb1/0xc0
+ __x64_sys_exit_group+0x1b/0x20
+ do_syscall_64+0x41/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+ </TASK>
+ CR2: ffffe38687000000
+
+Fixes: 6defa24d3b12 ("KVM: SEV: Init target VMCBs in sev_migrate_from")
+Cc: stable@vger.kernel.org
+Cc: Peter Gonda <pgonda@google.com>
+Reviewed-by: Peter Gonda <pgonda@google.com>
+Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
+Link: https://lore.kernel.org/r/20230825022357.2852133-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -1725,7 +1725,7 @@ static void sev_migrate_from(struct kvm
+ * Note, the source is not required to have the same number of
+ * vCPUs as the destination when migrating a vanilla SEV VM.
+ */
+- src_vcpu = kvm_get_vcpu(dst_kvm, i);
++ src_vcpu = kvm_get_vcpu(src_kvm, i);
+ src_svm = to_svm(src_vcpu);
+
+ /*
--- /dev/null
+From f3cebc75e7425d6949d726bb8e937095b0aef025 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 8 Aug 2023 16:31:32 -0700
+Subject: KVM: SVM: Set target pCPU during IRTE update if target vCPU is running
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f3cebc75e7425d6949d726bb8e937095b0aef025 upstream.
+
+Update the target pCPU for IOMMU doorbells when updating IRTE routing if
+KVM is actively running the associated vCPU. KVM currently only updates
+the pCPU when loading the vCPU (via avic_vcpu_load()), and so doorbell
+events will be delayed until the vCPU goes through a put+load cycle (which
+might very well "never" happen for the lifetime of the VM).
+
+To avoid inserting a stale pCPU, e.g. due to racing between updating IRTE
+routing and vCPU load/put, get the pCPU information from the vCPU's
+Physical APIC ID table entry (a.k.a. avic_physical_id_cache in KVM) and
+update the IRTE while holding ir_list_lock. Add comments with --verbose
+enabled to explain exactly what is and isn't protected by ir_list_lock.
+
+Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted interrupt")
+Reported-by: dengqiao.joey <dengqiao.joey@bytedance.com>
+Cc: stable@vger.kernel.org
+Cc: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Cc: Joao Martins <joao.m.martins@oracle.com>
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
+Link: https://lore.kernel.org/r/20230808233132.2499764-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/avic.c | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_s
+ int ret = 0;
+ unsigned long flags;
+ struct amd_svm_iommu_ir *ir;
++ u64 entry;
+
+ /**
+ * In some cases, the existing irte is updated and re-set,
+@@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_s
+ ir->data = pi->ir_data;
+
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
++
++ /*
++ * Update the target pCPU for IOMMU doorbells if the vCPU is running.
++ * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
++ * will update the pCPU info when the vCPU awkened and/or scheduled in.
++ * See also avic_vcpu_load().
++ */
++ entry = READ_ONCE(*(svm->avic_physical_id_cache));
++ if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
++ amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
++ true, pi->ir_data);
++
+ list_add(&ir->node, &svm->ir_list);
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+ out:
+@@ -1031,6 +1044,13 @@ void avic_vcpu_load(struct kvm_vcpu *vcp
+ if (kvm_vcpu_is_blocking(vcpu))
+ return;
+
++ /*
++ * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
++ * _currently_ have assigned devices, as that can change. Holding
++ * ir_list_lock ensures that either svm_ir_list_add() will consume
++ * up-to-date entry information, or that this task will wait until
++ * svm_ir_list_add() completes to set the new target pCPU.
++ */
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
+
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+@@ -1067,6 +1087,14 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu
+ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
+ return;
+
++ /*
++ * Take and hold the per-vCPU interrupt remapping lock while updating
++ * the Physical ID entry even though the lock doesn't protect against
++ * multiple writers (see above). Holding ir_list_lock ensures that
++ * either svm_ir_list_add() will consume up-to-date entry information,
++ * or that this task will wait until svm_ir_list_add() completes to
++ * mark the vCPU as not running.
++ */
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
+
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
--- /dev/null
+From 1952e74da96fb3e48b72a2d0ece78c688a5848c1 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 24 Aug 2023 19:23:57 -0700
+Subject: KVM: SVM: Skip VMSA init in sev_es_init_vmcb() if pointer is NULL
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 1952e74da96fb3e48b72a2d0ece78c688a5848c1 upstream.
+
+Skip initializing the VMSA physical address in the VMCB if the VMSA is
+NULL, which occurs during intrahost migration as KVM initializes the VMCB
+before copying over state from the source to the destination (including
+the VMSA and its physical address).
+
+In normal builds, __pa() is just math, so the bug isn't fatal, but with
+CONFIG_DEBUG_VIRTUAL=y, the validity of the virtual address is verified
+and passing in NULL will make the kernel unhappy.
+
+Fixes: 6defa24d3b12 ("KVM: SEV: Init target VMCBs in sev_migrate_from")
+Cc: stable@vger.kernel.org
+Cc: Peter Gonda <pgonda@google.com>
+Reviewed-by: Peter Gonda <pgonda@google.com>
+Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
+Link: https://lore.kernel.org/r/20230825022357.2852133-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -2955,9 +2955,12 @@ static void sev_es_init_vmcb(struct vcpu
+ /*
+ * An SEV-ES guest requires a VMSA area that is a separate from the
+ * VMCB page. Do not include the encryption mask on the VMSA physical
+- * address since hardware will access it using the guest key.
++ * address since hardware will access it using the guest key. Note,
++ * the VMSA will be NULL if this vCPU is the destination for intrahost
++ * migration, and will be copied later.
+ */
+- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
++ if (svm->sev_es.vmsa)
++ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+
+ /* Can't intercept CR register access, HV can't modify CR registers */
+ svm_clr_intercept(svm, INTERCEPT_CR0_READ);
--- /dev/null
+From 4c08e737f056fec930b416a2bd37ed266d724f95 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 8 Aug 2023 16:31:31 -0700
+Subject: KVM: SVM: Take and hold ir_list_lock when updating vCPU's Physical ID entry
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 4c08e737f056fec930b416a2bd37ed266d724f95 upstream.
+
+Hoist the acquisition of ir_list_lock from avic_update_iommu_vcpu_affinity()
+to its two callers, avic_vcpu_load() and avic_vcpu_put(), specifically to
+encapsulate the write to the vCPU's entry in the AVIC Physical ID table.
+This will allow a future fix to pull information from the Physical ID entry
+when updating the IRTE, without potentially consuming stale information,
+i.e. without racing with the vCPU being (un)loaded.
+
+Add a comment to call out that ir_list_lock does NOT protect against
+multiple writers, specifically that reading the Physical ID entry in
+avic_vcpu_put() outside of the lock is safe.
+
+To preserve some semblance of independence from ir_list_lock, keep the
+READ_ONCE() in avic_vcpu_load() even though acuiring the spinlock
+effectively ensures the load(s) will be generated after acquiring the
+lock.
+
+Cc: stable@vger.kernel.org
+Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
+Link: https://lore.kernel.org/r/20230808233132.2499764-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/avic.c | 31 +++++++++++++++++++++++--------
+ 1 file changed, 23 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -986,10 +986,11 @@ static inline int
+ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
+ {
+ int ret = 0;
+- unsigned long flags;
+ struct amd_svm_iommu_ir *ir;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
++ lockdep_assert_held(&svm->ir_list_lock);
++
+ if (!kvm_arch_has_assigned_device(vcpu->kvm))
+ return 0;
+
+@@ -997,19 +998,15 @@ avic_update_iommu_vcpu_affinity(struct k
+ * Here, we go through the per-vcpu ir_list to update all existing
+ * interrupt remapping table entry targeting this vcpu.
+ */
+- spin_lock_irqsave(&svm->ir_list_lock, flags);
+-
+ if (list_empty(&svm->ir_list))
+- goto out;
++ return 0;
+
+ list_for_each_entry(ir, &svm->ir_list, node) {
+ ret = amd_iommu_update_ga(cpu, r, ir->data);
+ if (ret)
+- break;
++ return ret;
+ }
+-out:
+- spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+- return ret;
++ return 0;
+ }
+
+ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+@@ -1017,6 +1014,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcp
+ u64 entry;
+ int h_physical_id = kvm_cpu_get_apicid(cpu);
+ struct vcpu_svm *svm = to_svm(vcpu);
++ unsigned long flags;
+
+ lockdep_assert_preemption_disabled();
+
+@@ -1033,6 +1031,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcp
+ if (kvm_vcpu_is_blocking(vcpu))
+ return;
+
++ spin_lock_irqsave(&svm->ir_list_lock, flags);
++
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+
+@@ -1042,25 +1042,40 @@ void avic_vcpu_load(struct kvm_vcpu *vcp
+
+ WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
++
++ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+ }
+
+ void avic_vcpu_put(struct kvm_vcpu *vcpu)
+ {
+ u64 entry;
+ struct vcpu_svm *svm = to_svm(vcpu);
++ unsigned long flags;
+
+ lockdep_assert_preemption_disabled();
+
++ /*
++ * Note, reading the Physical ID entry outside of ir_list_lock is safe
++ * as only the pCPU that has loaded (or is loading) the vCPU is allowed
++ * to modify the entry, and preemption is disabled. I.e. the vCPU
++ * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
++ * recursively.
++ */
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+
+ /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
+ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
+ return;
+
++ spin_lock_irqsave(&svm->ir_list_lock, flags);
++
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
+ entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
++
++ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
++
+ }
+
+ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
--- /dev/null
+From 50011c2a245792993f2756e5b5b571512bfa409e Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 24 Aug 2023 18:45:32 -0700
+Subject: KVM: VMX: Refresh available regs and IDT vectoring info before NMI handling
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 50011c2a245792993f2756e5b5b571512bfa409e upstream.
+
+Reset the mask of available "registers" and refresh the IDT vectoring
+info snapshot in vmx_vcpu_enter_exit(), before KVM potentially handles a
+an NMI VM-Exit. One of the "registers" that KVM VMX lazily loads is the
+vmcs.VM_EXIT_INTR_INFO field, which is holds the vector+type on "exception
+or NMI" VM-Exits, i.e. is needed to identify NMIs. Clearing the available
+registers bitmask after handling NMIs results in KVM querying info from
+the last VM-Exit that read vmcs.VM_EXIT_INTR_INFO, and leads to both
+missed NMIs and spurious NMIs in the host.
+
+Opportunistically grab vmcs.IDT_VECTORING_INFO_FIELD early in the VM-Exit
+path too, e.g. to guard against similar consumption of stale data. The
+field is read on every "normal" VM-Exit, and there's no point in delaying
+the inevitable.
+
+Reported-by: Like Xu <like.xu.linux@gmail.com>
+Fixes: 11df586d774f ("KVM: VMX: Handle NMI VM-Exits in noinstr region")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230825014532.2846714-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -7243,13 +7243,20 @@ static noinstr void vmx_vcpu_enter_exit(
+ flags);
+
+ vcpu->arch.cr2 = native_read_cr2();
++ vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
++
++ vmx->idt_vectoring_info = 0;
+
+ vmx_enable_fb_clear(vmx);
+
+- if (unlikely(vmx->fail))
++ if (unlikely(vmx->fail)) {
+ vmx->exit_reason.full = 0xdead;
+- else
+- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
++ goto out;
++ }
++
++ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
++ if (likely(!vmx->exit_reason.failed_vmentry))
++ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+
+ if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+ is_nmi(vmx_get_intr_info(vcpu))) {
+@@ -7258,6 +7265,7 @@ static noinstr void vmx_vcpu_enter_exit(
+ kvm_after_interrupt(vcpu);
+ }
+
++out:
+ guest_state_exit_irqoff();
+ }
+
+@@ -7379,8 +7387,6 @@ static fastpath_t vmx_vcpu_run(struct kv
+ loadsegment(es, __USER_DS);
+ #endif
+
+- vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+-
+ pt_guest_exit(vmx);
+
+ kvm_load_host_xsave_state(vcpu);
+@@ -7397,17 +7403,12 @@ static fastpath_t vmx_vcpu_run(struct kv
+ vmx->nested.nested_run_pending = 0;
+ }
+
+- vmx->idt_vectoring_info = 0;
+-
+ if (unlikely(vmx->fail))
+ return EXIT_FASTPATH_NONE;
+
+ if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
+ kvm_machine_check();
+
+- if (likely(!vmx->exit_reason.failed_vmentry))
+- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+-
+ trace_kvm_exit(vcpu, KVM_ISA_VMX);
+
+ if (unlikely(vmx->exit_reason.failed_vmentry))
--- /dev/null
+From a79a404e6c2241ebc528b9ebf4c0832457b498c3 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 18 Jul 2023 15:37:18 +0100
+Subject: MIPS: Fix CONFIG_CPU_DADDI_WORKAROUNDS `modules_install' regression
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit a79a404e6c2241ebc528b9ebf4c0832457b498c3 upstream.
+
+Remove a build-time check for the presence of the GCC `-msym32' option.
+This option has been there since GCC 4.1.0, which is below the minimum
+required as at commit 805b2e1d427a ("kbuild: include Makefile.compiler
+only when compiler is needed"), when an error message:
+
+arch/mips/Makefile:306: *** CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32. Stop.
+
+started to trigger for the `modules_install' target with configurations
+such as `decstation_64_defconfig' that set CONFIG_CPU_DADDI_WORKAROUNDS,
+because said commit has made `cc-option-yn' an undefined function for
+non-build targets.
+
+Reported-by: Jan-Benedict Glaw <jbglaw@lug-owl.de>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Fixes: 805b2e1d427a ("kbuild: include Makefile.compiler only when compiler is needed")
+Cc: stable@vger.kernel.org # v5.13+
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/mips/Makefile
++++ b/arch/mips/Makefile
+@@ -299,8 +299,8 @@ ifdef CONFIG_64BIT
+ endif
+ endif
+
+- ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy)
+- cflags-y += -msym32 -DKBUILD_64BIT_SYM32
++ ifeq ($(KBUILD_SYM32), y)
++ cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
+ else
+ ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y)
+ $(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32)
--- /dev/null
+From 4fe4a6374c4db9ae2b849b61e84b58685dca565a Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Tue, 18 Jul 2023 15:37:23 +0100
+Subject: MIPS: Only fiddle with CHECKFLAGS if `need-compiler'
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 4fe4a6374c4db9ae2b849b61e84b58685dca565a upstream.
+
+We have originally guarded fiddling with CHECKFLAGS in our arch Makefile
+by checking for the CONFIG_MIPS variable, not set for targets such as
+`distclean', etc. that neither include `.config' nor use the compiler.
+
+Starting from commit 805b2e1d427a ("kbuild: include Makefile.compiler
+only when compiler is needed") we have had a generic `need-compiler'
+variable explicitly telling us if the compiler will be used and thus its
+capabilities need to be checked and expressed in the form of compilation
+flags. If this variable is not set, then `make' functions such as
+`cc-option' are undefined, causing all kinds of weirdness to happen if
+we expect specific results to be returned, most recently:
+
+cc1: error: '-mloongson-mmi' must be used with '-mhard-float'
+
+messages with configurations such as `fuloong2e_defconfig' and the
+`modules_install' target, which does include `.config' and yet does not
+use the compiler.
+
+Replace the check for CONFIG_MIPS with one for `need-compiler' instead,
+so as to prevent the compiler from being ever called for CHECKFLAGS when
+not needed.
+
+Reported-by: Guillaume Tucker <guillaume.tucker@collabora.com>
+Closes: https://lore.kernel.org/r/85031c0c-d981-031e-8a50-bc4fad2ddcd8@collabora.com/
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Fixes: 805b2e1d427a ("kbuild: include Makefile.compiler only when compiler is needed")
+Cc: stable@vger.kernel.org # v5.13+
+Reported-by: "kernelci.org bot" <bot@kernelci.org>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/Makefile
++++ b/arch/mips/Makefile
+@@ -341,7 +341,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin
+
+ KBUILD_LDFLAGS += -m $(ld-emul)
+
+-ifdef CONFIG_MIPS
++ifdef need-compiler
+ CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
+ grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
+ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
--- /dev/null
+From e66dd317194daae0475fe9e5577c80aa97f16cb9 Mon Sep 17 00:00:00 2001
+From: William Zhang <william.zhang@broadcom.com>
+Date: Thu, 6 Jul 2023 11:29:07 -0700
+Subject: mtd: rawnand: brcmnand: Fix crash during the panic_write
+
+From: William Zhang <william.zhang@broadcom.com>
+
+commit e66dd317194daae0475fe9e5577c80aa97f16cb9 upstream.
+
+When executing a NAND command within the panic write path, wait for any
+pending command instead of calling BUG_ON to avoid crashing while
+already crashing.
+
+Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
+Signed-off-by: William Zhang <william.zhang@broadcom.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Reviewed-by: Kursad Oney <kursad.oney@broadcom.com>
+Reviewed-by: Kamal Dasu <kamal.dasu@broadcom.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20230706182909.79151-4-william.zhang@broadcom.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/raw/brcmnand/brcmnand.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
++++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+@@ -1592,7 +1592,17 @@ static void brcmnand_send_cmd(struct brc
+
+ dev_dbg(ctrl->dev, "send native cmd %d addr 0x%llx\n", cmd, cmd_addr);
+
+- BUG_ON(ctrl->cmd_pending != 0);
++ /*
++ * If we came here through _panic_write and there is a pending
++ * command, try to wait for it. If it times out, rather than
++ * hitting BUG_ON, just return so we don't crash while crashing.
++ */
++ if (oops_in_progress) {
++ if (ctrl->cmd_pending &&
++ bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY, NAND_CTRL_RDY, 0))
++ return;
++ } else
++ BUG_ON(ctrl->cmd_pending != 0);
+ ctrl->cmd_pending = cmd;
+
+ ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY, NAND_CTRL_RDY, 0);
--- /dev/null
+From 2ec2839a9062db8a592525a3fdabd42dcd9a3a9b Mon Sep 17 00:00:00 2001
+From: William Zhang <william.zhang@broadcom.com>
+Date: Thu, 6 Jul 2023 11:29:05 -0700
+Subject: mtd: rawnand: brcmnand: Fix ECC level field setting for v7.2 controller
+
+From: William Zhang <william.zhang@broadcom.com>
+
+commit 2ec2839a9062db8a592525a3fdabd42dcd9a3a9b upstream.
+
+v7.2 controller has different ECC level field size and shift in the acc
+control register than its predecessor and successor controller. It needs
+to be set specifically.
+
+Fixes: decba6d47869 ("mtd: brcmnand: Add v7.2 controller support")
+Signed-off-by: William Zhang <william.zhang@broadcom.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20230706182909.79151-2-william.zhang@broadcom.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/raw/brcmnand/brcmnand.c | 74 +++++++++++++++++--------------
+ 1 file changed, 41 insertions(+), 33 deletions(-)
+
+--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
++++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+@@ -272,6 +272,7 @@ struct brcmnand_controller {
+ const unsigned int *page_sizes;
+ unsigned int page_size_shift;
+ unsigned int max_oob;
++ u32 ecc_level_shift;
+ u32 features;
+
+ /* for low-power standby/resume only */
+@@ -596,6 +597,34 @@ enum {
+ INTFC_CTLR_READY = BIT(31),
+ };
+
++/***********************************************************************
++ * NAND ACC CONTROL bitfield
++ *
++ * Some bits have remained constant throughout hardware revision, while
++ * others have shifted around.
++ ***********************************************************************/
++
++/* Constant for all versions (where supported) */
++enum {
++ /* See BRCMNAND_HAS_CACHE_MODE */
++ ACC_CONTROL_CACHE_MODE = BIT(22),
++
++ /* See BRCMNAND_HAS_PREFETCH */
++ ACC_CONTROL_PREFETCH = BIT(23),
++
++ ACC_CONTROL_PAGE_HIT = BIT(24),
++ ACC_CONTROL_WR_PREEMPT = BIT(25),
++ ACC_CONTROL_PARTIAL_PAGE = BIT(26),
++ ACC_CONTROL_RD_ERASED = BIT(27),
++ ACC_CONTROL_FAST_PGM_RDIN = BIT(28),
++ ACC_CONTROL_WR_ECC = BIT(30),
++ ACC_CONTROL_RD_ECC = BIT(31),
++};
++
++#define ACC_CONTROL_ECC_SHIFT 16
++/* Only for v7.2 */
++#define ACC_CONTROL_ECC_EXT_SHIFT 13
++
+ static inline bool brcmnand_non_mmio_ops(struct brcmnand_controller *ctrl)
+ {
+ #if IS_ENABLED(CONFIG_MTD_NAND_BRCMNAND_BCMA)
+@@ -737,6 +766,12 @@ static int brcmnand_revision_init(struct
+ else if (of_property_read_bool(ctrl->dev->of_node, "brcm,nand-has-wp"))
+ ctrl->features |= BRCMNAND_HAS_WP;
+
++ /* v7.2 has different ecc level shift in the acc register */
++ if (ctrl->nand_version == 0x0702)
++ ctrl->ecc_level_shift = ACC_CONTROL_ECC_EXT_SHIFT;
++ else
++ ctrl->ecc_level_shift = ACC_CONTROL_ECC_SHIFT;
++
+ return 0;
+ }
+
+@@ -931,30 +966,6 @@ static inline int brcmnand_cmd_shift(str
+ return 0;
+ }
+
+-/***********************************************************************
+- * NAND ACC CONTROL bitfield
+- *
+- * Some bits have remained constant throughout hardware revision, while
+- * others have shifted around.
+- ***********************************************************************/
+-
+-/* Constant for all versions (where supported) */
+-enum {
+- /* See BRCMNAND_HAS_CACHE_MODE */
+- ACC_CONTROL_CACHE_MODE = BIT(22),
+-
+- /* See BRCMNAND_HAS_PREFETCH */
+- ACC_CONTROL_PREFETCH = BIT(23),
+-
+- ACC_CONTROL_PAGE_HIT = BIT(24),
+- ACC_CONTROL_WR_PREEMPT = BIT(25),
+- ACC_CONTROL_PARTIAL_PAGE = BIT(26),
+- ACC_CONTROL_RD_ERASED = BIT(27),
+- ACC_CONTROL_FAST_PGM_RDIN = BIT(28),
+- ACC_CONTROL_WR_ECC = BIT(30),
+- ACC_CONTROL_RD_ECC = BIT(31),
+-};
+-
+ static inline u32 brcmnand_spare_area_mask(struct brcmnand_controller *ctrl)
+ {
+ if (ctrl->nand_version == 0x0702)
+@@ -967,18 +978,15 @@ static inline u32 brcmnand_spare_area_ma
+ return GENMASK(4, 0);
+ }
+
+-#define NAND_ACC_CONTROL_ECC_SHIFT 16
+-#define NAND_ACC_CONTROL_ECC_EXT_SHIFT 13
+-
+ static inline u32 brcmnand_ecc_level_mask(struct brcmnand_controller *ctrl)
+ {
+ u32 mask = (ctrl->nand_version >= 0x0600) ? 0x1f : 0x0f;
+
+- mask <<= NAND_ACC_CONTROL_ECC_SHIFT;
++ mask <<= ACC_CONTROL_ECC_SHIFT;
+
+ /* v7.2 includes additional ECC levels */
+- if (ctrl->nand_version >= 0x0702)
+- mask |= 0x7 << NAND_ACC_CONTROL_ECC_EXT_SHIFT;
++ if (ctrl->nand_version == 0x0702)
++ mask |= 0x7 << ACC_CONTROL_ECC_EXT_SHIFT;
+
+ return mask;
+ }
+@@ -992,8 +1000,8 @@ static void brcmnand_set_ecc_enabled(str
+
+ if (en) {
+ acc_control |= ecc_flags; /* enable RD/WR ECC */
+- acc_control |= host->hwcfg.ecc_level
+- << NAND_ACC_CONTROL_ECC_SHIFT;
++ acc_control &= ~brcmnand_ecc_level_mask(ctrl);
++ acc_control |= host->hwcfg.ecc_level << ctrl->ecc_level_shift;
+ } else {
+ acc_control &= ~ecc_flags; /* disable RD/WR ECC */
+ acc_control &= ~brcmnand_ecc_level_mask(ctrl);
+@@ -2593,7 +2601,7 @@ static int brcmnand_set_cfg(struct brcmn
+ tmp &= ~brcmnand_ecc_level_mask(ctrl);
+ tmp &= ~brcmnand_spare_area_mask(ctrl);
+ if (ctrl->nand_version >= 0x0302) {
+- tmp |= cfg->ecc_level << NAND_ACC_CONTROL_ECC_SHIFT;
++ tmp |= cfg->ecc_level << ctrl->ecc_level_shift;
+ tmp |= cfg->spare_area_size;
+ }
+ nand_writereg(ctrl, acc_control_offs, tmp);
--- /dev/null
+From 9cc0a598b944816f2968baf2631757f22721b996 Mon Sep 17 00:00:00 2001
+From: William Zhang <william.zhang@broadcom.com>
+Date: Thu, 6 Jul 2023 11:29:06 -0700
+Subject: mtd: rawnand: brcmnand: Fix potential false time out warning
+
+From: William Zhang <william.zhang@broadcom.com>
+
+commit 9cc0a598b944816f2968baf2631757f22721b996 upstream.
+
+If system is busy during the command status polling function, the driver
+may not get the chance to poll the status register till the end of time
+out and return the premature status. Do a final check after time out
+happens to ensure reading the correct status.
+
+Fixes: 9d2ee0a60b8b ("mtd: nand: brcmnand: Check flash #WP pin status before nand erase/program")
+Signed-off-by: William Zhang <william.zhang@broadcom.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20230706182909.79151-3-william.zhang@broadcom.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/raw/brcmnand/brcmnand.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
++++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+@@ -1072,6 +1072,14 @@ static int bcmnand_ctrl_poll_status(stru
+ cpu_relax();
+ } while (time_after(limit, jiffies));
+
++ /*
++ * do a final check after time out in case the CPU was busy and the driver
++ * did not get enough time to perform the polling to avoid false alarms
++ */
++ val = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS);
++ if ((val & mask) == expected_val)
++ return 0;
++
+ dev_warn(ctrl->dev, "timeout on status poll (expected %x got %x)\n",
+ expected_val, val & mask);
+
--- /dev/null
+From 5d53244186c9ac58cb88d76a0958ca55b83a15cd Mon Sep 17 00:00:00 2001
+From: William Zhang <william.zhang@broadcom.com>
+Date: Thu, 6 Jul 2023 11:29:08 -0700
+Subject: mtd: rawnand: brcmnand: Fix potential out-of-bounds access in oob write
+
+From: William Zhang <william.zhang@broadcom.com>
+
+commit 5d53244186c9ac58cb88d76a0958ca55b83a15cd upstream.
+
+When the oob buffer length is not in multiple of words, the oob write
+function does out-of-bounds read on the oob source buffer at the last
+iteration. Fix that by always checking length limit on the oob buffer
+read and fill with 0xff when reaching the end of the buffer to the oob
+registers.
+
+Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
+Signed-off-by: William Zhang <william.zhang@broadcom.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20230706182909.79151-5-william.zhang@broadcom.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/raw/brcmnand/brcmnand.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
++++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+@@ -1461,19 +1461,33 @@ static int write_oob_to_regs(struct brcm
+ const u8 *oob, int sas, int sector_1k)
+ {
+ int tbytes = sas << sector_1k;
+- int j;
++ int j, k = 0;
++ u32 last = 0xffffffff;
++ u8 *plast = (u8 *)&last;
+
+ /* Adjust OOB values for 1K sector size */
+ if (sector_1k && (i & 0x01))
+ tbytes = max(0, tbytes - (int)ctrl->max_oob);
+ tbytes = min_t(int, tbytes, ctrl->max_oob);
+
+- for (j = 0; j < tbytes; j += 4)
++ /*
++ * tbytes may not be multiple of words. Make sure we don't read out of
++ * the boundary and stop at last word.
++ */
++ for (j = 0; (j + 3) < tbytes; j += 4)
+ oob_reg_write(ctrl, j,
+ (oob[j + 0] << 24) |
+ (oob[j + 1] << 16) |
+ (oob[j + 2] << 8) |
+ (oob[j + 3] << 0));
++
++ /* handle the remaing bytes */
++ while (j < tbytes)
++ plast[k++] = oob[j++];
++
++ if (tbytes & 0x3)
++ oob_reg_write(ctrl, (tbytes & ~0x3), (__force u32)cpu_to_be32(last));
++
+ return tbytes;
+ }
+
--- /dev/null
+From 83e824a4a595132f9bd7ac4f5afff857bfc5991e Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Tue, 18 Jul 2023 13:56:11 +0200
+Subject: mtd: spi-nor: Correct flags for Winbond w25q128
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit 83e824a4a595132f9bd7ac4f5afff857bfc5991e upstream.
+
+The Winbond "w25q128" (actual vendor name W25Q128JV) has
+exactly the same flags as the sibling device "w25q128jv".
+The devices both require unlocking to enable write access.
+
+The actual product naming between devices vs the Linux
+strings in winbond.c:
+
+0xef4018: "w25q128" W25Q128JV-IN/IQ/JQ
+0xef7018: "w25q128jv" W25Q128JV-IM/JM
+
+The latter device, "w25q128jv" supports features named DTQ
+and QPI, otherwise it is the same.
+
+Not having the right flags has the annoying side effect
+that write access does not work.
+
+After this patch I can write to the flash on the Inteno
+XG6846 router.
+
+The flash memory also supports dual and quad SPI modes.
+This does not currently manifest, but by turning on SFDP
+parsing, the right SPI modes are emitted in
+/sys/kernel/debug/spi-nor/spi1.0/capabilities
+for this chip, so we also turn on this.
+
+Since we now have determined that SFDP parsing works on
+the device, we also detect the geometry using SFDP.
+
+After this dmesg and sysfs says:
+[ 1.062401] spi-nor spi1.0: w25q128 (16384 Kbytes)
+cat erasesize
+65536
+(16384*1024)/65536 = 256 sectors
+
+spi-nor sysfs:
+cat jedec_id
+ef4018
+cat manufacturer
+winbond
+cat partname
+w25q128
+hexdump -v -C sfdp
+00000000 53 46 44 50 05 01 00 ff 00 05 01 10 80 00 00 ff
+00000010 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000020 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000030 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000040 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000050 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000060 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000070 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+00000080 e5 20 f9 ff ff ff ff 07 44 eb 08 6b 08 3b 42 bb
+00000090 fe ff ff ff ff ff 00 00 ff ff 40 eb 0c 20 0f 52
+000000a0 10 d8 00 00 36 02 a6 00 82 ea 14 c9 e9 63 76 33
+000000b0 7a 75 7a 75 f7 a2 d5 5c 19 f7 4d ff e9 30 f8 80
+
+Cc: stable@vger.kernel.org
+Suggested-by: Michael Walle <michael@walle.cc>
+Reviewed-by: Michael Walle <michael@walle.cc>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Link: https://lore.kernel.org/r/20230718-spi-nor-winbond-w25q128-v5-1-a73653ee46c3@linaro.org
+Signed-off-by: Tudor Ambarus <tudor.ambarus@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/spi-nor/winbond.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/mtd/spi-nor/winbond.c
++++ b/drivers/mtd/spi-nor/winbond.c
+@@ -120,8 +120,9 @@ static const struct flash_info winbond_n
+ NO_SFDP_FLAGS(SECT_4K) },
+ { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16)
+ NO_SFDP_FLAGS(SECT_4K) },
+- { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256)
+- NO_SFDP_FLAGS(SECT_4K) },
++ { "w25q128", INFO(0xef4018, 0, 0, 0)
++ PARSE_SFDP
++ FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
+ { "w25q256", INFO(0xef4019, 0, 64 * 1024, 512)
+ NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)
+ .fixups = &w25q256_fixups },
--- /dev/null
+From c7e97f215a4ad634b746804679f5937d25f77e29 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Thu, 27 Jul 2023 19:24:47 -0700
+Subject: perf build: Include generated header files properly
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit c7e97f215a4ad634b746804679f5937d25f77e29 upstream.
+
+The flex and bison generate header files from the source. When user
+specified a build directory with O= option, it'd generate files under
+the directory. The build command has -I option to specify the header
+include directory.
+
+But the -I option only affects the files included like <...>. Let's
+change the flex and bison headers to use it instead of "...".
+
+Fixes: 80eeb67fe577aa76 ("perf jevents: Program to convert JSON file")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Anup Sharma <anupnewsmail@gmail.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230728022447.1323563-2-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/perf/pmu-events/jevents.py | 2 +-
+ tools/perf/util/bpf-filter.c | 4 ++--
+ tools/perf/util/expr.c | 4 ++--
+ tools/perf/util/parse-events.c | 4 ++--
+ tools/perf/util/pmu.c | 4 ++--
+ 5 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/tools/perf/pmu-events/jevents.py
++++ b/tools/perf/pmu-events/jevents.py
+@@ -999,7 +999,7 @@ such as "arm/cortex-a34".''',
+ _args = ap.parse_args()
+
+ _args.output_file.write("""
+-#include "pmu-events/pmu-events.h"
++#include <pmu-events/pmu-events.h>
+ #include "util/header.h"
+ #include "util/pmu.h"
+ #include <string.h>
+--- a/tools/perf/util/bpf-filter.c
++++ b/tools/perf/util/bpf-filter.c
+@@ -9,8 +9,8 @@
+ #include "util/evsel.h"
+
+ #include "util/bpf-filter.h"
+-#include "util/bpf-filter-flex.h"
+-#include "util/bpf-filter-bison.h"
++#include <util/bpf-filter-flex.h>
++#include <util/bpf-filter-bison.h>
+
+ #include "bpf_skel/sample-filter.h"
+ #include "bpf_skel/sample_filter.skel.h"
+--- a/tools/perf/util/expr.c
++++ b/tools/perf/util/expr.c
+@@ -10,8 +10,8 @@
+ #include "debug.h"
+ #include "evlist.h"
+ #include "expr.h"
+-#include "expr-bison.h"
+-#include "expr-flex.h"
++#include <util/expr-bison.h>
++#include <util/expr-flex.h>
+ #include "util/hashmap.h"
+ #include "smt.h"
+ #include "tsc.h"
+--- a/tools/perf/util/parse-events.c
++++ b/tools/perf/util/parse-events.c
+@@ -18,8 +18,8 @@
+ #include "debug.h"
+ #include <api/fs/tracing_path.h>
+ #include <perf/cpumap.h>
+-#include "parse-events-bison.h"
+-#include "parse-events-flex.h"
++#include <util/parse-events-bison.h>
++#include <util/parse-events-flex.h>
+ #include "pmu.h"
+ #include "pmus.h"
+ #include "asm/bug.h"
+--- a/tools/perf/util/pmu.c
++++ b/tools/perf/util/pmu.c
+@@ -19,8 +19,8 @@
+ #include "evsel.h"
+ #include "pmu.h"
+ #include "pmus.h"
+-#include "pmu-bison.h"
+-#include "pmu-flex.h"
++#include <util/pmu-bison.h>
++#include <util/pmu-flex.h>
+ #include "parse-events.h"
+ #include "print-events.h"
+ #include "header.h"
--- /dev/null
+From 7822a8913f4c51c7d1aff793b525d60c3384fb5b Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Thu, 27 Jul 2023 19:24:46 -0700
+Subject: perf build: Update build rule for generated files
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit 7822a8913f4c51c7d1aff793b525d60c3384fb5b upstream.
+
+The bison and flex generate C files from the source (.y and .l)
+files. When O= option is used, they are saved in a separate directory
+but the default build rule assumes the .C files are in the source
+directory. So it might read invalid file if there are generated files
+from an old version. The same is true for the pmu-events files.
+
+For example, the following command would cause a build failure:
+
+ $ git checkout v6.3
+ $ make -C tools/perf # build in the same directory
+
+ $ git checkout v6.5-rc2
+ $ mkdir build # create a build directory
+ $ make -C tools/perf O=build # build in a different directory but it
+ # refers files in the source directory
+
+Let's update the build rule to specify those cases explicitly to depend
+on the files in the output directory.
+
+Note that it's not a complete fix and it needs the next patch for the
+include path too.
+
+Fixes: 80eeb67fe577aa76 ("perf jevents: Program to convert JSON file")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Anup Sharma <anupnewsmail@gmail.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230728022447.1323563-1-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/build/Makefile.build | 10 ++++++++++
+ tools/perf/pmu-events/Build | 6 ++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/tools/build/Makefile.build
++++ b/tools/build/Makefile.build
+@@ -117,6 +117,16 @@ $(OUTPUT)%.s: %.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_s_c)
+
++# bison and flex files are generated in the OUTPUT directory
++# so it needs a separate rule to depend on them properly
++$(OUTPUT)%-bison.o: $(OUTPUT)%-bison.c FORCE
++ $(call rule_mkdir)
++ $(call if_changed_dep,$(host)cc_o_c)
++
++$(OUTPUT)%-flex.o: $(OUTPUT)%-flex.c FORCE
++ $(call rule_mkdir)
++ $(call if_changed_dep,$(host)cc_o_c)
++
+ # Gather build data:
+ # obj-y - list of build objects
+ # subdir-y - list of directories to nest
+--- a/tools/perf/pmu-events/Build
++++ b/tools/perf/pmu-events/Build
+@@ -35,3 +35,9 @@ $(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
+ endif
++
++# pmu-events.c file is generated in the OUTPUT directory so it needs a
++# separate rule to depend on it properly
++$(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C)
++ $(call rule_mkdir)
++ $(call if_changed_dep,cc_o_c)
--- /dev/null
+From e2cabf2a44791f01c21f8d5189b946926e34142e Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Mon, 31 Jul 2023 02:49:32 -0700
+Subject: perf hists browser: Fix hierarchy mode header
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit e2cabf2a44791f01c21f8d5189b946926e34142e upstream.
+
+The commit ef9ff6017e3c4593 ("perf ui browser: Move the extra title
+lines from the hists browser") introduced ui_browser__gotorc_title() to
+help moving non-title lines easily. But it missed to update the title
+for the hierarchy mode so it won't print the header line on TUI at all.
+
+ $ perf report --hierarchy
+
+Fixes: ef9ff6017e3c4593 ("perf ui browser: Move the extra title lines from the hists browser")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230731094934.1616495-1-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/perf/ui/browsers/hists.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/perf/ui/browsers/hists.c
++++ b/tools/perf/ui/browsers/hists.c
+@@ -1779,7 +1779,7 @@ static void hists_browser__hierarchy_hea
+ hists_browser__scnprintf_hierarchy_headers(browser, headers,
+ sizeof(headers));
+
+- ui_browser__gotorc(&browser->b, 0, 0);
++ ui_browser__gotorc_title(&browser->b, 0, 0);
+ ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+ }
--- /dev/null
+From f6b8436bede3e80226e8b2100279c4450c73806a Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Mon, 31 Jul 2023 02:49:33 -0700
+Subject: perf hists browser: Fix the number of entries for 'e' key
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit f6b8436bede3e80226e8b2100279c4450c73806a upstream.
+
+The 'e' key is to toggle expand/collapse the selected entry only. But
+the current code has a bug that it only increases the number of entries
+by 1 in the hierarchy mode so users cannot move under the current entry
+after the key stroke. This is due to a wrong assumption in the
+hist_entry__set_folding().
+
+The commit b33f922651011eff ("perf hists browser: Put hist_entry folding
+logic into single function") factored out the code, but actually it
+should be handled separately. The hist_browser__set_folding() is to
+update fold state for each entry so it needs to traverse all (child)
+entries regardless of the current fold state. So it increases the
+number of entries by 1.
+
+But the hist_entry__set_folding() only cares the currently selected
+entry and its all children. So it should count all unfolded child
+entries. This code is implemented in hist_browser__toggle_fold()
+already so we can just call it.
+
+Fixes: b33f922651011eff ("perf hists browser: Put hist_entry folding logic into single function")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230731094934.1616495-2-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/perf/ui/browsers/hists.c | 58 ++++++++++++++++-------------------------
+ 1 file changed, 24 insertions(+), 34 deletions(-)
+
+--- a/tools/perf/ui/browsers/hists.c
++++ b/tools/perf/ui/browsers/hists.c
+@@ -407,11 +407,6 @@ static bool hist_browser__selection_has_
+ return container_of(ms, struct callchain_list, ms)->has_children;
+ }
+
+-static bool hist_browser__he_selection_unfolded(struct hist_browser *browser)
+-{
+- return browser->he_selection ? browser->he_selection->unfolded : false;
+-}
+-
+ static bool hist_browser__selection_unfolded(struct hist_browser *browser)
+ {
+ struct hist_entry *he = browser->he_selection;
+@@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct
+ return n;
+ }
+
+-static void __hist_entry__set_folding(struct hist_entry *he,
+- struct hist_browser *hb, bool unfold)
++static void hist_entry__set_folding(struct hist_entry *he,
++ struct hist_browser *hb, bool unfold)
+ {
+ hist_entry__init_have_children(he);
+ he->unfolded = unfold ? he->has_children : false;
+@@ -603,34 +598,12 @@ static void __hist_entry__set_folding(st
+ he->nr_rows = 0;
+ }
+
+-static void hist_entry__set_folding(struct hist_entry *he,
+- struct hist_browser *browser, bool unfold)
+-{
+- double percent;
+-
+- percent = hist_entry__get_percent_limit(he);
+- if (he->filtered || percent < browser->min_pcnt)
+- return;
+-
+- __hist_entry__set_folding(he, browser, unfold);
+-
+- if (!he->depth || unfold)
+- browser->nr_hierarchy_entries++;
+- if (he->leaf)
+- browser->nr_callchain_rows += he->nr_rows;
+- else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+- browser->nr_hierarchy_entries++;
+- he->has_no_entry = true;
+- he->nr_rows = 1;
+- } else
+- he->has_no_entry = false;
+-}
+-
+ static void
+ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+ {
+ struct rb_node *nd;
+ struct hist_entry *he;
++ double percent;
+
+ nd = rb_first_cached(&browser->hists->entries);
+ while (nd) {
+@@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_
+ nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+ hist_entry__set_folding(he, browser, unfold);
++
++ percent = hist_entry__get_percent_limit(he);
++ if (he->filtered || percent < browser->min_pcnt)
++ continue;
++
++ if (!he->depth || unfold)
++ browser->nr_hierarchy_entries++;
++ if (he->leaf)
++ browser->nr_callchain_rows += he->nr_rows;
++ else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
++ browser->nr_hierarchy_entries++;
++ he->has_no_entry = true;
++ he->nr_rows = 1;
++ } else
++ he->has_no_entry = false;
+ }
+ }
+
+@@ -659,8 +647,10 @@ static void hist_browser__set_folding_se
+ if (!browser->he_selection)
+ return;
+
+- hist_entry__set_folding(browser->he_selection, browser, unfold);
+- browser->b.nr_entries = hist_browser__nr_entries(browser);
++ if (unfold == browser->he_selection->unfolded)
++ return;
++
++ hist_browser__toggle_fold(browser);
+ }
+
+ static void ui_browser__warn_lost_events(struct ui_browser *browser)
+@@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(s
+ hist_browser__set_folding(browser, true);
+ break;
+ case 'e':
+- /* Expand the selected entry. */
+- hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser));
++ /* Toggle expand/collapse the selected entry. */
++ hist_browser__toggle_fold(browser);
+ break;
+ case 'H':
+ browser->show_headers = !browser->show_headers;
--- /dev/null
+From 68ca249c964f520af7f8763e22f12bd26b57b870 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Fri, 25 Aug 2023 09:41:51 -0700
+Subject: perf test shell stat_bpf_counters: Fix test on Intel
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit 68ca249c964f520af7f8763e22f12bd26b57b870 upstream.
+
+As of now, bpf counters (bperf) don't support event groups. But the
+default perf stat includes topdown metrics if supported (on recent Intel
+machines) which require groups. That makes perf stat exiting.
+
+ $ sudo perf stat --bpf-counter true
+ bpf managed perf events do not yet support groups.
+
+Actually the test explicitly uses cycles event only, but it missed to
+pass the option when it checks the availability of the command.
+
+Fixes: 2c0cb9f56020d2ea ("perf test: Add a shell test for 'perf stat --bpf-counters' new option")
+Reviewed-by: Song Liu <song@kernel.org>
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: bpf@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230825164152.165610-2-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/perf/tests/shell/stat_bpf_counters.sh | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/tools/perf/tests/shell/stat_bpf_counters.sh
++++ b/tools/perf/tests/shell/stat_bpf_counters.sh
+@@ -22,10 +22,10 @@ compare_number()
+ }
+
+ # skip if --bpf-counters is not supported
+-if ! perf stat --bpf-counters true > /dev/null 2>&1; then
++if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then
+ if [ "$1" = "-v" ]; then
+ echo "Skipping: --bpf-counters not supported"
+- perf --no-pager stat --bpf-counters true || true
++ perf --no-pager stat -e cycles --bpf-counters true || true
+ fi
+ exit 2
+ fi
--- /dev/null
+From 9bf63282ea77a531ea58acb42fb3f40d2d1e4497 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@kernel.org>
+Date: Fri, 25 Aug 2023 08:25:49 -0700
+Subject: perf tools: Handle old data in PERF_RECORD_ATTR
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+commit 9bf63282ea77a531ea58acb42fb3f40d2d1e4497 upstream.
+
+The PERF_RECORD_ATTR is used for a pipe mode to describe an event with
+attribute and IDs. The ID table comes after the attr and it calculate
+size of the table using the total record size and the attr size.
+
+ n_ids = (total_record_size - end_of_the_attr_field) / sizeof(u64)
+
+This is fine for most use cases, but sometimes it saves the pipe output
+in a file and then process it later. And it becomes a problem if there
+is a change in attr size between the record and report.
+
+ $ perf record -o- > perf-pipe.data # old version
+ $ perf report -i- < perf-pipe.data # new version
+
+For example, if the attr size is 128 and it has 4 IDs, then it would
+save them in 168 byte like below:
+
+ 8 byte: perf event header { .type = PERF_RECORD_ATTR, .size = 168 },
+ 128 byte: perf event attr { .size = 128, ... },
+ 32 byte: event IDs [] = { 1234, 1235, 1236, 1237 },
+
+But when report later, it thinks the attr size is 136 then it only read
+the last 3 entries as ID.
+
+ 8 byte: perf event header { .type = PERF_RECORD_ATTR, .size = 168 },
+ 136 byte: perf event attr { .size = 136, ... },
+ 24 byte: event IDs [] = { 1235, 1236, 1237 }, // 1234 is missing
+
+So it should use the recorded version of the attr. The attr has the
+size field already then it should honor the size when reading data.
+
+Fixes: 2c46dbb517a10b18 ("perf: Convert perf header attrs into attr events")
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tom Zanussi <zanussi@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230825152552.112913-1-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/perf/util/header.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/tools/perf/util/header.c
++++ b/tools/perf/util/header.c
+@@ -4382,7 +4382,8 @@ int perf_event__process_attr(struct perf
+ union perf_event *event,
+ struct evlist **pevlist)
+ {
+- u32 i, ids, n_ids;
++ u32 i, n_ids;
++ u64 *ids;
+ struct evsel *evsel;
+ struct evlist *evlist = *pevlist;
+
+@@ -4398,9 +4399,8 @@ int perf_event__process_attr(struct perf
+
+ evlist__add(evlist, evsel);
+
+- ids = event->header.size;
+- ids -= (void *)&event->attr.id - (void *)event;
+- n_ids = ids / sizeof(u64);
++ n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size;
++ n_ids = n_ids / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+@@ -4409,8 +4409,9 @@ int perf_event__process_attr(struct perf
+ if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
+ return -ENOMEM;
+
++ ids = (void *)&event->attr.attr + event->attr.attr.size;
+ for (i = 0; i < n_ids; i++) {
+- perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]);
++ perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]);
+ }
+
+ return 0;
--- /dev/null
+From a81de4a22bbe3183b7f0d6f13f592b8f5b5a3c18 Mon Sep 17 00:00:00 2001
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Date: Thu, 31 Aug 2023 15:17:14 -0400
+Subject: Revert "drm/amd/display: Remove v_startup workaround for dcn3+"
+
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+
+commit a81de4a22bbe3183b7f0d6f13f592b8f5b5a3c18 upstream.
+
+This reverts commit 3a31e8b89b7240d9a17ace8a1ed050bdcb560f9e.
+
+We still need to call dcn20_adjust_freesync_v_startup() for older DCN3+
+ASICs. Otherwise, it can cause DP to HDMI 2.1 PCONs to fail to light up.
+
+Cc: stable@vger.kernel.org
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2809
+Reviewed-by: Fangzhi Zuo <jerry.zuo@amd.com>
+Reviewed-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 24 ++++---------------
+ 1 file changed, 4 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+index 8afda5ecc0cd..d01bc2dff49b 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+@@ -1099,6 +1099,10 @@ void dcn20_calculate_dlg_params(struct dc *dc,
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
+ pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
++ if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
++ dcn20_adjust_freesync_v_startup(
++ &context->res_ctx.pipe_ctx[i].stream->timing,
++ &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
+
+ pipe_idx++;
+ }
+@@ -1927,7 +1931,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
+ int vlevel = 0;
+ int pipe_split_from[MAX_PIPES];
+ int pipe_cnt = 0;
+- int i = 0;
+ display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+@@ -1951,15 +1954,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
+ dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+- for (i = 0; i < dc->res_pool->pipe_count; i++) {
+- if (!context->res_ctx.pipe_ctx[i].stream)
+- continue;
+- if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+- dcn20_adjust_freesync_v_startup(
+- &context->res_ctx.pipe_ctx[i].stream->timing,
+- &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
+- }
+-
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ goto validate_out;
+@@ -2232,7 +2226,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
+ int vlevel = 0;
+ int pipe_split_from[MAX_PIPES];
+ int pipe_cnt = 0;
+- int i = 0;
+ display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+@@ -2261,15 +2254,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
+ dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+- for (i = 0; i < dc->res_pool->pipe_count; i++) {
+- if (!context->res_ctx.pipe_ctx[i].stream)
+- continue;
+- if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+- dcn20_adjust_freesync_v_startup(
+- &context->res_ctx.pipe_ctx[i].stream->timing,
+- &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
+- }
+-
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ goto validate_out;
+--
+2.42.0
+
btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch
btrfs-set-page-extent-mapped-after-read_folio-in-relocate_one_page.patch
btrfs-zoned-re-enable-metadata-over-commit-for-zoned-mode.patch
+btrfs-use-the-correct-superblock-to-compare-fsid-in-btrfs_validate_super.patch
+btrfs-compare-the-correct-fsid-metadata_uuid-in-btrfs_validate_super.patch
+btrfs-scrub-avoid-unnecessary-extent-tree-search-preparing-stripes.patch
+btrfs-scrub-avoid-unnecessary-csum-tree-search-preparing-stripes.patch
+btrfs-scrub-fix-grouping-of-read-io.patch
+drm-mxsfb-disable-overlay-plane-in-mxsfb_plane_overlay_atomic_disable.patch
+mtd-rawnand-brcmnand-fix-crash-during-the-panic_write.patch
+mtd-rawnand-brcmnand-fix-potential-out-of-bounds-access-in-oob-write.patch
+mtd-spi-nor-correct-flags-for-winbond-w25q128.patch
+mtd-rawnand-brcmnand-fix-potential-false-time-out-warning.patch
+mtd-rawnand-brcmnand-fix-ecc-level-field-setting-for-v7.2-controller.patch
+revert-drm-amd-display-remove-v_startup-workaround-for-dcn3.patch
+drm-amd-display-enable-cursor-degamma-for-dcn3-drm-legacy-gamma.patch
+drm-amd-display-limit-the-v_startup-workaround-to-asics-older-than-dcn3.1.patch
+drm-amd-display-prevent-potential-division-by-zero-errors.patch
+kvm-vmx-refresh-available-regs-and-idt-vectoring-info-before-nmi-handling.patch
+kvm-svm-take-and-hold-ir_list_lock-when-updating-vcpu-s-physical-id-entry.patch
+kvm-svm-don-t-inject-ud-if-kvm-attempts-to-skip-sev-guest-insn.patch
+kvm-svm-get-source-vcpus-from-source-vm-for-sev-es-intrahost-migration.patch
+kvm-nsvm-check-instead-of-asserting-on-nested-tsc-scaling-support.patch
+kvm-nsvm-load-l1-s-tsc-multiplier-based-on-l1-state-not-l2-state.patch
+kvm-svm-set-target-pcpu-during-irte-update-if-target-vcpu-is-running.patch
+kvm-svm-skip-vmsa-init-in-sev_es_init_vmcb-if-pointer-is-null.patch
+mips-only-fiddle-with-checkflags-if-need-compiler.patch
+mips-fix-config_cpu_daddi_workarounds-modules_install-regression.patch
+perf-hists-browser-fix-hierarchy-mode-header.patch
+perf-build-update-build-rule-for-generated-files.patch
+perf-test-shell-stat_bpf_counters-fix-test-on-intel.patch
+perf-tools-handle-old-data-in-perf_record_attr.patch
+perf-build-include-generated-header-files-properly.patch
+perf-hists-browser-fix-the-number-of-entries-for-e-key.patch
+drm-amd-display-always-switch-off-odm-before-committing-more-streams.patch
+drm-amd-display-remove-wait-while-locked.patch
+drm-amdkfd-add-missing-gfx11-mqd-manager-callbacks.patch
+drm-amdgpu-register-a-dirty-framebuffer-callback-for-fbcon.patch
+drm-amd-display-update-blank-state-on-odm-changes.patch