5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 29 Jan 2022 14:48:35 +0000 (15:48 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 29 Jan 2022 14:48:35 +0000 (15:48 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 14:48:35 +0000 (15:48 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 14:48:35 +0000 (15:48 +0100)
diff --git a/queue-5.15/block-add-bio_start_io_acct_time-to-control-start_time.patch b/queue-5.15/block-add-bio_start_io_acct_time-to-control-start_time.patch

new file mode 100644 (file)

index 0000000..bcbd715
--- /dev/null
+++ b/queue-5.15/block-add-bio_start_io_acct_time-to-control-start_time.patch
@@ -0,0 +1,93 @@
+From e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 28 Jan 2022 10:58:39 -0500
+Subject: block: add bio_start_io_acct_time() to control start_time
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 upstream.
+
+bio_start_io_acct_time() interface is like bio_start_io_acct() that
+allows start_time to be passed in. This gives drivers the ability to
+defer starting accounting until after IO is issued (but possibily not
+entirely due to bio splitting).
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Link: https://lore.kernel.org/r/20220128155841.39644-2-snitzer@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-core.c       |   25 +++++++++++++++++++------
+ include/linux/blkdev.h |    1 +
+ 2 files changed, 20 insertions(+), 6 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -1293,22 +1293,34 @@ void blk_account_io_start(struct request
+ }
+ 
+ static unsigned long __part_start_io_acct(struct block_device *part,
+-                                        unsigned int sectors, unsigned int op)
++                                        unsigned int sectors, unsigned int op,
++                                        unsigned long start_time)
+ {
+       const int sgrp = op_stat_group(op);
+-      unsigned long now = READ_ONCE(jiffies);
+ 
+       part_stat_lock();
+-      update_io_ticks(part, now, false);
++      update_io_ticks(part, start_time, false);
+       part_stat_inc(part, ios[sgrp]);
+       part_stat_add(part, sectors[sgrp], sectors);
+       part_stat_local_inc(part, in_flight[op_is_write(op)]);
+       part_stat_unlock();
+ 
+-      return now;
++      return start_time;
+ }
+ 
+ /**
++ * bio_start_io_acct_time - start I/O accounting for bio based drivers
++ * @bio:      bio to start account for
++ * @start_time:       start time that should be passed back to bio_end_io_acct().
++ */
++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
++{
++      __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
++                           bio_op(bio), start_time);
++}
++EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
++
++/**
+  * bio_start_io_acct - start I/O accounting for bio based drivers
+  * @bio:      bio to start account for
+  *
+@@ -1316,14 +1328,15 @@ static unsigned long __part_start_io_acc
+  */
+ unsigned long bio_start_io_acct(struct bio *bio)
+ {
+-      return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio));
++      return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
++                                  bio_op(bio), jiffies);
+ }
+ EXPORT_SYMBOL_GPL(bio_start_io_acct);
+ 
+ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+                                unsigned int op)
+ {
+-      return __part_start_io_acct(disk->part0, sectors, op);
++      return __part_start_io_acct(disk->part0, sectors, op, jiffies);
+ }
+ EXPORT_SYMBOL(disk_start_io_acct);
+ 
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1947,6 +1947,7 @@ unsigned long disk_start_io_acct(struct
+ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+               unsigned long start_time);
+ 
++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
+ unsigned long bio_start_io_acct(struct bio *bio);
+ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
+               struct block_device *orig_bdev);
diff --git a/queue-5.15/ceph-properly-put-ceph_string-reference-after-async-create-attempt.patch b/queue-5.15/ceph-properly-put-ceph_string-reference-after-async-create-attempt.patch

new file mode 100644 (file)

index 0000000..cf7720a
--- /dev/null
+++ b/queue-5.15/ceph-properly-put-ceph_string-reference-after-async-create-attempt.patch
@@ -0,0 +1,35 @@
+From 932a9b5870d38b87ba0a9923c804b1af7d3605b9 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 25 Jan 2022 15:39:16 -0500
+Subject: ceph: properly put ceph_string reference after async create attempt
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 932a9b5870d38b87ba0a9923c804b1af7d3605b9 upstream.
+
+The reference acquired by try_prep_async_create is currently leaked.
+Ensure we put it.
+
+Cc: stable@vger.kernel.org
+Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible")
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -744,8 +744,10 @@ retry:
+                               restore_deleg_ino(dir, req->r_deleg_ino);
+                               ceph_mdsc_put_request(req);
+                               try_async = false;
++                              ceph_put_string(rcu_dereference_raw(lo.pool_ns));
+                               goto retry;
+                       }
++                      ceph_put_string(rcu_dereference_raw(lo.pool_ns));
+                       goto out_req;
+               }
+       }
diff --git a/queue-5.15/ceph-set-pool_ns-in-new-inode-layout-for-async-creates.patch b/queue-5.15/ceph-set-pool_ns-in-new-inode-layout-for-async-creates.patch

new file mode 100644 (file)

index 0000000..3cc5269
--- /dev/null
+++ b/queue-5.15/ceph-set-pool_ns-in-new-inode-layout-for-async-creates.patch
@@ -0,0 +1,52 @@
+From 4584a768f22b7669cdebabc911543621ac661341 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Wed, 26 Jan 2022 12:36:49 -0500
+Subject: ceph: set pool_ns in new inode layout for async creates
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 4584a768f22b7669cdebabc911543621ac661341 upstream.
+
+Dan reported that he was unable to write to files that had been
+asynchronously created when the client's OSD caps are restricted to a
+particular namespace.
+
+The issue is that the layout for the new inode is only partially being
+filled. Ensure that we populate the pool_ns_data and pool_ns_len in the
+iinfo before calling ceph_fill_inode.
+
+Cc: stable@vger.kernel.org
+URL: https://tracker.ceph.com/issues/54013
+Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible")
+Reported-by: Dan van der Ster <dan@vanderster.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -577,6 +577,7 @@ static int ceph_finish_async_create(stru
+       struct ceph_inode_info *ci = ceph_inode(dir);
+       struct inode *inode;
+       struct timespec64 now;
++      struct ceph_string *pool_ns;
+       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+       struct ceph_vino vino = { .ino = req->r_deleg_ino,
+                                 .snap = CEPH_NOSNAP };
+@@ -626,6 +627,12 @@ static int ceph_finish_async_create(stru
+       in.max_size = cpu_to_le64(lo->stripe_unit);
+ 
+       ceph_file_layout_to_legacy(lo, &in.layout);
++      /* lo is private, so pool_ns can't change */
++      pool_ns = rcu_dereference_raw(lo->pool_ns);
++      if (pool_ns) {
++              iinfo.pool_ns_len = pool_ns->len;
++              iinfo.pool_ns_data = pool_ns->str;
++      }
+ 
+       down_read(&mdsc->snap_rwsem);
+       ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
diff --git a/queue-5.15/dm-properly-fix-redundant-bio-based-io-accounting.patch b/queue-5.15/dm-properly-fix-redundant-bio-based-io-accounting.patch

new file mode 100644 (file)

index 0000000..beba0bd
--- /dev/null
+++ b/queue-5.15/dm-properly-fix-redundant-bio-based-io-accounting.patch
@@ -0,0 +1,56 @@
+From b879f915bc48a18d4f4462729192435bb0f17052 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 28 Jan 2022 10:58:41 -0500
+Subject: dm: properly fix redundant bio-based IO accounting
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit b879f915bc48a18d4f4462729192435bb0f17052 upstream.
+
+Record the start_time for a bio but defer the starting block core's IO
+accounting until after IO is submitted using bio_start_io_acct_time().
+
+This approach avoids the need to mess around with any of the
+individual IO stats in response to a bio_split() that follows bio
+submission.
+
+Reported-by: Bud Brown <bubrown@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org
+Depends-on: e45c47d1f94e ("block: add bio_start_io_acct_time() to control start_time")
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Link: https://lore.kernel.org/r/20220128155841.39644-4-snitzer@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io *
+       struct mapped_device *md = io->md;
+       struct bio *bio = io->orig_bio;
+ 
+-      io->start_time = bio_start_io_acct(bio);
++      bio_start_io_acct_time(bio, io->start_time);
+       if (unlikely(dm_stats_used(&md->stats)))
+               dm_stats_account_io(&md->stats, bio_data_dir(bio),
+                                   bio->bi_iter.bi_sector, bio_sectors(bio),
+@@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct map
+       io->md = md;
+       spin_lock_init(&io->endio_lock);
+ 
+-      start_io_acct(io);
++      io->start_time = jiffies;
+ 
+       return io;
+ }
+@@ -1555,6 +1555,7 @@ static blk_qc_t __split_and_process_bio(
+                       ret = submit_bio_noacct(bio);
+               }
+       }
++      start_io_acct(ci.io);
+ 
+       /* drop the extra reference count */
+       dm_io_dec_pending(ci.io, errno_to_blk_status(error));
diff --git a/queue-5.15/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch b/queue-5.15/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch

new file mode 100644 (file)

index 0000000..b2f88fa
--- /dev/null
+++ b/queue-5.15/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch
@@ -0,0 +1,53 @@
+From f524d9c95fab54783d0038f7a3e8c014d5b56857 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 28 Jan 2022 10:58:40 -0500
+Subject: dm: revert partial fix for redundant bio-based IO accounting
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit f524d9c95fab54783d0038f7a3e8c014d5b56857 upstream.
+
+Reverts a1e1cb72d9649 ("dm: fix redundant IO accounting for bios that
+need splitting") because it was too narrow in scope (only addressed
+redundant 'sectors[]' accounting and not ios, nsecs[], etc).
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Link: https://lore.kernel.org/r/20220128155841.39644-3-snitzer@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm.c |   15 ---------------
+ 1 file changed, 15 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1514,9 +1514,6 @@ static void init_clone_info(struct clone
+       ci->sector = bio->bi_iter.bi_sector;
+ }
+ 
+-#define __dm_part_stat_sub(part, field, subnd)        \
+-      (part_stat_get(part, field) -= (subnd))
+-
+ /*
+  * Entry point to split a bio into clones and submit them to the targets.
+  */
+@@ -1553,18 +1550,6 @@ static blk_qc_t __split_and_process_bio(
+                                                 GFP_NOIO, &md->queue->bio_split);
+                       ci.io->orig_bio = b;
+ 
+-                      /*
+-                       * Adjust IO stats for each split, otherwise upon queue
+-                       * reentry there will be redundant IO accounting.
+-                       * NOTE: this is a stop-gap fix, a proper fix involves
+-                       * significant refactoring of DM core's bio splitting
+-                       * (by eliminating DM's splitting and just using bio_split)
+-                       */
+-                      part_stat_lock();
+-                      __dm_part_stat_sub(dm_disk(md)->part0,
+-                                         sectors[op_stat_group(bio_op(bio))], ci.sector_count);
+-                      part_stat_unlock();
+-
+                       bio_chain(b, bio);
+                       trace_block_split(b, bio->bi_iter.bi_sector);
+                       ret = submit_bio_noacct(bio);
diff --git a/queue-5.15/drm-amd-display-fix-fp-start-end-for-dcn30_internal_validate_bw.patch b/queue-5.15/drm-amd-display-fix-fp-start-end-for-dcn30_internal_validate_bw.patch

new file mode 100644 (file)

index 0000000..8624fd7
--- /dev/null
+++ b/queue-5.15/drm-amd-display-fix-fp-start-end-for-dcn30_internal_validate_bw.patch
@@ -0,0 +1,48 @@
+From 72a8d87b87270bff0c0b2fed4d59c48d0dd840d7 Mon Sep 17 00:00:00 2001
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Date: Mon, 24 Jan 2022 01:23:35 +0100
+Subject: drm/amd/display: Fix FP start/end for dcn30_internal_validate_bw.
+
+From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+
+commit 72a8d87b87270bff0c0b2fed4d59c48d0dd840d7 upstream.
+
+It calls populate_dml_pipes which uses doubles to initialize the
+scale_ratio_depth params. Mirrors the dcn20 logic.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+@@ -1879,7 +1879,6 @@ static noinline bool dcn30_internal_vali
+       dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+       pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ 
+-      DC_FP_START();
+       if (!pipe_cnt) {
+               out = true;
+               goto validate_out;
+@@ -2103,7 +2102,6 @@ validate_fail:
+       out = false;
+ 
+ validate_out:
+-      DC_FP_END();
+       return out;
+ }
+ 
+@@ -2304,7 +2302,9 @@ bool dcn30_validate_bandwidth(struct dc
+ 
+       BW_VAL_TRACE_COUNT();
+ 
++      DC_FP_START();
+       out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
++      DC_FP_END();
+ 
+       if (pipe_cnt == 0)
+               goto validate_out;
diff --git a/queue-5.15/drm-atomic-add-the-crtc-to-affected-crtc-only-if-uapi.enable-true.patch b/queue-5.15/drm-atomic-add-the-crtc-to-affected-crtc-only-if-uapi.enable-true.patch

new file mode 100644 (file)

index 0000000..3a7fcd1
--- /dev/null
+++ b/queue-5.15/drm-atomic-add-the-crtc-to-affected-crtc-only-if-uapi.enable-true.patch
@@ -0,0 +1,69 @@
+From 5ec1cebd59300ddd26dbaa96c17c508764eef911 Mon Sep 17 00:00:00 2001
+From: Manasi Navare <manasi.d.navare@intel.com>
+Date: Mon, 4 Oct 2021 04:59:13 -0700
+Subject: drm/atomic: Add the crtc to affected crtc only if uapi.enable = true
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Manasi Navare <manasi.d.navare@intel.com>
+
+commit 5ec1cebd59300ddd26dbaa96c17c508764eef911 upstream.
+
+In case of a modeset where a mode gets split across multiple CRTCs
+in the driver specific implementation (bigjoiner in i915) we wrongly count
+the affected CRTCs based on the drm_crtc_mask and indicate the stolen CRTC as
+an affected CRTC in atomic_check_only().
+This triggers a warning since affected CRTCs doent match requested CRTC.
+
+To fix this in such bigjoiner configurations, we should only
+increment affected crtcs if that CRTC is enabled in UAPI not
+if it is just used internally in the driver to split the mode.
+
+v3: Add the same uapi crtc_state->enable check in requested
+crtc calc (Ville)
+
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Cc: Simon Ser <contact@emersion.fr>
+Cc: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
+Cc: Daniel Stone <daniels@collabora.com>
+Cc: Daniel Vetter <daniel.vetter@intel.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v5.11+
+Fixes: 919c2299a893 ("drm/i915: Enable bigjoiner")
+Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
+Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20211004115913.23889-1-manasi.d.navare@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_atomic.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/drm_atomic.c
++++ b/drivers/gpu/drm/drm_atomic.c
+@@ -1310,8 +1310,10 @@ int drm_atomic_check_only(struct drm_ato
+ 
+       DRM_DEBUG_ATOMIC("checking %p\n", state);
+ 
+-      for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
+-              requested_crtc |= drm_crtc_mask(crtc);
++      for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
++              if (new_crtc_state->enable)
++                      requested_crtc |= drm_crtc_mask(crtc);
++      }
+ 
+       for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
+               ret = drm_atomic_plane_check(old_plane_state, new_plane_state);
+@@ -1360,8 +1362,10 @@ int drm_atomic_check_only(struct drm_ato
+               }
+       }
+ 
+-      for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
+-              affected_crtc |= drm_crtc_mask(crtc);
++      for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
++              if (new_crtc_state->enable)
++                      affected_crtc |= drm_crtc_mask(crtc);
++      }
+ 
+       /*
+        * For commits that allow modesets drivers can add other CRTCs to the
diff --git a/queue-5.15/drm-etnaviv-relax-submit-size-limits.patch b/queue-5.15/drm-etnaviv-relax-submit-size-limits.patch

new file mode 100644 (file)

index 0000000..9ffdb37
--- /dev/null
+++ b/queue-5.15/drm-etnaviv-relax-submit-size-limits.patch
@@ -0,0 +1,35 @@
+From e3d26528e083e612314d4dcd713f3d5a26143ddc Mon Sep 17 00:00:00 2001
+From: Lucas Stach <l.stach@pengutronix.de>
+Date: Thu, 6 Jan 2022 19:10:21 +0100
+Subject: drm/etnaviv: relax submit size limits
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+commit e3d26528e083e612314d4dcd713f3d5a26143ddc upstream.
+
+While all userspace tried to limit commandstreams to 64K in size,
+a bug in the Mesa driver lead to command streams of up to 128K
+being submitted. Allow those to avoid breaking existing userspace.
+
+Fixes: 6dfa2fab8ddd ("drm/etnaviv: limit submit sizes")
+Cc: stable@vger.kernel.org
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+@@ -469,8 +469,8 @@ int etnaviv_ioctl_gem_submit(struct drm_
+               return -EINVAL;
+       }
+ 
+-      if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K ||
+-          args->nr_bos > SZ_64K || args->nr_pmrs > 128) {
++      if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K ||
++          args->nr_bos > SZ_128K || args->nr_pmrs > 128) {
+               DRM_ERROR("submit arguments out of size limits\n");
+               return -EINVAL;
+       }
diff --git a/queue-5.15/efi-runtime-avoid-efiv2-runtime-services-on-apple-x86-machines.patch b/queue-5.15/efi-runtime-avoid-efiv2-runtime-services-on-apple-x86-machines.patch

new file mode 100644 (file)

index 0000000..e9f5c63
--- /dev/null
+++ b/queue-5.15/efi-runtime-avoid-efiv2-runtime-services-on-apple-x86-machines.patch
@@ -0,0 +1,62 @@
+From f5390cd0b43c2e54c7cf5506c7da4a37c5cef746 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Wed, 12 Jan 2022 11:14:13 +0100
+Subject: efi: runtime: avoid EFIv2 runtime services on Apple x86 machines
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit f5390cd0b43c2e54c7cf5506c7da4a37c5cef746 upstream.
+
+Aditya reports [0] that his recent MacbookPro crashes in the firmware
+when using the variable services at runtime. The culprit appears to be a
+call to QueryVariableInfo(), which we did not use to call on Apple x86
+machines in the past as they only upgraded from EFI v1.10 to EFI v2.40
+firmware fairly recently, and QueryVariableInfo() (along with
+UpdateCapsule() et al) was added in EFI v2.00.
+
+The only runtime service introduced in EFI v2.00 that we actually use in
+Linux is QueryVariableInfo(), as the capsule based ones are optional,
+generally not used at runtime (all the LVFS/fwupd firmware update
+infrastructure uses helper EFI programs that invoke capsule update at
+boot time, not runtime), and not implemented by Apple machines in the
+first place. QueryVariableInfo() is used to 'safely' set variables,
+i.e., only when there is enough space. This prevents machines with buggy
+firmwares from corrupting their NVRAMs when they run out of space.
+
+Given that Apple machines have been using EFI v1.10 services only for
+the longest time (the EFI v2.0 spec was released in 2006, and Linux
+support for the newly introduced runtime services was added in 2011, but
+the MacbookPro12,1 released in 2015 still claims to be EFI v1.10 only),
+let's avoid the EFI v2.0 ones on all Apple x86 machines.
+
+[0] https://lore.kernel.org/all/6D757C75-65B1-468B-842D-10410081A8E4@live.com/
+
+Cc: <stable@vger.kernel.org>
+Cc: Jeremy Kerr <jk@ozlabs.org>
+Cc: Matthew Garrett <mjg59@srcf.ucam.org>
+Reported-by: Aditya Garg <gargaditya08@live.com>
+Tested-by: Orlando Chamberlain <redecorating@protonmail.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Tested-by: Aditya Garg <gargaditya08@live.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215277
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/efi.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -719,6 +719,13 @@ void __init efi_systab_report_header(con
+               systab_hdr->revision >> 16,
+               systab_hdr->revision & 0xffff,
+               vendor);
++
++      if (IS_ENABLED(CONFIG_X86_64) &&
++          systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION &&
++          !strcmp(vendor, "Apple")) {
++              pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n");
++              efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION;
++      }
+ }
+ 
+ static __initdata char memory_type_name[][13] = {
diff --git a/queue-5.15/fsnotify-fix-fsnotify-hooks-in-pseudo-filesystems.patch b/queue-5.15/fsnotify-fix-fsnotify-hooks-in-pseudo-filesystems.patch

new file mode 100644 (file)

index 0000000..1198b79
--- /dev/null
+++ b/queue-5.15/fsnotify-fix-fsnotify-hooks-in-pseudo-filesystems.patch
@@ -0,0 +1,124 @@
+From 29044dae2e746949ad4b9cbdbfb248994d1dcdb4 Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Thu, 20 Jan 2022 23:53:05 +0200
+Subject: fsnotify: fix fsnotify hooks in pseudo filesystems
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 29044dae2e746949ad4b9cbdbfb248994d1dcdb4 upstream.
+
+Commit 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of
+d_delete()") moved the fsnotify delete hook before d_delete() so fsnotify
+will have access to a positive dentry.
+
+This allowed a race where opening the deleted file via cached dentry
+is now possible after receiving the IN_DELETE event.
+
+To fix the regression in pseudo filesystems, convert d_delete() calls
+to d_drop() (see commit 46c46f8df9aa ("devpts_pty_kill(): don't bother
+with d_delete()") and move the fsnotify hook after d_drop().
+
+Add a missing fsnotify_unlink() hook in nfsdfs that was found during
+the audit of fsnotify hooks in pseudo filesystems.
+
+Note that the fsnotify hooks in simple_recursive_removal() follow
+d_invalidate(), so they require no change.
+
+Link: https://lore.kernel.org/r/20220120215305.282577-2-amir73il@gmail.com
+Reported-by: Ivan Delalande <colona@arista.com>
+Link: https://lore.kernel.org/linux-fsdevel/YeNyzoDM5hP5LtGW@visor/
+Fixes: 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of d_delete()")
+Cc: stable@vger.kernel.org # v5.3+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/configfs/dir.c     |    6 +++---
+ fs/devpts/inode.c     |    2 +-
+ fs/nfsd/nfsctl.c      |    5 +++--
+ net/sunrpc/rpc_pipe.c |    4 ++--
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/fs/configfs/dir.c
++++ b/fs/configfs/dir.c
+@@ -1780,8 +1780,8 @@ void configfs_unregister_group(struct co
+       configfs_detach_group(&group->cg_item);
+       d_inode(dentry)->i_flags |= S_DEAD;
+       dont_mount(dentry);
++      d_drop(dentry);
+       fsnotify_rmdir(d_inode(parent), dentry);
+-      d_delete(dentry);
+       inode_unlock(d_inode(parent));
+ 
+       dput(dentry);
+@@ -1922,10 +1922,10 @@ void configfs_unregister_subsystem(struc
+       configfs_detach_group(&group->cg_item);
+       d_inode(dentry)->i_flags |= S_DEAD;
+       dont_mount(dentry);
+-      fsnotify_rmdir(d_inode(root), dentry);
+       inode_unlock(d_inode(dentry));
+ 
+-      d_delete(dentry);
++      d_drop(dentry);
++      fsnotify_rmdir(d_inode(root), dentry);
+ 
+       inode_unlock(d_inode(root));
+ 
+--- a/fs/devpts/inode.c
++++ b/fs/devpts/inode.c
+@@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dent
+ 
+       dentry->d_fsdata = NULL;
+       drop_nlink(dentry->d_inode);
+-      fsnotify_unlink(d_inode(dentry->d_parent), dentry);
+       d_drop(dentry);
++      fsnotify_unlink(d_inode(dentry->d_parent), dentry);
+       dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
+ }
+ 
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1249,7 +1249,8 @@ static void nfsdfs_remove_file(struct in
+       clear_ncl(d_inode(dentry));
+       dget(dentry);
+       ret = simple_unlink(dir, dentry);
+-      d_delete(dentry);
++      d_drop(dentry);
++      fsnotify_unlink(dir, dentry);
+       dput(dentry);
+       WARN_ON_ONCE(ret);
+ }
+@@ -1340,8 +1341,8 @@ void nfsd_client_rmdir(struct dentry *de
+       dget(dentry);
+       ret = simple_rmdir(dir, dentry);
+       WARN_ON_ONCE(ret);
++      d_drop(dentry);
+       fsnotify_rmdir(dir, dentry);
+-      d_delete(dentry);
+       dput(dentry);
+       inode_unlock(dir);
+ }
+--- a/net/sunrpc/rpc_pipe.c
++++ b/net/sunrpc/rpc_pipe.c
+@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir
+ 
+       dget(dentry);
+       ret = simple_rmdir(dir, dentry);
++      d_drop(dentry);
+       if (!ret)
+               fsnotify_rmdir(dir, dentry);
+-      d_delete(dentry);
+       dput(dentry);
+       return ret;
+ }
+@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *di
+ 
+       dget(dentry);
+       ret = simple_unlink(dir, dentry);
++      d_drop(dentry);
+       if (!ret)
+               fsnotify_unlink(dir, dentry);
+-      d_delete(dentry);
+       dput(dentry);
+       return ret;
+ }
diff --git a/queue-5.15/kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch b/queue-5.15/kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch

new file mode 100644 (file)

index 0000000..2ce412d
--- /dev/null
+++ b/queue-5.15/kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch
@@ -0,0 +1,53 @@
+From 35fe7cfbab2e81f1afb23fc4212210b1de6d9633 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpengli@tencent.com>
+Date: Tue, 25 Jan 2022 01:17:00 -0800
+Subject: KVM: LAPIC: Also cancel preemption timer during SET_LAPIC
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+commit 35fe7cfbab2e81f1afb23fc4212210b1de6d9633 upstream.
+
+The below warning is splatting during guest reboot.
+
+  ------------[ cut here ]------------
+  WARNING: CPU: 0 PID: 1931 at arch/x86/kvm/x86.c:10322 kvm_arch_vcpu_ioctl_run+0x874/0x880 [kvm]
+  CPU: 0 PID: 1931 Comm: qemu-system-x86 Tainted: G          I       5.17.0-rc1+ #5
+  RIP: 0010:kvm_arch_vcpu_ioctl_run+0x874/0x880 [kvm]
+  Call Trace:
+   <TASK>
+   kvm_vcpu_ioctl+0x279/0x710 [kvm]
+   __x64_sys_ioctl+0x83/0xb0
+   do_syscall_64+0x3b/0xc0
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+  RIP: 0033:0x7fd39797350b
+
+This can be triggered by not exposing tsc-deadline mode and doing a reboot in
+the guest. The lapic_shutdown() function which is called in sys_reboot path
+will not disarm the flying timer, it just masks LVTT. lapic_shutdown() clears
+APIC state w/ LVT_MASKED and timer-mode bit is 0, this can trigger timer-mode
+switch between tsc-deadline and oneshot/periodic, which can result in preemption
+timer be cancelled in apic_update_lvtt(). However, We can't depend on this when
+not exposing tsc-deadline mode and oneshot/periodic modes emulated by preemption
+timer. Qemu will synchronise states around reset, let's cancel preemption timer
+under KVM_SET_LAPIC.
+
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Message-Id: <1643102220-35667-1-git-send-email-wanpengli@tencent.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2623,7 +2623,7 @@ int kvm_apic_set_state(struct kvm_vcpu *
+       kvm_apic_set_version(vcpu);
+ 
+       apic_update_ppr(apic);
+-      hrtimer_cancel(&apic->lapic_timer.timer);
++      cancel_apic_timer(apic);
+       apic->lapic_timer.expired_tscdeadline = 0;
+       apic_update_lvtt(apic);
+       apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/queue-5.15/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch b/queue-5.15/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch

new file mode 100644 (file)

index 0000000..91599b7
--- /dev/null
+++ b/queue-5.15/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch
@@ -0,0 +1,80 @@
+From 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Sat, 22 Jan 2022 20:55:30 +1000
+Subject: KVM: PPC: Book3S HV Nested: Fix nested HFSCR being clobbered with multiple vCPUs
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 upstream.
+
+The L0 is storing HFSCR requested by the L1 for the L2 in struct
+kvm_nested_guest when the L1 requests a vCPU enter L2. kvm_nested_guest
+is not a per-vCPU structure. Hilarity ensues.
+
+Fix it by moving the nested hfscr into the vCPU structure together with
+the other per-vCPU nested fields.
+
+Fixes: 8b210a880b35 ("KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible")
+Cc: stable@vger.kernel.org # v5.15+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220122105530.3477250-1-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/include/asm/kvm_book3s_64.h |    1 -
+ arch/powerpc/include/asm/kvm_host.h      |    1 +
+ arch/powerpc/kvm/book3s_hv.c             |    3 +--
+ arch/powerpc/kvm/book3s_hv_nested.c      |    2 +-
+ 4 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -39,7 +39,6 @@ struct kvm_nested_guest {
+       pgd_t *shadow_pgtable;          /* our page table for this guest */
+       u64 l1_gr_to_hr;                /* L1's addr of part'n-scoped table */
+       u64 process_table;              /* process table entry for this guest */
+-      u64 hfscr;                      /* HFSCR that the L1 requested for this nested guest */
+       long refcnt;                    /* number of pointers to this struct */
+       struct mutex tlb_lock;          /* serialize page faults and tlbies */
+       struct kvm_nested_guest *next;
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -814,6 +814,7 @@ struct kvm_vcpu_arch {
+ 
+       /* For support of nested guests */
+       struct kvm_nested_guest *nested;
++      u64 nested_hfscr;       /* HFSCR that the L1 requested for the nested guest */
+       u32 nested_vcpu_id;
+       gpa_t nested_io_gpr;
+ #endif
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1731,7 +1731,6 @@ static int kvmppc_handle_exit_hv(struct
+ 
+ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_nested_guest *nested = vcpu->arch.nested;
+       int r;
+       int srcu_idx;
+ 
+@@ -1831,7 +1830,7 @@ static int kvmppc_handle_nested_exit(str
+                * it into a HEAI.
+                */
+               if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+-                                      (nested->hfscr & (1UL << cause))) {
++                              (vcpu->arch.nested_hfscr & (1UL << cause))) {
+                       vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+ 
+                       /*
+--- a/arch/powerpc/kvm/book3s_hv_nested.c
++++ b/arch/powerpc/kvm/book3s_hv_nested.c
+@@ -362,7 +362,7 @@ long kvmhv_enter_nested_guest(struct kvm
+       /* set L1 state to L2 state */
+       vcpu->arch.nested = l2;
+       vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+-      l2->hfscr = l2_hv.hfscr;
++      vcpu->arch.nested_hfscr = l2_hv.hfscr;
+       vcpu->arch.regs = l2_regs;
+ 
+       /* Guest must always run with ME enabled, HV disabled. */
diff --git a/queue-5.15/kvm-svm-don-t-intercept-gp-for-sev-guests.patch b/queue-5.15/kvm-svm-don-t-intercept-gp-for-sev-guests.patch

new file mode 100644 (file)

index 0000000..4d770eb
--- /dev/null
+++ b/queue-5.15/kvm-svm-don-t-intercept-gp-for-sev-guests.patch
@@ -0,0 +1,52 @@
+From 0b0be065b7563ac708aaa9f69dd4941c80b3446d Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 20 Jan 2022 01:07:13 +0000
+Subject: KVM: SVM: Don't intercept #GP for SEV guests
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0b0be065b7563ac708aaa9f69dd4941c80b3446d upstream.
+
+Never intercept #GP for SEV guests as reading SEV guest private memory
+will return cyphertext, i.e. emulating on #GP can't work as intended.
+
+Cc: stable@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
+Message-Id: <20220120010719.711476-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -303,7 +303,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu,
+                               return ret;
+                       }
+ 
+-                      if (svm_gp_erratum_intercept)
++                      /*
++                       * Never intercept #GP for SEV guests, KVM can't
++                       * decrypt guest memory to workaround the erratum.
++                       */
++                      if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
+                               set_exception_intercept(svm, GP_VECTOR);
+               }
+       }
+@@ -1176,9 +1180,10 @@ static void init_vmcb(struct kvm_vcpu *v
+        * Guest access to VMware backdoor ports could legitimately
+        * trigger #GP because of TSS I/O permission bitmap.
+        * We intercept those #GP and allow access to them anyway
+-       * as VMware does.
++       * as VMware does.  Don't intercept #GP for SEV guests as KVM can't
++       * decrypt guest memory to decode the faulting instruction.
+        */
+-      if (enable_vmware_backdoor)
++      if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+               set_exception_intercept(svm, GP_VECTOR);
+ 
+       svm_set_intercept(svm, INTERCEPT_INTR);
diff --git a/queue-5.15/kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch b/queue-5.15/kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch

new file mode 100644 (file)

index 0000000..1635f02
--- /dev/null
+++ b/queue-5.15/kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch
@@ -0,0 +1,55 @@
+From 55467fcd55b89c622e62b4afe60ac0eb2fae91f2 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 20 Jan 2022 01:07:11 +0000
+Subject: KVM: SVM: Never reject emulation due to SMAP errata for !SEV guests
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 55467fcd55b89c622e62b4afe60ac0eb2fae91f2 upstream.
+
+Always signal that emulation is possible for !SEV guests regardless of
+whether or not the CPU provided a valid instruction byte stream.  KVM can
+read all guest state (memory and registers) for !SEV guests, i.e. can
+fetch the code stream from memory even if the CPU failed to do so because
+of the SMAP errata.
+
+Fixes: 05d5a4863525 ("KVM: SVM: Workaround errata#1096 (insn_len maybe zero on SMAP violation)")
+Cc: stable@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
+Message-Id: <20220120010719.711476-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4407,8 +4407,13 @@ static bool svm_can_emulate_instruction(
+       bool smep, smap, is_user;
+       unsigned long cr4;
+ 
++      /* Emulation is always possible when KVM has access to all guest state. */
++      if (!sev_guest(vcpu->kvm))
++              return true;
++
+       /*
+-       * When the guest is an SEV-ES guest, emulation is not possible.
++       * Emulation is impossible for SEV-ES guests as KVM doesn't have access
++       * to guest register state.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return false;
+@@ -4461,9 +4466,6 @@ static bool svm_can_emulate_instruction(
+       smap = cr4 & X86_CR4_SMAP;
+       is_user = svm_get_cpl(vcpu) == 3;
+       if (smap && (!smep || is_user)) {
+-              if (!sev_guest(vcpu->kvm))
+-                      return true;
+-
+               pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
+               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       }
diff --git a/queue-5.15/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch b/queue-5.15/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch

new file mode 100644 (file)

index 0000000..a8b5b0d
--- /dev/null
+++ b/queue-5.15/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
@@ -0,0 +1,173 @@
+From f7e570780efc5cec9b2ed1e0472a7da14e864fdb Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 25 Jan 2022 22:03:58 +0000
+Subject: KVM: x86: Forcibly leave nested virt when SMM state is toggled
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7e570780efc5cec9b2ed1e0472a7da14e864fdb upstream.
+
+Forcibly leave nested virtualization operation if userspace toggles SMM
+state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS.  If userspace
+forces the vCPU out of SMM while it's post-VMXON and then injects an SMI,
+vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both
+vmxon=false and smm.vmxon=false, but all other nVMX state allocated.
+
+Don't attempt to gracefully handle the transition as (a) most transitions
+are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't
+sufficient information to handle all transitions, e.g. SVM wants access
+to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede
+KVM_SET_NESTED_STATE during state restore as the latter disallows putting
+the vCPU into L2 if SMM is active, and disallows tagging the vCPU as
+being post-VMXON in SMM if SMM is not active.
+
+Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX
+due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond
+just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU
+in an architecturally impossible state.
+
+  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
+  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
+  Modules linked in:
+  CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
+  RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
+  Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00
+  Call Trace:
+   <TASK>
+   kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123
+   kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline]
+   kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460
+   kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline]
+   kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676
+   kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline]
+   kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250
+   kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273
+   __fput+0x286/0x9f0 fs/file_table.c:311
+   task_work_run+0xdd/0x1a0 kernel/task_work.c:164
+   exit_task_work include/linux/task_work.h:32 [inline]
+   do_exit+0xb29/0x2a30 kernel/exit.c:806
+   do_group_exit+0xd2/0x2f0 kernel/exit.c:935
+   get_signal+0x4b0/0x28c0 kernel/signal.c:2862
+   arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868
+   handle_signal_work kernel/entry/common.c:148 [inline]
+   exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
+   exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207
+   __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
+   syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300
+   do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220125220358.2091737-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/svm/nested.c       |    9 +++++----
+ arch/x86/kvm/svm/svm.c          |    2 +-
+ arch/x86/kvm/svm/svm.h          |    2 +-
+ arch/x86/kvm/vmx/nested.c       |    1 +
+ arch/x86/kvm/x86.c              |    4 +++-
+ 6 files changed, 12 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1487,6 +1487,7 @@ struct kvm_x86_ops {
+ };
+ 
+ struct kvm_x86_nested_ops {
++      void (*leave_nested)(struct kvm_vcpu *vcpu);
+       int (*check_events)(struct kvm_vcpu *vcpu);
+       bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+       void (*triple_fault)(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -942,9 +942,9 @@ void svm_free_nested(struct vcpu_svm *sv
+ /*
+  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+  */
+-void svm_leave_nested(struct vcpu_svm *svm)
++void svm_leave_nested(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_vcpu *vcpu = &svm->vcpu;
++      struct vcpu_svm *svm = to_svm(vcpu);
+ 
+       if (is_guest_mode(vcpu)) {
+               svm->nested.nested_run_pending = 0;
+@@ -1313,7 +1313,7 @@ static int svm_set_nested_state(struct k
+               return -EINVAL;
+ 
+       if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+-              svm_leave_nested(svm);
++              svm_leave_nested(vcpu);
+               svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+               return 0;
+       }
+@@ -1378,7 +1378,7 @@ static int svm_set_nested_state(struct k
+        */
+ 
+       if (is_guest_mode(vcpu))
+-              svm_leave_nested(svm);
++              svm_leave_nested(vcpu);
+       else
+               svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+ 
+@@ -1432,6 +1432,7 @@ static bool svm_get_nested_state_pages(s
+ }
+ 
+ struct kvm_x86_nested_ops svm_nested_ops = {
++      .leave_nested = svm_leave_nested,
+       .check_events = svm_check_nested_events,
+       .triple_fault = nested_svm_triple_fault,
+       .get_nested_state_pages = svm_get_nested_state_pages,
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -281,7 +281,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu,
+ 
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+-                      svm_leave_nested(svm);
++                      svm_leave_nested(vcpu);
+                       svm_set_gif(svm, true);
+                       /* #GP intercept is still needed for vmware backdoor */
+                       if (!enable_vmware_backdoor)
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -461,7 +461,7 @@ static inline bool nested_exit_on_nmi(st
+ 
+ int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
+                        u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
+-void svm_leave_nested(struct vcpu_svm *svm);
++void svm_leave_nested(struct kvm_vcpu *vcpu);
+ void svm_free_nested(struct vcpu_svm *svm);
+ int svm_allocate_nested(struct vcpu_svm *svm);
+ int nested_svm_vmrun(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -6748,6 +6748,7 @@ __init int nested_vmx_hardware_setup(int
+ }
+ 
+ struct kvm_x86_nested_ops vmx_nested_ops = {
++      .leave_nested = vmx_leave_nested,
+       .check_events = vmx_check_nested_events,
+       .hv_timer_pending = nested_vmx_preemption_timer_pending,
+       .triple_fault = nested_vmx_triple_fault,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4727,8 +4727,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_e
+               vcpu->arch.apic->sipi_vector = events->sipi_vector;
+ 
+       if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+-              if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
++              if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
++                      kvm_x86_ops.nested_ops->leave_nested(vcpu);
+                       kvm_smm_changed(vcpu, events->smi.smm);
++              }
+ 
+               vcpu->arch.smi_pending = events->smi.pending;
+ 
diff --git a/queue-5.15/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch b/queue-5.15/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch

new file mode 100644 (file)

index 0000000..a3dcb8e
--- /dev/null
+++ b/queue-5.15/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch
@@ -0,0 +1,42 @@
+From be4f3b3f82271c3193ce200a996dc70682c8e622 Mon Sep 17 00:00:00 2001
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+Date: Wed, 26 Jan 2022 17:22:24 +0000
+Subject: KVM: x86: Keep MSR_IA32_XSS unchanged for INIT
+
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+
+commit be4f3b3f82271c3193ce200a996dc70682c8e622 upstream.
+
+It has been corrected from SDM version 075 that MSR_IA32_XSS is reset to
+zero on Power up and Reset but keeps unchanged on INIT.
+
+Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT")
+Cc: stable@vger.kernel.org
+Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220126172226.2298529-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10990,6 +10990,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
+               vcpu->arch.msr_misc_features_enables = 0;
+ 
+               vcpu->arch.xcr0 = XFEATURE_MASK_FP;
++              vcpu->arch.ia32_xss = 0;
+       }
+ 
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+@@ -11008,8 +11009,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
+               eax = 0x600;
+       kvm_rdx_write(vcpu, eax);
+ 
+-      vcpu->arch.ia32_xss = 0;
+-
+       static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
+ 
+       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
diff --git a/queue-5.15/kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch b/queue-5.15/kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch

new file mode 100644 (file)

index 0000000..8a7bef1
--- /dev/null
+++ b/queue-5.15/kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch
@@ -0,0 +1,53 @@
+From 47c28d436f409f5b009dc82bd82d4971088aa391 Mon Sep 17 00:00:00 2001
+From: Denis Valeev <lemniscattaden@gmail.com>
+Date: Sat, 22 Jan 2022 23:13:57 +0300
+Subject: KVM: x86: nSVM: skip eax alignment check for non-SVM instructions
+
+From: Denis Valeev <lemniscattaden@gmail.com>
+
+commit 47c28d436f409f5b009dc82bd82d4971088aa391 upstream.
+
+The bug occurs on #GP triggered by VMware backdoor when eax value is
+unaligned. eax alignment check should not be applied to non-SVM
+instructions because it leads to incorrect omission of the instructions
+emulation.
+Apply the alignment check only to SVM instructions to fix.
+
+Fixes: d1cba6c92237 ("KVM: x86: nSVM: test eax for 4K alignment for GP errata workaround")
+Signed-off-by: Denis Valeev <lemniscattaden@gmail.com>
+Message-Id: <Yexlhaoe1Fscm59u@q>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2238,10 +2238,6 @@ static int gp_interception(struct kvm_vc
+       if (error_code)
+               goto reinject;
+ 
+-      /* All SVM instructions expect page aligned RAX */
+-      if (svm->vmcb->save.rax & ~PAGE_MASK)
+-              goto reinject;
+-
+       /* Decode the instruction for usage later */
+       if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
+               goto reinject;
+@@ -2259,8 +2255,13 @@ static int gp_interception(struct kvm_vc
+               if (!is_guest_mode(vcpu))
+                       return kvm_emulate_instruction(vcpu,
+                               EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
+-      } else
++      } else {
++              /* All SVM instructions expect page aligned RAX */
++              if (svm->vmcb->save.rax & ~PAGE_MASK)
++                      goto reinject;
++
+               return emulate_svm_instr(vcpu, opcode);
++      }
+ 
+ reinject:
+       kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
diff --git a/queue-5.15/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch b/queue-5.15/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch

new file mode 100644 (file)

index 0000000..c191ff8
--- /dev/null
+++ b/queue-5.15/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch
@@ -0,0 +1,42 @@
+From 05a9e065059e566f218f8778c4d17ee75db56c55 Mon Sep 17 00:00:00 2001
+From: Like Xu <likexu@tencent.com>
+Date: Wed, 26 Jan 2022 17:22:26 +0000
+Subject: KVM: x86: Sync the states size with the XCR0/IA32_XSS at, any time
+
+From: Like Xu <likexu@tencent.com>
+
+commit 05a9e065059e566f218f8778c4d17ee75db56c55 upstream.
+
+XCR0 is reset to 1 by RESET but not INIT and IA32_XSS is zeroed by
+both RESET and INIT. The kvm_set_msr_common()'s handling of MSR_IA32_XSS
+also needs to update kvm_update_cpuid_runtime(). In the above cases, the
+size in bytes of the XSAVE area containing all states enabled by XCR0 or
+(XCRO | IA32_XSS) needs to be updated.
+
+For simplicity and consistency, existing helpers are used to write values
+and call kvm_update_cpuid_runtime(), and it's not exactly a fast path.
+
+Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT")
+Cc: stable@vger.kernel.org
+Signed-off-by: Like Xu <likexu@tencent.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220126172226.2298529-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10990,8 +10990,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
+ 
+               vcpu->arch.msr_misc_features_enables = 0;
+ 
+-              vcpu->arch.xcr0 = XFEATURE_MASK_FP;
+-              vcpu->arch.ia32_xss = 0;
++              __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
++              __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
+       }
+ 
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
diff --git a/queue-5.15/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch b/queue-5.15/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch

new file mode 100644 (file)

index 0000000..778ca26
--- /dev/null
+++ b/queue-5.15/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch
@@ -0,0 +1,34 @@
+From 4c282e51e4450b94680d6ca3b10f830483b1f243 Mon Sep 17 00:00:00 2001
+From: Like Xu <likexu@tencent.com>
+Date: Wed, 26 Jan 2022 17:22:25 +0000
+Subject: KVM: x86: Update vCPU's runtime CPUID on write to MSR_IA32_XSS
+
+From: Like Xu <likexu@tencent.com>
+
+commit 4c282e51e4450b94680d6ca3b10f830483b1f243 upstream.
+
+Do a runtime CPUID update for a vCPU if MSR_IA32_XSS is written, as the
+size in bytes of the XSAVE area is affected by the states enabled in XSS.
+
+Fixes: 203000993de5 ("kvm: vmx: add MSR logic for XSAVES")
+Cc: stable@vger.kernel.org
+Signed-off-by: Like Xu <likexu@tencent.com>
+[sean: split out as a separate patch, adjust Fixes tag]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220126172226.2298529-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3453,6 +3453,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
+               if (data & ~supported_xss)
+                       return 1;
+               vcpu->arch.ia32_xss = data;
++              kvm_update_cpuid_runtime(vcpu);
+               break;
+       case MSR_SMI_COUNT:
+               if (!msr_info->host_initiated)
diff --git a/queue-5.15/perf-x86-intel-add-a-quirk-for-the-calculation-of-the-number-of-counters-on-alder-lake.patch b/queue-5.15/perf-x86-intel-add-a-quirk-for-the-calculation-of-the-number-of-counters-on-alder-lake.patch

new file mode 100644 (file)

index 0000000..e06a80d
--- /dev/null
+++ b/queue-5.15/perf-x86-intel-add-a-quirk-for-the-calculation-of-the-number-of-counters-on-alder-lake.patch
@@ -0,0 +1,76 @@
+From 7fa981cad216e9f64f49e22112f610c0bfed91bc Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Tue, 11 Jan 2022 10:20:38 -0800
+Subject: perf/x86/intel: Add a quirk for the calculation of the number of counters on Alder Lake
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 7fa981cad216e9f64f49e22112f610c0bfed91bc upstream.
+
+For some Alder Lake machine with all E-cores disabled in a BIOS, the
+below warning may be triggered.
+
+[ 2.010766] hw perf events fixed 5 > max(4), clipping!
+
+Current perf code relies on the CPUID leaf 0xA and leaf 7.EDX[15] to
+calculate the number of the counters and follow the below assumption.
+
+For a hybrid configuration, the leaf 7.EDX[15] (X86_FEATURE_HYBRID_CPU)
+is set. The leaf 0xA only enumerate the common counters. Linux perf has
+to manually add the extra GP counters and fixed counters for P-cores.
+For a non-hybrid configuration, the X86_FEATURE_HYBRID_CPU should not
+be set. The leaf 0xA enumerates all counters.
+
+However, that's not the case when all E-cores are disabled in a BIOS.
+Although there are only P-cores in the system, the leaf 7.EDX[15]
+(X86_FEATURE_HYBRID_CPU) is still set. But the leaf 0xA is updated
+to enumerate all counters of P-cores. The inconsistency triggers the
+warning.
+
+Several software ways were considered to handle the inconsistency.
+- Drop the leaf 0xA and leaf 7.EDX[15] CPUID enumeration support.
+  Hardcode the number of counters. This solution may be a problem for
+  virtualization. A hypervisor cannot control the number of counters
+  in a Linux guest via changing the guest CPUID enumeration anymore.
+- Find another CPUID bit that is also updated with E-cores disabled.
+  There may be a problem in the virtualization environment too. Because
+  a hypervisor may disable the feature/CPUID bit.
+- The P-cores have a maximum of 8 GP counters and 4 fixed counters on
+  ADL. The maximum number can be used to detect the case.
+  This solution is implemented in this patch.
+
+Fixes: ee72a94ea4a6 ("perf/x86/intel: Fix fixed counter check warning for some Alder Lake")
+Reported-by: Damjan Marion (damarion) <damarion@cisco.com>
+Reported-by: Chan Edison <edison_chan_gz@hotmail.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Damjan Marion (damarion) <damarion@cisco.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1641925238-149288-1-git-send-email-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -6187,6 +6187,19 @@ __init int intel_pmu_init(void)
+                       pmu->num_counters = x86_pmu.num_counters;
+                       pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+               }
++
++              /*
++               * Quirk: For some Alder Lake machine, when all E-cores are disabled in
++               * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
++               * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
++               * mistakenly add extra counters for P-cores. Correct the number of
++               * counters here.
++               */
++              if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
++                      pmu->num_counters = x86_pmu.num_counters;
++                      pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
++              }
++
+               pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+               pmu->unconstrained = (struct event_constraint)
+                                       __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
diff --git a/queue-5.15/perf-x86-intel-uncore-fix-cas_count_write-issue-for-icx.patch b/queue-5.15/perf-x86-intel-uncore-fix-cas_count_write-issue-for-icx.patch

new file mode 100644 (file)

index 0000000..6edee62
--- /dev/null
+++ b/queue-5.15/perf-x86-intel-uncore-fix-cas_count_write-issue-for-icx.patch
@@ -0,0 +1,62 @@
+From 96fd2e89fba1aaada6f4b1e5d25a9d9ecbe1943d Mon Sep 17 00:00:00 2001
+From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
+Date: Thu, 23 Dec 2021 22:48:26 +0800
+Subject: perf/x86/intel/uncore: Fix CAS_COUNT_WRITE issue for ICX
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
+
+commit 96fd2e89fba1aaada6f4b1e5d25a9d9ecbe1943d upstream.
+
+The user recently report a perf issue in the ICX platform, when test by
+perf event “uncore_imc_x/cas_count_write”,the write bandwidth is always
+very small (only 0.38MB/s), it is caused by the wrong "umask" for the
+"cas_count_write" event. When double-checking, find "cas_count_read"
+also is wrong.
+
+The public document for ICX uncore:
+
+3rd Gen Intel® Xeon® Processor Scalable Family, Codename Ice Lake,Uncore
+Performance Monitoring Reference Manual, Revision 1.00, May 2021
+
+On 2.4.7, it defines Unit Masks for CAS_COUNT:
+RD b00001111
+WR b00110000
+
+So corrected both "cas_count_read" and "cas_count_write" for ICX.
+
+Old settings:
+ hswep_uncore_imc_events
+       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03")
+       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c")
+
+New settings:
+ snr_uncore_imc_events
+       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x0f")
+       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30")
+
+Fixes: 2b3b76b5ec67 ("perf/x86/intel/uncore: Add Ice Lake server uncore support")
+Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20211223144826.841267-1-zhengjun.xing@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/uncore_snbep.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/uncore_snbep.c
++++ b/arch/x86/events/intel/uncore_snbep.c
+@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_unco
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNR_IMC_MMIO_PMON_FIXED_CTR,
+       .fixed_ctl      = SNR_IMC_MMIO_PMON_FIXED_CTL,
+-      .event_descs    = hswep_uncore_imc_events,
++      .event_descs    = snr_uncore_imc_events,
+       .perf_ctr       = SNR_IMC_MMIO_PMON_CTR0,
+       .event_ctl      = SNR_IMC_MMIO_PMON_CTL0,
+       .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,
diff --git a/queue-5.15/pm-wakeup-simplify-the-output-logic-of-pm_show_wakelocks.patch b/queue-5.15/pm-wakeup-simplify-the-output-logic-of-pm_show_wakelocks.patch

new file mode 100644 (file)

index 0000000..6c48495
--- /dev/null
+++ b/queue-5.15/pm-wakeup-simplify-the-output-logic-of-pm_show_wakelocks.patch
@@ -0,0 +1,51 @@
+From c9d967b2ce40d71e968eb839f36c936b8a9cf1ea Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Thu, 13 Jan 2022 19:44:20 +0100
+Subject: PM: wakeup: simplify the output logic of pm_show_wakelocks()
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+commit c9d967b2ce40d71e968eb839f36c936b8a9cf1ea upstream.
+
+The buffer handling in pm_show_wakelocks() is tricky, and hopefully
+correct.  Ensure it really is correct by using sysfs_emit_at() which
+handles all of the tricky string handling logic in a PAGE_SIZE buffer
+for us automatically as this is a sysfs file being read from.
+
+Reviewed-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/power/wakelock.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/kernel/power/wakelock.c
++++ b/kernel/power/wakelock.c
+@@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, boo
+ {
+       struct rb_node *node;
+       struct wakelock *wl;
+-      char *str = buf;
+-      char *end = buf + PAGE_SIZE;
++      int len = 0;
+ 
+       mutex_lock(&wakelocks_lock);
+ 
+       for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
+               wl = rb_entry(node, struct wakelock, node);
+               if (wl->ws->active == show_active)
+-                      str += scnprintf(str, end - str, "%s ", wl->name);
++                      len += sysfs_emit_at(buf, len, "%s ", wl->name);
+       }
+-      if (str > buf)
+-              str--;
+ 
+-      str += scnprintf(str, end - str, "\n");
++      len += sysfs_emit_at(buf, len, "\n");
+ 
+       mutex_unlock(&wakelocks_lock);
+-      return (str - buf);
++      return len;
+ }
+ 
+ #if CONFIG_PM_WAKELOCKS_LIMIT > 0
diff --git a/queue-5.15/powerpc-audit-fix-syscall_get_arch.patch b/queue-5.15/powerpc-audit-fix-syscall_get_arch.patch

new file mode 100644 (file)

index 0000000..f67d892
--- /dev/null
+++ b/queue-5.15/powerpc-audit-fix-syscall_get_arch.patch
@@ -0,0 +1,65 @@
+From 252745240ba0ae774d2f80c5e185ed59fbc4fb41 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Fri, 14 Jan 2022 11:26:25 +0000
+Subject: powerpc/audit: Fix syscall_get_arch()
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 252745240ba0ae774d2f80c5e185ed59fbc4fb41 upstream.
+
+Commit 770cec16cdc9 ("powerpc/audit: Simplify syscall_get_arch()")
+and commit 898a1ef06ad4 ("powerpc/audit: Avoid unneccessary #ifdef
+in syscall_get_arguments()")
+replaced test_tsk_thread_flag(task, TIF_32BIT)) by is_32bit_task().
+
+But is_32bit_task() applies on current task while be want the test
+done on task 'task'
+
+So add a new macro is_tsk_32bit_task() to check any task.
+
+Fixes: 770cec16cdc9 ("powerpc/audit: Simplify syscall_get_arch()")
+Fixes: 898a1ef06ad4 ("powerpc/audit: Avoid unneccessary #ifdef in syscall_get_arguments()")
+Cc: stable@vger.kernel.org
+Reported-by: Dmitry V. Levin <ldv@altlinux.org>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/c55cddb8f65713bf5859ed675d75a50cb37d5995.1642159570.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/include/asm/syscall.h     |    4 ++--
+ arch/powerpc/include/asm/thread_info.h |    2 ++
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/include/asm/syscall.h
++++ b/arch/powerpc/include/asm/syscall.h
+@@ -90,7 +90,7 @@ static inline void syscall_get_arguments
+       unsigned long val, mask = -1UL;
+       unsigned int n = 6;
+ 
+-      if (is_32bit_task())
++      if (is_tsk_32bit_task(task))
+               mask = 0xffffffff;
+ 
+       while (n--) {
+@@ -115,7 +115,7 @@ static inline void syscall_set_arguments
+ 
+ static inline int syscall_get_arch(struct task_struct *task)
+ {
+-      if (is_32bit_task())
++      if (is_tsk_32bit_task(task))
+               return AUDIT_ARCH_PPC;
+       else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+               return AUDIT_ARCH_PPC64LE;
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -165,8 +165,10 @@ static inline bool test_thread_local_fla
+ 
+ #ifdef CONFIG_COMPAT
+ #define is_32bit_task()       (test_thread_flag(TIF_32BIT))
++#define is_tsk_32bit_task(tsk)        (test_tsk_thread_flag(tsk, TIF_32BIT))
+ #else
+ #define is_32bit_task()       (IS_ENABLED(CONFIG_PPC32))
++#define is_tsk_32bit_task(tsk)        (IS_ENABLED(CONFIG_PPC32))
+ #endif
+ 
+ #if defined(CONFIG_PPC64)
diff --git a/queue-5.15/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch b/queue-5.15/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch

new file mode 100644 (file)

index 0000000..333c61a
--- /dev/null
+++ b/queue-5.15/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch
@@ -0,0 +1,240 @@
+From a06247c6804f1a7c86a2e5398a4c1f1db1471848 Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Tue, 11 Jan 2022 15:23:09 -0800
+Subject: psi: Fix uaf issue when psi trigger is destroyed while being polled
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream.
+
+With write operation on psi files replacing old trigger with a new one,
+the lifetime of its waitqueue is totally arbitrary. Overwriting an
+existing trigger causes its waitqueue to be freed and pending poll()
+will stumble on trigger->event_wait which was destroyed.
+Fix this by disallowing to redefine an existing psi trigger. If a write
+operation is used on a file descriptor with an already existing psi
+trigger, the operation will fail with EBUSY error.
+Also bypass a check for psi_disabled in the psi_trigger_destroy as the
+flag can be flipped after the trigger is created, leading to a memory
+leak.
+
+Fixes: 0e94682b73bf ("psi: introduce psi monitor")
+Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Analyzed-by: Eric Biggers <ebiggers@kernel.org>
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/accounting/psi.rst |    3 +
+ include/linux/psi.h              |    2 -
+ include/linux/psi_types.h        |    3 -
+ kernel/cgroup/cgroup.c           |   11 ++++--
+ kernel/sched/psi.c               |   66 +++++++++++++++++----------------------
+ 5 files changed, 40 insertions(+), 45 deletions(-)
+
+--- a/Documentation/accounting/psi.rst
++++ b/Documentation/accounting/psi.rst
+@@ -92,7 +92,8 @@ Triggers can be set on more than one psi
+ for the same psi metric can be specified. However for each trigger a separate
+ file descriptor is required to be able to poll it separately from others,
+ therefore for each trigger a separate open() syscall should be made even
+-when opening the same psi interface file.
++when opening the same psi interface file. Write operations to a file descriptor
++with an already existing psi trigger will fail with EBUSY.
+ 
+ Monitors activate only when system enters stall state for the monitored
+ psi metric and deactivates upon exit from the stall state. While system is
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -32,7 +32,7 @@ void cgroup_move_task(struct task_struct
+ 
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+                       char *buf, size_t nbytes, enum psi_res res);
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
++void psi_trigger_destroy(struct psi_trigger *t);
+ 
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+                       poll_table *wait);
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -140,9 +140,6 @@ struct psi_trigger {
+        * events to one per window
+        */
+       u64 last_event_time;
+-
+-      /* Refcounting to prevent premature destruction */
+-      struct kref refcount;
+ };
+ 
+ struct psi_group {
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3642,6 +3642,12 @@ static ssize_t cgroup_pressure_write(str
+       cgroup_get(cgrp);
+       cgroup_kn_unlock(of->kn);
+ 
++      /* Allow only one trigger per file descriptor */
++      if (ctx->psi.trigger) {
++              cgroup_put(cgrp);
++              return -EBUSY;
++      }
++
+       psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       new = psi_trigger_create(psi, buf, nbytes, res);
+       if (IS_ERR(new)) {
+@@ -3649,8 +3655,7 @@ static ssize_t cgroup_pressure_write(str
+               return PTR_ERR(new);
+       }
+ 
+-      psi_trigger_replace(&ctx->psi.trigger, new);
+-
++      smp_store_release(&ctx->psi.trigger, new);
+       cgroup_put(cgrp);
+ 
+       return nbytes;
+@@ -3689,7 +3694,7 @@ static void cgroup_pressure_release(stru
+ {
+       struct cgroup_file_ctx *ctx = of->priv;
+ 
+-      psi_trigger_replace(&ctx->psi.trigger, NULL);
++      psi_trigger_destroy(ctx->psi.trigger);
+ }
+ 
+ bool cgroup_psi_enabled(void)
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -1162,7 +1162,6 @@ struct psi_trigger *psi_trigger_create(s
+       t->event = 0;
+       t->last_event_time = 0;
+       init_waitqueue_head(&t->event_wait);
+-      kref_init(&t->refcount);
+ 
+       mutex_lock(&group->trigger_lock);
+ 
+@@ -1191,15 +1190,19 @@ struct psi_trigger *psi_trigger_create(s
+       return t;
+ }
+ 
+-static void psi_trigger_destroy(struct kref *ref)
++void psi_trigger_destroy(struct psi_trigger *t)
+ {
+-      struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
+-      struct psi_group *group = t->group;
++      struct psi_group *group;
+       struct task_struct *task_to_destroy = NULL;
+ 
+-      if (static_branch_likely(&psi_disabled))
++      /*
++       * We do not check psi_disabled since it might have been disabled after
++       * the trigger got created.
++       */
++      if (!t)
+               return;
+ 
++      group = t->group;
+       /*
+        * Wakeup waiters to stop polling. Can happen if cgroup is deleted
+        * from under a polling process.
+@@ -1235,9 +1238,9 @@ static void psi_trigger_destroy(struct k
+       mutex_unlock(&group->trigger_lock);
+ 
+       /*
+-       * Wait for both *trigger_ptr from psi_trigger_replace and
+-       * poll_task RCUs to complete their read-side critical sections
+-       * before destroying the trigger and optionally the poll_task
++       * Wait for psi_schedule_poll_work RCU to complete its read-side
++       * critical section before destroying the trigger and optionally the
++       * poll_task.
+        */
+       synchronize_rcu();
+       /*
+@@ -1254,18 +1257,6 @@ static void psi_trigger_destroy(struct k
+       kfree(t);
+ }
+ 
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
+-{
+-      struct psi_trigger *old = *trigger_ptr;
+-
+-      if (static_branch_likely(&psi_disabled))
+-              return;
+-
+-      rcu_assign_pointer(*trigger_ptr, new);
+-      if (old)
+-              kref_put(&old->refcount, psi_trigger_destroy);
+-}
+-
+ __poll_t psi_trigger_poll(void **trigger_ptr,
+                               struct file *file, poll_table *wait)
+ {
+@@ -1275,24 +1266,15 @@ __poll_t psi_trigger_poll(void **trigger
+       if (static_branch_likely(&psi_disabled))
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+ 
+-      rcu_read_lock();
+-
+-      t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
+-      if (!t) {
+-              rcu_read_unlock();
++      t = smp_load_acquire(trigger_ptr);
++      if (!t)
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+-      }
+-      kref_get(&t->refcount);
+-
+-      rcu_read_unlock();
+ 
+       poll_wait(file, &t->event_wait, wait);
+ 
+       if (cmpxchg(&t->event, 1, 0) == 1)
+               ret |= EPOLLPRI;
+ 
+-      kref_put(&t->refcount, psi_trigger_destroy);
+-
+       return ret;
+ }
+ 
+@@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *fi
+ 
+       buf[buf_size - 1] = '\0';
+ 
+-      new = psi_trigger_create(&psi_system, buf, nbytes, res);
+-      if (IS_ERR(new))
+-              return PTR_ERR(new);
+-
+       seq = file->private_data;
++
+       /* Take seq->lock to protect seq->private from concurrent writes */
+       mutex_lock(&seq->lock);
+-      psi_trigger_replace(&seq->private, new);
++
++      /* Allow only one trigger per file descriptor */
++      if (seq->private) {
++              mutex_unlock(&seq->lock);
++              return -EBUSY;
++      }
++
++      new = psi_trigger_create(&psi_system, buf, nbytes, res);
++      if (IS_ERR(new)) {
++              mutex_unlock(&seq->lock);
++              return PTR_ERR(new);
++      }
++
++      smp_store_release(&seq->private, new);
+       mutex_unlock(&seq->lock);
+ 
+       return nbytes;
+@@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode
+ {
+       struct seq_file *seq = file->private_data;
+ 
+-      psi_trigger_replace(&seq->private, NULL);
++      psi_trigger_destroy(seq->private);
+       return single_release(inode, file);
+ }
+ 
diff --git a/queue-5.15/revert-kvm-svm-avoid-infinite-loop-on-npf-from-bad-address.patch b/queue-5.15/revert-kvm-svm-avoid-infinite-loop-on-npf-from-bad-address.patch

new file mode 100644 (file)

index 0000000..b5a844b
--- /dev/null
+++ b/queue-5.15/revert-kvm-svm-avoid-infinite-loop-on-npf-from-bad-address.patch
@@ -0,0 +1,60 @@
+From 31c25585695abdf03d6160aa6d829e855b256329 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 20 Jan 2022 01:07:12 +0000
+Subject: Revert "KVM: SVM: avoid infinite loop on NPF from bad address"
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 31c25585695abdf03d6160aa6d829e855b256329 upstream.
+
+Revert a completely broken check on an "invalid" RIP in SVM's workaround
+for the DecodeAssists SMAP errata.  kvm_vcpu_gfn_to_memslot() obviously
+expects a gfn, i.e. operates in the guest physical address space, whereas
+RIP is a virtual (not even linear) address.  The "fix" worked for the
+problematic KVM selftest because the test identity mapped RIP.
+
+Fully revert the hack instead of trying to translate RIP to a GPA, as the
+non-SEV case is now handled earlier, and KVM cannot access guest page
+tables to translate RIP.
+
+This reverts commit e72436bc3a5206f95bb384e741154166ddb3202e.
+
+Fixes: e72436bc3a52 ("KVM: SVM: avoid infinite loop on NPF from bad address")
+Reported-by: Liam Merwick <liam.merwick@oracle.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
+Message-Id: <20220120010719.711476-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    7 -------
+ virt/kvm/kvm_main.c    |    1 -
+ 2 files changed, 8 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4456,13 +4456,6 @@ static bool svm_can_emulate_instruction(
+       if (likely(!insn || insn_len))
+               return true;
+ 
+-      /*
+-       * If RIP is invalid, go ahead with emulation which will cause an
+-       * internal error exit.
+-       */
+-      if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+-              return true;
+-
+       cr4 = kvm_read_cr4(vcpu);
+       smep = cr4 & X86_CR4_SMEP;
+       smap = cr4 & X86_CR4_SMAP;
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -2104,7 +2104,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_
+ 
+       return NULL;
+ }
+-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
+ 
+ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
+ {
diff --git a/queue-5.15/series b/queue-5.15/series

index f5eaf2a1256cbfa71bcf9f0e01f7df40d01e385f..b8fb92c89af8679ffc89524246831efb5ca3b8a7 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -17,3 +17,30 @@ ucount-make-get_ucount-a-safe-get_user-replacement.patch
  scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch
  udf-restore-i_lenalloc-when-inode-expansion-fails.patch
  udf-fix-null-ptr-deref-when-converting-from-inline-format.patch
+efi-runtime-avoid-efiv2-runtime-services-on-apple-x86-machines.patch
+pm-wakeup-simplify-the-output-logic-of-pm_show_wakelocks.patch
+tracing-histogram-fix-a-potential-memory-leak-for-kstrdup.patch
+tracing-don-t-inc-err_log-entry-count-if-entry-allocation-fails.patch
+ceph-properly-put-ceph_string-reference-after-async-create-attempt.patch
+ceph-set-pool_ns-in-new-inode-layout-for-async-creates.patch
+fsnotify-fix-fsnotify-hooks-in-pseudo-filesystems.patch
+revert-kvm-svm-avoid-infinite-loop-on-npf-from-bad-address.patch
+psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch
+powerpc-audit-fix-syscall_get_arch.patch
+perf-x86-intel-uncore-fix-cas_count_write-issue-for-icx.patch
+perf-x86-intel-add-a-quirk-for-the-calculation-of-the-number-of-counters-on-alder-lake.patch
+drm-etnaviv-relax-submit-size-limits.patch
+drm-atomic-add-the-crtc-to-affected-crtc-only-if-uapi.enable-true.patch
+drm-amd-display-fix-fp-start-end-for-dcn30_internal_validate_bw.patch
+kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch
+kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch
+kvm-svm-don-t-intercept-gp-for-sev-guests.patch
+kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch
+kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
+kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch
+kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch
+kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch
+kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch
+dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch
+block-add-bio_start_io_acct_time-to-control-start_time.patch
+dm-properly-fix-redundant-bio-based-io-accounting.patch
diff --git a/queue-5.15/tracing-don-t-inc-err_log-entry-count-if-entry-allocation-fails.patch b/queue-5.15/tracing-don-t-inc-err_log-entry-count-if-entry-allocation-fails.patch

new file mode 100644 (file)

index 0000000..205995d
--- /dev/null
+++ b/queue-5.15/tracing-don-t-inc-err_log-entry-count-if-entry-allocation-fails.patch
@@ -0,0 +1,38 @@
+From 67ab5eb71b37b55f7c5522d080a1b42823351776 Mon Sep 17 00:00:00 2001
+From: Tom Zanussi <zanussi@kernel.org>
+Date: Thu, 27 Jan 2022 15:44:18 -0600
+Subject: tracing: Don't inc err_log entry count if entry allocation fails
+
+From: Tom Zanussi <zanussi@kernel.org>
+
+commit 67ab5eb71b37b55f7c5522d080a1b42823351776 upstream.
+
+tr->n_err_log_entries should only be increased if entry allocation
+succeeds.
+
+Doing it when it fails won't cause any problems other than wasting an
+entry, but should be fixed anyway.
+
+Link: https://lkml.kernel.org/r/cad1ab28f75968db0f466925e7cba5970cec6c29.1643319703.git.zanussi@kernel.org
+
+Cc: stable@vger.kernel.org
+Fixes: 2f754e771b1a6 ("tracing: Don't inc err_log entry count if entry allocation fails")
+Signed-off-by: Tom Zanussi <zanussi@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7749,7 +7749,8 @@ static struct tracing_log_err *get_traci
+               err = kzalloc(sizeof(*err), GFP_KERNEL);
+               if (!err)
+                       err = ERR_PTR(-ENOMEM);
+-              tr->n_err_log_entries++;
++              else
++                      tr->n_err_log_entries++;
+ 
+               return err;
+       }
diff --git a/queue-5.15/tracing-histogram-fix-a-potential-memory-leak-for-kstrdup.patch b/queue-5.15/tracing-histogram-fix-a-potential-memory-leak-for-kstrdup.patch

new file mode 100644 (file)

index 0000000..e5d25dd
--- /dev/null
+++ b/queue-5.15/tracing-histogram-fix-a-potential-memory-leak-for-kstrdup.patch
@@ -0,0 +1,37 @@
+From e629e7b525a179e29d53463d992bdee759c950fb Mon Sep 17 00:00:00 2001
+From: Xiaoke Wang <xkernel.wang@foxmail.com>
+Date: Tue, 25 Jan 2022 12:07:15 +0800
+Subject: tracing/histogram: Fix a potential memory leak for kstrdup()
+
+From: Xiaoke Wang <xkernel.wang@foxmail.com>
+
+commit e629e7b525a179e29d53463d992bdee759c950fb upstream.
+
+kfree() is missing on an error path to free the memory allocated by
+kstrdup():
+
+  p = param = kstrdup(data->params[i], GFP_KERNEL);
+
+So it is better to free it via kfree(p).
+
+Link: https://lkml.kernel.org/r/tencent_C52895FD37802832A3E5B272D05008866F0A@qq.com
+
+Cc: stable@vger.kernel.org
+Fixes: d380dcde9a07c ("tracing: Fix now invalid var_ref_vals assumption in trace action")
+Signed-off-by: Xiaoke Wang <xkernel.wang@foxmail.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -3581,6 +3581,7 @@ static int trace_action_create(struct hi
+ 
+                       var_ref_idx = find_var_ref_idx(hist_data, var_ref);
+                       if (WARN_ON(var_ref_idx < 0)) {
++                              kfree(p);
+                               ret = var_ref_idx;
+                               goto err;
+                       }
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 29 Jan 2022 14:48:35 +0000 (15:48 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 29 Jan 2022 14:48:35 +0000 (15:48 +0100)
queue-5.15/block-add-bio_start_io_acct_time-to-control-start_time.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ceph-properly-put-ceph_string-reference-after-async-create-attempt.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ceph-set-pool_ns-in-new-inode-layout-for-async-creates.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-properly-fix-redundant-bio-based-io-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-amd-display-fix-fp-start-end-for-dcn30_internal_validate_bw.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-atomic-add-the-crtc-to-affected-crtc-only-if-uapi.enable-true.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-etnaviv-relax-submit-size-limits.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/efi-runtime-avoid-efiv2-runtime-services-on-apple-x86-machines.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/fsnotify-fix-fsnotify-hooks-in-pseudo-filesystems.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-svm-don-t-intercept-gp-for-sev-guests.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/perf-x86-intel-add-a-quirk-for-the-calculation-of-the-number-of-counters-on-alder-lake.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/perf-x86-intel-uncore-fix-cas_count_write-issue-for-icx.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/pm-wakeup-simplify-the-output-logic-of-pm_show_wakelocks.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/powerpc-audit-fix-syscall_get_arch.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/psi-fix-uaf-issue-when-psi-trigger-is-destroyed-while-being-polled.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/revert-kvm-svm-avoid-infinite-loop-on-npf-from-bad-address.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/tracing-don-t-inc-err_log-entry-count-if-entry-allocation-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/tracing-histogram-fix-a-potential-memory-leak-for-kstrdup.patch	[new file with mode: 0644]	patch \| blob