]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 24 Feb 2025 14:09:05 +0000 (15:09 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 24 Feb 2025 14:09:05 +0000 (15:09 +0100)
added patches:
block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch
block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch
media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch
x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch

queue-6.1/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch [new file with mode: 0644]
queue-6.1/block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch [new file with mode: 0644]
queue-6.1/media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch [new file with mode: 0644]

diff --git a/queue-6.1/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch b/queue-6.1/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch
new file mode 100644 (file)
index 0000000..134f5ce
--- /dev/null
@@ -0,0 +1,195 @@
+From e8b8344de3980709080d86c157d24e7de07d70ad Mon Sep 17 00:00:00 2001
+From: Yu Kuai <yukuai3@huawei.com>
+Date: Fri, 29 Nov 2024 17:15:09 +0800
+Subject: block, bfq: fix bfqq uaf in bfq_limit_depth()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+commit e8b8344de3980709080d86c157d24e7de07d70ad upstream.
+
+Set new allocated bfqq to bic or remove freed bfqq from bic are both
+protected by bfqd->lock, however bfq_limit_depth() is deferencing bfqq
+from bic without the lock, this can lead to UAF if the io_context is
+shared by multiple tasks.
+
+For example, test bfq with io_uring can trigger following UAF in v6.6:
+
+==================================================================
+BUG: KASAN: slab-use-after-free in bfqq_group+0x15/0x50
+
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x47/0x80
+ print_address_description.constprop.0+0x66/0x300
+ print_report+0x3e/0x70
+ kasan_report+0xb4/0xf0
+ bfqq_group+0x15/0x50
+ bfqq_request_over_limit+0x130/0x9a0
+ bfq_limit_depth+0x1b5/0x480
+ __blk_mq_alloc_requests+0x2b5/0xa00
+ blk_mq_get_new_requests+0x11d/0x1d0
+ blk_mq_submit_bio+0x286/0xb00
+ submit_bio_noacct_nocheck+0x331/0x400
+ __block_write_full_folio+0x3d0/0x640
+ writepage_cb+0x3b/0xc0
+ write_cache_pages+0x254/0x6c0
+ write_cache_pages+0x254/0x6c0
+ do_writepages+0x192/0x310
+ filemap_fdatawrite_wbc+0x95/0xc0
+ __filemap_fdatawrite_range+0x99/0xd0
+ filemap_write_and_wait_range.part.0+0x4d/0xa0
+ blkdev_read_iter+0xef/0x1e0
+ io_read+0x1b6/0x8a0
+ io_issue_sqe+0x87/0x300
+ io_wq_submit_work+0xeb/0x390
+ io_worker_handle_work+0x24d/0x550
+ io_wq_worker+0x27f/0x6c0
+ ret_from_fork_asm+0x1b/0x30
+ </TASK>
+
+Allocated by task 808602:
+ kasan_save_stack+0x1e/0x40
+ kasan_set_track+0x21/0x30
+ __kasan_slab_alloc+0x83/0x90
+ kmem_cache_alloc_node+0x1b1/0x6d0
+ bfq_get_queue+0x138/0xfa0
+ bfq_get_bfqq_handle_split+0xe3/0x2c0
+ bfq_init_rq+0x196/0xbb0
+ bfq_insert_request.isra.0+0xb5/0x480
+ bfq_insert_requests+0x156/0x180
+ blk_mq_insert_request+0x15d/0x440
+ blk_mq_submit_bio+0x8a4/0xb00
+ submit_bio_noacct_nocheck+0x331/0x400
+ __blkdev_direct_IO_async+0x2dd/0x330
+ blkdev_write_iter+0x39a/0x450
+ io_write+0x22a/0x840
+ io_issue_sqe+0x87/0x300
+ io_wq_submit_work+0xeb/0x390
+ io_worker_handle_work+0x24d/0x550
+ io_wq_worker+0x27f/0x6c0
+ ret_from_fork+0x2d/0x50
+ ret_from_fork_asm+0x1b/0x30
+
+Freed by task 808589:
+ kasan_save_stack+0x1e/0x40
+ kasan_set_track+0x21/0x30
+ kasan_save_free_info+0x27/0x40
+ __kasan_slab_free+0x126/0x1b0
+ kmem_cache_free+0x10c/0x750
+ bfq_put_queue+0x2dd/0x770
+ __bfq_insert_request.isra.0+0x155/0x7a0
+ bfq_insert_request.isra.0+0x122/0x480
+ bfq_insert_requests+0x156/0x180
+ blk_mq_dispatch_plug_list+0x528/0x7e0
+ blk_mq_flush_plug_list.part.0+0xe5/0x590
+ __blk_flush_plug+0x3b/0x90
+ blk_finish_plug+0x40/0x60
+ do_writepages+0x19d/0x310
+ filemap_fdatawrite_wbc+0x95/0xc0
+ __filemap_fdatawrite_range+0x99/0xd0
+ filemap_write_and_wait_range.part.0+0x4d/0xa0
+ blkdev_read_iter+0xef/0x1e0
+ io_read+0x1b6/0x8a0
+ io_issue_sqe+0x87/0x300
+ io_wq_submit_work+0xeb/0x390
+ io_worker_handle_work+0x24d/0x550
+ io_wq_worker+0x27f/0x6c0
+ ret_from_fork+0x2d/0x50
+ ret_from_fork_asm+0x1b/0x30
+
+Fix the problem by protecting bic_to_bfqq() with bfqd->lock.
+
+CC: Jan Kara <jack@suse.cz>
+Fixes: 76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Link: https://lore.kernel.org/r/20241129091509.2227136-1-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bfq-iosched.c |   37 ++++++++++++++++++++++++-------------
+ 1 file changed, 24 insertions(+), 13 deletions(-)
+
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -581,23 +581,31 @@ static struct request *bfq_choose_req(st
+ #define BFQ_LIMIT_INLINE_DEPTH 16
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+-static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
++static bool bfqq_request_over_limit(struct bfq_data *bfqd,
++                                  struct bfq_io_cq *bic, blk_opf_t opf,
++                                  unsigned int act_idx, int limit)
+ {
+-      struct bfq_data *bfqd = bfqq->bfqd;
+-      struct bfq_entity *entity = &bfqq->entity;
+       struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
+       struct bfq_entity **entities = inline_entities;
+-      int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
+-      int class_idx = bfqq->ioprio_class - 1;
++      int alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
+       struct bfq_sched_data *sched_data;
++      struct bfq_entity *entity;
++      struct bfq_queue *bfqq;
+       unsigned long wsum;
+       bool ret = false;
+-
+-      if (!entity->on_st_or_in_serv)
+-              return false;
++      int depth;
++      int level;
+ retry:
+       spin_lock_irq(&bfqd->lock);
++      bfqq = bic_to_bfqq(bic, op_is_sync(opf), act_idx);
++      if (!bfqq)
++              goto out;
++
++      entity = &bfqq->entity;
++      if (!entity->on_st_or_in_serv)
++              goto out;
++
+       /* +1 for bfqq entity, root cgroup not included */
+       depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
+       if (depth > alloc_depth) {
+@@ -642,7 +650,7 @@ retry:
+                        * class.
+                        */
+                       wsum = 0;
+-                      for (i = 0; i <= class_idx; i++) {
++                      for (i = 0; i <= bfqq->ioprio_class - 1; i++) {
+                               wsum = wsum * IOPRIO_BE_NR +
+                                       sched_data->service_tree[i].wsum;
+                       }
+@@ -665,7 +673,9 @@ out:
+       return ret;
+ }
+ #else
+-static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
++static bool bfqq_request_over_limit(struct bfq_data *bfqd,
++                                  struct bfq_io_cq *bic, blk_opf_t opf,
++                                  unsigned int act_idx, int limit)
+ {
+       return false;
+ }
+@@ -703,8 +713,9 @@ static void bfq_limit_depth(blk_opf_t op
+       }
+       for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
+-              struct bfq_queue *bfqq =
+-                      bic_to_bfqq(bic, op_is_sync(opf), act_idx);
++              /* Fast path to check if bfqq is already allocated. */
++              if (!bic_to_bfqq(bic, op_is_sync(opf), act_idx))
++                      continue;
+               /*
+                * Does queue (or any parent entity) exceed number of
+@@ -712,7 +723,7 @@ static void bfq_limit_depth(blk_opf_t op
+                * limit depth so that it cannot consume more
+                * available requests and thus starve other entities.
+                */
+-              if (bfqq && bfqq_request_over_limit(bfqq, limit)) {
++              if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
+                       depth = 1;
+                       break;
+               }
diff --git a/queue-6.1/block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch b/queue-6.1/block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch
new file mode 100644 (file)
index 0000000..71fffc1
--- /dev/null
@@ -0,0 +1,638 @@
+From 9778369a2d6c5ed2b81a04164c4aa9da1bdb193d Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Tue, 3 Jan 2023 15:54:56 +0100
+Subject: block, bfq: split sync bfq_queues on a per-actuator basis
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Valente <paolo.valente@linaro.org>
+
+commit 9778369a2d6c5ed2b81a04164c4aa9da1bdb193d upstream.
+
+Single-LUN multi-actuator SCSI drives, as well as all multi-actuator
+SATA drives appear as a single device to the I/O subsystem [1].  Yet
+they address commands to different actuators internally, as a function
+of Logical Block Addressing (LBAs). A given sector is reachable by
+only one of the actuators. For example, Seagate’s Serial Advanced
+Technology Attachment (SATA) version contains two actuators and maps
+the lower half of the SATA LBA space to the lower actuator and the
+upper half to the upper actuator.
+
+Evidently, to fully utilize actuators, no actuator must be left idle
+or underutilized while there is pending I/O for it. The block layer
+must somehow control the load of each actuator individually. This
+commit lays the ground for allowing BFQ to provide such a per-actuator
+control.
+
+BFQ associates an I/O-request sync bfq_queue with each process doing
+synchronous I/O, or with a group of processes, in case of queue
+merging. Then BFQ serves one bfq_queue at a time. While in service, a
+bfq_queue is emptied in request-position order. Yet the same process,
+or group of processes, may generate I/O for different actuators. In
+this case, different streams of I/O (each for a different actuator)
+get all inserted into the same sync bfq_queue. So there is basically
+no individual control on when each stream is served, i.e., on when the
+I/O requests of the stream are picked from the bfq_queue and
+dispatched to the drive.
+
+This commit enables BFQ to control the service of each actuator
+individually for synchronous I/O, by simply splitting each sync
+bfq_queue into N queues, one for each actuator. In other words, a sync
+bfq_queue is now associated to a pair (process, actuator). As a
+consequence of this split, the per-queue proportional-share policy
+implemented by BFQ will guarantee that the sync I/O generated for each
+actuator, by each process, receives its fair share of service.
+
+This is just a preparatory patch. If the I/O of the same process
+happens to be sent to different queues, then each of these queues may
+undergo queue merging. To handle this event, the bfq_io_cq data
+structure must be properly extended. In addition, stable merging must
+be disabled to avoid loss of control on individual actuators. Finally,
+also async queues must be split. These issues are described in detail
+and addressed in next commits. As for this commit, although multiple
+per-process bfq_queues are provided, the I/O of each process or group
+of processes is still sent to only one queue, regardless of the
+actuator the I/O is for. The forwarding to distinct bfq_queues will be
+enabled after addressing the above issues.
+
+[1] https://www.linaro.org/blog/budget-fair-queueing-bfq-linux-io-scheduler-optimizations-for-multi-actuator-sata-hard-drives/
+
+Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Gabriele Felici <felicigb@gmail.com>
+Signed-off-by: Carmine Zaccagnino <carmine@carminezacc.com>
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+Link: https://lore.kernel.org/r/20230103145503.71712-2-paolo.valente@linaro.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: e8b8344de398 ("block, bfq: fix bfqq uaf in bfq_limit_depth()")
+[Hagar: needed contextual fixes]
+Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bfq-cgroup.c  |   97 ++++++++++++++++---------------
+ block/bfq-iosched.c |  160 ++++++++++++++++++++++++++++++++++------------------
+ block/bfq-iosched.h |   51 +++++++++++++---
+ 3 files changed, 197 insertions(+), 111 deletions(-)
+
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -704,6 +704,46 @@ void bfq_bfqq_move(struct bfq_data *bfqd
+       bfq_put_queue(bfqq);
+ }
++static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
++                             struct bfq_queue *sync_bfqq,
++                             struct bfq_io_cq *bic,
++                             struct bfq_group *bfqg,
++                             unsigned int act_idx)
++{
++      struct bfq_queue *bfqq;
++
++      if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
++              /* We are the only user of this bfqq, just move it */
++              if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
++                      bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
++              return;
++      }
++
++      /*
++       * The queue was merged to a different queue. Check
++       * that the merge chain still belongs to the same
++       * cgroup.
++       */
++      for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
++              if (bfqq->entity.sched_data != &bfqg->sched_data)
++                      break;
++      if (bfqq) {
++              /*
++               * Some queue changed cgroup so the merge is not valid
++               * anymore. We cannot easily just cancel the merge (by
++               * clearing new_bfqq) as there may be other processes
++               * using this queue and holding refs to all queues
++               * below sync_bfqq->new_bfqq. Similarly if the merge
++               * already happened, we need to detach from bfqq now
++               * so that we cannot merge bio to a request from the
++               * old cgroup.
++               */
++              bfq_put_cooperator(sync_bfqq);
++              bfq_release_process_ref(bfqd, sync_bfqq);
++              bic_set_bfqq(bic, NULL, true, act_idx);
++      }
++}
++
+ /**
+  * __bfq_bic_change_cgroup - move @bic to @bfqg.
+  * @bfqd: the queue descriptor.
+@@ -714,60 +754,25 @@ void bfq_bfqq_move(struct bfq_data *bfqd
+  * sure that the reference to cgroup is valid across the call (see
+  * comments in bfq_bic_update_cgroup on this issue)
+  */
+-static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
++static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
+                                    struct bfq_io_cq *bic,
+                                    struct bfq_group *bfqg)
+ {
+-      struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false);
+-      struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true);
+-      struct bfq_entity *entity;
+-
+-      if (async_bfqq) {
+-              entity = &async_bfqq->entity;
++      unsigned int act_idx;
+-              if (entity->sched_data != &bfqg->sched_data) {
+-                      bic_set_bfqq(bic, NULL, false);
++      for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
++              struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx);
++              struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx);
++
++              if (async_bfqq &&
++                  async_bfqq->entity.sched_data != &bfqg->sched_data) {
++                      bic_set_bfqq(bic, NULL, false, act_idx);
+                       bfq_release_process_ref(bfqd, async_bfqq);
+               }
+-      }
+-      if (sync_bfqq) {
+-              if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+-                      /* We are the only user of this bfqq, just move it */
+-                      if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+-                              bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+-              } else {
+-                      struct bfq_queue *bfqq;
+-
+-                      /*
+-                       * The queue was merged to a different queue. Check
+-                       * that the merge chain still belongs to the same
+-                       * cgroup.
+-                       */
+-                      for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+-                              if (bfqq->entity.sched_data !=
+-                                  &bfqg->sched_data)
+-                                      break;
+-                      if (bfqq) {
+-                              /*
+-                               * Some queue changed cgroup so the merge is
+-                               * not valid anymore. We cannot easily just
+-                               * cancel the merge (by clearing new_bfqq) as
+-                               * there may be other processes using this
+-                               * queue and holding refs to all queues below
+-                               * sync_bfqq->new_bfqq. Similarly if the merge
+-                               * already happened, we need to detach from
+-                               * bfqq now so that we cannot merge bio to a
+-                               * request from the old cgroup.
+-                               */
+-                              bfq_put_cooperator(sync_bfqq);
+-                              bic_set_bfqq(bic, NULL, true);
+-                              bfq_release_process_ref(bfqd, sync_bfqq);
+-                      }
+-              }
++              if (sync_bfqq)
++                      bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx);
+       }
+-
+-      return bfqg;
+ }
+ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -377,16 +377,23 @@ static const unsigned long bfq_late_stab
+ #define RQ_BIC(rq)            ((struct bfq_io_cq *)((rq)->elv.priv[0]))
+ #define RQ_BFQQ(rq)           ((rq)->elv.priv[1])
+-struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
++struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync,
++                            unsigned int actuator_idx)
+ {
+-      return bic->bfqq[is_sync];
++      if (is_sync)
++              return bic->bfqq[1][actuator_idx];
++
++      return bic->bfqq[0][actuator_idx];
+ }
+ static void bfq_put_stable_ref(struct bfq_queue *bfqq);
+-void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
++void bic_set_bfqq(struct bfq_io_cq *bic,
++                struct bfq_queue *bfqq,
++                bool is_sync,
++                unsigned int actuator_idx)
+ {
+-      struct bfq_queue *old_bfqq = bic->bfqq[is_sync];
++      struct bfq_queue *old_bfqq = bic->bfqq[is_sync][actuator_idx];
+       /* Clear bic pointer if bfqq is detached from this bic */
+       if (old_bfqq && old_bfqq->bic == bic)
+@@ -405,7 +412,10 @@ void bic_set_bfqq(struct bfq_io_cq *bic,
+        * we cancel the stable merge if
+        * bic->stable_merge_bfqq == bfqq.
+        */
+-      bic->bfqq[is_sync] = bfqq;
++      if (is_sync)
++              bic->bfqq[1][actuator_idx] = bfqq;
++      else
++              bic->bfqq[0][actuator_idx] = bfqq;
+       if (bfqq && bic->stable_merge_bfqq == bfqq) {
+               /*
+@@ -680,9 +690,9 @@ static void bfq_limit_depth(blk_opf_t op
+ {
+       struct bfq_data *bfqd = data->q->elevator->elevator_data;
+       struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
+-      struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL;
+       int depth;
+       unsigned limit = data->q->nr_requests;
++      unsigned int act_idx;
+       /* Sync reads have full depth available */
+       if (op_is_sync(opf) && !op_is_write(opf)) {
+@@ -692,14 +702,21 @@ static void bfq_limit_depth(blk_opf_t op
+               limit = (limit * depth) >> bfqd->full_depth_shift;
+       }
+-      /*
+-       * Does queue (or any parent entity) exceed number of requests that
+-       * should be available to it? Heavily limit depth so that it cannot
+-       * consume more available requests and thus starve other entities.
+-       */
+-      if (bfqq && bfqq_request_over_limit(bfqq, limit))
+-              depth = 1;
++      for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
++              struct bfq_queue *bfqq =
++                      bic_to_bfqq(bic, op_is_sync(opf), act_idx);
++              /*
++               * Does queue (or any parent entity) exceed number of
++               * requests that should be available to it? Heavily
++               * limit depth so that it cannot consume more
++               * available requests and thus starve other entities.
++               */
++              if (bfqq && bfqq_request_over_limit(bfqq, limit)) {
++                      depth = 1;
++                      break;
++              }
++      }
+       bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
+               __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
+       if (depth)
+@@ -1820,6 +1837,18 @@ static bool bfq_bfqq_higher_class_or_wei
+       return bfqq_weight > in_serv_weight;
+ }
++/*
++ * Get the index of the actuator that will serve bio.
++ */
++static unsigned int bfq_actuator_index(struct bfq_data *bfqd, struct bio *bio)
++{
++      /*
++       * Multi-actuator support not complete yet, so always return 0
++       * for the moment (to keep incomplete mechanisms off).
++       */
++      return 0;
++}
++
+ static bool bfq_better_to_idle(struct bfq_queue *bfqq);
+ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
+@@ -2150,7 +2179,7 @@ static void bfq_check_waker(struct bfq_d
+        * We reset waker detection logic also if too much time has passed
+        * since the first detection. If wakeups are rare, pointless idling
+        * doesn't hurt throughput that much. The condition below makes sure
+-       * we do not uselessly idle blocking waker in more than 1/64 cases. 
++       * we do not uselessly idle blocking waker in more than 1/64 cases.
+        */
+       if (bfqd->last_completed_rq_bfqq !=
+           bfqq->tentative_waker_bfqq ||
+@@ -2486,7 +2515,8 @@ static bool bfq_bio_merge(struct request
+                */
+               bfq_bic_update_cgroup(bic, bio);
+-              bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
++              bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf),
++                                           bfq_actuator_index(bfqd, bio));
+       } else {
+               bfqd->bio_bfqq = NULL;
+       }
+@@ -3188,7 +3218,7 @@ static struct bfq_queue *bfq_merge_bfqqs
+       /*
+        * Merge queues (that is, let bic redirect its requests to new_bfqq)
+        */
+-      bic_set_bfqq(bic, new_bfqq, true);
++      bic_set_bfqq(bic, new_bfqq, true, bfqq->actuator_idx);
+       bfq_mark_bfqq_coop(new_bfqq);
+       /*
+        * new_bfqq now belongs to at least two bics (it is a shared queue):
+@@ -4818,11 +4848,8 @@ check_queue:
+        */
+       if (bfq_bfqq_wait_request(bfqq) ||
+           (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) {
+-              struct bfq_queue *async_bfqq =
+-                      bfqq->bic && bfqq->bic->bfqq[0] &&
+-                      bfq_bfqq_busy(bfqq->bic->bfqq[0]) &&
+-                      bfqq->bic->bfqq[0]->next_rq ?
+-                      bfqq->bic->bfqq[0] : NULL;
++              unsigned int act_idx = bfqq->actuator_idx;
++              struct bfq_queue *async_bfqq = NULL;
+               struct bfq_queue *blocked_bfqq =
+                       !hlist_empty(&bfqq->woken_list) ?
+                       container_of(bfqq->woken_list.first,
+@@ -4830,6 +4857,10 @@ check_queue:
+                                    woken_list_node)
+                       : NULL;
++              if (bfqq->bic && bfqq->bic->bfqq[0][act_idx] &&
++                  bfq_bfqq_busy(bfqq->bic->bfqq[0][act_idx]) &&
++                  bfqq->bic->bfqq[0][act_idx]->next_rq)
++                      async_bfqq = bfqq->bic->bfqq[0][act_idx];
+               /*
+                * The next four mutually-exclusive ifs decide
+                * whether to try injection, and choose the queue to
+@@ -4914,7 +4945,7 @@ check_queue:
+                   icq_to_bic(async_bfqq->next_rq->elv.icq) == bfqq->bic &&
+                   bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
+                   bfq_bfqq_budget_left(async_bfqq))
+-                      bfqq = bfqq->bic->bfqq[0];
++                      bfqq = bfqq->bic->bfqq[0][act_idx];
+               else if (bfqq->waker_bfqq &&
+                          bfq_bfqq_busy(bfqq->waker_bfqq) &&
+                          bfqq->waker_bfqq->next_rq &&
+@@ -5375,48 +5406,54 @@ static void bfq_exit_bfqq(struct bfq_dat
+       bfq_release_process_ref(bfqd, bfqq);
+ }
+-static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
++static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
++                            unsigned int actuator_idx)
+ {
+-      struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
++      struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, actuator_idx);
+       struct bfq_data *bfqd;
+       if (bfqq)
+               bfqd = bfqq->bfqd; /* NULL if scheduler already exited */
+       if (bfqq && bfqd) {
+-              unsigned long flags;
+-
+-              spin_lock_irqsave(&bfqd->lock, flags);
+-              bic_set_bfqq(bic, NULL, is_sync);
++              bic_set_bfqq(bic, NULL, is_sync, actuator_idx);
+               bfq_exit_bfqq(bfqd, bfqq);
+-              spin_unlock_irqrestore(&bfqd->lock, flags);
+       }
+ }
+ static void bfq_exit_icq(struct io_cq *icq)
+ {
+       struct bfq_io_cq *bic = icq_to_bic(icq);
++      struct bfq_data *bfqd = bic_to_bfqd(bic);
++      unsigned long flags;
++      unsigned int act_idx;
++      /*
++       * If bfqd and thus bfqd->num_actuators is not available any
++       * longer, then cycle over all possible per-actuator bfqqs in
++       * next loop. We rely on bic being zeroed on creation, and
++       * therefore on its unused per-actuator fields being NULL.
++       */
++      unsigned int num_actuators = BFQ_MAX_ACTUATORS;
+-      if (bic->stable_merge_bfqq) {
+-              struct bfq_data *bfqd = bic->stable_merge_bfqq->bfqd;
++      /*
++       * bfqd is NULL if scheduler already exited, and in that case
++       * this is the last time these queues are accessed.
++       */
++      if (bfqd) {
++              spin_lock_irqsave(&bfqd->lock, flags);
++              num_actuators = bfqd->num_actuators;
++      }
+-              /*
+-               * bfqd is NULL if scheduler already exited, and in
+-               * that case this is the last time bfqq is accessed.
+-               */
+-              if (bfqd) {
+-                      unsigned long flags;
++      if (bic->stable_merge_bfqq)
++              bfq_put_stable_ref(bic->stable_merge_bfqq);
+-                      spin_lock_irqsave(&bfqd->lock, flags);
+-                      bfq_put_stable_ref(bic->stable_merge_bfqq);
+-                      spin_unlock_irqrestore(&bfqd->lock, flags);
+-              } else {
+-                      bfq_put_stable_ref(bic->stable_merge_bfqq);
+-              }
++      for (act_idx = 0; act_idx < num_actuators; act_idx++) {
++              bfq_exit_icq_bfqq(bic, true, act_idx);
++              bfq_exit_icq_bfqq(bic, false, act_idx);
+       }
+-      bfq_exit_icq_bfqq(bic, true);
+-      bfq_exit_icq_bfqq(bic, false);
++      if (bfqd)
++              spin_unlock_irqrestore(&bfqd->lock, flags);
+ }
+ /*
+@@ -5493,25 +5530,27 @@ static void bfq_check_ioprio_change(stru
+       bic->ioprio = ioprio;
+-      bfqq = bic_to_bfqq(bic, false);
++      bfqq = bic_to_bfqq(bic, false, bfq_actuator_index(bfqd, bio));
+       if (bfqq) {
+               struct bfq_queue *old_bfqq = bfqq;
+               bfqq = bfq_get_queue(bfqd, bio, false, bic, true);
+-              bic_set_bfqq(bic, bfqq, false);
++              bic_set_bfqq(bic, bfqq, false, bfq_actuator_index(bfqd, bio));
+               bfq_release_process_ref(bfqd, old_bfqq);
+       }
+-      bfqq = bic_to_bfqq(bic, true);
++      bfqq = bic_to_bfqq(bic, true, bfq_actuator_index(bfqd, bio));
+       if (bfqq)
+               bfq_set_next_ioprio_data(bfqq, bic);
+ }
+ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+-                        struct bfq_io_cq *bic, pid_t pid, int is_sync)
++                        struct bfq_io_cq *bic, pid_t pid, int is_sync,
++                        unsigned int act_idx)
+ {
+       u64 now_ns = ktime_get_ns();
++      bfqq->actuator_idx = act_idx;
+       RB_CLEAR_NODE(&bfqq->entity.rb_node);
+       INIT_LIST_HEAD(&bfqq->fifo);
+       INIT_HLIST_NODE(&bfqq->burst_list_node);
+@@ -5762,7 +5801,7 @@ static struct bfq_queue *bfq_get_queue(s
+       if (bfqq) {
+               bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+-                            is_sync);
++                            is_sync, bfq_actuator_index(bfqd, bio));
+               bfq_init_entity(&bfqq->entity, bfqg);
+               bfq_log_bfqq(bfqd, bfqq, "allocated");
+       } else {
+@@ -6078,7 +6117,8 @@ static bool __bfq_insert_request(struct
+                * then complete the merge and redirect it to
+                * new_bfqq.
+                */
+-              if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) {
++              if (bic_to_bfqq(RQ_BIC(rq), true,
++                              bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+                       while (bfqq != new_bfqq)
+                               bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+               }
+@@ -6632,7 +6672,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, st
+               return bfqq;
+       }
+-      bic_set_bfqq(bic, NULL, true);
++      bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx);
+       bfq_put_cooperator(bfqq);
+@@ -6646,7 +6686,8 @@ static struct bfq_queue *bfq_get_bfqq_ha
+                                                  bool split, bool is_sync,
+                                                  bool *new_queue)
+ {
+-      struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
++      unsigned int act_idx = bfq_actuator_index(bfqd, bio);
++      struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, act_idx);
+       if (likely(bfqq && bfqq != &bfqd->oom_bfqq))
+               return bfqq;
+@@ -6658,7 +6699,7 @@ static struct bfq_queue *bfq_get_bfqq_ha
+               bfq_put_queue(bfqq);
+       bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, split);
+-      bic_set_bfqq(bic, bfqq, is_sync);
++      bic_set_bfqq(bic, bfqq, is_sync, act_idx);
+       if (split && is_sync) {
+               if ((bic->was_in_burst_list && bfqd->large_burst) ||
+                   bic->saved_in_large_burst)
+@@ -7139,8 +7180,10 @@ static int bfq_init_queue(struct request
+        * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
+        * Grab a permanent reference to it, so that the normal code flow
+        * will not attempt to free it.
++       * Set zero as actuator index: we will pretend that
++       * all I/O requests are for the same actuator.
+        */
+-      bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
++      bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0, 0);
+       bfqd->oom_bfqq.ref++;
+       bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+       bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+@@ -7159,6 +7202,13 @@ static int bfq_init_queue(struct request
+       bfqd->queue = q;
++      /*
++       * Multi-actuator support not complete yet, unconditionally
++       * set to only one actuator for the moment (to keep incomplete
++       * mechanisms off).
++       */
++      bfqd->num_actuators = 1;
++
+       INIT_LIST_HEAD(&bfqd->dispatch);
+       hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC,
+--- a/block/bfq-iosched.h
++++ b/block/bfq-iosched.h
+@@ -33,6 +33,14 @@
+  */
+ #define BFQ_SOFTRT_WEIGHT_FACTOR      100
++/*
++ * Maximum number of actuators supported. This constant is used simply
++ * to define the size of the static array that will contain
++ * per-actuator data. The current value is hopefully a good upper
++ * bound to the possible number of actuators of any actual drive.
++ */
++#define BFQ_MAX_ACTUATORS 8
++
+ struct bfq_entity;
+ /**
+@@ -225,12 +233,14 @@ struct bfq_ttime {
+  * struct bfq_queue - leaf schedulable entity.
+  *
+  * A bfq_queue is a leaf request queue; it can be associated with an
+- * io_context or more, if it  is  async or shared  between  cooperating
+- * processes. @cgroup holds a reference to the cgroup, to be sure that it
+- * does not disappear while a bfqq still references it (mostly to avoid
+- * races between request issuing and task migration followed by cgroup
+- * destruction).
+- * All the fields are protected by the queue lock of the containing bfqd.
++ * io_context or more, if it is async or shared between cooperating
++ * processes. Besides, it contains I/O requests for only one actuator
++ * (an io_context is associated with a different bfq_queue for each
++ * actuator it generates I/O for). @cgroup holds a reference to the
++ * cgroup, to be sure that it does not disappear while a bfqq still
++ * references it (mostly to avoid races between request issuing and
++ * task migration followed by cgroup destruction).  All the fields are
++ * protected by the queue lock of the containing bfqd.
+  */
+ struct bfq_queue {
+       /* reference counter */
+@@ -395,6 +405,9 @@ struct bfq_queue {
+        * the woken queues when this queue exits.
+        */
+       struct hlist_head woken_list;
++
++      /* index of the actuator this queue is associated with */
++      unsigned int actuator_idx;
+ };
+ /**
+@@ -403,8 +416,17 @@ struct bfq_queue {
+ struct bfq_io_cq {
+       /* associated io_cq structure */
+       struct io_cq icq; /* must be the first member */
+-      /* array of two process queues, the sync and the async */
+-      struct bfq_queue *bfqq[2];
++      /*
++       * Matrix of associated process queues: first row for async
++       * queues, second row sync queues. Each row contains one
++       * column for each actuator. An I/O request generated by the
++       * process is inserted into the queue pointed by bfqq[i][j] if
++       * the request is to be served by the j-th actuator of the
++       * drive, where i==0 or i==1, depending on whether the request
++       * is async or sync. So there is a distinct queue for each
++       * actuator.
++       */
++      struct bfq_queue *bfqq[2][BFQ_MAX_ACTUATORS];
+       /* per (request_queue, blkcg) ioprio */
+       int ioprio;
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+@@ -768,6 +790,13 @@ struct bfq_data {
+        */
+       unsigned int word_depths[2][2];
+       unsigned int full_depth_shift;
++
++      /*
++       * Number of independent actuators. This is equal to 1 in
++       * case of single-actuator drives.
++       */
++      unsigned int num_actuators;
++
+ };
+ enum bfqq_state_flags {
+@@ -964,8 +993,10 @@ struct bfq_group {
+ extern const int bfq_timeout;
+-struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
+-void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
++struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync,
++                              unsigned int actuator_idx);
++void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync,
++                              unsigned int actuator_idx);
+ struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
+ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
diff --git a/queue-6.1/media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch b/queue-6.1/media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch
new file mode 100644 (file)
index 0000000..12f4447
--- /dev/null
@@ -0,0 +1,48 @@
+From 9be85491619f1953b8a29590ca630be571941ffa Mon Sep 17 00:00:00 2001
+From: Yunfei Dong <yunfei.dong@mediatek.com>
+Date: Thu, 13 Jun 2024 17:33:55 +0800
+Subject: media: mediatek: vcodec: Fix H264 multi stateless decoder smatch warning
+
+From: Yunfei Dong <yunfei.dong@mediatek.com>
+
+commit 9be85491619f1953b8a29590ca630be571941ffa upstream.
+
+Fix a smatch static checker warning on vdec_h264_req_multi_if.c.
+Which leads to a kernel crash when fb is NULL.
+
+Fixes: 397edc703a10 ("media: mediatek: vcodec: add h264 decoder driver for mt8186")
+Signed-off-by: Yunfei Dong <yunfei.dong@mediatek.com>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Signed-off-by: Sebastian Fricke <sebastian.fricke@collabora.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+[ drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
+  is renamed from drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c
+  since 0934d3759615 ("media: mediatek: vcodec: separate decoder and encoder").
+  The path is changed accordingly to apply the patch on 6.1.y. ]
+Signed-off-by: Wenshan Lan <jetlan9@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c
++++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c
+@@ -729,11 +729,16 @@ static int vdec_h264_slice_single_decode
+               return vpu_dec_reset(vpu);
+       fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx);
++      if (!fb) {
++              mtk_vcodec_err(inst, "fb buffer is NULL");
++              return -ENOMEM;
++      }
++
+       src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer);
+       dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer);
+-      y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0;
+-      c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
++      y_fb_dma = fb->base_y.dma_addr;
++      c_fb_dma = fb->base_c.dma_addr;
+       mtk_vcodec_debug(inst, "[h264-dec] [%d] y_dma=%llx c_dma=%llx",
+                        inst->ctx->decoded_frame_cnt, y_fb_dma, c_fb_dma);
index 575a051d859e8daf1e83fa0633979d737625be4f..a588a8e5d22e1a57a2788e20278b623f5203d749 100644 (file)
@@ -87,3 +87,7 @@ smb-client-add-check-for-next_buffer-in-receive_encrypted_standard.patch
 edac-qcom-correct-interrupt-enable-register-configuration.patch
 ftrace-correct-preemption-accounting-for-function-tracing.patch
 ftrace-do-not-add-duplicate-entries-in-subops-manager-ops.patch
+x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch
+block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch
+block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch
+media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch
diff --git a/queue-6.1/x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch b/queue-6.1/x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch
new file mode 100644 (file)
index 0000000..10da975
--- /dev/null
@@ -0,0 +1,148 @@
+From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001
+From: Patrick Bellasi <derkling@google.com>
+Date: Wed, 5 Feb 2025 14:04:41 +0000
+Subject: x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit
+
+From: Patrick Bellasi <derkling@google.com>
+
+commit 318e8c339c9a0891c389298bb328ed0762a9935e upstream.
+
+In [1] the meaning of the synthetic IBPB flags has been redefined for a
+better separation of concerns:
+ - ENTRY_IBPB     -- issue IBPB on entry only
+ - IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only
+and the Retbleed mitigations have been updated to match this new
+semantics.
+
+Commit [2] was merged shortly before [1], and their interaction was not
+handled properly. This resulted in IBPB not being triggered on VM-Exit
+in all SRSO mitigation configs requesting an IBPB there.
+
+Specifically, an IBPB on VM-Exit is triggered only when
+X86_FEATURE_IBPB_ON_VMEXIT is set. However:
+
+ - X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb",
+   because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence,
+   an IBPB is triggered on entry but the expected IBPB on VM-exit is
+   not.
+
+ - X86_FEATURE_IBPB_ON_VMEXIT is not set also when
+   "spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is
+   already set.
+
+   That's because before [1] this was effectively redundant. Hence, e.g.
+   a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly
+   reports the machine still vulnerable to SRSO, despite an IBPB being
+   triggered both on entry and VM-Exit, because of the Retbleed selected
+   mitigation config.
+
+ - UNTRAIN_RET_VM won't still actually do anything unless
+   CONFIG_MITIGATION_IBPB_ENTRY is set.
+
+For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit
+and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by
+X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation
+option similar to the one for 'retbleed=ibpb', thus re-order the code
+for the RETBLEED_MITIGATION_IBPB option to be less confusing by having
+all features enabling before the disabling of the not needed ones.
+
+For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting
+with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is
+effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard,
+since none of the SRSO compile cruft is required in this configuration.
+Also, check only that the required microcode is present to effectively
+enabled the IBPB on VM-Exit.
+
+Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY
+to list also all SRSO config settings enabled by this guard.
+
+Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1]
+Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2]
+Reported-by: Yosry Ahmed <yosryahmed@google.com>
+Signed-off-by: Patrick Bellasi <derkling@google.com>
+Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig           |    3 ++-
+ arch/x86/kernel/cpu/bugs.c |   20 ++++++++++++++------
+ 2 files changed, 16 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -2506,7 +2506,8 @@ config CPU_IBPB_ENTRY
+       depends on CPU_SUP_AMD && X86_64
+       default y
+       help
+-        Compile the kernel with support for the retbleed=ibpb mitigation.
++        Compile the kernel with support for the retbleed=ibpb and
++        spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
+ config CPU_IBRS_ENTRY
+       bool "Enable IBRS on kernel entry"
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1092,6 +1092,8 @@ do_cmd_auto:
+       case RETBLEED_MITIGATION_IBPB:
+               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++              setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
++              mitigate_smt = true;
+               /*
+                * IBPB on entry already obviates the need for
+@@ -1101,8 +1103,6 @@ do_cmd_auto:
+               setup_clear_cpu_cap(X86_FEATURE_UNRET);
+               setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+-              mitigate_smt = true;
+-
+               /*
+                * There is no need for RSB filling: entry_ibpb() ensures
+                * all predictions, including the RSB, are invalidated,
+@@ -2607,6 +2607,7 @@ static void __init srso_select_mitigatio
+               if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+                       if (has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++                              setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+                               srso_mitigation = SRSO_MITIGATION_IBPB;
+                               /*
+@@ -2616,6 +2617,13 @@ static void __init srso_select_mitigatio
+                                */
+                               setup_clear_cpu_cap(X86_FEATURE_UNRET);
+                               setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
++
++                              /*
++                               * There is no need for RSB filling: entry_ibpb() ensures
++                               * all predictions, including the RSB, are invalidated,
++                               * regardless of IBPB implementation.
++                               */
++                              setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+@@ -2624,8 +2632,8 @@ static void __init srso_select_mitigatio
+               break;
+       case SRSO_CMD_IBPB_ON_VMEXIT:
+-              if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+-                      if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
++              if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++                      if (has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+                               srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+@@ -2637,9 +2645,9 @@ static void __init srso_select_mitigatio
+                               setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+                       }
+               } else {
+-                      pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
++                      pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+                       goto pred_cmd;
+-                }
++              }
+               break;
+       default: