From: Greg Kroah-Hartman Date: Mon, 24 Feb 2025 14:09:05 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.6.80~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4fbd98d0162dab48001bf0ce2484da21d4bc3644;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch --- diff --git a/queue-6.1/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch b/queue-6.1/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch new file mode 100644 index 0000000000..134f5cec84 --- /dev/null +++ b/queue-6.1/block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch @@ -0,0 +1,195 @@ +From e8b8344de3980709080d86c157d24e7de07d70ad Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Fri, 29 Nov 2024 17:15:09 +0800 +Subject: block, bfq: fix bfqq uaf in bfq_limit_depth() + +From: Yu Kuai + +commit e8b8344de3980709080d86c157d24e7de07d70ad upstream. + +Set new allocated bfqq to bic or remove freed bfqq from bic are both +protected by bfqd->lock, however bfq_limit_depth() is deferencing bfqq +from bic without the lock, this can lead to UAF if the io_context is +shared by multiple tasks. + +For example, test bfq with io_uring can trigger following UAF in v6.6: + +================================================================== +BUG: KASAN: slab-use-after-free in bfqq_group+0x15/0x50 + +Call Trace: + + dump_stack_lvl+0x47/0x80 + print_address_description.constprop.0+0x66/0x300 + print_report+0x3e/0x70 + kasan_report+0xb4/0xf0 + bfqq_group+0x15/0x50 + bfqq_request_over_limit+0x130/0x9a0 + bfq_limit_depth+0x1b5/0x480 + __blk_mq_alloc_requests+0x2b5/0xa00 + blk_mq_get_new_requests+0x11d/0x1d0 + blk_mq_submit_bio+0x286/0xb00 + submit_bio_noacct_nocheck+0x331/0x400 + __block_write_full_folio+0x3d0/0x640 + writepage_cb+0x3b/0xc0 + write_cache_pages+0x254/0x6c0 + write_cache_pages+0x254/0x6c0 + do_writepages+0x192/0x310 + filemap_fdatawrite_wbc+0x95/0xc0 + __filemap_fdatawrite_range+0x99/0xd0 + filemap_write_and_wait_range.part.0+0x4d/0xa0 + blkdev_read_iter+0xef/0x1e0 + io_read+0x1b6/0x8a0 + io_issue_sqe+0x87/0x300 + io_wq_submit_work+0xeb/0x390 + io_worker_handle_work+0x24d/0x550 + io_wq_worker+0x27f/0x6c0 + ret_from_fork_asm+0x1b/0x30 + + +Allocated by task 808602: + kasan_save_stack+0x1e/0x40 + kasan_set_track+0x21/0x30 + __kasan_slab_alloc+0x83/0x90 + kmem_cache_alloc_node+0x1b1/0x6d0 + bfq_get_queue+0x138/0xfa0 + bfq_get_bfqq_handle_split+0xe3/0x2c0 + bfq_init_rq+0x196/0xbb0 + bfq_insert_request.isra.0+0xb5/0x480 + bfq_insert_requests+0x156/0x180 + blk_mq_insert_request+0x15d/0x440 + blk_mq_submit_bio+0x8a4/0xb00 + submit_bio_noacct_nocheck+0x331/0x400 + __blkdev_direct_IO_async+0x2dd/0x330 + blkdev_write_iter+0x39a/0x450 + io_write+0x22a/0x840 + io_issue_sqe+0x87/0x300 + io_wq_submit_work+0xeb/0x390 + io_worker_handle_work+0x24d/0x550 + io_wq_worker+0x27f/0x6c0 + ret_from_fork+0x2d/0x50 + ret_from_fork_asm+0x1b/0x30 + +Freed by task 808589: + kasan_save_stack+0x1e/0x40 + kasan_set_track+0x21/0x30 + kasan_save_free_info+0x27/0x40 + __kasan_slab_free+0x126/0x1b0 + kmem_cache_free+0x10c/0x750 + bfq_put_queue+0x2dd/0x770 + __bfq_insert_request.isra.0+0x155/0x7a0 + bfq_insert_request.isra.0+0x122/0x480 + bfq_insert_requests+0x156/0x180 + blk_mq_dispatch_plug_list+0x528/0x7e0 + blk_mq_flush_plug_list.part.0+0xe5/0x590 + __blk_flush_plug+0x3b/0x90 + blk_finish_plug+0x40/0x60 + do_writepages+0x19d/0x310 + filemap_fdatawrite_wbc+0x95/0xc0 + __filemap_fdatawrite_range+0x99/0xd0 + filemap_write_and_wait_range.part.0+0x4d/0xa0 + blkdev_read_iter+0xef/0x1e0 + io_read+0x1b6/0x8a0 + io_issue_sqe+0x87/0x300 + io_wq_submit_work+0xeb/0x390 + io_worker_handle_work+0x24d/0x550 + io_wq_worker+0x27f/0x6c0 + ret_from_fork+0x2d/0x50 + ret_from_fork_asm+0x1b/0x30 + +Fix the problem by protecting bic_to_bfqq() with bfqd->lock. + +CC: Jan Kara +Fixes: 76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup") +Signed-off-by: Yu Kuai +Link: https://lore.kernel.org/r/20241129091509.2227136-1-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Signed-off-by: Hagar Hemdan +Signed-off-by: Greg Kroah-Hartman +--- + block/bfq-iosched.c | 37 ++++++++++++++++++++++++------------- + 1 file changed, 24 insertions(+), 13 deletions(-) + +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -581,23 +581,31 @@ static struct request *bfq_choose_req(st + #define BFQ_LIMIT_INLINE_DEPTH 16 + + #ifdef CONFIG_BFQ_GROUP_IOSCHED +-static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) ++static bool bfqq_request_over_limit(struct bfq_data *bfqd, ++ struct bfq_io_cq *bic, blk_opf_t opf, ++ unsigned int act_idx, int limit) + { +- struct bfq_data *bfqd = bfqq->bfqd; +- struct bfq_entity *entity = &bfqq->entity; + struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH]; + struct bfq_entity **entities = inline_entities; +- int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH; +- int class_idx = bfqq->ioprio_class - 1; ++ int alloc_depth = BFQ_LIMIT_INLINE_DEPTH; + struct bfq_sched_data *sched_data; ++ struct bfq_entity *entity; ++ struct bfq_queue *bfqq; + unsigned long wsum; + bool ret = false; +- +- if (!entity->on_st_or_in_serv) +- return false; ++ int depth; ++ int level; + + retry: + spin_lock_irq(&bfqd->lock); ++ bfqq = bic_to_bfqq(bic, op_is_sync(opf), act_idx); ++ if (!bfqq) ++ goto out; ++ ++ entity = &bfqq->entity; ++ if (!entity->on_st_or_in_serv) ++ goto out; ++ + /* +1 for bfqq entity, root cgroup not included */ + depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1; + if (depth > alloc_depth) { +@@ -642,7 +650,7 @@ retry: + * class. + */ + wsum = 0; +- for (i = 0; i <= class_idx; i++) { ++ for (i = 0; i <= bfqq->ioprio_class - 1; i++) { + wsum = wsum * IOPRIO_BE_NR + + sched_data->service_tree[i].wsum; + } +@@ -665,7 +673,9 @@ out: + return ret; + } + #else +-static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) ++static bool bfqq_request_over_limit(struct bfq_data *bfqd, ++ struct bfq_io_cq *bic, blk_opf_t opf, ++ unsigned int act_idx, int limit) + { + return false; + } +@@ -703,8 +713,9 @@ static void bfq_limit_depth(blk_opf_t op + } + + for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { +- struct bfq_queue *bfqq = +- bic_to_bfqq(bic, op_is_sync(opf), act_idx); ++ /* Fast path to check if bfqq is already allocated. */ ++ if (!bic_to_bfqq(bic, op_is_sync(opf), act_idx)) ++ continue; + + /* + * Does queue (or any parent entity) exceed number of +@@ -712,7 +723,7 @@ static void bfq_limit_depth(blk_opf_t op + * limit depth so that it cannot consume more + * available requests and thus starve other entities. + */ +- if (bfqq && bfqq_request_over_limit(bfqq, limit)) { ++ if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) { + depth = 1; + break; + } diff --git a/queue-6.1/block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch b/queue-6.1/block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch new file mode 100644 index 0000000000..71fffc1aae --- /dev/null +++ b/queue-6.1/block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch @@ -0,0 +1,638 @@ +From 9778369a2d6c5ed2b81a04164c4aa9da1bdb193d Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Tue, 3 Jan 2023 15:54:56 +0100 +Subject: block, bfq: split sync bfq_queues on a per-actuator basis +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paolo Valente + +commit 9778369a2d6c5ed2b81a04164c4aa9da1bdb193d upstream. + +Single-LUN multi-actuator SCSI drives, as well as all multi-actuator +SATA drives appear as a single device to the I/O subsystem [1]. Yet +they address commands to different actuators internally, as a function +of Logical Block Addressing (LBAs). A given sector is reachable by +only one of the actuators. For example, Seagate’s Serial Advanced +Technology Attachment (SATA) version contains two actuators and maps +the lower half of the SATA LBA space to the lower actuator and the +upper half to the upper actuator. + +Evidently, to fully utilize actuators, no actuator must be left idle +or underutilized while there is pending I/O for it. The block layer +must somehow control the load of each actuator individually. This +commit lays the ground for allowing BFQ to provide such a per-actuator +control. + +BFQ associates an I/O-request sync bfq_queue with each process doing +synchronous I/O, or with a group of processes, in case of queue +merging. Then BFQ serves one bfq_queue at a time. While in service, a +bfq_queue is emptied in request-position order. Yet the same process, +or group of processes, may generate I/O for different actuators. In +this case, different streams of I/O (each for a different actuator) +get all inserted into the same sync bfq_queue. So there is basically +no individual control on when each stream is served, i.e., on when the +I/O requests of the stream are picked from the bfq_queue and +dispatched to the drive. + +This commit enables BFQ to control the service of each actuator +individually for synchronous I/O, by simply splitting each sync +bfq_queue into N queues, one for each actuator. In other words, a sync +bfq_queue is now associated to a pair (process, actuator). As a +consequence of this split, the per-queue proportional-share policy +implemented by BFQ will guarantee that the sync I/O generated for each +actuator, by each process, receives its fair share of service. + +This is just a preparatory patch. If the I/O of the same process +happens to be sent to different queues, then each of these queues may +undergo queue merging. To handle this event, the bfq_io_cq data +structure must be properly extended. In addition, stable merging must +be disabled to avoid loss of control on individual actuators. Finally, +also async queues must be split. These issues are described in detail +and addressed in next commits. As for this commit, although multiple +per-process bfq_queues are provided, the I/O of each process or group +of processes is still sent to only one queue, regardless of the +actuator the I/O is for. The forwarding to distinct bfq_queues will be +enabled after addressing the above issues. + +[1] https://www.linaro.org/blog/budget-fair-queueing-bfq-linux-io-scheduler-optimizations-for-multi-actuator-sata-hard-drives/ + +Reviewed-by: Damien Le Moal +Signed-off-by: Gabriele Felici +Signed-off-by: Carmine Zaccagnino +Signed-off-by: Paolo Valente +Link: https://lore.kernel.org/r/20230103145503.71712-2-paolo.valente@linaro.org +Signed-off-by: Jens Axboe +Stable-dep-of: e8b8344de398 ("block, bfq: fix bfqq uaf in bfq_limit_depth()") +[Hagar: needed contextual fixes] +Signed-off-by: Hagar Hemdan +Signed-off-by: Greg Kroah-Hartman +--- + block/bfq-cgroup.c | 97 ++++++++++++++++--------------- + block/bfq-iosched.c | 160 ++++++++++++++++++++++++++++++++++------------------ + block/bfq-iosched.h | 51 +++++++++++++--- + 3 files changed, 197 insertions(+), 111 deletions(-) + +--- a/block/bfq-cgroup.c ++++ b/block/bfq-cgroup.c +@@ -704,6 +704,46 @@ void bfq_bfqq_move(struct bfq_data *bfqd + bfq_put_queue(bfqq); + } + ++static void bfq_sync_bfqq_move(struct bfq_data *bfqd, ++ struct bfq_queue *sync_bfqq, ++ struct bfq_io_cq *bic, ++ struct bfq_group *bfqg, ++ unsigned int act_idx) ++{ ++ struct bfq_queue *bfqq; ++ ++ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { ++ /* We are the only user of this bfqq, just move it */ ++ if (sync_bfqq->entity.sched_data != &bfqg->sched_data) ++ bfq_bfqq_move(bfqd, sync_bfqq, bfqg); ++ return; ++ } ++ ++ /* ++ * The queue was merged to a different queue. Check ++ * that the merge chain still belongs to the same ++ * cgroup. ++ */ ++ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) ++ if (bfqq->entity.sched_data != &bfqg->sched_data) ++ break; ++ if (bfqq) { ++ /* ++ * Some queue changed cgroup so the merge is not valid ++ * anymore. We cannot easily just cancel the merge (by ++ * clearing new_bfqq) as there may be other processes ++ * using this queue and holding refs to all queues ++ * below sync_bfqq->new_bfqq. Similarly if the merge ++ * already happened, we need to detach from bfqq now ++ * so that we cannot merge bio to a request from the ++ * old cgroup. ++ */ ++ bfq_put_cooperator(sync_bfqq); ++ bfq_release_process_ref(bfqd, sync_bfqq); ++ bic_set_bfqq(bic, NULL, true, act_idx); ++ } ++} ++ + /** + * __bfq_bic_change_cgroup - move @bic to @bfqg. + * @bfqd: the queue descriptor. +@@ -714,60 +754,25 @@ void bfq_bfqq_move(struct bfq_data *bfqd + * sure that the reference to cgroup is valid across the call (see + * comments in bfq_bic_update_cgroup on this issue) + */ +-static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, ++static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) + { +- struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false); +- struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true); +- struct bfq_entity *entity; +- +- if (async_bfqq) { +- entity = &async_bfqq->entity; ++ unsigned int act_idx; + +- if (entity->sched_data != &bfqg->sched_data) { +- bic_set_bfqq(bic, NULL, false); ++ for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { ++ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); ++ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); ++ ++ if (async_bfqq && ++ async_bfqq->entity.sched_data != &bfqg->sched_data) { ++ bic_set_bfqq(bic, NULL, false, act_idx); + bfq_release_process_ref(bfqd, async_bfqq); + } +- } + +- if (sync_bfqq) { +- if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { +- /* We are the only user of this bfqq, just move it */ +- if (sync_bfqq->entity.sched_data != &bfqg->sched_data) +- bfq_bfqq_move(bfqd, sync_bfqq, bfqg); +- } else { +- struct bfq_queue *bfqq; +- +- /* +- * The queue was merged to a different queue. Check +- * that the merge chain still belongs to the same +- * cgroup. +- */ +- for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) +- if (bfqq->entity.sched_data != +- &bfqg->sched_data) +- break; +- if (bfqq) { +- /* +- * Some queue changed cgroup so the merge is +- * not valid anymore. We cannot easily just +- * cancel the merge (by clearing new_bfqq) as +- * there may be other processes using this +- * queue and holding refs to all queues below +- * sync_bfqq->new_bfqq. Similarly if the merge +- * already happened, we need to detach from +- * bfqq now so that we cannot merge bio to a +- * request from the old cgroup. +- */ +- bfq_put_cooperator(sync_bfqq); +- bic_set_bfqq(bic, NULL, true); +- bfq_release_process_ref(bfqd, sync_bfqq); +- } +- } ++ if (sync_bfqq) ++ bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); + } +- +- return bfqg; + } + + void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -377,16 +377,23 @@ static const unsigned long bfq_late_stab + #define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0])) + #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) + +-struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) ++struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync, ++ unsigned int actuator_idx) + { +- return bic->bfqq[is_sync]; ++ if (is_sync) ++ return bic->bfqq[1][actuator_idx]; ++ ++ return bic->bfqq[0][actuator_idx]; + } + + static void bfq_put_stable_ref(struct bfq_queue *bfqq); + +-void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) ++void bic_set_bfqq(struct bfq_io_cq *bic, ++ struct bfq_queue *bfqq, ++ bool is_sync, ++ unsigned int actuator_idx) + { +- struct bfq_queue *old_bfqq = bic->bfqq[is_sync]; ++ struct bfq_queue *old_bfqq = bic->bfqq[is_sync][actuator_idx]; + + /* Clear bic pointer if bfqq is detached from this bic */ + if (old_bfqq && old_bfqq->bic == bic) +@@ -405,7 +412,10 @@ void bic_set_bfqq(struct bfq_io_cq *bic, + * we cancel the stable merge if + * bic->stable_merge_bfqq == bfqq. + */ +- bic->bfqq[is_sync] = bfqq; ++ if (is_sync) ++ bic->bfqq[1][actuator_idx] = bfqq; ++ else ++ bic->bfqq[0][actuator_idx] = bfqq; + + if (bfqq && bic->stable_merge_bfqq == bfqq) { + /* +@@ -680,9 +690,9 @@ static void bfq_limit_depth(blk_opf_t op + { + struct bfq_data *bfqd = data->q->elevator->elevator_data; + struct bfq_io_cq *bic = bfq_bic_lookup(data->q); +- struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL; + int depth; + unsigned limit = data->q->nr_requests; ++ unsigned int act_idx; + + /* Sync reads have full depth available */ + if (op_is_sync(opf) && !op_is_write(opf)) { +@@ -692,14 +702,21 @@ static void bfq_limit_depth(blk_opf_t op + limit = (limit * depth) >> bfqd->full_depth_shift; + } + +- /* +- * Does queue (or any parent entity) exceed number of requests that +- * should be available to it? Heavily limit depth so that it cannot +- * consume more available requests and thus starve other entities. +- */ +- if (bfqq && bfqq_request_over_limit(bfqq, limit)) +- depth = 1; ++ for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { ++ struct bfq_queue *bfqq = ++ bic_to_bfqq(bic, op_is_sync(opf), act_idx); + ++ /* ++ * Does queue (or any parent entity) exceed number of ++ * requests that should be available to it? Heavily ++ * limit depth so that it cannot consume more ++ * available requests and thus starve other entities. ++ */ ++ if (bfqq && bfqq_request_over_limit(bfqq, limit)) { ++ depth = 1; ++ break; ++ } ++ } + bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", + __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); + if (depth) +@@ -1820,6 +1837,18 @@ static bool bfq_bfqq_higher_class_or_wei + return bfqq_weight > in_serv_weight; + } + ++/* ++ * Get the index of the actuator that will serve bio. ++ */ ++static unsigned int bfq_actuator_index(struct bfq_data *bfqd, struct bio *bio) ++{ ++ /* ++ * Multi-actuator support not complete yet, so always return 0 ++ * for the moment (to keep incomplete mechanisms off). ++ */ ++ return 0; ++} ++ + static bool bfq_better_to_idle(struct bfq_queue *bfqq); + + static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, +@@ -2150,7 +2179,7 @@ static void bfq_check_waker(struct bfq_d + * We reset waker detection logic also if too much time has passed + * since the first detection. If wakeups are rare, pointless idling + * doesn't hurt throughput that much. The condition below makes sure +- * we do not uselessly idle blocking waker in more than 1/64 cases. ++ * we do not uselessly idle blocking waker in more than 1/64 cases. + */ + if (bfqd->last_completed_rq_bfqq != + bfqq->tentative_waker_bfqq || +@@ -2486,7 +2515,8 @@ static bool bfq_bio_merge(struct request + */ + bfq_bic_update_cgroup(bic, bio); + +- bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); ++ bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf), ++ bfq_actuator_index(bfqd, bio)); + } else { + bfqd->bio_bfqq = NULL; + } +@@ -3188,7 +3218,7 @@ static struct bfq_queue *bfq_merge_bfqqs + /* + * Merge queues (that is, let bic redirect its requests to new_bfqq) + */ +- bic_set_bfqq(bic, new_bfqq, true); ++ bic_set_bfqq(bic, new_bfqq, true, bfqq->actuator_idx); + bfq_mark_bfqq_coop(new_bfqq); + /* + * new_bfqq now belongs to at least two bics (it is a shared queue): +@@ -4818,11 +4848,8 @@ check_queue: + */ + if (bfq_bfqq_wait_request(bfqq) || + (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { +- struct bfq_queue *async_bfqq = +- bfqq->bic && bfqq->bic->bfqq[0] && +- bfq_bfqq_busy(bfqq->bic->bfqq[0]) && +- bfqq->bic->bfqq[0]->next_rq ? +- bfqq->bic->bfqq[0] : NULL; ++ unsigned int act_idx = bfqq->actuator_idx; ++ struct bfq_queue *async_bfqq = NULL; + struct bfq_queue *blocked_bfqq = + !hlist_empty(&bfqq->woken_list) ? + container_of(bfqq->woken_list.first, +@@ -4830,6 +4857,10 @@ check_queue: + woken_list_node) + : NULL; + ++ if (bfqq->bic && bfqq->bic->bfqq[0][act_idx] && ++ bfq_bfqq_busy(bfqq->bic->bfqq[0][act_idx]) && ++ bfqq->bic->bfqq[0][act_idx]->next_rq) ++ async_bfqq = bfqq->bic->bfqq[0][act_idx]; + /* + * The next four mutually-exclusive ifs decide + * whether to try injection, and choose the queue to +@@ -4914,7 +4945,7 @@ check_queue: + icq_to_bic(async_bfqq->next_rq->elv.icq) == bfqq->bic && + bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <= + bfq_bfqq_budget_left(async_bfqq)) +- bfqq = bfqq->bic->bfqq[0]; ++ bfqq = bfqq->bic->bfqq[0][act_idx]; + else if (bfqq->waker_bfqq && + bfq_bfqq_busy(bfqq->waker_bfqq) && + bfqq->waker_bfqq->next_rq && +@@ -5375,48 +5406,54 @@ static void bfq_exit_bfqq(struct bfq_dat + bfq_release_process_ref(bfqd, bfqq); + } + +-static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) ++static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync, ++ unsigned int actuator_idx) + { +- struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync); ++ struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, actuator_idx); + struct bfq_data *bfqd; + + if (bfqq) + bfqd = bfqq->bfqd; /* NULL if scheduler already exited */ + + if (bfqq && bfqd) { +- unsigned long flags; +- +- spin_lock_irqsave(&bfqd->lock, flags); +- bic_set_bfqq(bic, NULL, is_sync); ++ bic_set_bfqq(bic, NULL, is_sync, actuator_idx); + bfq_exit_bfqq(bfqd, bfqq); +- spin_unlock_irqrestore(&bfqd->lock, flags); + } + } + + static void bfq_exit_icq(struct io_cq *icq) + { + struct bfq_io_cq *bic = icq_to_bic(icq); ++ struct bfq_data *bfqd = bic_to_bfqd(bic); ++ unsigned long flags; ++ unsigned int act_idx; ++ /* ++ * If bfqd and thus bfqd->num_actuators is not available any ++ * longer, then cycle over all possible per-actuator bfqqs in ++ * next loop. We rely on bic being zeroed on creation, and ++ * therefore on its unused per-actuator fields being NULL. ++ */ ++ unsigned int num_actuators = BFQ_MAX_ACTUATORS; + +- if (bic->stable_merge_bfqq) { +- struct bfq_data *bfqd = bic->stable_merge_bfqq->bfqd; ++ /* ++ * bfqd is NULL if scheduler already exited, and in that case ++ * this is the last time these queues are accessed. ++ */ ++ if (bfqd) { ++ spin_lock_irqsave(&bfqd->lock, flags); ++ num_actuators = bfqd->num_actuators; ++ } + +- /* +- * bfqd is NULL if scheduler already exited, and in +- * that case this is the last time bfqq is accessed. +- */ +- if (bfqd) { +- unsigned long flags; ++ if (bic->stable_merge_bfqq) ++ bfq_put_stable_ref(bic->stable_merge_bfqq); + +- spin_lock_irqsave(&bfqd->lock, flags); +- bfq_put_stable_ref(bic->stable_merge_bfqq); +- spin_unlock_irqrestore(&bfqd->lock, flags); +- } else { +- bfq_put_stable_ref(bic->stable_merge_bfqq); +- } ++ for (act_idx = 0; act_idx < num_actuators; act_idx++) { ++ bfq_exit_icq_bfqq(bic, true, act_idx); ++ bfq_exit_icq_bfqq(bic, false, act_idx); + } + +- bfq_exit_icq_bfqq(bic, true); +- bfq_exit_icq_bfqq(bic, false); ++ if (bfqd) ++ spin_unlock_irqrestore(&bfqd->lock, flags); + } + + /* +@@ -5493,25 +5530,27 @@ static void bfq_check_ioprio_change(stru + + bic->ioprio = ioprio; + +- bfqq = bic_to_bfqq(bic, false); ++ bfqq = bic_to_bfqq(bic, false, bfq_actuator_index(bfqd, bio)); + if (bfqq) { + struct bfq_queue *old_bfqq = bfqq; + + bfqq = bfq_get_queue(bfqd, bio, false, bic, true); +- bic_set_bfqq(bic, bfqq, false); ++ bic_set_bfqq(bic, bfqq, false, bfq_actuator_index(bfqd, bio)); + bfq_release_process_ref(bfqd, old_bfqq); + } + +- bfqq = bic_to_bfqq(bic, true); ++ bfqq = bic_to_bfqq(bic, true, bfq_actuator_index(bfqd, bio)); + if (bfqq) + bfq_set_next_ioprio_data(bfqq, bic); + } + + static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +- struct bfq_io_cq *bic, pid_t pid, int is_sync) ++ struct bfq_io_cq *bic, pid_t pid, int is_sync, ++ unsigned int act_idx) + { + u64 now_ns = ktime_get_ns(); + ++ bfqq->actuator_idx = act_idx; + RB_CLEAR_NODE(&bfqq->entity.rb_node); + INIT_LIST_HEAD(&bfqq->fifo); + INIT_HLIST_NODE(&bfqq->burst_list_node); +@@ -5762,7 +5801,7 @@ static struct bfq_queue *bfq_get_queue(s + + if (bfqq) { + bfq_init_bfqq(bfqd, bfqq, bic, current->pid, +- is_sync); ++ is_sync, bfq_actuator_index(bfqd, bio)); + bfq_init_entity(&bfqq->entity, bfqg); + bfq_log_bfqq(bfqd, bfqq, "allocated"); + } else { +@@ -6078,7 +6117,8 @@ static bool __bfq_insert_request(struct + * then complete the merge and redirect it to + * new_bfqq. + */ +- if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) { ++ if (bic_to_bfqq(RQ_BIC(rq), true, ++ bfq_actuator_index(bfqd, rq->bio)) == bfqq) { + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq); + } +@@ -6632,7 +6672,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, st + return bfqq; + } + +- bic_set_bfqq(bic, NULL, true); ++ bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx); + + bfq_put_cooperator(bfqq); + +@@ -6646,7 +6686,8 @@ static struct bfq_queue *bfq_get_bfqq_ha + bool split, bool is_sync, + bool *new_queue) + { +- struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync); ++ unsigned int act_idx = bfq_actuator_index(bfqd, bio); ++ struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, act_idx); + + if (likely(bfqq && bfqq != &bfqd->oom_bfqq)) + return bfqq; +@@ -6658,7 +6699,7 @@ static struct bfq_queue *bfq_get_bfqq_ha + bfq_put_queue(bfqq); + bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, split); + +- bic_set_bfqq(bic, bfqq, is_sync); ++ bic_set_bfqq(bic, bfqq, is_sync, act_idx); + if (split && is_sync) { + if ((bic->was_in_burst_list && bfqd->large_burst) || + bic->saved_in_large_burst) +@@ -7139,8 +7180,10 @@ static int bfq_init_queue(struct request + * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. ++ * Set zero as actuator index: we will pretend that ++ * all I/O requests are for the same actuator. + */ +- bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0); ++ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0, 0); + bfqd->oom_bfqq.ref++; + bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO; + bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; +@@ -7159,6 +7202,13 @@ static int bfq_init_queue(struct request + + bfqd->queue = q; + ++ /* ++ * Multi-actuator support not complete yet, unconditionally ++ * set to only one actuator for the moment (to keep incomplete ++ * mechanisms off). ++ */ ++ bfqd->num_actuators = 1; ++ + INIT_LIST_HEAD(&bfqd->dispatch); + + hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC, +--- a/block/bfq-iosched.h ++++ b/block/bfq-iosched.h +@@ -33,6 +33,14 @@ + */ + #define BFQ_SOFTRT_WEIGHT_FACTOR 100 + ++/* ++ * Maximum number of actuators supported. This constant is used simply ++ * to define the size of the static array that will contain ++ * per-actuator data. The current value is hopefully a good upper ++ * bound to the possible number of actuators of any actual drive. ++ */ ++#define BFQ_MAX_ACTUATORS 8 ++ + struct bfq_entity; + + /** +@@ -225,12 +233,14 @@ struct bfq_ttime { + * struct bfq_queue - leaf schedulable entity. + * + * A bfq_queue is a leaf request queue; it can be associated with an +- * io_context or more, if it is async or shared between cooperating +- * processes. @cgroup holds a reference to the cgroup, to be sure that it +- * does not disappear while a bfqq still references it (mostly to avoid +- * races between request issuing and task migration followed by cgroup +- * destruction). +- * All the fields are protected by the queue lock of the containing bfqd. ++ * io_context or more, if it is async or shared between cooperating ++ * processes. Besides, it contains I/O requests for only one actuator ++ * (an io_context is associated with a different bfq_queue for each ++ * actuator it generates I/O for). @cgroup holds a reference to the ++ * cgroup, to be sure that it does not disappear while a bfqq still ++ * references it (mostly to avoid races between request issuing and ++ * task migration followed by cgroup destruction). All the fields are ++ * protected by the queue lock of the containing bfqd. + */ + struct bfq_queue { + /* reference counter */ +@@ -395,6 +405,9 @@ struct bfq_queue { + * the woken queues when this queue exits. + */ + struct hlist_head woken_list; ++ ++ /* index of the actuator this queue is associated with */ ++ unsigned int actuator_idx; + }; + + /** +@@ -403,8 +416,17 @@ struct bfq_queue { + struct bfq_io_cq { + /* associated io_cq structure */ + struct io_cq icq; /* must be the first member */ +- /* array of two process queues, the sync and the async */ +- struct bfq_queue *bfqq[2]; ++ /* ++ * Matrix of associated process queues: first row for async ++ * queues, second row sync queues. Each row contains one ++ * column for each actuator. An I/O request generated by the ++ * process is inserted into the queue pointed by bfqq[i][j] if ++ * the request is to be served by the j-th actuator of the ++ * drive, where i==0 or i==1, depending on whether the request ++ * is async or sync. So there is a distinct queue for each ++ * actuator. ++ */ ++ struct bfq_queue *bfqq[2][BFQ_MAX_ACTUATORS]; + /* per (request_queue, blkcg) ioprio */ + int ioprio; + #ifdef CONFIG_BFQ_GROUP_IOSCHED +@@ -768,6 +790,13 @@ struct bfq_data { + */ + unsigned int word_depths[2][2]; + unsigned int full_depth_shift; ++ ++ /* ++ * Number of independent actuators. This is equal to 1 in ++ * case of single-actuator drives. ++ */ ++ unsigned int num_actuators; ++ + }; + + enum bfqq_state_flags { +@@ -964,8 +993,10 @@ struct bfq_group { + + extern const int bfq_timeout; + +-struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); +-void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); ++struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync, ++ unsigned int actuator_idx); ++void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync, ++ unsigned int actuator_idx); + struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); + void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); + void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, diff --git a/queue-6.1/media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch b/queue-6.1/media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch new file mode 100644 index 0000000000..12f44477ae --- /dev/null +++ b/queue-6.1/media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch @@ -0,0 +1,48 @@ +From 9be85491619f1953b8a29590ca630be571941ffa Mon Sep 17 00:00:00 2001 +From: Yunfei Dong +Date: Thu, 13 Jun 2024 17:33:55 +0800 +Subject: media: mediatek: vcodec: Fix H264 multi stateless decoder smatch warning + +From: Yunfei Dong + +commit 9be85491619f1953b8a29590ca630be571941ffa upstream. + +Fix a smatch static checker warning on vdec_h264_req_multi_if.c. +Which leads to a kernel crash when fb is NULL. + +Fixes: 397edc703a10 ("media: mediatek: vcodec: add h264 decoder driver for mt8186") +Signed-off-by: Yunfei Dong +Reviewed-by: AngeloGioacchino Del Regno +Signed-off-by: Sebastian Fricke +Signed-off-by: Hans Verkuil +[ drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c + is renamed from drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c + since 0934d3759615 ("media: mediatek: vcodec: separate decoder and encoder"). + The path is changed accordingly to apply the patch on 6.1.y. ] +Signed-off-by: Wenshan Lan +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c ++++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c +@@ -729,11 +729,16 @@ static int vdec_h264_slice_single_decode + return vpu_dec_reset(vpu); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); ++ if (!fb) { ++ mtk_vcodec_err(inst, "fb buffer is NULL"); ++ return -ENOMEM; ++ } ++ + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + +- y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; +- c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; ++ y_fb_dma = fb->base_y.dma_addr; ++ c_fb_dma = fb->base_c.dma_addr; + mtk_vcodec_debug(inst, "[h264-dec] [%d] y_dma=%llx c_dma=%llx", + inst->ctx->decoded_frame_cnt, y_fb_dma, c_fb_dma); + diff --git a/queue-6.1/series b/queue-6.1/series index 575a051d85..a588a8e5d2 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -87,3 +87,7 @@ smb-client-add-check-for-next_buffer-in-receive_encrypted_standard.patch edac-qcom-correct-interrupt-enable-register-configuration.patch ftrace-correct-preemption-accounting-for-function-tracing.patch ftrace-do-not-add-duplicate-entries-in-subops-manager-ops.patch +x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch +block-bfq-split-sync-bfq_queues-on-a-per-actuator-basis.patch +block-bfq-fix-bfqq-uaf-in-bfq_limit_depth.patch +media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch diff --git a/queue-6.1/x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch b/queue-6.1/x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch new file mode 100644 index 0000000000..10da975199 --- /dev/null +++ b/queue-6.1/x86-cpu-kvm-srso-fix-possible-missing-ibpb-on-vm-exit.patch @@ -0,0 +1,148 @@ +From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001 +From: Patrick Bellasi +Date: Wed, 5 Feb 2025 14:04:41 +0000 +Subject: x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit + +From: Patrick Bellasi + +commit 318e8c339c9a0891c389298bb328ed0762a9935e upstream. + +In [1] the meaning of the synthetic IBPB flags has been redefined for a +better separation of concerns: + - ENTRY_IBPB -- issue IBPB on entry only + - IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only +and the Retbleed mitigations have been updated to match this new +semantics. + +Commit [2] was merged shortly before [1], and their interaction was not +handled properly. This resulted in IBPB not being triggered on VM-Exit +in all SRSO mitigation configs requesting an IBPB there. + +Specifically, an IBPB on VM-Exit is triggered only when +X86_FEATURE_IBPB_ON_VMEXIT is set. However: + + - X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb", + because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence, + an IBPB is triggered on entry but the expected IBPB on VM-exit is + not. + + - X86_FEATURE_IBPB_ON_VMEXIT is not set also when + "spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is + already set. + + That's because before [1] this was effectively redundant. Hence, e.g. + a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly + reports the machine still vulnerable to SRSO, despite an IBPB being + triggered both on entry and VM-Exit, because of the Retbleed selected + mitigation config. + + - UNTRAIN_RET_VM won't still actually do anything unless + CONFIG_MITIGATION_IBPB_ENTRY is set. + +For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit +and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by +X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation +option similar to the one for 'retbleed=ibpb', thus re-order the code +for the RETBLEED_MITIGATION_IBPB option to be less confusing by having +all features enabling before the disabling of the not needed ones. + +For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting +with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is +effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard, +since none of the SRSO compile cruft is required in this configuration. +Also, check only that the required microcode is present to effectively +enabled the IBPB on VM-Exit. + +Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY +to list also all SRSO config settings enabled by this guard. + +Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1] +Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2] +Reported-by: Yosry Ahmed +Signed-off-by: Patrick Bellasi +Reviewed-by: Borislav Petkov (AMD) +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Kconfig | 3 ++- + arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++------ + 2 files changed, 16 insertions(+), 7 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -2506,7 +2506,8 @@ config CPU_IBPB_ENTRY + depends on CPU_SUP_AMD && X86_64 + default y + help +- Compile the kernel with support for the retbleed=ibpb mitigation. ++ Compile the kernel with support for the retbleed=ibpb and ++ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations. + + config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1092,6 +1092,8 @@ do_cmd_auto: + + case RETBLEED_MITIGATION_IBPB: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); ++ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); ++ mitigate_smt = true; + + /* + * IBPB on entry already obviates the need for +@@ -1101,8 +1103,6 @@ do_cmd_auto: + setup_clear_cpu_cap(X86_FEATURE_UNRET); + setup_clear_cpu_cap(X86_FEATURE_RETHUNK); + +- mitigate_smt = true; +- + /* + * There is no need for RSB filling: entry_ibpb() ensures + * all predictions, including the RSB, are invalidated, +@@ -2607,6 +2607,7 @@ static void __init srso_select_mitigatio + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (has_microcode) { + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); ++ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + srso_mitigation = SRSO_MITIGATION_IBPB; + + /* +@@ -2616,6 +2617,13 @@ static void __init srso_select_mitigatio + */ + setup_clear_cpu_cap(X86_FEATURE_UNRET); + setup_clear_cpu_cap(X86_FEATURE_RETHUNK); ++ ++ /* ++ * There is no need for RSB filling: entry_ibpb() ensures ++ * all predictions, including the RSB, are invalidated, ++ * regardless of IBPB implementation. ++ */ ++ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); + } + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); +@@ -2624,8 +2632,8 @@ static void __init srso_select_mitigatio + break; + + case SRSO_CMD_IBPB_ON_VMEXIT: +- if (IS_ENABLED(CONFIG_CPU_SRSO)) { +- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { ++ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ if (has_microcode) { + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + +@@ -2637,9 +2645,9 @@ static void __init srso_select_mitigatio + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); + } + } else { +- pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); ++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto pred_cmd; +- } ++ } + break; + + default: