From: Sasha Levin Date: Sun, 10 Mar 2024 02:31:47 +0000 (-0500) Subject: Fixes for 6.1 X-Git-Tag: v6.8.1~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=80fae9c31bdb2f889902960d8d877a0623da9bd3;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/asoc-codecs-wcd938x-fix-headphones-volume-controls.patch b/queue-6.1/asoc-codecs-wcd938x-fix-headphones-volume-controls.patch new file mode 100644 index 00000000000..6bee69b4d90 --- /dev/null +++ b/queue-6.1/asoc-codecs-wcd938x-fix-headphones-volume-controls.patch @@ -0,0 +1,42 @@ +From cb6503e46264caaec048dda33ad58268a5f559bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 10:11:30 +0100 +Subject: ASoC: codecs: wcd938x: fix headphones volume controls + +From: Johan Hovold + +[ Upstream commit 4d0e8bdfa4a57099dc7230952a460903f2e2f8de ] + +The lowest headphones volume setting does not mute so the leave the TLV +mute flag unset. + +This is specifically needed to let the sound server use the lowest gain +setting. + +Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR") +Cc: # 6.5 +Cc: Srinivas Kandagatla +Signed-off-by: Johan Hovold +Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/wcd938x.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c +index e80be4e4fa8b4..555b74e7172d8 100644 +--- a/sound/soc/codecs/wcd938x.c ++++ b/sound/soc/codecs/wcd938x.c +@@ -210,7 +210,7 @@ struct wcd938x_priv { + }; + + static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); +-static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); ++static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); + static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); + + struct wcd938x_mbhc_zdet_param { +-- +2.43.0 + diff --git a/queue-6.1/blk-iocost-disable-writeback-throttling.patch b/queue-6.1/blk-iocost-disable-writeback-throttling.patch new file mode 100644 index 00000000000..72255201cd8 --- /dev/null +++ b/queue-6.1/blk-iocost-disable-writeback-throttling.patch @@ -0,0 +1,46 @@ +From c287453564ed11a8d05e35a279e773fa882d33a3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Oct 2022 17:40:32 +0800 +Subject: blk-iocost: disable writeback throttling + +From: Yu Kuai + +[ Upstream commit 8796acbc9a0eceeddd99eaef833bdda1241d39b9 ] + +Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") disable +wbt for bfq, because different write-throttling heuristics should not +work together. + +For the same reason, wbt and iocost should not work together as well, +unless admin really want to do that, dispite that performance is +affected. + +Signed-off-by: Yu Kuai +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20221012094035.390056-2-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index e6557024e3da8..3788774a7b729 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -3281,9 +3281,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, + blk_stat_enable_accounting(disk->queue); + blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); + ioc->enabled = true; ++ wbt_disable_default(disk->queue); + } else { + blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); + ioc->enabled = false; ++ wbt_enable_default(disk->queue); + } + + if (user) { +-- +2.43.0 + diff --git a/queue-6.1/blk-iocost-pass-gendisk-to-ioc_refresh_params.patch b/queue-6.1/blk-iocost-pass-gendisk-to-ioc_refresh_params.patch new file mode 100644 index 00000000000..788ce875f10 --- /dev/null +++ b/queue-6.1/blk-iocost-pass-gendisk-to-ioc_refresh_params.patch @@ -0,0 +1,138 @@ +From 4d92df6c36fe4a84ee71df9bbf00ad1bf65633f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Feb 2023 03:16:54 -0800 +Subject: blk-iocost: Pass gendisk to ioc_refresh_params + +From: Breno Leitao + +[ Upstream commit e33b93650fc5364f773985a3e961e24349330d97 ] + +Current kernel (d2980d8d826554fa6981d621e569a453787472f8) crashes +when blk_iocost_init for `nvme1` disk. + + BUG: kernel NULL pointer dereference, address: 0000000000000050 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + + blk_iocost_init (include/asm-generic/qspinlock.h:128 + include/linux/spinlock.h:203 + include/linux/spinlock_api_smp.h:158 + include/linux/spinlock.h:400 + block/blk-iocost.c:2884) + ioc_qos_write (block/blk-iocost.c:3198) + ? kretprobe_perf_func (kernel/trace/trace_kprobe.c:1566) + ? kernfs_fop_write_iter (include/linux/slab.h:584 fs/kernfs/file.c:311) + ? __kmem_cache_alloc_node (mm/slab.h:? mm/slub.c:3452 mm/slub.c:3491) + ? _copy_from_iter (arch/x86/include/asm/uaccess_64.h:46 + arch/x86/include/asm/uaccess_64.h:52 + lib/iov_iter.c:183 lib/iov_iter.c:628) + ? kretprobe_dispatcher (kernel/trace/trace_kprobe.c:1693) + cgroup_file_write (kernel/cgroup/cgroup.c:4061) + kernfs_fop_write_iter (fs/kernfs/file.c:334) + vfs_write (include/linux/fs.h:1849 fs/read_write.c:491 + fs/read_write.c:584) + ksys_write (fs/read_write.c:637) + do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) + +This happens because ioc_refresh_params() is being called without +a properly initialized ioc->rqos, which is happening later in the callee +side. + +ioc_refresh_params() -> ioc_autop_idx() tries to access +ioc->rqos.disk->queue but ioc->rqos.disk is NULL, causing the BUG above. + +Create function, called ioc_refresh_params_disk(), that is similar to +ioc_refresh_params() but where the "struct gendisk" could be passed as +an explicit argument. This function will be called when ioc->rqos.disk +is not initialized. + +Fixes: ce57b558604e ("blk-rq-qos: make rq_qos_add and rq_qos_del more useful") + +Signed-off-by: Breno Leitao +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230228111654.1778120-1-leitao@debian.org +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 26 ++++++++++++++++++++------ + 1 file changed, 20 insertions(+), 6 deletions(-) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index ab5830ba23e0f..0d4bc9d8f2cac 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -801,7 +801,11 @@ static void ioc_refresh_period_us(struct ioc *ioc) + ioc_refresh_margins(ioc); + } + +-static int ioc_autop_idx(struct ioc *ioc) ++/* ++ * ioc->rqos.disk isn't initialized when this function is called from ++ * the init path. ++ */ ++static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk) + { + int idx = ioc->autop_idx; + const struct ioc_params *p = &autop[idx]; +@@ -809,11 +813,11 @@ static int ioc_autop_idx(struct ioc *ioc) + u64 now_ns; + + /* rotational? */ +- if (!blk_queue_nonrot(ioc->rqos.disk->queue)) ++ if (!blk_queue_nonrot(disk->queue)) + return AUTOP_HDD; + + /* handle SATA SSDs w/ broken NCQ */ +- if (blk_queue_depth(ioc->rqos.disk->queue) == 1) ++ if (blk_queue_depth(disk->queue) == 1) + return AUTOP_SSD_QD1; + + /* use one of the normal ssd sets */ +@@ -902,14 +906,19 @@ static void ioc_refresh_lcoefs(struct ioc *ioc) + &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]); + } + +-static bool ioc_refresh_params(struct ioc *ioc, bool force) ++/* ++ * struct gendisk is required as an argument because ioc->rqos.disk ++ * is not properly initialized when called from the init path. ++ */ ++static bool ioc_refresh_params_disk(struct ioc *ioc, bool force, ++ struct gendisk *disk) + { + const struct ioc_params *p; + int idx; + + lockdep_assert_held(&ioc->lock); + +- idx = ioc_autop_idx(ioc); ++ idx = ioc_autop_idx(ioc, disk); + p = &autop[idx]; + + if (idx == ioc->autop_idx && !force) +@@ -938,6 +947,11 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force) + return true; + } + ++static bool ioc_refresh_params(struct ioc *ioc, bool force) ++{ ++ return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk); ++} ++ + /* + * When an iocg accumulates too much vtime or gets deactivated, we throw away + * some vtime, which lowers the overall device utilization. As the exact amount +@@ -2884,7 +2898,7 @@ static int blk_iocost_init(struct gendisk *disk) + + spin_lock_irq(&ioc->lock); + ioc->autop_idx = AUTOP_INVALID; +- ioc_refresh_params(ioc, true); ++ ioc_refresh_params_disk(ioc, true, disk); + spin_unlock_irq(&ioc->lock); + + /* +-- +2.43.0 + diff --git a/queue-6.1/blk-rq-qos-constify-rq_qos_ops.patch b/queue-6.1/blk-rq-qos-constify-rq_qos_ops.patch new file mode 100644 index 00000000000..594a636199c --- /dev/null +++ b/queue-6.1/blk-rq-qos-constify-rq_qos_ops.patch @@ -0,0 +1,103 @@ +From aa235b97093a21478dc99fd9638fc62d88af5f17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 16:03:55 +0100 +Subject: blk-rq-qos: constify rq_qos_ops + +From: Christoph Hellwig + +[ Upstream commit 3963d84df7974b6687cb34bce3b9e0b2686f839c ] + +These op vectors are constant, so mark them const. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Andreas Herrmann +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230203150400.3199230-15-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 2 +- + block/blk-iolatency.c | 2 +- + block/blk-rq-qos.c | 2 +- + block/blk-rq-qos.h | 4 ++-- + block/blk-wbt.c | 2 +- + 5 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index a8a7d2ce927b9..78958c5bece08 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -2836,7 +2836,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos) + kfree(ioc); + } + +-static struct rq_qos_ops ioc_rqos_ops = { ++static const struct rq_qos_ops ioc_rqos_ops = { + .throttle = ioc_rqos_throttle, + .merge = ioc_rqos_merge, + .done_bio = ioc_rqos_done_bio, +diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c +index c64cfec34ac37..b0f8550f87cd2 100644 +--- a/block/blk-iolatency.c ++++ b/block/blk-iolatency.c +@@ -651,7 +651,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) + kfree(blkiolat); + } + +-static struct rq_qos_ops blkcg_iolatency_ops = { ++static const struct rq_qos_ops blkcg_iolatency_ops = { + .throttle = blkcg_iolatency_throttle, + .done_bio = blkcg_iolatency_done_bio, + .exit = blkcg_iolatency_exit, +diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c +index 14bee1bd76136..8e83734cfe8db 100644 +--- a/block/blk-rq-qos.c ++++ b/block/blk-rq-qos.c +@@ -296,7 +296,7 @@ void rq_qos_exit(struct request_queue *q) + } + + int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, +- struct rq_qos_ops *ops) ++ const struct rq_qos_ops *ops) + { + struct request_queue *q = disk->queue; + +diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h +index 22552785aa31e..2b7b668479f71 100644 +--- a/block/blk-rq-qos.h ++++ b/block/blk-rq-qos.h +@@ -25,7 +25,7 @@ struct rq_wait { + }; + + struct rq_qos { +- struct rq_qos_ops *ops; ++ const struct rq_qos_ops *ops; + struct request_queue *q; + enum rq_qos_id id; + struct rq_qos *next; +@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) + } + + int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, +- struct rq_qos_ops *ops); ++ const struct rq_qos_ops *ops); + void rq_qos_del(struct rq_qos *rqos); + + typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index aec4e37c89c4a..d9398347b08d8 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -808,7 +808,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { + }; + #endif + +-static struct rq_qos_ops wbt_rqos_ops = { ++static const struct rq_qos_ops wbt_rqos_ops = { + .throttle = wbt_wait, + .issue = wbt_issue, + .track = wbt_track, +-- +2.43.0 + diff --git a/queue-6.1/blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch b/queue-6.1/blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch new file mode 100644 index 00000000000..348877d6433 --- /dev/null +++ b/queue-6.1/blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch @@ -0,0 +1,192 @@ +From 581958da857b8e9faf3303ba6ebc2f7e0b7a15fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 16:03:54 +0100 +Subject: blk-rq-qos: make rq_qos_add and rq_qos_del more useful + +From: Christoph Hellwig + +[ Upstream commit ce57b558604e68277d31ca5ce49ec4579a8618c5 ] + +Switch to passing a gendisk, and make rq_qos_add initialize all required +fields and drop the not required q argument from rq_qos_del. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Andreas Herrmann +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230203150400.3199230-14-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 13 +++---------- + block/blk-iolatency.c | 14 ++++---------- + block/blk-rq-qos.c | 13 ++++++++++--- + block/blk-rq-qos.h | 5 +++-- + block/blk-wbt.c | 5 +---- + 5 files changed, 21 insertions(+), 29 deletions(-) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index 72ca07f24b3c0..a8a7d2ce927b9 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -2847,9 +2847,7 @@ static struct rq_qos_ops ioc_rqos_ops = { + + static int blk_iocost_init(struct gendisk *disk) + { +- struct request_queue *q = disk->queue; + struct ioc *ioc; +- struct rq_qos *rqos; + int i, cpu, ret; + + ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); +@@ -2872,11 +2870,6 @@ static int blk_iocost_init(struct gendisk *disk) + local64_set(&ccs->rq_wait_ns, 0); + } + +- rqos = &ioc->rqos; +- rqos->id = RQ_QOS_COST; +- rqos->ops = &ioc_rqos_ops; +- rqos->q = q; +- + spin_lock_init(&ioc->lock); + timer_setup(&ioc->timer, ioc_timer_fn, 0); + INIT_LIST_HEAD(&ioc->active_iocgs); +@@ -2900,17 +2893,17 @@ static int blk_iocost_init(struct gendisk *disk) + * called before policy activation completion, can't assume that the + * target bio has an iocg associated and need to test for NULL iocg. + */ +- ret = rq_qos_add(q, rqos); ++ ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); + if (ret) + goto err_free_ioc; + +- ret = blkcg_activate_policy(q, &blkcg_policy_iocost); ++ ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost); + if (ret) + goto err_del_qos; + return 0; + + err_del_qos: +- rq_qos_del(q, rqos); ++ rq_qos_del(&ioc->rqos); + err_free_ioc: + free_percpu(ioc->pcpu_stat); + kfree(ioc); +diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c +index 571fa95aafe96..c64cfec34ac37 100644 +--- a/block/blk-iolatency.c ++++ b/block/blk-iolatency.c +@@ -758,24 +758,18 @@ static void blkiolatency_enable_work_fn(struct work_struct *work) + + int blk_iolatency_init(struct gendisk *disk) + { +- struct request_queue *q = disk->queue; + struct blk_iolatency *blkiolat; +- struct rq_qos *rqos; + int ret; + + blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL); + if (!blkiolat) + return -ENOMEM; + +- rqos = &blkiolat->rqos; +- rqos->id = RQ_QOS_LATENCY; +- rqos->ops = &blkcg_iolatency_ops; +- rqos->q = q; +- +- ret = rq_qos_add(q, rqos); ++ ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY, ++ &blkcg_iolatency_ops); + if (ret) + goto err_free; +- ret = blkcg_activate_policy(q, &blkcg_policy_iolatency); ++ ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency); + if (ret) + goto err_qos_del; + +@@ -785,7 +779,7 @@ int blk_iolatency_init(struct gendisk *disk) + return 0; + + err_qos_del: +- rq_qos_del(q, rqos); ++ rq_qos_del(&blkiolat->rqos); + err_free: + kfree(blkiolat); + return ret; +diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c +index aae98dcb01ebe..14bee1bd76136 100644 +--- a/block/blk-rq-qos.c ++++ b/block/blk-rq-qos.c +@@ -295,8 +295,15 @@ void rq_qos_exit(struct request_queue *q) + } + } + +-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) ++int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, ++ struct rq_qos_ops *ops) + { ++ struct request_queue *q = disk->queue; ++ ++ rqos->q = q; ++ rqos->id = id; ++ rqos->ops = ops; ++ + /* + * No IO can be in-flight when adding rqos, so freeze queue, which + * is fine since we only support rq_qos for blk-mq queue. +@@ -326,11 +333,11 @@ int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) + spin_unlock_irq(&q->queue_lock); + blk_mq_unfreeze_queue(q); + return -EBUSY; +- + } + +-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) ++void rq_qos_del(struct rq_qos *rqos) + { ++ struct request_queue *q = rqos->q; + struct rq_qos **cur; + + /* +diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h +index 805eee8b031d0..22552785aa31e 100644 +--- a/block/blk-rq-qos.h ++++ b/block/blk-rq-qos.h +@@ -85,8 +85,9 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) + init_waitqueue_head(&rq_wait->wait); + } + +-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos); +-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos); ++int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, ++ struct rq_qos_ops *ops); ++void rq_qos_del(struct rq_qos *rqos); + + typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); + typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data); +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index 95bec9244e9f3..aec4e37c89c4a 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -842,9 +842,6 @@ int wbt_init(struct gendisk *disk) + for (i = 0; i < WBT_NUM_RWQ; i++) + rq_wait_init(&rwb->rq_wait[i]); + +- rwb->rqos.id = RQ_QOS_WBT; +- rwb->rqos.ops = &wbt_rqos_ops; +- rwb->rqos.q = q; + rwb->last_comp = rwb->last_issue = jiffies; + rwb->win_nsec = RWB_WINDOW_NSEC; + rwb->enable_state = WBT_STATE_ON_DEFAULT; +@@ -857,7 +854,7 @@ int wbt_init(struct gendisk *disk) + /* + * Assign rwb and add the stats callback. + */ +- ret = rq_qos_add(q, &rwb->rqos); ++ ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); + if (ret) + goto err_free; + +-- +2.43.0 + diff --git a/queue-6.1/blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch b/queue-6.1/blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch new file mode 100644 index 00000000000..6669d217f05 --- /dev/null +++ b/queue-6.1/blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch @@ -0,0 +1,166 @@ +From 99215e8e45084576ff46f9ed9e23f06d152f879c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 16:03:53 +0100 +Subject: blk-rq-qos: move rq_qos_add and rq_qos_del out of line + +From: Christoph Hellwig + +[ Upstream commit b494f9c566ba5fe2cc8abe67fdeb0332c6b48d4b ] + +These two functions are rather larger and not in a fast path, so move +them out of line. + +Signed-off-by: Christoph Hellwig +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230203150400.3199230-13-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-rq-qos.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ + block/blk-rq-qos.h | 61 ++-------------------------------------------- + 2 files changed, 62 insertions(+), 59 deletions(-) + +diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c +index 88f0fe7dcf545..aae98dcb01ebe 100644 +--- a/block/blk-rq-qos.c ++++ b/block/blk-rq-qos.c +@@ -294,3 +294,63 @@ void rq_qos_exit(struct request_queue *q) + rqos->ops->exit(rqos); + } + } ++ ++int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) ++{ ++ /* ++ * No IO can be in-flight when adding rqos, so freeze queue, which ++ * is fine since we only support rq_qos for blk-mq queue. ++ * ++ * Reuse ->queue_lock for protecting against other concurrent ++ * rq_qos adding/deleting ++ */ ++ blk_mq_freeze_queue(q); ++ ++ spin_lock_irq(&q->queue_lock); ++ if (rq_qos_id(q, rqos->id)) ++ goto ebusy; ++ rqos->next = q->rq_qos; ++ q->rq_qos = rqos; ++ spin_unlock_irq(&q->queue_lock); ++ ++ blk_mq_unfreeze_queue(q); ++ ++ if (rqos->ops->debugfs_attrs) { ++ mutex_lock(&q->debugfs_mutex); ++ blk_mq_debugfs_register_rqos(rqos); ++ mutex_unlock(&q->debugfs_mutex); ++ } ++ ++ return 0; ++ebusy: ++ spin_unlock_irq(&q->queue_lock); ++ blk_mq_unfreeze_queue(q); ++ return -EBUSY; ++ ++} ++ ++void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) ++{ ++ struct rq_qos **cur; ++ ++ /* ++ * See comment in rq_qos_add() about freezing queue & using ++ * ->queue_lock. ++ */ ++ blk_mq_freeze_queue(q); ++ ++ spin_lock_irq(&q->queue_lock); ++ for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { ++ if (*cur == rqos) { ++ *cur = rqos->next; ++ break; ++ } ++ } ++ spin_unlock_irq(&q->queue_lock); ++ ++ blk_mq_unfreeze_queue(q); ++ ++ mutex_lock(&q->debugfs_mutex); ++ blk_mq_debugfs_unregister_rqos(rqos); ++ mutex_unlock(&q->debugfs_mutex); ++} +diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h +index 1ef1f7d4bc3cb..805eee8b031d0 100644 +--- a/block/blk-rq-qos.h ++++ b/block/blk-rq-qos.h +@@ -85,65 +85,8 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) + init_waitqueue_head(&rq_wait->wait); + } + +-static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) +-{ +- /* +- * No IO can be in-flight when adding rqos, so freeze queue, which +- * is fine since we only support rq_qos for blk-mq queue. +- * +- * Reuse ->queue_lock for protecting against other concurrent +- * rq_qos adding/deleting +- */ +- blk_mq_freeze_queue(q); +- +- spin_lock_irq(&q->queue_lock); +- if (rq_qos_id(q, rqos->id)) +- goto ebusy; +- rqos->next = q->rq_qos; +- q->rq_qos = rqos; +- spin_unlock_irq(&q->queue_lock); +- +- blk_mq_unfreeze_queue(q); +- +- if (rqos->ops->debugfs_attrs) { +- mutex_lock(&q->debugfs_mutex); +- blk_mq_debugfs_register_rqos(rqos); +- mutex_unlock(&q->debugfs_mutex); +- } +- +- return 0; +-ebusy: +- spin_unlock_irq(&q->queue_lock); +- blk_mq_unfreeze_queue(q); +- return -EBUSY; +- +-} +- +-static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) +-{ +- struct rq_qos **cur; +- +- /* +- * See comment in rq_qos_add() about freezing queue & using +- * ->queue_lock. +- */ +- blk_mq_freeze_queue(q); +- +- spin_lock_irq(&q->queue_lock); +- for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { +- if (*cur == rqos) { +- *cur = rqos->next; +- break; +- } +- } +- spin_unlock_irq(&q->queue_lock); +- +- blk_mq_unfreeze_queue(q); +- +- mutex_lock(&q->debugfs_mutex); +- blk_mq_debugfs_unregister_rqos(rqos); +- mutex_unlock(&q->debugfs_mutex); +-} ++int rq_qos_add(struct request_queue *q, struct rq_qos *rqos); ++void rq_qos_del(struct request_queue *q, struct rq_qos *rqos); + + typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); + typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data); +-- +2.43.0 + diff --git a/queue-6.1/blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch b/queue-6.1/blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch new file mode 100644 index 00000000000..77370a2087f --- /dev/null +++ b/queue-6.1/blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch @@ -0,0 +1,283 @@ +From 8d1a0d757f1cabbee1a542c21443aefc9746b42d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 16:03:56 +0100 +Subject: blk-rq-qos: store a gendisk instead of request_queue in struct rq_qos + +From: Christoph Hellwig + +[ Upstream commit ba91c849fa50dbc6519cf7808177b3a9b7f6bc97 ] + +This is what about half of the users already want, and it's only going to +grow more. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Andreas Herrmann +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230203150400.3199230-16-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 12 ++++++------ + block/blk-iolatency.c | 14 +++++++------- + block/blk-mq-debugfs.c | 10 ++++------ + block/blk-rq-qos.c | 4 ++-- + block/blk-rq-qos.h | 2 +- + block/blk-wbt.c | 16 +++++++--------- + 6 files changed, 27 insertions(+), 31 deletions(-) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index 78958c5bece08..ab5830ba23e0f 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -670,7 +670,7 @@ static struct ioc *q_to_ioc(struct request_queue *q) + + static const char __maybe_unused *ioc_name(struct ioc *ioc) + { +- struct gendisk *disk = ioc->rqos.q->disk; ++ struct gendisk *disk = ioc->rqos.disk; + + if (!disk) + return ""; +@@ -809,11 +809,11 @@ static int ioc_autop_idx(struct ioc *ioc) + u64 now_ns; + + /* rotational? */ +- if (!blk_queue_nonrot(ioc->rqos.q)) ++ if (!blk_queue_nonrot(ioc->rqos.disk->queue)) + return AUTOP_HDD; + + /* handle SATA SSDs w/ broken NCQ */ +- if (blk_queue_depth(ioc->rqos.q) == 1) ++ if (blk_queue_depth(ioc->rqos.disk->queue) == 1) + return AUTOP_SSD_QD1; + + /* use one of the normal ssd sets */ +@@ -2653,7 +2653,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) + if (use_debt) { + iocg_incur_debt(iocg, abs_cost, &now); + if (iocg_kick_delay(iocg, &now)) +- blkcg_schedule_throttle(rqos->q->disk, ++ blkcg_schedule_throttle(rqos->disk, + (bio->bi_opf & REQ_SWAP) == REQ_SWAP); + iocg_unlock(iocg, ioc_locked, &flags); + return; +@@ -2754,7 +2754,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, + if (likely(!list_empty(&iocg->active_list))) { + iocg_incur_debt(iocg, abs_cost, &now); + if (iocg_kick_delay(iocg, &now)) +- blkcg_schedule_throttle(rqos->q->disk, ++ blkcg_schedule_throttle(rqos->disk, + (bio->bi_opf & REQ_SWAP) == REQ_SWAP); + } else { + iocg_commit_bio(iocg, bio, abs_cost, cost); +@@ -2825,7 +2825,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos) + { + struct ioc *ioc = rqos_to_ioc(rqos); + +- blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost); ++ blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost); + + spin_lock_irq(&ioc->lock); + ioc->running = IOC_STOP; +diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c +index b0f8550f87cd2..268e6653b5a62 100644 +--- a/block/blk-iolatency.c ++++ b/block/blk-iolatency.c +@@ -292,7 +292,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos, + unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay); + + if (use_delay) +- blkcg_schedule_throttle(rqos->q->disk, use_memdelay); ++ blkcg_schedule_throttle(rqos->disk, use_memdelay); + + /* + * To avoid priority inversions we want to just take a slot if we are +@@ -330,7 +330,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, + struct child_latency_info *lat_info, + bool up) + { +- unsigned long qd = blkiolat->rqos.q->nr_requests; ++ unsigned long qd = blkiolat->rqos.disk->queue->nr_requests; + unsigned long scale = scale_amount(qd, up); + unsigned long old = atomic_read(&lat_info->scale_cookie); + unsigned long max_scale = qd << 1; +@@ -370,7 +370,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, + */ + static void scale_change(struct iolatency_grp *iolat, bool up) + { +- unsigned long qd = iolat->blkiolat->rqos.q->nr_requests; ++ unsigned long qd = iolat->blkiolat->rqos.disk->queue->nr_requests; + unsigned long scale = scale_amount(qd, up); + unsigned long old = iolat->rq_depth.max_depth; + +@@ -647,7 +647,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) + + del_timer_sync(&blkiolat->timer); + flush_work(&blkiolat->enable_work); +- blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency); ++ blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency); + kfree(blkiolat); + } + +@@ -666,7 +666,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) + + rcu_read_lock(); + blkg_for_each_descendant_pre(blkg, pos_css, +- blkiolat->rqos.q->root_blkg) { ++ blkiolat->rqos.disk->queue->root_blkg) { + struct iolatency_grp *iolat; + struct child_latency_info *lat_info; + unsigned long flags; +@@ -750,9 +750,9 @@ static void blkiolatency_enable_work_fn(struct work_struct *work) + */ + enabled = atomic_read(&blkiolat->enable_cnt); + if (enabled != blkiolat->enabled) { +- blk_mq_freeze_queue(blkiolat->rqos.q); ++ blk_mq_freeze_queue(blkiolat->rqos.disk->queue); + blkiolat->enabled = enabled; +- blk_mq_unfreeze_queue(blkiolat->rqos.q); ++ blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue); + } + } + +diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c +index 7675e663df365..c152276736832 100644 +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -813,9 +813,9 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id) + + void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) + { +- lockdep_assert_held(&rqos->q->debugfs_mutex); ++ lockdep_assert_held(&rqos->disk->queue->debugfs_mutex); + +- if (!rqos->q->debugfs_dir) ++ if (!rqos->disk->queue->debugfs_dir) + return; + debugfs_remove_recursive(rqos->debugfs_dir); + rqos->debugfs_dir = NULL; +@@ -823,7 +823,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) + + void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) + { +- struct request_queue *q = rqos->q; ++ struct request_queue *q = rqos->disk->queue; + const char *dir_name = rq_qos_id_to_name(rqos->id); + + lockdep_assert_held(&q->debugfs_mutex); +@@ -835,9 +835,7 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) + q->rqos_debugfs_dir = debugfs_create_dir("rqos", + q->debugfs_dir); + +- rqos->debugfs_dir = debugfs_create_dir(dir_name, +- rqos->q->rqos_debugfs_dir); +- ++ rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir); + debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); + } + +diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c +index 8e83734cfe8db..d8cc820a365e3 100644 +--- a/block/blk-rq-qos.c ++++ b/block/blk-rq-qos.c +@@ -300,7 +300,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, + { + struct request_queue *q = disk->queue; + +- rqos->q = q; ++ rqos->disk = disk; + rqos->id = id; + rqos->ops = ops; + +@@ -337,7 +337,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, + + void rq_qos_del(struct rq_qos *rqos) + { +- struct request_queue *q = rqos->q; ++ struct request_queue *q = rqos->disk->queue; + struct rq_qos **cur; + + /* +diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h +index 2b7b668479f71..b02a1a3d33a89 100644 +--- a/block/blk-rq-qos.h ++++ b/block/blk-rq-qos.h +@@ -26,7 +26,7 @@ struct rq_wait { + + struct rq_qos { + const struct rq_qos_ops *ops; +- struct request_queue *q; ++ struct gendisk *disk; + enum rq_qos_id id; + struct rq_qos *next; + #ifdef CONFIG_BLK_DEBUG_FS +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index d9398347b08d8..e9206b1406e76 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -98,7 +98,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) + */ + static bool wb_recent_wait(struct rq_wb *rwb) + { +- struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb; ++ struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb; + + return time_before(jiffies, wb->dirty_sleep + HZ); + } +@@ -235,7 +235,7 @@ enum { + + static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) + { +- struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi; ++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi; + struct rq_depth *rqd = &rwb->rq_depth; + u64 thislat; + +@@ -288,7 +288,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) + + static void rwb_trace_step(struct rq_wb *rwb, const char *msg) + { +- struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi; ++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi; + struct rq_depth *rqd = &rwb->rq_depth; + + trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, +@@ -358,13 +358,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb) + unsigned int inflight = wbt_inflight(rwb); + int status; + +- if (!rwb->rqos.q->disk) ++ if (!rwb->rqos.disk) + return; + + status = latency_exceeded(rwb, cb->stat); + +- trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step, +- inflight); ++ trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight); + + /* + * If we exceeded the latency target, step down. If we did not, +@@ -689,16 +688,15 @@ static int wbt_data_dir(const struct request *rq) + + static void wbt_queue_depth_changed(struct rq_qos *rqos) + { +- RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q); ++ RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); + wbt_update_limits(RQWB(rqos)); + } + + static void wbt_exit(struct rq_qos *rqos) + { + struct rq_wb *rwb = RQWB(rqos); +- struct request_queue *q = rqos->q; + +- blk_stat_remove_callback(q, rwb->cb); ++ blk_stat_remove_callback(rqos->disk->queue, rwb->cb); + blk_stat_free_callback(rwb->cb); + kfree(rwb); + } +-- +2.43.0 + diff --git a/queue-6.1/blk-wbt-don-t-enable-throttling-if-default-elevator-.patch b/queue-6.1/blk-wbt-don-t-enable-throttling-if-default-elevator-.patch new file mode 100644 index 00000000000..3d91bef37f7 --- /dev/null +++ b/queue-6.1/blk-wbt-don-t-enable-throttling-if-default-elevator-.patch @@ -0,0 +1,114 @@ +From 51938e4e6ade6005901b700cfe6ecdd7481af216 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 20:15:18 +0800 +Subject: blk-wbt: don't enable throttling if default elevator is bfq + +From: Yu Kuai + +[ Upstream commit 671fae5e51297fc76b3758ca2edd514858734a6a ] + +Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") tries to +disable wbt for bfq, it's done by calling wbt_disable_default() in +bfq_init_queue(). However, wbt is still enabled if default elevator is +bfq: + +device_add_disk + elevator_init_mq + bfq_init_queue + wbt_disable_default -> done nothing + + blk_register_queue + wbt_enable_default -> wbt is enabled + +Fix the problem by adding a new flag ELEVATOR_FLAG_DISBALE_WBT, bfq +will set the flag in bfq_init_queue, and following wbt_enable_default() +won't enable wbt while the flag is set. + +Signed-off-by: Yu Kuai +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20221019121518.3865235-7-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/bfq-iosched.c | 2 ++ + block/blk-wbt.c | 11 ++++++++--- + block/elevator.h | 3 ++- + 3 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 52eb79d60a3f3..e4699291aee23 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -7059,6 +7059,7 @@ static void bfq_exit_queue(struct elevator_queue *e) + #endif + + blk_stat_disable_accounting(bfqd->queue); ++ clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags); + wbt_enable_default(bfqd->queue); + + kfree(bfqd); +@@ -7204,6 +7205,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) + /* We dispatch from request queue wide instead of hw queue */ + blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + ++ set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags); + wbt_disable_default(q); + blk_stat_enable_accounting(q); + +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index c5a8c10028a08..afb1782b4255e 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -27,6 +27,7 @@ + + #include "blk-wbt.h" + #include "blk-rq-qos.h" ++#include "elevator.h" + + #define CREATE_TRACE_POINTS + #include +@@ -638,11 +639,15 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) + */ + void wbt_enable_default(struct request_queue *q) + { +- struct rq_qos *rqos = wbt_rq_qos(q); ++ struct rq_qos *rqos; ++ bool disable_flag = q->elevator && ++ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags); + + /* Throttling already enabled? */ ++ rqos = wbt_rq_qos(q); + if (rqos) { +- if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) ++ if (!disable_flag && ++ RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) + RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; + return; + } +@@ -651,7 +656,7 @@ void wbt_enable_default(struct request_queue *q) + if (!blk_queue_registered(q)) + return; + +- if (queue_is_mq(q)) ++ if (queue_is_mq(q) && !disable_flag) + wbt_init(q); + } + EXPORT_SYMBOL_GPL(wbt_enable_default); +diff --git a/block/elevator.h b/block/elevator.h +index ed574bf3e629e..75382471222d1 100644 +--- a/block/elevator.h ++++ b/block/elevator.h +@@ -104,7 +104,8 @@ struct elevator_queue + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); + }; + +-#define ELEVATOR_FLAG_REGISTERED 0 ++#define ELEVATOR_FLAG_REGISTERED 0 ++#define ELEVATOR_FLAG_DISABLE_WBT 1 + + /* + * block elevator interface +-- +2.43.0 + diff --git a/queue-6.1/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch b/queue-6.1/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch new file mode 100644 index 00000000000..c065067107b --- /dev/null +++ b/queue-6.1/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch @@ -0,0 +1,115 @@ +From ecf5ea95f5102ff71cf1675020f9bff184b40208 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 18:58:26 +0100 +Subject: blk-wbt: Fix detection of dirty-throttled tasks + +From: Jan Kara + +[ Upstream commit f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 ] + +The detection of dirty-throttled tasks in blk-wbt has been subtly broken +since its beginning in 2016. Namely if we are doing cgroup writeback and +the throttled task is not in the root cgroup, balance_dirty_pages() will +set dirty_sleep for the non-root bdi_writeback structure. However +blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback +structure. Thus detection of recently throttled tasks is not working in +this case (we noticed this when we switched to cgroup v2 and suddently +writeback was slow). + +Since blk-wbt has no easy way to get to proper bdi_writeback and +furthermore its intention has always been to work on the whole device +rather than on individual cgroups, just move the dirty_sleep timestamp +from bdi_writeback to backing_dev_info. That fixes the checking for +recently throttled task and saves memory for everybody as a bonus. + +CC: stable@vger.kernel.org +Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz +[axboe: fixup indentation errors] +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-wbt.c | 4 ++-- + include/linux/backing-dev-defs.h | 7 +++++-- + mm/backing-dev.c | 2 +- + mm/page-writeback.c | 2 +- + 4 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index e9206b1406e76..fcacdff8af93b 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -98,9 +98,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) + */ + static bool wb_recent_wait(struct rq_wb *rwb) + { +- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb; ++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi; + +- return time_before(jiffies, wb->dirty_sleep + HZ); ++ return time_before(jiffies, bdi->last_bdp_sleep + HZ); + } + + static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, +diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h +index ae12696ec492c..2ad261082bba5 100644 +--- a/include/linux/backing-dev-defs.h ++++ b/include/linux/backing-dev-defs.h +@@ -141,8 +141,6 @@ struct bdi_writeback { + struct delayed_work dwork; /* work item used for writeback */ + struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ + +- unsigned long dirty_sleep; /* last wait */ +- + struct list_head bdi_node; /* anchored at bdi->wb_list */ + + #ifdef CONFIG_CGROUP_WRITEBACK +@@ -179,6 +177,11 @@ struct backing_dev_info { + * any dirty wbs, which is depended upon by bdi_has_dirty(). + */ + atomic_long_t tot_write_bandwidth; ++ /* ++ * Jiffies when last process was dirty throttled on this bdi. Used by ++ * blk-wbt. ++ */ ++ unsigned long last_bdp_sleep; + + struct bdi_writeback wb; /* the root writeback info for this bdi */ + struct list_head wb_list; /* list of all wbs */ +diff --git a/mm/backing-dev.c b/mm/backing-dev.c +index bf5525c2e561a..c070ff9ef9cf3 100644 +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -305,7 +305,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, + INIT_LIST_HEAD(&wb->work_list); + INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); +- wb->dirty_sleep = jiffies; + + err = fprop_local_init_percpu(&wb->completions, gfp); + if (err) +@@ -793,6 +792,7 @@ int bdi_init(struct backing_dev_info *bdi) + INIT_LIST_HEAD(&bdi->bdi_list); + INIT_LIST_HEAD(&bdi->wb_list); + init_waitqueue_head(&bdi->wb_waitq); ++ bdi->last_bdp_sleep = jiffies; + + return cgwb_bdi_init(bdi); + } +diff --git a/mm/page-writeback.c b/mm/page-writeback.c +index d3e9d12860b9f..9046d1f1b408e 100644 +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -1809,7 +1809,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb, + break; + } + __set_current_state(TASK_KILLABLE); +- wb->dirty_sleep = now; ++ bdi->last_bdp_sleep = jiffies; + io_schedule_timeout(pause); + + current->dirty_paused_when = now + pause; +-- +2.43.0 + diff --git a/queue-6.1/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch b/queue-6.1/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch new file mode 100644 index 00000000000..487c08fc13f --- /dev/null +++ b/queue-6.1/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch @@ -0,0 +1,65 @@ +From 5376a7667a1f2430589c3b2f5f0bccafd1dd761b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 May 2023 20:18:54 +0800 +Subject: blk-wbt: fix that wbt can't be disabled by default + +From: Yu Kuai + +[ Upstream commit 8a2b20a997a3779ae9fcae268f2959eb82ec05a1 ] + +commit b11d31ae01e6 ("blk-wbt: remove unnecessary check in +wbt_enable_default()") removes the checking of CONFIG_BLK_WBT_MQ by +mistake, which is used to control enable or disable wbt by default. + +Fix the problem by adding back the checking. This patch also do a litter +cleanup to make related code more readable. + +Fixes: b11d31ae01e6 ("blk-wbt: remove unnecessary check in wbt_enable_default()") +Reported-by: Lukas Bulwahn +Link: https://lore.kernel.org/lkml/CAKXUXMzfKq_J9nKHGyr5P5rvUETY4B-fxoQD4sO+NYjFOfVtZA@mail.gmail.com/t/ +Signed-off-by: Yu Kuai +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20230522121854.2928880-1-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-wbt.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index fcacdff8af93b..526fb12c3e4cf 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -640,14 +640,16 @@ void wbt_enable_default(struct gendisk *disk) + { + struct request_queue *q = disk->queue; + struct rq_qos *rqos; +- bool disable_flag = q->elevator && +- test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags); ++ bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ); ++ ++ if (q->elevator && ++ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags)) ++ enable = false; + + /* Throttling already enabled? */ + rqos = wbt_rq_qos(q); + if (rqos) { +- if (!disable_flag && +- RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) ++ if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) + RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; + return; + } +@@ -656,7 +658,7 @@ void wbt_enable_default(struct gendisk *disk) + if (!blk_queue_registered(q)) + return; + +- if (queue_is_mq(q) && !disable_flag) ++ if (queue_is_mq(q) && enable) + wbt_init(disk); + } + EXPORT_SYMBOL_GPL(wbt_enable_default); +-- +2.43.0 + diff --git a/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch b/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch new file mode 100644 index 00000000000..93821b3354c --- /dev/null +++ b/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch @@ -0,0 +1,139 @@ +From 3cab63f95634875a1501abbda551e69098f6c978 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 16:03:49 +0100 +Subject: blk-wbt: pass a gendisk to wbt_{enable,disable}_default + +From: Christoph Hellwig + +[ Upstream commit 04aad37be1a88de6a1919996a615437ac74de479 ] + +Pass a gendisk to wbt_enable_default and wbt_disable_default to +prepare for phasing out usage of the request_queue in the blk-cgroup +code. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Andreas Herrmann +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230203150400.3199230-9-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/bfq-iosched.c | 4 ++-- + block/blk-iocost.c | 4 ++-- + block/blk-sysfs.c | 2 +- + block/blk-wbt.c | 7 ++++--- + block/blk-wbt.h | 8 ++++---- + 5 files changed, 13 insertions(+), 12 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index e4699291aee23..84b4763b2b223 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -7060,7 +7060,7 @@ static void bfq_exit_queue(struct elevator_queue *e) + + blk_stat_disable_accounting(bfqd->queue); + clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags); +- wbt_enable_default(bfqd->queue); ++ wbt_enable_default(bfqd->queue->disk); + + kfree(bfqd); + } +@@ -7206,7 +7206,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) + blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + + set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags); +- wbt_disable_default(q); ++ wbt_disable_default(q->disk); + blk_stat_enable_accounting(q); + + return 0; +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index 3788774a7b729..72ca07f24b3c0 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -3281,11 +3281,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, + blk_stat_enable_accounting(disk->queue); + blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); + ioc->enabled = true; +- wbt_disable_default(disk->queue); ++ wbt_disable_default(disk); + } else { + blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); + ioc->enabled = false; +- wbt_enable_default(disk->queue); ++ wbt_enable_default(disk); + } + + if (user) { +diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c +index a82bdec923b21..c59c4d3ee7a27 100644 +--- a/block/blk-sysfs.c ++++ b/block/blk-sysfs.c +@@ -837,7 +837,7 @@ int blk_register_queue(struct gendisk *disk) + goto put_dev; + + blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); +- wbt_enable_default(q); ++ wbt_enable_default(disk); + blk_throtl_register(disk); + + /* Now everything is ready and send out KOBJ_ADD uevent */ +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index afb1782b4255e..8d4f075f13e2f 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -637,8 +637,9 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) + /* + * Enable wbt if defaults are configured that way + */ +-void wbt_enable_default(struct request_queue *q) ++void wbt_enable_default(struct gendisk *disk) + { ++ struct request_queue *q = disk->queue; + struct rq_qos *rqos; + bool disable_flag = q->elevator && + test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags); +@@ -705,9 +706,9 @@ static void wbt_exit(struct rq_qos *rqos) + /* + * Disable wbt, if enabled by default. + */ +-void wbt_disable_default(struct request_queue *q) ++void wbt_disable_default(struct gendisk *disk) + { +- struct rq_qos *rqos = wbt_rq_qos(q); ++ struct rq_qos *rqos = wbt_rq_qos(disk->queue); + struct rq_wb *rwb; + if (!rqos) + return; +diff --git a/block/blk-wbt.h b/block/blk-wbt.h +index 7e44eccc676dd..58c226fe33d48 100644 +--- a/block/blk-wbt.h ++++ b/block/blk-wbt.h +@@ -89,8 +89,8 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb) + #ifdef CONFIG_BLK_WBT + + int wbt_init(struct request_queue *); +-void wbt_disable_default(struct request_queue *); +-void wbt_enable_default(struct request_queue *); ++void wbt_disable_default(struct gendisk *disk); ++void wbt_enable_default(struct gendisk *disk); + + u64 wbt_get_min_lat(struct request_queue *q); + void wbt_set_min_lat(struct request_queue *q, u64 val); +@@ -105,10 +105,10 @@ static inline int wbt_init(struct request_queue *q) + { + return -EINVAL; + } +-static inline void wbt_disable_default(struct request_queue *q) ++static inline void wbt_disable_default(struct gendisk *disk) + { + } +-static inline void wbt_enable_default(struct request_queue *q) ++static inline void wbt_enable_default(struct gendisk *disk) + { + } + static inline void wbt_set_write_cache(struct request_queue *q, bool wc) +-- +2.43.0 + diff --git a/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_init.patch b/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_init.patch new file mode 100644 index 00000000000..c2e6a0ab19d --- /dev/null +++ b/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_init.patch @@ -0,0 +1,87 @@ +From 64436d303bf9f3e4b615121498533f1e7b068e19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 16:03:50 +0100 +Subject: blk-wbt: pass a gendisk to wbt_init + +From: Christoph Hellwig + +[ Upstream commit 958f29654747a54f2272eb478e493eb97f492e06 ] + +Pass a gendisk to wbt_init to prepare for phasing out usage of the +request_queue in the blk-cgroup code. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Andreas Herrmann +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230203150400.3199230-10-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-sysfs.c | 2 +- + block/blk-wbt.c | 5 +++-- + block/blk-wbt.h | 4 ++-- + 3 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c +index c59c4d3ee7a27..31f53ef01982d 100644 +--- a/block/blk-sysfs.c ++++ b/block/blk-sysfs.c +@@ -488,7 +488,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, + + rqos = wbt_rq_qos(q); + if (!rqos) { +- ret = wbt_init(q); ++ ret = wbt_init(q->disk); + if (ret) + return ret; + } +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index 8d4f075f13e2f..95bec9244e9f3 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -658,7 +658,7 @@ void wbt_enable_default(struct gendisk *disk) + return; + + if (queue_is_mq(q) && !disable_flag) +- wbt_init(q); ++ wbt_init(disk); + } + EXPORT_SYMBOL_GPL(wbt_enable_default); + +@@ -822,8 +822,9 @@ static struct rq_qos_ops wbt_rqos_ops = { + #endif + }; + +-int wbt_init(struct request_queue *q) ++int wbt_init(struct gendisk *disk) + { ++ struct request_queue *q = disk->queue; + struct rq_wb *rwb; + int i; + int ret; +diff --git a/block/blk-wbt.h b/block/blk-wbt.h +index 58c226fe33d48..8170439b89d6e 100644 +--- a/block/blk-wbt.h ++++ b/block/blk-wbt.h +@@ -88,7 +88,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb) + + #ifdef CONFIG_BLK_WBT + +-int wbt_init(struct request_queue *); ++int wbt_init(struct gendisk *disk); + void wbt_disable_default(struct gendisk *disk); + void wbt_enable_default(struct gendisk *disk); + +@@ -101,7 +101,7 @@ u64 wbt_default_latency_nsec(struct request_queue *); + + #else + +-static inline int wbt_init(struct request_queue *q) ++static inline int wbt_init(struct gendisk *disk) + { + return -EINVAL; + } +-- +2.43.0 + diff --git a/queue-6.1/blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch b/queue-6.1/blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch new file mode 100644 index 00000000000..b65683926d0 --- /dev/null +++ b/queue-6.1/blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch @@ -0,0 +1,37 @@ +From 631dc45a1e1ca5721ec23d80d60381e818e3c409 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 20:15:14 +0800 +Subject: blk-wbt: remove unnecessary check in wbt_enable_default() + +From: Yu Kuai + +[ Upstream commit b11d31ae01e6b0762b28e645ad6718a12faa8d14 ] + +If CONFIG_BLK_WBT_MQ is disabled, wbt_init() won't do anything. + +Signed-off-by: Yu Kuai +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20221019121518.3865235-3-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/blk-wbt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index c293e08b301ff..c5a8c10028a08 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -651,7 +651,7 @@ void wbt_enable_default(struct request_queue *q) + if (!blk_queue_registered(q)) + return; + +- if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) ++ if (queue_is_mq(q)) + wbt_init(q); + } + EXPORT_SYMBOL_GPL(wbt_enable_default); +-- +2.43.0 + diff --git a/queue-6.1/drm-amd-display-fix-mst-null-ptr-for-rv.patch b/queue-6.1/drm-amd-display-fix-mst-null-ptr-for-rv.patch new file mode 100644 index 00000000000..70bcfc7bc53 --- /dev/null +++ b/queue-6.1/drm-amd-display-fix-mst-null-ptr-for-rv.patch @@ -0,0 +1,126 @@ +From 525bbc796fc5729946f6d394ea8f72487384a8a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 13:43:46 -0500 +Subject: drm/amd/display: Fix MST Null Ptr for RV + +From: Fangzhi Zuo + +[ Upstream commit e6a7df96facdcf5b1f71eb3ec26f2f9f6ad61e57 ] + +The change try to fix below error specific to RV platform: + +BUG: kernel NULL pointer dereference, address: 0000000000000008 +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP NOPTI +CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 +Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 +RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] +Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> +RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 +RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 +RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 +R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 +R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 +FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 +Call Trace: + + ? __die+0x23/0x70 + ? page_fault_oops+0x171/0x4e0 + ? plist_add+0xbe/0x100 + ? exc_page_fault+0x7c/0x180 + ? asm_exc_page_fault+0x26/0x30 + ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] + ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] + compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] + ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] + compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] + amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] + drm_atomic_check_only+0x5c5/0xa40 + drm_mode_atomic_ioctl+0x76e/0xbc0 + ? _copy_to_user+0x25/0x30 + ? drm_ioctl+0x296/0x4b0 + ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 + drm_ioctl_kernel+0xcd/0x170 + drm_ioctl+0x26d/0x4b0 + ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 + amdgpu_drm_ioctl+0x4e/0x90 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] + __x64_sys_ioctl+0x94/0xd0 + do_syscall_64+0x60/0x90 + ? do_syscall_64+0x6c/0x90 + entry_SYSCALL_64_after_hwframe+0x72/0xdc +RIP: 0033:0x7f4dad17f76f +Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c> +RSP: 002b:00007ffd9ae859f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 +RAX: ffffffffffffffda RBX: 000055e255a55900 RCX: 00007f4dad17f76f +RDX: 00007ffd9ae85a90 RSI: 00000000c03864bc RDI: 000000000000000b +RBP: 00007ffd9ae85a90 R08: 0000000000000003 R09: 0000000000000003 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c03864bc +R13: 000000000000000b R14: 000055e255a7fc60 R15: 000055e255a01eb0 + +Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device ccm cmac algif_hash algif_skcipher af_alg joydev mousedev bnep > + typec libphy k10temp ipmi_msghandler roles i2c_scmi acpi_cpufreq mac_hid nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_mas> +CR2: 0000000000000008 +---[ end trace 0000000000000000 ]--- +RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] +Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> +RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 +RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 +RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 +R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 +R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 +FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 + +With a second DP monitor connected, drm_atomic_state in dm atomic check +sequence does not include the connector state for the old/existing/first +DP monitor. In such case, dsc determination policy would hit a null ptr +when it tries to iterate the old/existing stream that does not have a +valid connector state attached to it. When that happens, dm atomic check +should call drm_atomic_get_connector_state for a new connector state. +Existing dm has already done that, except for RV due to it does not have +official support of dsc where .num_dsc is not defined in dcn10 resource +cap, that prevent from getting drm_atomic_get_connector_state called. +So, skip dsc determination policy for ASICs that don't have DSC support. + +Cc: stable@vger.kernel.org # 6.1+ +Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314 +Reviewed-by: Wayne Lin +Acked-by: Hamza Mahfooz +Signed-off-by: Fangzhi Zuo +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index bea49befdcacc..a6c6f286a5988 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -10123,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, + } + + #if defined(CONFIG_DRM_AMD_DC_DCN) +- ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); +- if (ret) { +- DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); +- ret = -EINVAL; +- goto fail; ++ if (dc_resource_is_dsc_encoding_supported(dc)) { ++ ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); ++ if (ret) { ++ DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); ++ ret = -EINVAL; ++ goto fail; ++ } + } + + ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); +-- +2.43.0 + diff --git a/queue-6.1/drm-amd-display-fix-uninitialized-variable-usage-in-.patch b/queue-6.1/drm-amd-display-fix-uninitialized-variable-usage-in-.patch new file mode 100644 index 00000000000..e5f8a855a6d --- /dev/null +++ b/queue-6.1/drm-amd-display-fix-uninitialized-variable-usage-in-.patch @@ -0,0 +1,59 @@ +From a06f08e22b6af5f25d8f4a6abddfaf9548d74e5e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jan 2024 08:41:52 +0530 +Subject: drm/amd/display: Fix uninitialized variable usage in core_link_ + 'read_dpcd() & write_dpcd()' functions + +From: Srinivasan Shanmugam + +[ Upstream commit a58371d632ebab9ea63f10893a6b6731196b6f8d ] + +The 'status' variable in 'core_link_read_dpcd()' & +'core_link_write_dpcd()' was uninitialized. + +Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default. + +Fixes the below: +drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'. +drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'. + +Cc: stable@vger.kernel.org +Cc: Jerry Zuo +Cc: Jun Lei +Cc: Wayne Lin +Cc: Aurabindo Pillai +Cc: Rodrigo Siqueira +Cc: Hamza Mahfooz +Signed-off-by: Srinivasan Shanmugam +Reviewed-by: Rodrigo Siqueira +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +index af110bf9470fa..aefca9756dbe8 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +@@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd( + uint32_t extended_size; + /* size of the remaining partitioned address space */ + uint32_t size_left_to_read; +- enum dc_status status; ++ enum dc_status status = DC_ERROR_UNEXPECTED; + /* size of the next partition to be read from */ + uint32_t partition_size; + uint32_t data_index = 0; +@@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd( + { + uint32_t partition_size; + uint32_t data_index = 0; +- enum dc_status status; ++ enum dc_status status = DC_ERROR_UNEXPECTED; + + while (size) { + partition_size = dpcd_get_next_partition_size(address, size); +-- +2.43.0 + diff --git a/queue-6.1/drm-amd-display-wrong-colorimetry-workaround.patch b/queue-6.1/drm-amd-display-wrong-colorimetry-workaround.patch new file mode 100644 index 00000000000..3462268e824 --- /dev/null +++ b/queue-6.1/drm-amd-display-wrong-colorimetry-workaround.patch @@ -0,0 +1,119 @@ +From db5f2cf82b018c2d5cf047cde55d1e9baa8f6b31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 11:46:56 -0400 +Subject: drm/amd/display: Wrong colorimetry workaround + +From: Ma Hanghong + +[ Upstream commit b1a98cf89a695d36c414653634ea7ba91b6e701f ] + +[Why] +For FreeSync HDR, native color space flag in AMD VSIF(BT.709) should be +used when intepreting content and color space flag in VSC or AVI +infoFrame should be ignored. However, it turned out some userspace +application still use color flag in VSC or AVI infoFrame which is +incorrect. + +[How] +Transfer function is used when building the VSC and AVI infoFrame. Set +colorimetry to BT.709 when all the following match: + +1. Pixel format is YCbCr; +2. In FreeSync 2 HDR, color is COLOR_SPACE_2020_YCBCR; +3. Transfer function is TRANSFER_FUNC_GAMMA_22; + +Tested-by: Mark Broadworth +Reviewed-by: Krunoslav Kovac +Acked-by: Rodrigo Siqueira +Signed-off-by: Ma Hanghong +Signed-off-by: Alex Deucher +Stable-dep-of: e6a7df96facd ("drm/amd/display: Fix MST Null Ptr for RV") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- + drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++++++ + drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h | 3 ++- + .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 6 +++++- + 4 files changed, 17 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index da16048bf1004..bea49befdcacc 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -5938,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, + bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; + int mode_refresh; + int preferred_refresh = 0; ++ enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; + #if defined(CONFIG_DRM_AMD_DC_DCN) + struct dsc_dec_dpcd_caps dsc_caps; + #endif +@@ -6071,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, + if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) + stream->use_vsc_sdp_for_colorimetry = true; + } +- mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space); ++ if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) ++ tf = TRANSFER_FUNC_GAMMA_22; ++ mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); + aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; + + } +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +index 66923f51037a3..e2f80cd0ca8cb 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +@@ -3038,6 +3038,12 @@ static void set_avi_info_frame( + hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; + } + ++ if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && ++ stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { ++ hdmi_info.bits.EC0_EC2 = 0; ++ hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; ++ } ++ + /* TODO: un-hardcode aspect ratio */ + aspect = stream->timing.aspect_ratio; + +diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +index 1d8b746b02f24..edf5845f6a1f7 100644 +--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h ++++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +@@ -35,7 +35,8 @@ struct mod_vrr_params; + + void mod_build_vsc_infopacket(const struct dc_stream_state *stream, + struct dc_info_packet *info_packet, +- enum dc_color_space cs); ++ enum dc_color_space cs, ++ enum color_transfer_func tf); + + void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, + struct dc_info_packet *info_packet); +diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +index 27ceba9d6d658..69691058ab898 100644 +--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c ++++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +@@ -132,7 +132,8 @@ enum ColorimetryYCCDP { + + void mod_build_vsc_infopacket(const struct dc_stream_state *stream, + struct dc_info_packet *info_packet, +- enum dc_color_space cs) ++ enum dc_color_space cs, ++ enum color_transfer_func tf) + { + unsigned int vsc_packet_revision = vsc_packet_undefined; + unsigned int i; +@@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, + colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; + else if (cs == COLOR_SPACE_2020_YCBCR) + colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; ++ ++ if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) ++ colorimetryFormat = ColorimetryYCC_DP_ITU709; + break; + + default: +-- +2.43.0 + diff --git a/queue-6.1/drm-amdgpu-reset-ih-overflow_clear-bit.patch b/queue-6.1/drm-amdgpu-reset-ih-overflow_clear-bit.patch new file mode 100644 index 00000000000..16035f2dd67 --- /dev/null +++ b/queue-6.1/drm-amdgpu-reset-ih-overflow_clear-bit.patch @@ -0,0 +1,188 @@ +From 288715497704306fceb8c2cb307a168cfb241320 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 12:52:03 +0100 +Subject: drm/amdgpu: Reset IH OVERFLOW_CLEAR bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Friedrich Vock + +[ Upstream commit 7330256268664ea0a7dd5b07a3fed363093477dd ] + +Allows us to detect subsequent IH ring buffer overflows as well. + +Cc: Joshua Ashton +Cc: Alex Deucher +Cc: Christian König +Cc: stable@vger.kernel.org +Signed-off-by: Friedrich Vock +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/cik_ih.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/cz_ih.c | 5 +++++ + drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++ + drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/si_ih.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 ++++++ + 9 files changed, 52 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c +index df385ffc97683..6578ca1b90afa 100644 +--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c +@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, + tmp = RREG32(mmIH_RB_CNTL); + tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); ++ ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; ++ WREG32(mmIH_RB_CNTL, tmp); + } + return (wptr & ih->ptr_mask); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c +index b8c47e0cf37ad..c19681492efa7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c +@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32(mmIH_RB_CNTL, tmp); + + out: + return (wptr & ih->ptr_mask); +diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +index aecad530b10a6..2c02ae69883d2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32(mmIH_RB_CNTL, tmp); + + out: + return (wptr & ih->ptr_mask); +diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +index 7cd79a3844b24..657e4ca6f9dd2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +@@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); ++ ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: + return (wptr & ih->ptr_mask); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +index eec13cb5bf758..84e8e8b008ef6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); ++ ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: + return (wptr & ih->ptr_mask); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c +index 9a24f17a57502..cada9f300a7f5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c +@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, + tmp = RREG32(IH_RB_CNTL); + tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); ++ ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; ++ WREG32(IH_RB_CNTL, tmp); + } + return (wptr & ih->ptr_mask); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +index b08905d1c00f0..07a5d95be07f5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32(mmIH_RB_CNTL, tmp); ++ + out: + return (wptr & ih->ptr_mask); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +index 1e83db0c5438d..74c94df423455 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); ++ + out: + return (wptr & ih->ptr_mask); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +index 59dfca093155c..f1ba76c35cd6e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +@@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows ++ * can be detected. ++ */ ++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); ++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); ++ + out: + return (wptr & ih->ptr_mask); + } +-- +2.43.0 + diff --git a/queue-6.1/elevator-add-new-field-flags-in-struct-elevator_queu.patch b/queue-6.1/elevator-add-new-field-flags-in-struct-elevator_queu.patch new file mode 100644 index 00000000000..864e5a1a5aa --- /dev/null +++ b/queue-6.1/elevator-add-new-field-flags-in-struct-elevator_queu.patch @@ -0,0 +1,70 @@ +From 85ad0276e21822aca9c6a80d8a03247daa354d1c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 20:15:17 +0800 +Subject: elevator: add new field flags in struct elevator_queue + +From: Yu Kuai + +[ Upstream commit 181d06637451b5348d746039478e71fa53dfbff6 ] + +There are only one flag to indicate that elevator is registered currently, +prepare to add a flag to disable wbt if default elevator is bfq. + +Signed-off-by: Yu Kuai +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20221019121518.3865235-6-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/elevator.c | 6 ++---- + block/elevator.h | 4 +++- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/elevator.c b/block/elevator.c +index 20e70fd3f77f9..9e12706e8d8cb 100644 +--- a/block/elevator.c ++++ b/block/elevator.c +@@ -512,7 +512,7 @@ int elv_register_queue(struct request_queue *q, bool uevent) + if (uevent) + kobject_uevent(&e->kobj, KOBJ_ADD); + +- e->registered = 1; ++ set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags); + } + return error; + } +@@ -523,11 +523,9 @@ void elv_unregister_queue(struct request_queue *q) + + lockdep_assert_held(&q->sysfs_lock); + +- if (e && e->registered) { ++ if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) { + kobject_uevent(&e->kobj, KOBJ_REMOVE); + kobject_del(&e->kobj); +- +- e->registered = 0; + } + } + +diff --git a/block/elevator.h b/block/elevator.h +index 3f0593b3bf9d3..ed574bf3e629e 100644 +--- a/block/elevator.h ++++ b/block/elevator.h +@@ -100,10 +100,12 @@ struct elevator_queue + void *elevator_data; + struct kobject kobj; + struct mutex sysfs_lock; +- unsigned int registered:1; ++ unsigned long flags; + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); + }; + ++#define ELEVATOR_FLAG_REGISTERED 0 ++ + /* + * block elevator interface + */ +-- +2.43.0 + diff --git a/queue-6.1/elevator-remove-redundant-code-in-elv_unregister_que.patch b/queue-6.1/elevator-remove-redundant-code-in-elv_unregister_que.patch new file mode 100644 index 00000000000..ede298aeb06 --- /dev/null +++ b/queue-6.1/elevator-remove-redundant-code-in-elv_unregister_que.patch @@ -0,0 +1,39 @@ +From efa75e7a472dd9a1c9519c1cf50e37d2b5d3ca47 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Oct 2022 20:15:13 +0800 +Subject: elevator: remove redundant code in elv_unregister_queue() + +From: Yu Kuai + +[ Upstream commit 6d9f4cf125585ebf0718abcf5ce9ca898877c6d2 ] + +"elevator_queue *e" is already declared and initialized in the beginning +of elv_unregister_queue(). + +Signed-off-by: Yu Kuai +Reviewed-by: Christoph Hellwig +Reviewed-by: Eric Biggers +Link: https://lore.kernel.org/r/20221019121518.3865235-2-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks") +Signed-off-by: Sasha Levin +--- + block/elevator.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/block/elevator.c b/block/elevator.c +index bd71f0fc4e4b6..20e70fd3f77f9 100644 +--- a/block/elevator.c ++++ b/block/elevator.c +@@ -524,8 +524,6 @@ void elv_unregister_queue(struct request_queue *q) + lockdep_assert_held(&q->sysfs_lock); + + if (e && e->registered) { +- struct elevator_queue *e = q->elevator; +- + kobject_uevent(&e->kobj, KOBJ_REMOVE); + kobject_del(&e->kobj); + +-- +2.43.0 + diff --git a/queue-6.1/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch b/queue-6.1/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch new file mode 100644 index 00000000000..102d36035b6 --- /dev/null +++ b/queue-6.1/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch @@ -0,0 +1,65 @@ +From 253749176dc92e57dc90d7bf99dd82310f3bf2ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 16:34:00 +0100 +Subject: exit: wait_task_zombie: kill the no longer necessary + spin_lock_irq(siglock) + +From: Oleg Nesterov + +[ Upstream commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 ] + +After the recent changes nobody use siglock to read the values protected +by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and +update the comment. + +With this patch only __exit_signal() and thread_group_start_cputime() take +stats_lock under siglock. + +Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com +Signed-off-by: Oleg Nesterov +Signed-off-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/exit.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +diff --git a/kernel/exit.c b/kernel/exit.c +index bccfa4218356e..c95fffc625fcd 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1146,17 +1146,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) + * and nobody can change them. + * + * psig->stats_lock also protects us from our sub-threads +- * which can reap other children at the same time. Until +- * we change k_getrusage()-like users to rely on this lock +- * we have to take ->siglock as well. ++ * which can reap other children at the same time. + * + * We use thread_group_cputime_adjusted() to get times for + * the thread group, which consolidates times for all threads + * in the group including the group leader. + */ + thread_group_cputime_adjusted(p, &tgutime, &tgstime); +- spin_lock_irq(¤t->sighand->siglock); +- write_seqlock(&psig->stats_lock); ++ write_seqlock_irq(&psig->stats_lock); + psig->cutime += tgutime + sig->cutime; + psig->cstime += tgstime + sig->cstime; + psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; +@@ -1179,8 +1176,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) + psig->cmaxrss = maxrss; + task_io_accounting_add(&psig->ioac, &p->ioac); + task_io_accounting_add(&psig->ioac, &sig->ioac); +- write_sequnlock(&psig->stats_lock); +- spin_unlock_irq(¤t->sighand->siglock); ++ write_sequnlock_irq(&psig->stats_lock); + } + + if (wo->wo_rusage) +-- +2.43.0 + diff --git a/queue-6.1/fs-proc-do_task_stat-use-__for_each_thread.patch b/queue-6.1/fs-proc-do_task_stat-use-__for_each_thread.patch new file mode 100644 index 00000000000..9ad1d0e760d --- /dev/null +++ b/queue-6.1/fs-proc-do_task_stat-use-__for_each_thread.patch @@ -0,0 +1,45 @@ +From 30a5f96454071a295334641ab88e22da9c9bcd99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Sep 2023 18:45:01 +0200 +Subject: fs/proc: do_task_stat: use __for_each_thread() + +From: Oleg Nesterov + +[ Upstream commit 7904e53ed5a20fc678c01d5d1b07ec486425bb6a ] + +do/while_each_thread should be avoided when possible. + +Link: https://lkml.kernel.org/r/20230909164501.GA11581@redhat.com +Signed-off-by: Oleg Nesterov +Cc: Eric W. Biederman +Signed-off-by: Andrew Morton +Stable-dep-of: 7601df8031fd ("fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats") +Signed-off-by: Sasha Levin +--- + fs/proc/array.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/fs/proc/array.c b/fs/proc/array.c +index 1b0d78dfd20f9..bcb645627991e 100644 +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -526,12 +526,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + + /* add up live thread stats at the group level */ + if (whole) { +- struct task_struct *t = task; +- do { ++ struct task_struct *t; ++ ++ __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); +- } while_each_thread(task, t); ++ } + + min_flt += sig->min_flt; + maj_flt += sig->maj_flt; +-- +2.43.0 + diff --git a/queue-6.1/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch b/queue-6.1/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch new file mode 100644 index 00000000000..228aa6db29e --- /dev/null +++ b/queue-6.1/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch @@ -0,0 +1,130 @@ +From ccfb929b0f854215d56556ebff5261bc0f01227c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 16:33:57 +0100 +Subject: fs/proc: do_task_stat: use sig->stats_lock to gather the + threads/children stats + +From: Oleg Nesterov + +[ Upstream commit 7601df8031fd67310af891897ef6cc0df4209305 ] + +lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call +do_task_stat() at the same time and the process has NR_THREADS, it will +spin with irqs disabled O(NR_CPUS * NR_THREADS) time. + +Change do_task_stat() to use sig->stats_lock to gather the statistics +outside of ->siglock protected section, in the likely case this code will +run lockless. + +Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com +Signed-off-by: Oleg Nesterov +Signed-off-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/proc/array.c | 58 +++++++++++++++++++++++++++---------------------- + 1 file changed, 32 insertions(+), 26 deletions(-) + +diff --git a/fs/proc/array.c b/fs/proc/array.c +index bcb645627991e..d210b2f8b7ed5 100644 +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + int permitted; + struct mm_struct *mm; + unsigned long long start_time; +- unsigned long cmin_flt = 0, cmaj_flt = 0; +- unsigned long min_flt = 0, maj_flt = 0; +- u64 cutime, cstime, utime, stime; +- u64 cgtime, gtime; ++ unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; ++ u64 cutime, cstime, cgtime, utime, stime, gtime; + unsigned long rsslim = 0; + unsigned long flags; + int exit_code = task->exit_code; ++ struct signal_struct *sig = task->signal; ++ unsigned int seq = 1; + + state = *get_task_state(task); + vsize = eip = esp = 0; +@@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + + sigemptyset(&sigign); + sigemptyset(&sigcatch); +- cutime = cstime = 0; +- cgtime = gtime = 0; + + if (lock_task_sighand(task, &flags)) { +- struct signal_struct *sig = task->signal; +- + if (sig->tty) { + struct pid *pgrp = tty_get_pgrp(sig->tty); + tty_pgrp = pid_nr_ns(pgrp, ns); +@@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + num_threads = get_nr_threads(task); + collect_sigign_sigcatch(task, &sigign, &sigcatch); + +- cmin_flt = sig->cmin_flt; +- cmaj_flt = sig->cmaj_flt; +- cutime = sig->cutime; +- cstime = sig->cstime; +- cgtime = sig->cgtime; + rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); + +- /* add up live thread stats at the group level */ + if (whole) { +- struct task_struct *t; +- +- __for_each_thread(sig, t) { +- min_flt += t->min_flt; +- maj_flt += t->maj_flt; +- gtime += task_gtime(t); +- } +- +- min_flt += sig->min_flt; +- maj_flt += sig->maj_flt; +- gtime += sig->gtime; +- + if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) + exit_code = sig->group_exit_code; + } +@@ -552,6 +530,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + if (permitted && (!whole || num_threads < 2)) + wchan = !task_is_running(task); + ++ do { ++ seq++; /* 2 on the 1st/lockless path, otherwise odd */ ++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); ++ ++ cmin_flt = sig->cmin_flt; ++ cmaj_flt = sig->cmaj_flt; ++ cutime = sig->cutime; ++ cstime = sig->cstime; ++ cgtime = sig->cgtime; ++ ++ if (whole) { ++ struct task_struct *t; ++ ++ min_flt = sig->min_flt; ++ maj_flt = sig->maj_flt; ++ gtime = sig->gtime; ++ ++ rcu_read_lock(); ++ __for_each_thread(sig, t) { ++ min_flt += t->min_flt; ++ maj_flt += t->maj_flt; ++ gtime += task_gtime(t); ++ } ++ rcu_read_unlock(); ++ } ++ } while (need_seqretry(&sig->stats_lock, seq)); ++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags); ++ + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { +-- +2.43.0 + diff --git a/queue-6.1/getrusage-add-the-signal_struct-sig-local-variable.patch b/queue-6.1/getrusage-add-the-signal_struct-sig-local-variable.patch new file mode 100644 index 00000000000..3b77b3fb5f2 --- /dev/null +++ b/queue-6.1/getrusage-add-the-signal_struct-sig-local-variable.patch @@ -0,0 +1,93 @@ +From 81ff25ceeea37b4c83ad30633828b50019a78f16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Sep 2023 19:25:54 +0200 +Subject: getrusage: add the "signal_struct *sig" local variable + +From: Oleg Nesterov + +[ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ] + +No functional changes, cleanup/preparation. + +Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com +Signed-off-by: Oleg Nesterov +Cc: Eric W. Biederman +Signed-off-by: Andrew Morton +Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()") +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 37 +++++++++++++++++++------------------ + 1 file changed, 19 insertions(+), 18 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index c85e1abf7b7c7..177155ba50cd3 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1779,6 +1779,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + unsigned long flags; + u64 tgutime, tgstime, utime, stime; + unsigned long maxrss = 0; ++ struct signal_struct *sig = p->signal; + + memset((char *)r, 0, sizeof (*r)); + utime = stime = 0; +@@ -1786,7 +1787,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + if (who == RUSAGE_THREAD) { + task_cputime_adjusted(current, &utime, &stime); + accumulate_thread_rusage(p, r); +- maxrss = p->signal->maxrss; ++ maxrss = sig->maxrss; + goto out; + } + +@@ -1796,15 +1797,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + switch (who) { + case RUSAGE_BOTH: + case RUSAGE_CHILDREN: +- utime = p->signal->cutime; +- stime = p->signal->cstime; +- r->ru_nvcsw = p->signal->cnvcsw; +- r->ru_nivcsw = p->signal->cnivcsw; +- r->ru_minflt = p->signal->cmin_flt; +- r->ru_majflt = p->signal->cmaj_flt; +- r->ru_inblock = p->signal->cinblock; +- r->ru_oublock = p->signal->coublock; +- maxrss = p->signal->cmaxrss; ++ utime = sig->cutime; ++ stime = sig->cstime; ++ r->ru_nvcsw = sig->cnvcsw; ++ r->ru_nivcsw = sig->cnivcsw; ++ r->ru_minflt = sig->cmin_flt; ++ r->ru_majflt = sig->cmaj_flt; ++ r->ru_inblock = sig->cinblock; ++ r->ru_oublock = sig->coublock; ++ maxrss = sig->cmaxrss; + + if (who == RUSAGE_CHILDREN) + break; +@@ -1814,14 +1815,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; +- r->ru_nvcsw += p->signal->nvcsw; +- r->ru_nivcsw += p->signal->nivcsw; +- r->ru_minflt += p->signal->min_flt; +- r->ru_majflt += p->signal->maj_flt; +- r->ru_inblock += p->signal->inblock; +- r->ru_oublock += p->signal->oublock; +- if (maxrss < p->signal->maxrss) +- maxrss = p->signal->maxrss; ++ r->ru_nvcsw += sig->nvcsw; ++ r->ru_nivcsw += sig->nivcsw; ++ r->ru_minflt += sig->min_flt; ++ r->ru_majflt += sig->maj_flt; ++ r->ru_inblock += sig->inblock; ++ r->ru_oublock += sig->oublock; ++ if (maxrss < sig->maxrss) ++ maxrss = sig->maxrss; + t = p; + do { + accumulate_thread_rusage(t, r); +-- +2.43.0 + diff --git a/queue-6.1/getrusage-move-thread_group_cputime_adjusted-outside.patch b/queue-6.1/getrusage-move-thread_group_cputime_adjusted-outside.patch new file mode 100644 index 00000000000..aecdb3ca8c2 --- /dev/null +++ b/queue-6.1/getrusage-move-thread_group_cputime_adjusted-outside.patch @@ -0,0 +1,111 @@ +From 915ff491a0f50b26e3e1c864d6331479e6056eeb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 16:50:50 +0100 +Subject: getrusage: move thread_group_cputime_adjusted() outside of + lock_task_sighand() + +From: Oleg Nesterov + +[ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ] + +Patch series "getrusage: use sig->stats_lock", v2. + +This patch (of 2): + +thread_group_cputime() does its own locking, we can safely shift +thread_group_cputime_adjusted() which does another for_each_thread loop +outside of ->siglock protected section. + +This is also preparation for the next patch which changes getrusage() to +use stats_lock instead of siglock, thread_group_cputime() takes the same +lock. With the current implementation recursive read_seqbegin_or_lock() +is fine, thread_group_cputime() can't enter the slow mode if the caller +holds stats_lock, yet this looks more safe and better performance-wise. + +Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com +Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com +Signed-off-by: Oleg Nesterov +Reported-by: Dylan Hatch +Tested-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index 177155ba50cd3..2646047fe5513 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1778,17 +1778,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + struct task_struct *t; + unsigned long flags; + u64 tgutime, tgstime, utime, stime; +- unsigned long maxrss = 0; ++ unsigned long maxrss; ++ struct mm_struct *mm; + struct signal_struct *sig = p->signal; + +- memset((char *)r, 0, sizeof (*r)); ++ memset(r, 0, sizeof(*r)); + utime = stime = 0; ++ maxrss = 0; + + if (who == RUSAGE_THREAD) { + task_cputime_adjusted(current, &utime, &stime); + accumulate_thread_rusage(p, r); + maxrss = sig->maxrss; +- goto out; ++ goto out_thread; + } + + if (!lock_task_sighand(p, &flags)) +@@ -1812,9 +1814,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + fallthrough; + + case RUSAGE_SELF: +- thread_group_cputime_adjusted(p, &tgutime, &tgstime); +- utime += tgutime; +- stime += tgstime; + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; +@@ -1834,19 +1833,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + } + unlock_task_sighand(p, &flags); + +-out: +- r->ru_utime = ns_to_kernel_old_timeval(utime); +- r->ru_stime = ns_to_kernel_old_timeval(stime); ++ if (who == RUSAGE_CHILDREN) ++ goto out_children; + +- if (who != RUSAGE_CHILDREN) { +- struct mm_struct *mm = get_task_mm(p); ++ thread_group_cputime_adjusted(p, &tgutime, &tgstime); ++ utime += tgutime; ++ stime += tgstime; + +- if (mm) { +- setmax_mm_hiwater_rss(&maxrss, mm); +- mmput(mm); +- } ++out_thread: ++ mm = get_task_mm(p); ++ if (mm) { ++ setmax_mm_hiwater_rss(&maxrss, mm); ++ mmput(mm); + } ++ ++out_children: + r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ ++ r->ru_utime = ns_to_kernel_old_timeval(utime); ++ r->ru_stime = ns_to_kernel_old_timeval(stime); + } + + SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) +-- +2.43.0 + diff --git a/queue-6.1/getrusage-use-__for_each_thread.patch b/queue-6.1/getrusage-use-__for_each_thread.patch new file mode 100644 index 00000000000..81c6b5ff66e --- /dev/null +++ b/queue-6.1/getrusage-use-__for_each_thread.patch @@ -0,0 +1,43 @@ +From c2606554387cfe68ca114e907e6556c86ef1b3ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Sep 2023 19:26:29 +0200 +Subject: getrusage: use __for_each_thread() + +From: Oleg Nesterov + +[ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ] + +do/while_each_thread should be avoided when possible. + +Plus this change allows to avoid lock_task_sighand(), we can use rcu +and/or sig->stats_lock instead. + +Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com +Signed-off-by: Oleg Nesterov +Cc: Eric W. Biederman +Signed-off-by: Andrew Morton +Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()") +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index 2646047fe5513..04102538cf43f 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1822,10 +1822,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; +- t = p; +- do { ++ __for_each_thread(sig, t) + accumulate_thread_rusage(t, r); +- } while_each_thread(p, t); + break; + + default: +-- +2.43.0 + diff --git a/queue-6.1/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch b/queue-6.1/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch new file mode 100644 index 00000000000..df97529f40d --- /dev/null +++ b/queue-6.1/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch @@ -0,0 +1,92 @@ +From a9c7d357939f612d4a8a5533af560a4b4ad4a57a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 16:50:53 +0100 +Subject: getrusage: use sig->stats_lock rather than lock_task_sighand() + +From: Oleg Nesterov + +[ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ] + +lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call +getrusage() at the same time and the process has NR_THREADS, spin_lock_irq +will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. + +Change getrusage() to use sig->stats_lock, it was specifically designed +for this type of use. This way it runs lockless in the likely case. + +TODO: + - Change do_task_stat() to use sig->stats_lock too, then we can + remove spin_lock_irq(siglock) in wait_task_zombie(). + + - Turn sig->stats_lock into seqcount_rwlock_t, this way the + readers in the slow mode won't exclude each other. See + https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/ + + - stats_lock has to disable irqs because ->siglock can be taken + in irq context, it would be very nice to change __exit_signal() + to avoid the siglock->stats_lock dependency. + +Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com +Signed-off-by: Oleg Nesterov +Reported-by: Dylan Hatch +Tested-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index 04102538cf43f..d06eda1387b69 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1781,7 +1781,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + unsigned long maxrss; + struct mm_struct *mm; + struct signal_struct *sig = p->signal; ++ unsigned int seq = 0; + ++retry: + memset(r, 0, sizeof(*r)); + utime = stime = 0; + maxrss = 0; +@@ -1793,8 +1795,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + goto out_thread; + } + +- if (!lock_task_sighand(p, &flags)) +- return; ++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + switch (who) { + case RUSAGE_BOTH: +@@ -1822,14 +1823,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; ++ ++ rcu_read_lock(); + __for_each_thread(sig, t) + accumulate_thread_rusage(t, r); ++ rcu_read_unlock(); ++ + break; + + default: + BUG(); + } +- unlock_task_sighand(p, &flags); ++ ++ if (need_seqretry(&sig->stats_lock, seq)) { ++ seq = 1; ++ goto retry; ++ } ++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + + if (who == RUSAGE_CHILDREN) + goto out_children; +-- +2.43.0 + diff --git a/queue-6.1/kvm-s390-add-stat-counter-for-shadow-gmap-events.patch b/queue-6.1/kvm-s390-add-stat-counter-for-shadow-gmap-events.patch new file mode 100644 index 00000000000..9547060367f --- /dev/null +++ b/queue-6.1/kvm-s390-add-stat-counter-for-shadow-gmap-events.patch @@ -0,0 +1,168 @@ +From 9ff0df5b0577368409e200af8042ed5138f3cc34 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Oct 2023 11:32:52 +0200 +Subject: KVM: s390: add stat counter for shadow gmap events + +From: Nico Boehr + +[ Upstream commit c3235e2dd6956448a562d6b1112205eeebc8ab43 ] + +The shadow gmap tracks memory of nested guests (guest-3). In certain +scenarios, the shadow gmap needs to be rebuilt, which is a costly operation +since it involves a SIE exit into guest-1 for every entry in the respective +shadow level. + +Add kvm stat counters when new shadow structures are created at various +levels. Also add a counter gmap_shadow_create when a completely fresh +shadow gmap is created as well as a counter gmap_shadow_reuse when an +existing gmap is being reused. + +Note that when several levels are shadowed at once, counters on all +affected levels will be increased. + +Also note that not all page table levels need to be present and a ASCE +can directly point to e.g. a segment table. In this case, a new segment +table will always be equivalent to a new shadow gmap and hence will be +counted as gmap_shadow_create and not as gmap_shadow_segment. + +Signed-off-by: Nico Boehr +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Reviewed-by: Janosch Frank +Signed-off-by: Janosch Frank +Link: https://lore.kernel.org/r/20231009093304.2555344-2-nrb@linux.ibm.com +Message-Id: <20231009093304.2555344-2-nrb@linux.ibm.com> +Stable-dep-of: fe752331d4b3 ("KVM: s390: vsie: fix race during shadow creation") +Signed-off-by: Sasha Levin +--- + arch/s390/include/asm/kvm_host.h | 7 +++++++ + arch/s390/kvm/gaccess.c | 7 +++++++ + arch/s390/kvm/kvm-s390.c | 9 ++++++++- + arch/s390/kvm/vsie.c | 5 ++++- + 4 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h +index b1e98a9ed152b..09abf000359f8 100644 +--- a/arch/s390/include/asm/kvm_host.h ++++ b/arch/s390/include/asm/kvm_host.h +@@ -777,6 +777,13 @@ struct kvm_vm_stat { + u64 inject_service_signal; + u64 inject_virtio; + u64 aen_forward; ++ u64 gmap_shadow_create; ++ u64 gmap_shadow_reuse; ++ u64 gmap_shadow_r1_entry; ++ u64 gmap_shadow_r2_entry; ++ u64 gmap_shadow_r3_entry; ++ u64 gmap_shadow_sg_entry; ++ u64 gmap_shadow_pg_entry; + }; + + struct kvm_arch_memory_slot { +diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c +index 0243b6e38d364..3beceff5f1c09 100644 +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + unsigned long *pgt, int *dat_protection, + int *fake) + { ++ struct kvm *kvm; + struct gmap *parent; + union asce asce; + union vaddress vaddr; +@@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + + *fake = 0; + *dat_protection = 0; ++ kvm = sg->private; + parent = sg->parent; + vaddr.addr = saddr; + asce.val = sg->orig_asce; +@@ -1341,6 +1343,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); + if (rc) + return rc; ++ kvm->stat.gmap_shadow_r1_entry++; + } + fallthrough; + case ASCE_TYPE_REGION2: { +@@ -1369,6 +1372,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); + if (rc) + return rc; ++ kvm->stat.gmap_shadow_r2_entry++; + } + fallthrough; + case ASCE_TYPE_REGION3: { +@@ -1406,6 +1410,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); + if (rc) + return rc; ++ kvm->stat.gmap_shadow_r3_entry++; + } + fallthrough; + case ASCE_TYPE_SEGMENT: { +@@ -1439,6 +1444,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); + if (rc) + return rc; ++ kvm->stat.gmap_shadow_sg_entry++; + } + } + /* Return the parent address of the page table */ +@@ -1509,6 +1515,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, + pte.p |= dat_protection; + if (!rc) + rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); ++ vcpu->kvm->stat.gmap_shadow_pg_entry++; + ipte_unlock(vcpu->kvm); + mmap_read_unlock(sg->mm); + return rc; +diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c +index f604946ab2c85..348d49268a7ec 100644 +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { + STATS_DESC_COUNTER(VM, inject_pfault_done), + STATS_DESC_COUNTER(VM, inject_service_signal), + STATS_DESC_COUNTER(VM, inject_virtio), +- STATS_DESC_COUNTER(VM, aen_forward) ++ STATS_DESC_COUNTER(VM, aen_forward), ++ STATS_DESC_COUNTER(VM, gmap_shadow_reuse), ++ STATS_DESC_COUNTER(VM, gmap_shadow_create), ++ STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), ++ STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), ++ STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), ++ STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), ++ STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), + }; + + const struct kvm_stats_header kvm_vm_stats_header = { +diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c +index 740f8b56e63f9..b2dbf08a961e5 100644 +--- a/arch/s390/kvm/vsie.c ++++ b/arch/s390/kvm/vsie.c +@@ -1206,8 +1206,10 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, + * we're holding has been unshadowed. If the gmap is still valid, + * we can safely reuse it. + */ +- if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) ++ if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) { ++ vcpu->kvm->stat.gmap_shadow_reuse++; + return 0; ++ } + + /* release the old shadow - if any, and mark the prefix as unmapped */ + release_gmap_shadow(vsie_page); +@@ -1215,6 +1217,7 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, + if (IS_ERR(gmap)) + return PTR_ERR(gmap); + gmap->private = vcpu->kvm; ++ vcpu->kvm->stat.gmap_shadow_create++; + WRITE_ONCE(vsie_page->gmap, gmap); + return 0; + } +-- +2.43.0 + diff --git a/queue-6.1/kvm-s390-vsie-fix-race-during-shadow-creation.patch b/queue-6.1/kvm-s390-vsie-fix-race-during-shadow-creation.patch new file mode 100644 index 00000000000..db7f6483fc4 --- /dev/null +++ b/queue-6.1/kvm-s390-vsie-fix-race-during-shadow-creation.patch @@ -0,0 +1,66 @@ +From 5a150283ce6521f9a844d5c5f14c8f1dc7e26bab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Dec 2023 13:53:17 +0100 +Subject: KVM: s390: vsie: fix race during shadow creation + +From: Christian Borntraeger + +[ Upstream commit fe752331d4b361d43cfd0b89534b4b2176057c32 ] + +Right now it is possible to see gmap->private being zero in +kvm_s390_vsie_gmap_notifier resulting in a crash. This is due to the +fact that we add gmap->private == kvm after creation: + +static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, + struct vsie_page *vsie_page) +{ +[...] + gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); + if (IS_ERR(gmap)) + return PTR_ERR(gmap); + gmap->private = vcpu->kvm; + +Let children inherit the private field of the parent. + +Reported-by: Marc Hartmayer +Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") +Cc: +Cc: David Hildenbrand +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Signed-off-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com +Signed-off-by: Sasha Levin +--- + arch/s390/kvm/vsie.c | 1 - + arch/s390/mm/gmap.c | 1 + + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c +index b2dbf08a961e5..d90c818a9ae71 100644 +--- a/arch/s390/kvm/vsie.c ++++ b/arch/s390/kvm/vsie.c +@@ -1216,7 +1216,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, + gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); + if (IS_ERR(gmap)) + return PTR_ERR(gmap); +- gmap->private = vcpu->kvm; + vcpu->kvm->stat.gmap_shadow_create++; + WRITE_ONCE(vsie_page->gmap, gmap); + return 0; +diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c +index 243f673fa6515..662cf23a1b44b 100644 +--- a/arch/s390/mm/gmap.c ++++ b/arch/s390/mm/gmap.c +@@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); ++ new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; +-- +2.43.0 + diff --git a/queue-6.1/nfp-flower-add-goto_chain_index-for-ct-entry.patch b/queue-6.1/nfp-flower-add-goto_chain_index-for-ct-entry.patch new file mode 100644 index 00000000000..720de5b3b00 --- /dev/null +++ b/queue-6.1/nfp-flower-add-goto_chain_index-for-ct-entry.patch @@ -0,0 +1,95 @@ +From 571c5e0b16a801d079f9d65fde6131b1e7141702 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Mar 2023 08:36:08 +0200 +Subject: nfp: flower: add goto_chain_index for ct entry + +From: Wentao Jia + +[ Upstream commit 3e44d19934b92398785b3ffc2353b9eba264140e ] + +The chain_index has different means in pre ct entry and post ct entry. +In pre ct entry, it means chain index, but in post ct entry, it means +goto chain index, it is confused. + +chain_index and goto_chain_index may be present in one flow rule, It +cannot be distinguished by one field chain_index, both chain_index +and goto_chain_index are required in the follow-up patch to support +multiple ct zones + +Another field goto_chain_index is added to record the goto chain index. +If no goto action in post ct entry, goto_chain_index is 0. + +Signed-off-by: Wentao Jia +Acked-by: Simon Horman +Signed-off-by: Louis Peens +Signed-off-by: Jakub Kicinski +Stable-dep-of: cefa98e806fd ("nfp: flower: add hardware offload check for post ct entry") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 8 ++++++-- + drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 2 ++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +index 7af03b45555dd..da7a47416a208 100644 +--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +@@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, + /* Checks that the chain_index of the filter matches the + * chain_index of the GOTO action. + */ +- if (post_ct_entry->chain_index != pre_ct_entry->chain_index) ++ if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) + return -EINVAL; + + err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); +@@ -1776,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + ct_entry->type = CT_TYPE_PRE_CT; +- ct_entry->chain_index = ct_goto->chain_index; ++ ct_entry->chain_index = flow->common.chain_index; ++ ct_entry->goto_chain_index = ct_goto->chain_index; + list_add(&ct_entry->list_node, &zt->pre_ct_list); + zt->pre_ct_count++; + +@@ -1799,6 +1800,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + struct nfp_fl_ct_zone_entry *zt; + bool wildcarded = false; + struct flow_match_ct ct; ++ struct flow_action_entry *ct_goto; + + flow_rule_match_ct(rule, &ct); + if (!ct.mask->ct_zone) { +@@ -1823,6 +1825,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + + ct_entry->type = CT_TYPE_POST_CT; + ct_entry->chain_index = flow->common.chain_index; ++ ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); ++ ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; + list_add(&ct_entry->list_node, &zt->post_ct_list); + zt->post_ct_count++; + +diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +index 762c0b36e269b..9440ab776ecea 100644 +--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h ++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +@@ -112,6 +112,7 @@ enum nfp_nfp_layer_name { + * @cookie: Flow cookie, same as original TC flow, used as key + * @list_node: Used by the list + * @chain_index: Chain index of the original flow ++ * @goto_chain_index: goto chain index of the flow + * @netdev: netdev structure. + * @type: Type of pre-entry from enum ct_entry_type + * @zt: Reference to the zone table this belongs to +@@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry { + unsigned long cookie; + struct list_head list_node; + u32 chain_index; ++ u32 goto_chain_index; + enum ct_entry_type type; + struct net_device *netdev; + struct nfp_fl_ct_zone_entry *zt; +-- +2.43.0 + diff --git a/queue-6.1/nfp-flower-add-hardware-offload-check-for-post-ct-en.patch b/queue-6.1/nfp-flower-add-hardware-offload-check-for-post-ct-en.patch new file mode 100644 index 00000000000..6d00f776483 --- /dev/null +++ b/queue-6.1/nfp-flower-add-hardware-offload-check-for-post-ct-en.patch @@ -0,0 +1,68 @@ +From 435ba0cb7080cb3f0960b93523f5da947205147a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Jan 2024 17:19:08 +0200 +Subject: nfp: flower: add hardware offload check for post ct entry + +From: Hui Zhou + +[ Upstream commit cefa98e806fd4e2a5e2047457a11ae5f17b8f621 ] + +The nfp offload flow pay will not allocate a mask id when the out port +is openvswitch internal port. This is because these flows are used to +configure the pre_tun table and are never actually send to the firmware +as an add-flow message. When a tc rule which action contains ct and +the post ct entry's out port is openvswitch internal port, the merge +offload flow pay with the wrong mask id of 0 will be send to the +firmware. Actually, the nfp can not support hardware offload for this +situation, so return EOPNOTSUPP. + +Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows") +CC: stable@vger.kernel.org # 5.14+ +Signed-off-by: Hui Zhou +Signed-off-by: Louis Peens +Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +index da7a47416a208..497766ecdd91d 100644 +--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +@@ -1797,10 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct nfp_fl_ct_flow_entry *ct_entry; ++ struct flow_action_entry *ct_goto; + struct nfp_fl_ct_zone_entry *zt; ++ struct flow_action_entry *act; + bool wildcarded = false; + struct flow_match_ct ct; +- struct flow_action_entry *ct_goto; ++ int i; ++ ++ flow_action_for_each(i, act, &rule->action) { ++ switch (act->id) { ++ case FLOW_ACTION_REDIRECT: ++ case FLOW_ACTION_REDIRECT_INGRESS: ++ case FLOW_ACTION_MIRRED: ++ case FLOW_ACTION_MIRRED_INGRESS: ++ if (act->dev->rtnl_link_ops && ++ !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { ++ NL_SET_ERR_MSG_MOD(extack, ++ "unsupported offload: out port is openvswitch internal port"); ++ return -EOPNOTSUPP; ++ } ++ break; ++ default: ++ break; ++ } ++ } + + flow_rule_match_ct(rule, &ct); + if (!ct.mask->ct_zone) { +-- +2.43.0 + diff --git a/queue-6.1/readahead-avoid-multiple-marked-readahead-pages.patch b/queue-6.1/readahead-avoid-multiple-marked-readahead-pages.patch new file mode 100644 index 00000000000..acba474893f --- /dev/null +++ b/queue-6.1/readahead-avoid-multiple-marked-readahead-pages.patch @@ -0,0 +1,97 @@ +From 751dd31cb25b1fda2357852e790cffcc04bb0544 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Jan 2024 09:58:39 +0100 +Subject: readahead: avoid multiple marked readahead pages + +From: Jan Kara + +[ Upstream commit ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 ] + +ra_alloc_folio() marks a page that should trigger next round of async +readahead. However it rounds up computed index to the order of page being +allocated. This can however lead to multiple consecutive pages being +marked with readahead flag. Consider situation with index == 1, mark == +1, order == 0. We insert order 0 page at index 1 and mark it. Then we +bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page +at index 2 is marked as well. Then we bump order to 2, index is +incremented to 4, mark gets rounded to 4 so page at index 4 is marked as +well. The fact that multiple pages get marked within a single readahead +window confuses the readahead logic and results in readahead window being +trimmed back to 1. This situation is triggered in particular when maximum +readahead window size is not a power of two (in the observed case it was +768 KB) and as a result sequential read throughput suffers. + +Fix the problem by rounding 'mark' down instead of up. Because the index +is naturally aligned to 'order', we are guaranteed 'rounded mark' == index +iff 'mark' is within the page we are allocating at 'index' and thus +exactly one page is marked with readahead flag as required by the +readahead code and sequential read performance is restored. + +This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix +readahead with large folios"). The commit changed the rounding with the +rationale: + +"... we were setting the readahead flag on the folio which contains the +last byte read from the block. This is wrong because we will trigger +readahead at the end of the read without waiting to see if a subsequent +read is going to use the pages we just read." + +Although this is true, the fact is this was always the case with read +sizes not aligned to folio boundaries and large folios in the page cache +just make the situation more obvious (and frequent). Also for sequential +read workloads it is better to trigger the readahead earlier rather than +later. It is true that the difference in the rounding and thus earlier +triggering of the readahead can result in reading more for semi-random +workloads. However workloads really suffering from this seem to be rare. +In particular I have verified that the workload described in commit +b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading +random 100k blocks from a file like: + +[reader] +bs=100k +rw=randread +numjobs=1 +size=64g +runtime=60s + +is not impacted by the rounding change and achieves ~70MB/s in both cases. + +[jack@suse.cz: fix one more place where mark rounding was done as well] + Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz +Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz +Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") +Signed-off-by: Jan Kara +Cc: Matthew Wilcox +Cc: Guo Xuenan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/readahead.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/mm/readahead.c b/mm/readahead.c +index ba43428043a35..e4b772bb70e68 100644 +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -483,7 +483,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, + + if (!folio) + return -ENOMEM; +- mark = round_up(mark, 1UL << order); ++ mark = round_down(mark, 1UL << order); + if (index == mark) + folio_set_readahead(folio); + err = filemap_add_folio(ractl->mapping, folio, index, gfp); +@@ -591,7 +591,7 @@ static void ondemand_readahead(struct readahead_control *ractl, + * It's the expected callback index, assume sequential access. + * Ramp up sizes, and push forward the readahead window. + */ +- expected = round_up(ra->start + ra->size - ra->async_size, ++ expected = round_down(ra->start + ra->size - ra->async_size, + 1UL << order); + if (index == expected || index == (ra->start + ra->size)) { + ra->start += ra->size; +-- +2.43.0 + diff --git a/queue-6.1/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch b/queue-6.1/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch new file mode 100644 index 00000000000..f6f376fbac1 --- /dev/null +++ b/queue-6.1/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch @@ -0,0 +1,61 @@ +From edc5869301a9444152614d751e3bb60d6c2a75db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 Jan 2024 06:14:29 -0700 +Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems + +From: Nico Pache + +[ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ] + +On systems with 64k page size and 512M huge page sizes, the allocation and +test succeeds but errors out at the munmap. As the comment states, munmap +will failure if its not HUGEPAGE aligned. This is due to the length of +the mapping being 1/2 the size of the hugepage causing the munmap to not +be hugepage aligned. Fix this by making the mapping length the full +hugepage if the hugepage is larger than the length of the mapping. + +Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com +Signed-off-by: Nico Pache +Cc: Donet Tom +Cc: Shuah Khan +Cc: Christophe Leroy +Cc: Michael Ellerman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c +index 312889edb84ab..c65c55b7a789f 100644 +--- a/tools/testing/selftests/vm/map_hugetlb.c ++++ b/tools/testing/selftests/vm/map_hugetlb.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include "vm_util.h" + + #define LENGTH (256UL*1024*1024) + #define PROTECTION (PROT_READ | PROT_WRITE) +@@ -70,10 +71,16 @@ int main(int argc, char **argv) + { + void *addr; + int ret; ++ size_t hugepage_size; + size_t length = LENGTH; + int flags = FLAGS; + int shift = 0; + ++ hugepage_size = default_huge_page_size(); ++ /* munmap with fail if the length is not page aligned */ ++ if (hugepage_size > length) ++ length = hugepage_size; ++ + if (argc > 1) + length = atol(argv[1]) << 20; + if (argc > 2) { +-- +2.43.0 + diff --git a/queue-6.1/selftests-mm-switch-to-bash-from-sh.patch b/queue-6.1/selftests-mm-switch-to-bash-from-sh.patch new file mode 100644 index 00000000000..0b6dc0a459d --- /dev/null +++ b/queue-6.1/selftests-mm-switch-to-bash-from-sh.patch @@ -0,0 +1,58 @@ +From a5559df581c2a4189c25ad561a0b87f7bccd22ce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 16 Jan 2024 14:04:54 +0500 +Subject: selftests/mm: switch to bash from sh + +From: Muhammad Usama Anjum + +[ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ] + +Running charge_reserved_hugetlb.sh generates errors if sh is set to +dash: + +./charge_reserved_hugetlb.sh: 9: [[: not found +./charge_reserved_hugetlb.sh: 19: [[: not found +./charge_reserved_hugetlb.sh: 27: [[: not found +./charge_reserved_hugetlb.sh: 37: [[: not found +./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected + +Switch to using /bin/bash instead of /bin/sh. Make the switch for +write_hugetlb_memory.sh as well which is called from +charge_reserved_hugetlb.sh. + +Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com +Signed-off-by: Muhammad Usama Anjum +Cc: Muhammad Usama Anjum +Cc: Shuah Khan +Cc: David Laight +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +- + tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +index 0899019a7fcb4..e14bdd4455f2d 100644 +--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh ++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + + # Kselftest framework requirement - SKIP code is 4. +diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh +index 70a02301f4c27..3d2d2eb9d6fff 100644 +--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh ++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + + set -e +-- +2.43.0 + diff --git a/queue-6.1/selftests-mptcp-decrease-bw-in-simult-flows.patch b/queue-6.1/selftests-mptcp-decrease-bw-in-simult-flows.patch new file mode 100644 index 00000000000..7e62dd591d6 --- /dev/null +++ b/queue-6.1/selftests-mptcp-decrease-bw-in-simult-flows.patch @@ -0,0 +1,53 @@ +From 29eb96441917da2cf892c37a0d2fb87ceabfed17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Jan 2024 22:49:51 +0100 +Subject: selftests: mptcp: decrease BW in simult flows + +From: Matthieu Baerts (NGI0) + +[ Upstream commit 5e2f3c65af47e527ccac54060cf909e3306652ff ] + +When running the simult_flow selftest in slow environments -- e.g. QEmu +without KVM support --, the results can be unstable. This selftest +checks if the aggregated bandwidth is (almost) fully used as expected. + +To help improving the stability while still keeping the same validation +in place, the BW and the delay are reduced to lower the pressure on the +CPU. + +Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") +Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") +Cc: stable@vger.kernel.org +Suggested-by: Paolo Abeni +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh +index 6b0014f538a22..6bda70af03a83 100755 +--- a/tools/testing/selftests/net/mptcp/simult_flows.sh ++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh +@@ -303,12 +303,12 @@ done + + setup + run_test 10 10 0 0 "balanced bwidth" +-run_test 10 10 1 50 "balanced bwidth with unbalanced delay" ++run_test 10 10 1 25 "balanced bwidth with unbalanced delay" + + # we still need some additional infrastructure to pass the following test-cases +-run_test 30 10 0 0 "unbalanced bwidth" +-run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +-run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" ++run_test 10 3 0 0 "unbalanced bwidth" ++run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" ++run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" + + mptcp_lib_result_print_all_tap + exit $ret +-- +2.43.0 + diff --git a/queue-6.1/selftests-mptcp-simult-flows-fix-some-subtest-names.patch b/queue-6.1/selftests-mptcp-simult-flows-fix-some-subtest-names.patch new file mode 100644 index 00000000000..58f1540fc0a --- /dev/null +++ b/queue-6.1/selftests-mptcp-simult-flows-fix-some-subtest-names.patch @@ -0,0 +1,42 @@ +From 94a73f9dec7fef0ebc322fac05236c755fb6aa29 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Feb 2024 19:25:37 +0100 +Subject: selftests: mptcp: simult flows: fix some subtest names + +From: Matthieu Baerts (NGI0) + +[ Upstream commit 4d8e0dde0403b5a86aa83e243f020711a9c3e31f ] + +The selftest was correctly recording all the results, but the 'reverse +direction' part was missing in the name when needed. + +It is important to have a unique (sub)test name in TAP, because some CI +environments drop tests with duplicated name. + +Fixes: 675d99338e7a ("selftests: mptcp: simult flows: format subtests results in TAP") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh +index 6bda70af03a83..41d2f4991b35c 100755 +--- a/tools/testing/selftests/net/mptcp/simult_flows.sh ++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh +@@ -269,7 +269,8 @@ run_test() + [ $bail -eq 0 ] || exit $ret + fi + +- printf "%-60s" "$msg - reverse direction" ++ msg+=" - reverse direction" ++ printf "%-60s" "${msg}" + do_transfer $large $small $time + lret=$? + mptcp_lib_result_code "${lret}" "${msg}" +-- +2.43.0 + diff --git a/queue-6.1/selftests-mptcp-simult-flows-format-subtests-results.patch b/queue-6.1/selftests-mptcp-simult-flows-format-subtests-results.patch new file mode 100644 index 00000000000..19529613cd3 --- /dev/null +++ b/queue-6.1/selftests-mptcp-simult-flows-format-subtests-results.patch @@ -0,0 +1,68 @@ +From 4eeef0aaffa567f812390612c30f800de02edd73 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 15:21:31 +0200 +Subject: selftests: mptcp: simult flows: format subtests results in TAP + +From: Matthieu Baerts + +[ Upstream commit 675d99338e7a6cd925d61d7dbf8c26612f7f08a9 ] + +The current selftests infrastructure formats the results in TAP 13. This +version doesn't support subtests and only the end result of each +selftest is taken into account. It means that a single issue in a +subtest of a selftest containing multiple subtests forces the whole +selftest to be marked as failed. It also means that subtests results are +not tracked by CIs executing selftests. + +MPTCP selftests run hundreds of various subtests. It is then important +to track each of them and not one result per selftest. + +It is particularly interesting to do that when validating stable kernels +with the last version of the test suite: tests might fail because a +feature is not supported but the test didn't skip that part. In this +case, if subtests are not tracked, the whole selftest will be marked as +failed making the other subtests useless because their results are +ignored. + +This patch formats subtests results in TAP in simult_flows.sh selftest. + +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 +Acked-by: Paolo Abeni +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Stable-dep-of: 5e2f3c65af47 ("selftests: mptcp: decrease BW in simult flows") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh +index 4a417f9d51d67..6b0014f538a22 100755 +--- a/tools/testing/selftests/net/mptcp/simult_flows.sh ++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh +@@ -263,6 +263,7 @@ run_test() + printf "%-60s" "$msg" + do_transfer $small $large $time + lret=$? ++ mptcp_lib_result_code "${lret}" "${msg}" + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret +@@ -271,6 +272,7 @@ run_test() + printf "%-60s" "$msg - reverse direction" + do_transfer $large $small $time + lret=$? ++ mptcp_lib_result_code "${lret}" "${msg}" + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret +@@ -307,4 +309,6 @@ run_test 10 10 1 50 "balanced bwidth with unbalanced delay" + run_test 30 10 0 0 "unbalanced bwidth" + run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" + run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" ++ ++mptcp_lib_result_print_all_tap + exit $ret +-- +2.43.0 + diff --git a/queue-6.1/series b/queue-6.1/series index ce57a431ba3..6aa0f5bb096 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -28,3 +28,41 @@ netrom-fix-a-data-race-around-sysctl_netrom_transpor.patch-15916 netrom-fix-a-data-race-around-sysctl_netrom_routing_.patch netrom-fix-a-data-race-around-sysctl_netrom_link_fai.patch netrom-fix-data-races-around-sysctl_net_busy_read.patch +kvm-s390-add-stat-counter-for-shadow-gmap-events.patch +kvm-s390-vsie-fix-race-during-shadow-creation.patch +asoc-codecs-wcd938x-fix-headphones-volume-controls.patch +drm-amd-display-fix-uninitialized-variable-usage-in-.patch +nfp-flower-add-goto_chain_index-for-ct-entry.patch +nfp-flower-add-hardware-offload-check-for-post-ct-en.patch +readahead-avoid-multiple-marked-readahead-pages.patch +selftests-mm-switch-to-bash-from-sh.patch +selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch +xhci-process-isoc-td-properly-when-there-was-a-trans.patch +xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch +drm-amdgpu-reset-ih-overflow_clear-bit.patch +selftests-mptcp-simult-flows-format-subtests-results.patch +selftests-mptcp-decrease-bw-in-simult-flows.patch +blk-iocost-disable-writeback-throttling.patch +elevator-remove-redundant-code-in-elv_unregister_que.patch +blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch +elevator-add-new-field-flags-in-struct-elevator_queu.patch +blk-wbt-don-t-enable-throttling-if-default-elevator-.patch +blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch +blk-wbt-pass-a-gendisk-to-wbt_init.patch +blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch +blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch +blk-rq-qos-constify-rq_qos_ops.patch +blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch +blk-wbt-fix-detection-of-dirty-throttled-tasks.patch +drm-amd-display-wrong-colorimetry-workaround.patch +drm-amd-display-fix-mst-null-ptr-for-rv.patch +getrusage-add-the-signal_struct-sig-local-variable.patch +getrusage-move-thread_group_cputime_adjusted-outside.patch +getrusage-use-__for_each_thread.patch +getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch +fs-proc-do_task_stat-use-__for_each_thread.patch +fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch +exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch +selftests-mptcp-simult-flows-fix-some-subtest-names.patch +blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch +blk-iocost-pass-gendisk-to-ioc_refresh_params.patch diff --git a/queue-6.1/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch b/queue-6.1/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch new file mode 100644 index 00000000000..3094e1d3753 --- /dev/null +++ b/queue-6.1/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch @@ -0,0 +1,57 @@ +From 9b5b2f37f69d5ac40e70bb8ba57cc444a5731800 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Jan 2024 17:27:37 +0200 +Subject: xhci: handle isoc Babble and Buffer Overrun events properly + +From: Michal Pecio + +[ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ] + +xHCI 4.9 explicitly forbids assuming that the xHC has released its +ownership of a multi-TRB TD when it reports an error on one of the +early TRBs. Yet the driver makes such assumption and releases the TD, +allowing the remaining TRBs to be freed or overwritten by new TDs. + +The xHC should also report completion of the final TRB due to its IOC +flag being set by us, regardless of prior errors. This event cannot +be recognized if the TD has already been freed earlier, resulting in +"Transfer event TRB DMA ptr not part of current TD" error message. + +Fix this by reusing the logic for processing isoc Transaction Errors. +This also handles hosts which fail to report the final completion. + +Fix transfer length reporting on Babble errors. They may be caused by +device malfunction, no guarantee that the buffer has been filled. + +Signed-off-by: Michal Pecio +Cc: stable@vger.kernel.org +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-ring.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index e4441a71368e5..239b5edee3268 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2381,9 +2381,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + case COMP_BANDWIDTH_OVERRUN_ERROR: + frame->status = -ECOMM; + break; +- case COMP_ISOCH_BUFFER_OVERRUN: + case COMP_BABBLE_DETECTED_ERROR: ++ sum_trbs_for_length = true; ++ fallthrough; ++ case COMP_ISOCH_BUFFER_OVERRUN: + frame->status = -EOVERFLOW; ++ if (ep_trb != td->last_trb) ++ td->error_mid_td = true; + break; + case COMP_INCOMPATIBLE_DEVICE_ERROR: + case COMP_STALL_ERROR: +-- +2.43.0 + diff --git a/queue-6.1/xhci-process-isoc-td-properly-when-there-was-a-trans.patch b/queue-6.1/xhci-process-isoc-td-properly-when-there-was-a-trans.patch new file mode 100644 index 00000000000..473cfd55ddc --- /dev/null +++ b/queue-6.1/xhci-process-isoc-td-properly-when-there-was-a-trans.patch @@ -0,0 +1,186 @@ +From 2ca20b347c88d00cc5e407823e29358788366800 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Jan 2024 17:27:36 +0200 +Subject: xhci: process isoc TD properly when there was a transaction error mid + TD. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mathias Nyman + +[ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ] + +The last TRB of a isoc TD might not trigger an event if there was +an error event for a TRB mid TD. This is seen on a NEC Corporation +uPD720200 USB 3.0 Host + +After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 +generate events for passed TRBs with IOC flag set if it proceeds to the +next TD. This event is either a copy of the original error, or a +"success" transfer event. + +If that event is missing then the driver and xHC host get out of sync as +the driver is still expecting a transfer event for that first TD, while +xHC host is already sending events for the next TD in the list. +This leads to +"Transfer event TRB DMA ptr not part of current TD" messages. + +As a solution we tag the isoc TDs that get error events mid TD. +If an event doesn't match the first TD, then check if the tag is +set, and event points to the next TD. +In that case give back the fist TD and process the next TD normally + +Make sure TD status and transferred length stay valid in both cases +with and without final TD completion event. + +Reported-by: Michał Pecio +Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ +Tested-by: Michał Pecio +Cc: stable@vger.kernel.org +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- + drivers/usb/host/xhci.h | 1 + + 2 files changed, 61 insertions(+), 14 deletions(-) + +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index 1239e06dfe411..e4441a71368e5 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2363,6 +2363,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + /* handle completion code */ + switch (trb_comp_code) { + case COMP_SUCCESS: ++ /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ ++ if (td->error_mid_td) ++ break; + if (remaining) { + frame->status = short_framestatus; + if (xhci->quirks & XHCI_TRUST_TX_LENGTH) +@@ -2388,8 +2391,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + break; + case COMP_USB_TRANSACTION_ERROR: + frame->status = -EPROTO; ++ sum_trbs_for_length = true; + if (ep_trb != td->last_trb) +- return 0; ++ td->error_mid_td = true; + break; + case COMP_STOPPED: + sum_trbs_for_length = true; +@@ -2409,6 +2413,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + break; + } + ++ if (td->urb_length_set) ++ goto finish_td; ++ + if (sum_trbs_for_length) + frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + + ep_trb_len - remaining; +@@ -2417,6 +2424,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + + td->urb->actual_length += frame->actual_length; + ++finish_td: ++ /* Don't give back TD yet if we encountered an error mid TD */ ++ if (td->error_mid_td && ep_trb != td->last_trb) { ++ xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); ++ td->urb_length_set = true; ++ return 0; ++ } ++ + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); + } + +@@ -2801,17 +2816,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, + } + + if (!ep_seg) { +- if (!ep->skip || +- !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { +- /* Some host controllers give a spurious +- * successful event after a short transfer. +- * Ignore it. +- */ +- if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && +- ep_ring->last_td_was_short) { +- ep_ring->last_td_was_short = false; +- goto cleanup; ++ ++ if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { ++ skip_isoc_td(xhci, td, ep, status); ++ goto cleanup; ++ } ++ ++ /* ++ * Some hosts give a spurious success event after a short ++ * transfer. Ignore it. ++ */ ++ if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && ++ ep_ring->last_td_was_short) { ++ ep_ring->last_td_was_short = false; ++ goto cleanup; ++ } ++ ++ /* ++ * xhci 4.10.2 states isoc endpoints should continue ++ * processing the next TD if there was an error mid TD. ++ * So host like NEC don't generate an event for the last ++ * isoc TRB even if the IOC flag is set. ++ * xhci 4.9.1 states that if there are errors in mult-TRB ++ * TDs xHC should generate an error for that TRB, and if xHC ++ * proceeds to the next TD it should genete an event for ++ * any TRB with IOC flag on the way. Other host follow this. ++ * So this event might be for the next TD. ++ */ ++ if (td->error_mid_td && ++ !list_is_last(&td->td_list, &ep_ring->td_list)) { ++ struct xhci_td *td_next = list_next_entry(td, td_list); ++ ++ ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, ++ td_next->last_trb, ep_trb_dma, false); ++ if (ep_seg) { ++ /* give back previous TD, start handling new */ ++ xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); ++ ep_ring->dequeue = td->last_trb; ++ ep_ring->deq_seg = td->last_trb_seg; ++ inc_deq(xhci, ep_ring); ++ xhci_td_cleanup(xhci, td, ep_ring, td->status); ++ td = td_next; + } ++ } ++ ++ if (!ep_seg) { + /* HC is busted, give up! */ + xhci_err(xhci, + "ERROR Transfer event TRB DMA ptr not " +@@ -2823,9 +2872,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, + ep_trb_dma, true); + return -ESHUTDOWN; + } +- +- skip_isoc_td(xhci, td, ep, status); +- goto cleanup; + } + if (trb_comp_code == COMP_SHORT_PACKET) + ep_ring->last_td_was_short = true; +diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h +index 1354310cb37b1..fc25a5b09710c 100644 +--- a/drivers/usb/host/xhci.h ++++ b/drivers/usb/host/xhci.h +@@ -1570,6 +1570,7 @@ struct xhci_td { + struct xhci_segment *bounce_seg; + /* actual_length of the URB has already been set */ + bool urb_length_set; ++ bool error_mid_td; + unsigned int num_trbs; + }; + +-- +2.43.0 +