--- /dev/null
+From cb6503e46264caaec048dda33ad58268a5f559bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 10:11:30 +0100
+Subject: ASoC: codecs: wcd938x: fix headphones volume controls
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 4d0e8bdfa4a57099dc7230952a460903f2e2f8de ]
+
+The lowest headphones volume setting does not mute so the leave the TLV
+mute flag unset.
+
+This is specifically needed to let the sound server use the lowest gain
+setting.
+
+Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR")
+Cc: <stable@vger.kernel.org> # 6.5
+Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wcd938x.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index e80be4e4fa8b4..555b74e7172d8 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -210,7 +210,7 @@ struct wcd938x_priv {
+ };
+
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
+-static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
++static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
+
+ struct wcd938x_mbhc_zdet_param {
+--
+2.43.0
+
--- /dev/null
+From c287453564ed11a8d05e35a279e773fa882d33a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Oct 2022 17:40:32 +0800
+Subject: blk-iocost: disable writeback throttling
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 8796acbc9a0eceeddd99eaef833bdda1241d39b9 ]
+
+Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") disable
+wbt for bfq, because different write-throttling heuristics should not
+work together.
+
+For the same reason, wbt and iocost should not work together as well,
+unless admin really want to do that, dispite that performance is
+affected.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20221012094035.390056-2-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index e6557024e3da8..3788774a7b729 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -3281,9 +3281,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
+ blk_stat_enable_accounting(disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+ ioc->enabled = true;
++ wbt_disable_default(disk->queue);
+ } else {
+ blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+ ioc->enabled = false;
++ wbt_enable_default(disk->queue);
+ }
+
+ if (user) {
+--
+2.43.0
+
--- /dev/null
+From 4d92df6c36fe4a84ee71df9bbf00ad1bf65633f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Feb 2023 03:16:54 -0800
+Subject: blk-iocost: Pass gendisk to ioc_refresh_params
+
+From: Breno Leitao <leitao@debian.org>
+
+[ Upstream commit e33b93650fc5364f773985a3e961e24349330d97 ]
+
+Current kernel (d2980d8d826554fa6981d621e569a453787472f8) crashes
+when blk_iocost_init for `nvme1` disk.
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000050
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+
+ blk_iocost_init (include/asm-generic/qspinlock.h:128
+ include/linux/spinlock.h:203
+ include/linux/spinlock_api_smp.h:158
+ include/linux/spinlock.h:400
+ block/blk-iocost.c:2884)
+ ioc_qos_write (block/blk-iocost.c:3198)
+ ? kretprobe_perf_func (kernel/trace/trace_kprobe.c:1566)
+ ? kernfs_fop_write_iter (include/linux/slab.h:584 fs/kernfs/file.c:311)
+ ? __kmem_cache_alloc_node (mm/slab.h:? mm/slub.c:3452 mm/slub.c:3491)
+ ? _copy_from_iter (arch/x86/include/asm/uaccess_64.h:46
+ arch/x86/include/asm/uaccess_64.h:52
+ lib/iov_iter.c:183 lib/iov_iter.c:628)
+ ? kretprobe_dispatcher (kernel/trace/trace_kprobe.c:1693)
+ cgroup_file_write (kernel/cgroup/cgroup.c:4061)
+ kernfs_fop_write_iter (fs/kernfs/file.c:334)
+ vfs_write (include/linux/fs.h:1849 fs/read_write.c:491
+ fs/read_write.c:584)
+ ksys_write (fs/read_write.c:637)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+This happens because ioc_refresh_params() is being called without
+a properly initialized ioc->rqos, which is happening later in the callee
+side.
+
+ioc_refresh_params() -> ioc_autop_idx() tries to access
+ioc->rqos.disk->queue but ioc->rqos.disk is NULL, causing the BUG above.
+
+Create function, called ioc_refresh_params_disk(), that is similar to
+ioc_refresh_params() but where the "struct gendisk" could be passed as
+an explicit argument. This function will be called when ioc->rqos.disk
+is not initialized.
+
+Fixes: ce57b558604e ("blk-rq-qos: make rq_qos_add and rq_qos_del more useful")
+
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230228111654.1778120-1-leitao@debian.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index ab5830ba23e0f..0d4bc9d8f2cac 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -801,7 +801,11 @@ static void ioc_refresh_period_us(struct ioc *ioc)
+ ioc_refresh_margins(ioc);
+ }
+
+-static int ioc_autop_idx(struct ioc *ioc)
++/*
++ * ioc->rqos.disk isn't initialized when this function is called from
++ * the init path.
++ */
++static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
+ {
+ int idx = ioc->autop_idx;
+ const struct ioc_params *p = &autop[idx];
+@@ -809,11 +813,11 @@ static int ioc_autop_idx(struct ioc *ioc)
+ u64 now_ns;
+
+ /* rotational? */
+- if (!blk_queue_nonrot(ioc->rqos.disk->queue))
++ if (!blk_queue_nonrot(disk->queue))
+ return AUTOP_HDD;
+
+ /* handle SATA SSDs w/ broken NCQ */
+- if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
++ if (blk_queue_depth(disk->queue) == 1)
+ return AUTOP_SSD_QD1;
+
+ /* use one of the normal ssd sets */
+@@ -902,14 +906,19 @@ static void ioc_refresh_lcoefs(struct ioc *ioc)
+ &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]);
+ }
+
+-static bool ioc_refresh_params(struct ioc *ioc, bool force)
++/*
++ * struct gendisk is required as an argument because ioc->rqos.disk
++ * is not properly initialized when called from the init path.
++ */
++static bool ioc_refresh_params_disk(struct ioc *ioc, bool force,
++ struct gendisk *disk)
+ {
+ const struct ioc_params *p;
+ int idx;
+
+ lockdep_assert_held(&ioc->lock);
+
+- idx = ioc_autop_idx(ioc);
++ idx = ioc_autop_idx(ioc, disk);
+ p = &autop[idx];
+
+ if (idx == ioc->autop_idx && !force)
+@@ -938,6 +947,11 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
+ return true;
+ }
+
++static bool ioc_refresh_params(struct ioc *ioc, bool force)
++{
++ return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk);
++}
++
+ /*
+ * When an iocg accumulates too much vtime or gets deactivated, we throw away
+ * some vtime, which lowers the overall device utilization. As the exact amount
+@@ -2884,7 +2898,7 @@ static int blk_iocost_init(struct gendisk *disk)
+
+ spin_lock_irq(&ioc->lock);
+ ioc->autop_idx = AUTOP_INVALID;
+- ioc_refresh_params(ioc, true);
++ ioc_refresh_params_disk(ioc, true, disk);
+ spin_unlock_irq(&ioc->lock);
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From aa235b97093a21478dc99fd9638fc62d88af5f17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:55 +0100
+Subject: blk-rq-qos: constify rq_qos_ops
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 3963d84df7974b6687cb34bce3b9e0b2686f839c ]
+
+These op vectors are constant, so mark them const.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-15-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 2 +-
+ block/blk-iolatency.c | 2 +-
+ block/blk-rq-qos.c | 2 +-
+ block/blk-rq-qos.h | 4 ++--
+ block/blk-wbt.c | 2 +-
+ 5 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index a8a7d2ce927b9..78958c5bece08 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2836,7 +2836,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
+ kfree(ioc);
+ }
+
+-static struct rq_qos_ops ioc_rqos_ops = {
++static const struct rq_qos_ops ioc_rqos_ops = {
+ .throttle = ioc_rqos_throttle,
+ .merge = ioc_rqos_merge,
+ .done_bio = ioc_rqos_done_bio,
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index c64cfec34ac37..b0f8550f87cd2 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -651,7 +651,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
+ kfree(blkiolat);
+ }
+
+-static struct rq_qos_ops blkcg_iolatency_ops = {
++static const struct rq_qos_ops blkcg_iolatency_ops = {
+ .throttle = blkcg_iolatency_throttle,
+ .done_bio = blkcg_iolatency_done_bio,
+ .exit = blkcg_iolatency_exit,
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index 14bee1bd76136..8e83734cfe8db 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -296,7 +296,7 @@ void rq_qos_exit(struct request_queue *q)
+ }
+
+ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+- struct rq_qos_ops *ops)
++ const struct rq_qos_ops *ops)
+ {
+ struct request_queue *q = disk->queue;
+
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 22552785aa31e..2b7b668479f71 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -25,7 +25,7 @@ struct rq_wait {
+ };
+
+ struct rq_qos {
+- struct rq_qos_ops *ops;
++ const struct rq_qos_ops *ops;
+ struct request_queue *q;
+ enum rq_qos_id id;
+ struct rq_qos *next;
+@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+ }
+
+ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+- struct rq_qos_ops *ops);
++ const struct rq_qos_ops *ops);
+ void rq_qos_del(struct rq_qos *rqos);
+
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index aec4e37c89c4a..d9398347b08d8 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -808,7 +808,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
+ };
+ #endif
+
+-static struct rq_qos_ops wbt_rqos_ops = {
++static const struct rq_qos_ops wbt_rqos_ops = {
+ .throttle = wbt_wait,
+ .issue = wbt_issue,
+ .track = wbt_track,
+--
+2.43.0
+
--- /dev/null
+From 581958da857b8e9faf3303ba6ebc2f7e0b7a15fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:54 +0100
+Subject: blk-rq-qos: make rq_qos_add and rq_qos_del more useful
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit ce57b558604e68277d31ca5ce49ec4579a8618c5 ]
+
+Switch to passing a gendisk, and make rq_qos_add initialize all required
+fields and drop the not required q argument from rq_qos_del.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-14-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 13 +++----------
+ block/blk-iolatency.c | 14 ++++----------
+ block/blk-rq-qos.c | 13 ++++++++++---
+ block/blk-rq-qos.h | 5 +++--
+ block/blk-wbt.c | 5 +----
+ 5 files changed, 21 insertions(+), 29 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 72ca07f24b3c0..a8a7d2ce927b9 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2847,9 +2847,7 @@ static struct rq_qos_ops ioc_rqos_ops = {
+
+ static int blk_iocost_init(struct gendisk *disk)
+ {
+- struct request_queue *q = disk->queue;
+ struct ioc *ioc;
+- struct rq_qos *rqos;
+ int i, cpu, ret;
+
+ ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
+@@ -2872,11 +2870,6 @@ static int blk_iocost_init(struct gendisk *disk)
+ local64_set(&ccs->rq_wait_ns, 0);
+ }
+
+- rqos = &ioc->rqos;
+- rqos->id = RQ_QOS_COST;
+- rqos->ops = &ioc_rqos_ops;
+- rqos->q = q;
+-
+ spin_lock_init(&ioc->lock);
+ timer_setup(&ioc->timer, ioc_timer_fn, 0);
+ INIT_LIST_HEAD(&ioc->active_iocgs);
+@@ -2900,17 +2893,17 @@ static int blk_iocost_init(struct gendisk *disk)
+ * called before policy activation completion, can't assume that the
+ * target bio has an iocg associated and need to test for NULL iocg.
+ */
+- ret = rq_qos_add(q, rqos);
++ ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops);
+ if (ret)
+ goto err_free_ioc;
+
+- ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
++ ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost);
+ if (ret)
+ goto err_del_qos;
+ return 0;
+
+ err_del_qos:
+- rq_qos_del(q, rqos);
++ rq_qos_del(&ioc->rqos);
+ err_free_ioc:
+ free_percpu(ioc->pcpu_stat);
+ kfree(ioc);
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index 571fa95aafe96..c64cfec34ac37 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -758,24 +758,18 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
+
+ int blk_iolatency_init(struct gendisk *disk)
+ {
+- struct request_queue *q = disk->queue;
+ struct blk_iolatency *blkiolat;
+- struct rq_qos *rqos;
+ int ret;
+
+ blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
+ if (!blkiolat)
+ return -ENOMEM;
+
+- rqos = &blkiolat->rqos;
+- rqos->id = RQ_QOS_LATENCY;
+- rqos->ops = &blkcg_iolatency_ops;
+- rqos->q = q;
+-
+- ret = rq_qos_add(q, rqos);
++ ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY,
++ &blkcg_iolatency_ops);
+ if (ret)
+ goto err_free;
+- ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
++ ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency);
+ if (ret)
+ goto err_qos_del;
+
+@@ -785,7 +779,7 @@ int blk_iolatency_init(struct gendisk *disk)
+ return 0;
+
+ err_qos_del:
+- rq_qos_del(q, rqos);
++ rq_qos_del(&blkiolat->rqos);
+ err_free:
+ kfree(blkiolat);
+ return ret;
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index aae98dcb01ebe..14bee1bd76136 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -295,8 +295,15 @@ void rq_qos_exit(struct request_queue *q)
+ }
+ }
+
+-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
++ struct rq_qos_ops *ops)
+ {
++ struct request_queue *q = disk->queue;
++
++ rqos->q = q;
++ rqos->id = id;
++ rqos->ops = ops;
++
+ /*
+ * No IO can be in-flight when adding rqos, so freeze queue, which
+ * is fine since we only support rq_qos for blk-mq queue.
+@@ -326,11 +333,11 @@ int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+ spin_unlock_irq(&q->queue_lock);
+ blk_mq_unfreeze_queue(q);
+ return -EBUSY;
+-
+ }
+
+-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
++void rq_qos_del(struct rq_qos *rqos)
+ {
++ struct request_queue *q = rqos->q;
+ struct rq_qos **cur;
+
+ /*
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 805eee8b031d0..22552785aa31e 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -85,8 +85,9 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+ init_waitqueue_head(&rq_wait->wait);
+ }
+
+-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos);
+-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos);
++int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
++ struct rq_qos_ops *ops);
++void rq_qos_del(struct rq_qos *rqos);
+
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+ typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 95bec9244e9f3..aec4e37c89c4a 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -842,9 +842,6 @@ int wbt_init(struct gendisk *disk)
+ for (i = 0; i < WBT_NUM_RWQ; i++)
+ rq_wait_init(&rwb->rq_wait[i]);
+
+- rwb->rqos.id = RQ_QOS_WBT;
+- rwb->rqos.ops = &wbt_rqos_ops;
+- rwb->rqos.q = q;
+ rwb->last_comp = rwb->last_issue = jiffies;
+ rwb->win_nsec = RWB_WINDOW_NSEC;
+ rwb->enable_state = WBT_STATE_ON_DEFAULT;
+@@ -857,7 +854,7 @@ int wbt_init(struct gendisk *disk)
+ /*
+ * Assign rwb and add the stats callback.
+ */
+- ret = rq_qos_add(q, &rwb->rqos);
++ ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
+ if (ret)
+ goto err_free;
+
+--
+2.43.0
+
--- /dev/null
+From 99215e8e45084576ff46f9ed9e23f06d152f879c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:53 +0100
+Subject: blk-rq-qos: move rq_qos_add and rq_qos_del out of line
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit b494f9c566ba5fe2cc8abe67fdeb0332c6b48d4b ]
+
+These two functions are rather larger and not in a fast path, so move
+them out of line.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-13-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-rq-qos.c | 60 +++++++++++++++++++++++++++++++++++++++++++++
+ block/blk-rq-qos.h | 61 ++--------------------------------------------
+ 2 files changed, 62 insertions(+), 59 deletions(-)
+
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index 88f0fe7dcf545..aae98dcb01ebe 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -294,3 +294,63 @@ void rq_qos_exit(struct request_queue *q)
+ rqos->ops->exit(rqos);
+ }
+ }
++
++int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++{
++ /*
++ * No IO can be in-flight when adding rqos, so freeze queue, which
++ * is fine since we only support rq_qos for blk-mq queue.
++ *
++ * Reuse ->queue_lock for protecting against other concurrent
++ * rq_qos adding/deleting
++ */
++ blk_mq_freeze_queue(q);
++
++ spin_lock_irq(&q->queue_lock);
++ if (rq_qos_id(q, rqos->id))
++ goto ebusy;
++ rqos->next = q->rq_qos;
++ q->rq_qos = rqos;
++ spin_unlock_irq(&q->queue_lock);
++
++ blk_mq_unfreeze_queue(q);
++
++ if (rqos->ops->debugfs_attrs) {
++ mutex_lock(&q->debugfs_mutex);
++ blk_mq_debugfs_register_rqos(rqos);
++ mutex_unlock(&q->debugfs_mutex);
++ }
++
++ return 0;
++ebusy:
++ spin_unlock_irq(&q->queue_lock);
++ blk_mq_unfreeze_queue(q);
++ return -EBUSY;
++
++}
++
++void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
++{
++ struct rq_qos **cur;
++
++ /*
++ * See comment in rq_qos_add() about freezing queue & using
++ * ->queue_lock.
++ */
++ blk_mq_freeze_queue(q);
++
++ spin_lock_irq(&q->queue_lock);
++ for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
++ if (*cur == rqos) {
++ *cur = rqos->next;
++ break;
++ }
++ }
++ spin_unlock_irq(&q->queue_lock);
++
++ blk_mq_unfreeze_queue(q);
++
++ mutex_lock(&q->debugfs_mutex);
++ blk_mq_debugfs_unregister_rqos(rqos);
++ mutex_unlock(&q->debugfs_mutex);
++}
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 1ef1f7d4bc3cb..805eee8b031d0 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -85,65 +85,8 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+ init_waitqueue_head(&rq_wait->wait);
+ }
+
+-static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+-{
+- /*
+- * No IO can be in-flight when adding rqos, so freeze queue, which
+- * is fine since we only support rq_qos for blk-mq queue.
+- *
+- * Reuse ->queue_lock for protecting against other concurrent
+- * rq_qos adding/deleting
+- */
+- blk_mq_freeze_queue(q);
+-
+- spin_lock_irq(&q->queue_lock);
+- if (rq_qos_id(q, rqos->id))
+- goto ebusy;
+- rqos->next = q->rq_qos;
+- q->rq_qos = rqos;
+- spin_unlock_irq(&q->queue_lock);
+-
+- blk_mq_unfreeze_queue(q);
+-
+- if (rqos->ops->debugfs_attrs) {
+- mutex_lock(&q->debugfs_mutex);
+- blk_mq_debugfs_register_rqos(rqos);
+- mutex_unlock(&q->debugfs_mutex);
+- }
+-
+- return 0;
+-ebusy:
+- spin_unlock_irq(&q->queue_lock);
+- blk_mq_unfreeze_queue(q);
+- return -EBUSY;
+-
+-}
+-
+-static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+-{
+- struct rq_qos **cur;
+-
+- /*
+- * See comment in rq_qos_add() about freezing queue & using
+- * ->queue_lock.
+- */
+- blk_mq_freeze_queue(q);
+-
+- spin_lock_irq(&q->queue_lock);
+- for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+- if (*cur == rqos) {
+- *cur = rqos->next;
+- break;
+- }
+- }
+- spin_unlock_irq(&q->queue_lock);
+-
+- blk_mq_unfreeze_queue(q);
+-
+- mutex_lock(&q->debugfs_mutex);
+- blk_mq_debugfs_unregister_rqos(rqos);
+- mutex_unlock(&q->debugfs_mutex);
+-}
++int rq_qos_add(struct request_queue *q, struct rq_qos *rqos);
++void rq_qos_del(struct request_queue *q, struct rq_qos *rqos);
+
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+ typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
+--
+2.43.0
+
--- /dev/null
+From 8d1a0d757f1cabbee1a542c21443aefc9746b42d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:56 +0100
+Subject: blk-rq-qos: store a gendisk instead of request_queue in struct rq_qos
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit ba91c849fa50dbc6519cf7808177b3a9b7f6bc97 ]
+
+This is what about half of the users already want, and it's only going to
+grow more.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-16-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 12 ++++++------
+ block/blk-iolatency.c | 14 +++++++-------
+ block/blk-mq-debugfs.c | 10 ++++------
+ block/blk-rq-qos.c | 4 ++--
+ block/blk-rq-qos.h | 2 +-
+ block/blk-wbt.c | 16 +++++++---------
+ 6 files changed, 27 insertions(+), 31 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 78958c5bece08..ab5830ba23e0f 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -670,7 +670,7 @@ static struct ioc *q_to_ioc(struct request_queue *q)
+
+ static const char __maybe_unused *ioc_name(struct ioc *ioc)
+ {
+- struct gendisk *disk = ioc->rqos.q->disk;
++ struct gendisk *disk = ioc->rqos.disk;
+
+ if (!disk)
+ return "<unknown>";
+@@ -809,11 +809,11 @@ static int ioc_autop_idx(struct ioc *ioc)
+ u64 now_ns;
+
+ /* rotational? */
+- if (!blk_queue_nonrot(ioc->rqos.q))
++ if (!blk_queue_nonrot(ioc->rqos.disk->queue))
+ return AUTOP_HDD;
+
+ /* handle SATA SSDs w/ broken NCQ */
+- if (blk_queue_depth(ioc->rqos.q) == 1)
++ if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
+ return AUTOP_SSD_QD1;
+
+ /* use one of the normal ssd sets */
+@@ -2653,7 +2653,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
+ if (use_debt) {
+ iocg_incur_debt(iocg, abs_cost, &now);
+ if (iocg_kick_delay(iocg, &now))
+- blkcg_schedule_throttle(rqos->q->disk,
++ blkcg_schedule_throttle(rqos->disk,
+ (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+ iocg_unlock(iocg, ioc_locked, &flags);
+ return;
+@@ -2754,7 +2754,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
+ if (likely(!list_empty(&iocg->active_list))) {
+ iocg_incur_debt(iocg, abs_cost, &now);
+ if (iocg_kick_delay(iocg, &now))
+- blkcg_schedule_throttle(rqos->q->disk,
++ blkcg_schedule_throttle(rqos->disk,
+ (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+ } else {
+ iocg_commit_bio(iocg, bio, abs_cost, cost);
+@@ -2825,7 +2825,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
+ {
+ struct ioc *ioc = rqos_to_ioc(rqos);
+
+- blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost);
++ blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost);
+
+ spin_lock_irq(&ioc->lock);
+ ioc->running = IOC_STOP;
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index b0f8550f87cd2..268e6653b5a62 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -292,7 +292,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
+ unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
+
+ if (use_delay)
+- blkcg_schedule_throttle(rqos->q->disk, use_memdelay);
++ blkcg_schedule_throttle(rqos->disk, use_memdelay);
+
+ /*
+ * To avoid priority inversions we want to just take a slot if we are
+@@ -330,7 +330,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
+ struct child_latency_info *lat_info,
+ bool up)
+ {
+- unsigned long qd = blkiolat->rqos.q->nr_requests;
++ unsigned long qd = blkiolat->rqos.disk->queue->nr_requests;
+ unsigned long scale = scale_amount(qd, up);
+ unsigned long old = atomic_read(&lat_info->scale_cookie);
+ unsigned long max_scale = qd << 1;
+@@ -370,7 +370,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
+ */
+ static void scale_change(struct iolatency_grp *iolat, bool up)
+ {
+- unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
++ unsigned long qd = iolat->blkiolat->rqos.disk->queue->nr_requests;
+ unsigned long scale = scale_amount(qd, up);
+ unsigned long old = iolat->rq_depth.max_depth;
+
+@@ -647,7 +647,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
+
+ del_timer_sync(&blkiolat->timer);
+ flush_work(&blkiolat->enable_work);
+- blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
++ blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency);
+ kfree(blkiolat);
+ }
+
+@@ -666,7 +666,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
+
+ rcu_read_lock();
+ blkg_for_each_descendant_pre(blkg, pos_css,
+- blkiolat->rqos.q->root_blkg) {
++ blkiolat->rqos.disk->queue->root_blkg) {
+ struct iolatency_grp *iolat;
+ struct child_latency_info *lat_info;
+ unsigned long flags;
+@@ -750,9 +750,9 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
+ */
+ enabled = atomic_read(&blkiolat->enable_cnt);
+ if (enabled != blkiolat->enabled) {
+- blk_mq_freeze_queue(blkiolat->rqos.q);
++ blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
+ blkiolat->enabled = enabled;
+- blk_mq_unfreeze_queue(blkiolat->rqos.q);
++ blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
+ }
+ }
+
+diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
+index 7675e663df365..c152276736832 100644
+--- a/block/blk-mq-debugfs.c
++++ b/block/blk-mq-debugfs.c
+@@ -813,9 +813,9 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
+
+ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+ {
+- lockdep_assert_held(&rqos->q->debugfs_mutex);
++ lockdep_assert_held(&rqos->disk->queue->debugfs_mutex);
+
+- if (!rqos->q->debugfs_dir)
++ if (!rqos->disk->queue->debugfs_dir)
+ return;
+ debugfs_remove_recursive(rqos->debugfs_dir);
+ rqos->debugfs_dir = NULL;
+@@ -823,7 +823,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+
+ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+ {
+- struct request_queue *q = rqos->q;
++ struct request_queue *q = rqos->disk->queue;
+ const char *dir_name = rq_qos_id_to_name(rqos->id);
+
+ lockdep_assert_held(&q->debugfs_mutex);
+@@ -835,9 +835,7 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+ q->rqos_debugfs_dir = debugfs_create_dir("rqos",
+ q->debugfs_dir);
+
+- rqos->debugfs_dir = debugfs_create_dir(dir_name,
+- rqos->q->rqos_debugfs_dir);
+-
++ rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir);
+ debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
+ }
+
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index 8e83734cfe8db..d8cc820a365e3 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -300,7 +300,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+ {
+ struct request_queue *q = disk->queue;
+
+- rqos->q = q;
++ rqos->disk = disk;
+ rqos->id = id;
+ rqos->ops = ops;
+
+@@ -337,7 +337,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+
+ void rq_qos_del(struct rq_qos *rqos)
+ {
+- struct request_queue *q = rqos->q;
++ struct request_queue *q = rqos->disk->queue;
+ struct rq_qos **cur;
+
+ /*
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 2b7b668479f71..b02a1a3d33a89 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -26,7 +26,7 @@ struct rq_wait {
+
+ struct rq_qos {
+ const struct rq_qos_ops *ops;
+- struct request_queue *q;
++ struct gendisk *disk;
+ enum rq_qos_id id;
+ struct rq_qos *next;
+ #ifdef CONFIG_BLK_DEBUG_FS
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index d9398347b08d8..e9206b1406e76 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -98,7 +98,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
+ */
+ static bool wb_recent_wait(struct rq_wb *rwb)
+ {
+- struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb;
++ struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+
+ return time_before(jiffies, wb->dirty_sleep + HZ);
+ }
+@@ -235,7 +235,7 @@ enum {
+
+ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
+ {
+- struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
+ struct rq_depth *rqd = &rwb->rq_depth;
+ u64 thislat;
+
+@@ -288,7 +288,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
+
+ static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
+ {
+- struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
+ struct rq_depth *rqd = &rwb->rq_depth;
+
+ trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
+@@ -358,13 +358,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
+ unsigned int inflight = wbt_inflight(rwb);
+ int status;
+
+- if (!rwb->rqos.q->disk)
++ if (!rwb->rqos.disk)
+ return;
+
+ status = latency_exceeded(rwb, cb->stat);
+
+- trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
+- inflight);
++ trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight);
+
+ /*
+ * If we exceeded the latency target, step down. If we did not,
+@@ -689,16 +688,15 @@ static int wbt_data_dir(const struct request *rq)
+
+ static void wbt_queue_depth_changed(struct rq_qos *rqos)
+ {
+- RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
++ RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
+ wbt_update_limits(RQWB(rqos));
+ }
+
+ static void wbt_exit(struct rq_qos *rqos)
+ {
+ struct rq_wb *rwb = RQWB(rqos);
+- struct request_queue *q = rqos->q;
+
+- blk_stat_remove_callback(q, rwb->cb);
++ blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
+ blk_stat_free_callback(rwb->cb);
+ kfree(rwb);
+ }
+--
+2.43.0
+
--- /dev/null
+From 51938e4e6ade6005901b700cfe6ecdd7481af216 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:18 +0800
+Subject: blk-wbt: don't enable throttling if default elevator is bfq
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 671fae5e51297fc76b3758ca2edd514858734a6a ]
+
+Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") tries to
+disable wbt for bfq, it's done by calling wbt_disable_default() in
+bfq_init_queue(). However, wbt is still enabled if default elevator is
+bfq:
+
+device_add_disk
+ elevator_init_mq
+ bfq_init_queue
+ wbt_disable_default -> done nothing
+
+ blk_register_queue
+ wbt_enable_default -> wbt is enabled
+
+Fix the problem by adding a new flag ELEVATOR_FLAG_DISBALE_WBT, bfq
+will set the flag in bfq_init_queue, and following wbt_enable_default()
+won't enable wbt while the flag is set.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20221019121518.3865235-7-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bfq-iosched.c | 2 ++
+ block/blk-wbt.c | 11 ++++++++---
+ block/elevator.h | 3 ++-
+ 3 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 52eb79d60a3f3..e4699291aee23 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7059,6 +7059,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
+ #endif
+
+ blk_stat_disable_accounting(bfqd->queue);
++ clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
+ wbt_enable_default(bfqd->queue);
+
+ kfree(bfqd);
+@@ -7204,6 +7205,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
++ set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
+ wbt_disable_default(q);
+ blk_stat_enable_accounting(q);
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index c5a8c10028a08..afb1782b4255e 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -27,6 +27,7 @@
+
+ #include "blk-wbt.h"
+ #include "blk-rq-qos.h"
++#include "elevator.h"
+
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/wbt.h>
+@@ -638,11 +639,15 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
+ */
+ void wbt_enable_default(struct request_queue *q)
+ {
+- struct rq_qos *rqos = wbt_rq_qos(q);
++ struct rq_qos *rqos;
++ bool disable_flag = q->elevator &&
++ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+
+ /* Throttling already enabled? */
++ rqos = wbt_rq_qos(q);
+ if (rqos) {
+- if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
++ if (!disable_flag &&
++ RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+ RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
+ return;
+ }
+@@ -651,7 +656,7 @@ void wbt_enable_default(struct request_queue *q)
+ if (!blk_queue_registered(q))
+ return;
+
+- if (queue_is_mq(q))
++ if (queue_is_mq(q) && !disable_flag)
+ wbt_init(q);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+diff --git a/block/elevator.h b/block/elevator.h
+index ed574bf3e629e..75382471222d1 100644
+--- a/block/elevator.h
++++ b/block/elevator.h
+@@ -104,7 +104,8 @@ struct elevator_queue
+ DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
+ };
+
+-#define ELEVATOR_FLAG_REGISTERED 0
++#define ELEVATOR_FLAG_REGISTERED 0
++#define ELEVATOR_FLAG_DISABLE_WBT 1
+
+ /*
+ * block elevator interface
+--
+2.43.0
+
--- /dev/null
+From ecf5ea95f5102ff71cf1675020f9bff184b40208 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 18:58:26 +0100
+Subject: blk-wbt: Fix detection of dirty-throttled tasks
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 ]
+
+The detection of dirty-throttled tasks in blk-wbt has been subtly broken
+since its beginning in 2016. Namely if we are doing cgroup writeback and
+the throttled task is not in the root cgroup, balance_dirty_pages() will
+set dirty_sleep for the non-root bdi_writeback structure. However
+blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback
+structure. Thus detection of recently throttled tasks is not working in
+this case (we noticed this when we switched to cgroup v2 and suddently
+writeback was slow).
+
+Since blk-wbt has no easy way to get to proper bdi_writeback and
+furthermore its intention has always been to work on the whole device
+rather than on individual cgroups, just move the dirty_sleep timestamp
+from bdi_writeback to backing_dev_info. That fixes the checking for
+recently throttled task and saves memory for everybody as a bonus.
+
+CC: stable@vger.kernel.org
+Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz
+[axboe: fixup indentation errors]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c | 4 ++--
+ include/linux/backing-dev-defs.h | 7 +++++--
+ mm/backing-dev.c | 2 +-
+ mm/page-writeback.c | 2 +-
+ 4 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index e9206b1406e76..fcacdff8af93b 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -98,9 +98,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
+ */
+ static bool wb_recent_wait(struct rq_wb *rwb)
+ {
+- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
+
+- return time_before(jiffies, wb->dirty_sleep + HZ);
++ return time_before(jiffies, bdi->last_bdp_sleep + HZ);
+ }
+
+ static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
+diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
+index ae12696ec492c..2ad261082bba5 100644
+--- a/include/linux/backing-dev-defs.h
++++ b/include/linux/backing-dev-defs.h
+@@ -141,8 +141,6 @@ struct bdi_writeback {
+ struct delayed_work dwork; /* work item used for writeback */
+ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
+
+- unsigned long dirty_sleep; /* last wait */
+-
+ struct list_head bdi_node; /* anchored at bdi->wb_list */
+
+ #ifdef CONFIG_CGROUP_WRITEBACK
+@@ -179,6 +177,11 @@ struct backing_dev_info {
+ * any dirty wbs, which is depended upon by bdi_has_dirty().
+ */
+ atomic_long_t tot_write_bandwidth;
++ /*
++ * Jiffies when last process was dirty throttled on this bdi. Used by
++ * blk-wbt.
++ */
++ unsigned long last_bdp_sleep;
+
+ struct bdi_writeback wb; /* the root writeback info for this bdi */
+ struct list_head wb_list; /* list of all wbs */
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index bf5525c2e561a..c070ff9ef9cf3 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -305,7 +305,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
+ INIT_LIST_HEAD(&wb->work_list);
+ INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+ INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
+- wb->dirty_sleep = jiffies;
+
+ err = fprop_local_init_percpu(&wb->completions, gfp);
+ if (err)
+@@ -793,6 +792,7 @@ int bdi_init(struct backing_dev_info *bdi)
+ INIT_LIST_HEAD(&bdi->bdi_list);
+ INIT_LIST_HEAD(&bdi->wb_list);
+ init_waitqueue_head(&bdi->wb_waitq);
++ bdi->last_bdp_sleep = jiffies;
+
+ return cgwb_bdi_init(bdi);
+ }
+diff --git a/mm/page-writeback.c b/mm/page-writeback.c
+index d3e9d12860b9f..9046d1f1b408e 100644
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1809,7 +1809,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
+ break;
+ }
+ __set_current_state(TASK_KILLABLE);
+- wb->dirty_sleep = now;
++ bdi->last_bdp_sleep = jiffies;
+ io_schedule_timeout(pause);
+
+ current->dirty_paused_when = now + pause;
+--
+2.43.0
+
--- /dev/null
+From 5376a7667a1f2430589c3b2f5f0bccafd1dd761b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 20:18:54 +0800
+Subject: blk-wbt: fix that wbt can't be disabled by default
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 8a2b20a997a3779ae9fcae268f2959eb82ec05a1 ]
+
+commit b11d31ae01e6 ("blk-wbt: remove unnecessary check in
+wbt_enable_default()") removes the checking of CONFIG_BLK_WBT_MQ by
+mistake, which is used to control enable or disable wbt by default.
+
+Fix the problem by adding back the checking. This patch also do a litter
+cleanup to make related code more readable.
+
+Fixes: b11d31ae01e6 ("blk-wbt: remove unnecessary check in wbt_enable_default()")
+Reported-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Link: https://lore.kernel.org/lkml/CAKXUXMzfKq_J9nKHGyr5P5rvUETY4B-fxoQD4sO+NYjFOfVtZA@mail.gmail.com/t/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20230522121854.2928880-1-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index fcacdff8af93b..526fb12c3e4cf 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -640,14 +640,16 @@ void wbt_enable_default(struct gendisk *disk)
+ {
+ struct request_queue *q = disk->queue;
+ struct rq_qos *rqos;
+- bool disable_flag = q->elevator &&
+- test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
++ bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
++
++ if (q->elevator &&
++ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
++ enable = false;
+
+ /* Throttling already enabled? */
+ rqos = wbt_rq_qos(q);
+ if (rqos) {
+- if (!disable_flag &&
+- RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
++ if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+ RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
+ return;
+ }
+@@ -656,7 +658,7 @@ void wbt_enable_default(struct gendisk *disk)
+ if (!blk_queue_registered(q))
+ return;
+
+- if (queue_is_mq(q) && !disable_flag)
++ if (queue_is_mq(q) && enable)
+ wbt_init(disk);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+--
+2.43.0
+
--- /dev/null
+From 3cab63f95634875a1501abbda551e69098f6c978 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:49 +0100
+Subject: blk-wbt: pass a gendisk to wbt_{enable,disable}_default
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 04aad37be1a88de6a1919996a615437ac74de479 ]
+
+Pass a gendisk to wbt_enable_default and wbt_disable_default to
+prepare for phasing out usage of the request_queue in the blk-cgroup
+code.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-9-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bfq-iosched.c | 4 ++--
+ block/blk-iocost.c | 4 ++--
+ block/blk-sysfs.c | 2 +-
+ block/blk-wbt.c | 7 ++++---
+ block/blk-wbt.h | 8 ++++----
+ 5 files changed, 13 insertions(+), 12 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index e4699291aee23..84b4763b2b223 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7060,7 +7060,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
+
+ blk_stat_disable_accounting(bfqd->queue);
+ clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
+- wbt_enable_default(bfqd->queue);
++ wbt_enable_default(bfqd->queue->disk);
+
+ kfree(bfqd);
+ }
+@@ -7206,7 +7206,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
+ set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
+- wbt_disable_default(q);
++ wbt_disable_default(q->disk);
+ blk_stat_enable_accounting(q);
+
+ return 0;
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 3788774a7b729..72ca07f24b3c0 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -3281,11 +3281,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
+ blk_stat_enable_accounting(disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+ ioc->enabled = true;
+- wbt_disable_default(disk->queue);
++ wbt_disable_default(disk);
+ } else {
+ blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+ ioc->enabled = false;
+- wbt_enable_default(disk->queue);
++ wbt_enable_default(disk);
+ }
+
+ if (user) {
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index a82bdec923b21..c59c4d3ee7a27 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -837,7 +837,7 @@ int blk_register_queue(struct gendisk *disk)
+ goto put_dev;
+
+ blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
+- wbt_enable_default(q);
++ wbt_enable_default(disk);
+ blk_throtl_register(disk);
+
+ /* Now everything is ready and send out KOBJ_ADD uevent */
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index afb1782b4255e..8d4f075f13e2f 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -637,8 +637,9 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
+ /*
+ * Enable wbt if defaults are configured that way
+ */
+-void wbt_enable_default(struct request_queue *q)
++void wbt_enable_default(struct gendisk *disk)
+ {
++ struct request_queue *q = disk->queue;
+ struct rq_qos *rqos;
+ bool disable_flag = q->elevator &&
+ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+@@ -705,9 +706,9 @@ static void wbt_exit(struct rq_qos *rqos)
+ /*
+ * Disable wbt, if enabled by default.
+ */
+-void wbt_disable_default(struct request_queue *q)
++void wbt_disable_default(struct gendisk *disk)
+ {
+- struct rq_qos *rqos = wbt_rq_qos(q);
++ struct rq_qos *rqos = wbt_rq_qos(disk->queue);
+ struct rq_wb *rwb;
+ if (!rqos)
+ return;
+diff --git a/block/blk-wbt.h b/block/blk-wbt.h
+index 7e44eccc676dd..58c226fe33d48 100644
+--- a/block/blk-wbt.h
++++ b/block/blk-wbt.h
+@@ -89,8 +89,8 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
+ #ifdef CONFIG_BLK_WBT
+
+ int wbt_init(struct request_queue *);
+-void wbt_disable_default(struct request_queue *);
+-void wbt_enable_default(struct request_queue *);
++void wbt_disable_default(struct gendisk *disk);
++void wbt_enable_default(struct gendisk *disk);
+
+ u64 wbt_get_min_lat(struct request_queue *q);
+ void wbt_set_min_lat(struct request_queue *q, u64 val);
+@@ -105,10 +105,10 @@ static inline int wbt_init(struct request_queue *q)
+ {
+ return -EINVAL;
+ }
+-static inline void wbt_disable_default(struct request_queue *q)
++static inline void wbt_disable_default(struct gendisk *disk)
+ {
+ }
+-static inline void wbt_enable_default(struct request_queue *q)
++static inline void wbt_enable_default(struct gendisk *disk)
+ {
+ }
+ static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
+--
+2.43.0
+
--- /dev/null
+From 64436d303bf9f3e4b615121498533f1e7b068e19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:50 +0100
+Subject: blk-wbt: pass a gendisk to wbt_init
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 958f29654747a54f2272eb478e493eb97f492e06 ]
+
+Pass a gendisk to wbt_init to prepare for phasing out usage of the
+request_queue in the blk-cgroup code.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-10-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-sysfs.c | 2 +-
+ block/blk-wbt.c | 5 +++--
+ block/blk-wbt.h | 4 ++--
+ 3 files changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index c59c4d3ee7a27..31f53ef01982d 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -488,7 +488,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+
+ rqos = wbt_rq_qos(q);
+ if (!rqos) {
+- ret = wbt_init(q);
++ ret = wbt_init(q->disk);
+ if (ret)
+ return ret;
+ }
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 8d4f075f13e2f..95bec9244e9f3 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -658,7 +658,7 @@ void wbt_enable_default(struct gendisk *disk)
+ return;
+
+ if (queue_is_mq(q) && !disable_flag)
+- wbt_init(q);
++ wbt_init(disk);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+
+@@ -822,8 +822,9 @@ static struct rq_qos_ops wbt_rqos_ops = {
+ #endif
+ };
+
+-int wbt_init(struct request_queue *q)
++int wbt_init(struct gendisk *disk)
+ {
++ struct request_queue *q = disk->queue;
+ struct rq_wb *rwb;
+ int i;
+ int ret;
+diff --git a/block/blk-wbt.h b/block/blk-wbt.h
+index 58c226fe33d48..8170439b89d6e 100644
+--- a/block/blk-wbt.h
++++ b/block/blk-wbt.h
+@@ -88,7 +88,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
+
+ #ifdef CONFIG_BLK_WBT
+
+-int wbt_init(struct request_queue *);
++int wbt_init(struct gendisk *disk);
+ void wbt_disable_default(struct gendisk *disk);
+ void wbt_enable_default(struct gendisk *disk);
+
+@@ -101,7 +101,7 @@ u64 wbt_default_latency_nsec(struct request_queue *);
+
+ #else
+
+-static inline int wbt_init(struct request_queue *q)
++static inline int wbt_init(struct gendisk *disk)
+ {
+ return -EINVAL;
+ }
+--
+2.43.0
+
--- /dev/null
+From 631dc45a1e1ca5721ec23d80d60381e818e3c409 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:14 +0800
+Subject: blk-wbt: remove unnecessary check in wbt_enable_default()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit b11d31ae01e6b0762b28e645ad6718a12faa8d14 ]
+
+If CONFIG_BLK_WBT_MQ is disabled, wbt_init() won't do anything.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20221019121518.3865235-3-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index c293e08b301ff..c5a8c10028a08 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -651,7 +651,7 @@ void wbt_enable_default(struct request_queue *q)
+ if (!blk_queue_registered(q))
+ return;
+
+- if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
++ if (queue_is_mq(q))
+ wbt_init(q);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+--
+2.43.0
+
--- /dev/null
+From 525bbc796fc5729946f6d394ea8f72487384a8a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 13:43:46 -0500
+Subject: drm/amd/display: Fix MST Null Ptr for RV
+
+From: Fangzhi Zuo <jerry.zuo@amd.com>
+
+[ Upstream commit e6a7df96facdcf5b1f71eb3ec26f2f9f6ad61e57 ]
+
+The change try to fix below error specific to RV platform:
+
+BUG: kernel NULL pointer dereference, address: 0000000000000008
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2
+Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022
+RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
+Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8>
+RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224
+RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280
+RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850
+R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000
+R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224
+FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0
+Call Trace:
+ <TASK>
+ ? __die+0x23/0x70
+ ? page_fault_oops+0x171/0x4e0
+ ? plist_add+0xbe/0x100
+ ? exc_page_fault+0x7c/0x180
+ ? asm_exc_page_fault+0x26/0x30
+ ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026]
+ ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026]
+ compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ drm_atomic_check_only+0x5c5/0xa40
+ drm_mode_atomic_ioctl+0x76e/0xbc0
+ ? _copy_to_user+0x25/0x30
+ ? drm_ioctl+0x296/0x4b0
+ ? __pfx_drm_mode_atomic_ioctl+0x10/0x10
+ drm_ioctl_kernel+0xcd/0x170
+ drm_ioctl+0x26d/0x4b0
+ ? __pfx_drm_mode_atomic_ioctl+0x10/0x10
+ amdgpu_drm_ioctl+0x4e/0x90 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ __x64_sys_ioctl+0x94/0xd0
+ do_syscall_64+0x60/0x90
+ ? do_syscall_64+0x6c/0x90
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7f4dad17f76f
+Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c>
+RSP: 002b:00007ffd9ae859f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 000055e255a55900 RCX: 00007f4dad17f76f
+RDX: 00007ffd9ae85a90 RSI: 00000000c03864bc RDI: 000000000000000b
+RBP: 00007ffd9ae85a90 R08: 0000000000000003 R09: 0000000000000003
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c03864bc
+R13: 000000000000000b R14: 000055e255a7fc60 R15: 000055e255a01eb0
+ </TASK>
+Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device ccm cmac algif_hash algif_skcipher af_alg joydev mousedev bnep >
+ typec libphy k10temp ipmi_msghandler roles i2c_scmi acpi_cpufreq mac_hid nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_mas>
+CR2: 0000000000000008
+---[ end trace 0000000000000000 ]---
+RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
+Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8>
+RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224
+RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280
+RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850
+R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000
+R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224
+FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0
+
+With a second DP monitor connected, drm_atomic_state in dm atomic check
+sequence does not include the connector state for the old/existing/first
+DP monitor. In such case, dsc determination policy would hit a null ptr
+when it tries to iterate the old/existing stream that does not have a
+valid connector state attached to it. When that happens, dm atomic check
+should call drm_atomic_get_connector_state for a new connector state.
+Existing dm has already done that, except for RV due to it does not have
+official support of dsc where .num_dsc is not defined in dcn10 resource
+cap, that prevent from getting drm_atomic_get_connector_state called.
+So, skip dsc determination policy for ASICs that don't have DSC support.
+
+Cc: stable@vger.kernel.org # 6.1+
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314
+Reviewed-by: Wayne Lin <wayne.lin@amd.com>
+Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Fangzhi Zuo <jerry.zuo@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index bea49befdcacc..a6c6f286a5988 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -10123,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
+ }
+
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+- ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+- if (ret) {
+- DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
+- ret = -EINVAL;
+- goto fail;
++ if (dc_resource_is_dsc_encoding_supported(dc)) {
++ ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
++ if (ret) {
++ DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
++ ret = -EINVAL;
++ goto fail;
++ }
+ }
+
+ ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
+--
+2.43.0
+
--- /dev/null
+From a06f08e22b6af5f25d8f4a6abddfaf9548d74e5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jan 2024 08:41:52 +0530
+Subject: drm/amd/display: Fix uninitialized variable usage in core_link_
+ 'read_dpcd() & write_dpcd()' functions
+
+From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
+
+[ Upstream commit a58371d632ebab9ea63f10893a6b6731196b6f8d ]
+
+The 'status' variable in 'core_link_read_dpcd()' &
+'core_link_write_dpcd()' was uninitialized.
+
+Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default.
+
+Fixes the below:
+drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'.
+drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'.
+
+Cc: stable@vger.kernel.org
+Cc: Jerry Zuo <jerry.zuo@amd.com>
+Cc: Jun Lei <Jun.Lei@amd.com>
+Cc: Wayne Lin <Wayne.Lin@amd.com>
+Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
+Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
+index af110bf9470fa..aefca9756dbe8 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
+@@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd(
+ uint32_t extended_size;
+ /* size of the remaining partitioned address space */
+ uint32_t size_left_to_read;
+- enum dc_status status;
++ enum dc_status status = DC_ERROR_UNEXPECTED;
+ /* size of the next partition to be read from */
+ uint32_t partition_size;
+ uint32_t data_index = 0;
+@@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd(
+ {
+ uint32_t partition_size;
+ uint32_t data_index = 0;
+- enum dc_status status;
++ enum dc_status status = DC_ERROR_UNEXPECTED;
+
+ while (size) {
+ partition_size = dpcd_get_next_partition_size(address, size);
+--
+2.43.0
+
--- /dev/null
+From db5f2cf82b018c2d5cf047cde55d1e9baa8f6b31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 11:46:56 -0400
+Subject: drm/amd/display: Wrong colorimetry workaround
+
+From: Ma Hanghong <hanghong.ma@amd.com>
+
+[ Upstream commit b1a98cf89a695d36c414653634ea7ba91b6e701f ]
+
+[Why]
+For FreeSync HDR, native color space flag in AMD VSIF(BT.709) should be
+used when intepreting content and color space flag in VSC or AVI
+infoFrame should be ignored. However, it turned out some userspace
+application still use color flag in VSC or AVI infoFrame which is
+incorrect.
+
+[How]
+Transfer function is used when building the VSC and AVI infoFrame. Set
+colorimetry to BT.709 when all the following match:
+
+1. Pixel format is YCbCr;
+2. In FreeSync 2 HDR, color is COLOR_SPACE_2020_YCBCR;
+3. Transfer function is TRANSFER_FUNC_GAMMA_22;
+
+Tested-by: Mark Broadworth <mark.broadworth@amd.com>
+Reviewed-by: Krunoslav Kovac <Krunoslav.Kovac@amd.com>
+Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Signed-off-by: Ma Hanghong <hanghong.ma@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: e6a7df96facd ("drm/amd/display: Fix MST Null Ptr for RV")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++-
+ drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++++++
+ drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h | 3 ++-
+ .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 6 +++++-
+ 4 files changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index da16048bf1004..bea49befdcacc 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -5938,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+ bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
+ int mode_refresh;
+ int preferred_refresh = 0;
++ enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+ struct dsc_dec_dpcd_caps dsc_caps;
+ #endif
+@@ -6071,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+ if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
+ stream->use_vsc_sdp_for_colorimetry = true;
+ }
+- mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space);
++ if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
++ tf = TRANSFER_FUNC_GAMMA_22;
++ mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
+ aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
+
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+index 66923f51037a3..e2f80cd0ca8cb 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+@@ -3038,6 +3038,12 @@ static void set_avi_info_frame(
+ hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED;
+ }
+
++ if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR &&
++ stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
++ hdmi_info.bits.EC0_EC2 = 0;
++ hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
++ }
++
+ /* TODO: un-hardcode aspect ratio */
+ aspect = stream->timing.aspect_ratio;
+
+diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+index 1d8b746b02f24..edf5845f6a1f7 100644
+--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
++++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+@@ -35,7 +35,8 @@ struct mod_vrr_params;
+
+ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+ struct dc_info_packet *info_packet,
+- enum dc_color_space cs);
++ enum dc_color_space cs,
++ enum color_transfer_func tf);
+
+ void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
+ struct dc_info_packet *info_packet);
+diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+index 27ceba9d6d658..69691058ab898 100644
+--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
++++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+@@ -132,7 +132,8 @@ enum ColorimetryYCCDP {
+
+ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+ struct dc_info_packet *info_packet,
+- enum dc_color_space cs)
++ enum dc_color_space cs,
++ enum color_transfer_func tf)
+ {
+ unsigned int vsc_packet_revision = vsc_packet_undefined;
+ unsigned int i;
+@@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+ colorimetryFormat = ColorimetryYCC_DP_AdobeYCC;
+ else if (cs == COLOR_SPACE_2020_YCBCR)
+ colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr;
++
++ if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22)
++ colorimetryFormat = ColorimetryYCC_DP_ITU709;
+ break;
+
+ default:
+--
+2.43.0
+
--- /dev/null
+From 288715497704306fceb8c2cb307a168cfb241320 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 12:52:03 +0100
+Subject: drm/amdgpu: Reset IH OVERFLOW_CLEAR bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Friedrich Vock <friedrich.vock@gmx.de>
+
+[ Upstream commit 7330256268664ea0a7dd5b07a3fed363093477dd ]
+
+Allows us to detect subsequent IH ring buffer overflows as well.
+
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/cik_ih.c | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/cz_ih.c | 5 +++++
+ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++
+ drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/si_ih.c | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 ++++++
+ 9 files changed, 52 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+index df385ffc97683..6578ca1b90afa 100644
+--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = RREG32(mmIH_RB_CNTL);
+ tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
++
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
++ WREG32(mmIH_RB_CNTL, tmp);
+ }
+ return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+index b8c47e0cf37ad..c19681492efa7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32(mmIH_RB_CNTL, tmp);
+
+ out:
+ return (wptr & ih->ptr_mask);
+diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+index aecad530b10a6..2c02ae69883d2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32(mmIH_RB_CNTL, tmp);
+
+ out:
+ return (wptr & ih->ptr_mask);
+diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+index 7cd79a3844b24..657e4ca6f9dd2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+@@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ out:
+ return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+index eec13cb5bf758..84e8e8b008ef6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ out:
+ return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
+index 9a24f17a57502..cada9f300a7f5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
+@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = RREG32(IH_RB_CNTL);
+ tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
++
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
++ WREG32(IH_RB_CNTL, tmp);
+ }
+ return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+index b08905d1c00f0..07a5d95be07f5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32(mmIH_RB_CNTL, tmp);
++
+ out:
+ return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+index 1e83db0c5438d..74c94df423455 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
+ out:
+ return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+index 59dfca093155c..f1ba76c35cd6e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+@@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
++ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++ * can be detected.
++ */
++ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
+ out:
+ return (wptr & ih->ptr_mask);
+ }
+--
+2.43.0
+
--- /dev/null
+From 85ad0276e21822aca9c6a80d8a03247daa354d1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:17 +0800
+Subject: elevator: add new field flags in struct elevator_queue
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 181d06637451b5348d746039478e71fa53dfbff6 ]
+
+There are only one flag to indicate that elevator is registered currently,
+prepare to add a flag to disable wbt if default elevator is bfq.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20221019121518.3865235-6-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/elevator.c | 6 ++----
+ block/elevator.h | 4 +++-
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/block/elevator.c b/block/elevator.c
+index 20e70fd3f77f9..9e12706e8d8cb 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -512,7 +512,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
+ if (uevent)
+ kobject_uevent(&e->kobj, KOBJ_ADD);
+
+- e->registered = 1;
++ set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
+ }
+ return error;
+ }
+@@ -523,11 +523,9 @@ void elv_unregister_queue(struct request_queue *q)
+
+ lockdep_assert_held(&q->sysfs_lock);
+
+- if (e && e->registered) {
++ if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
+ kobject_uevent(&e->kobj, KOBJ_REMOVE);
+ kobject_del(&e->kobj);
+-
+- e->registered = 0;
+ }
+ }
+
+diff --git a/block/elevator.h b/block/elevator.h
+index 3f0593b3bf9d3..ed574bf3e629e 100644
+--- a/block/elevator.h
++++ b/block/elevator.h
+@@ -100,10 +100,12 @@ struct elevator_queue
+ void *elevator_data;
+ struct kobject kobj;
+ struct mutex sysfs_lock;
+- unsigned int registered:1;
++ unsigned long flags;
+ DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
+ };
+
++#define ELEVATOR_FLAG_REGISTERED 0
++
+ /*
+ * block elevator interface
+ */
+--
+2.43.0
+
--- /dev/null
+From efa75e7a472dd9a1c9519c1cf50e37d2b5d3ca47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:13 +0800
+Subject: elevator: remove redundant code in elv_unregister_queue()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 6d9f4cf125585ebf0718abcf5ce9ca898877c6d2 ]
+
+"elevator_queue *e" is already declared and initialized in the beginning
+of elv_unregister_queue().
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20221019121518.3865235-2-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/elevator.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/block/elevator.c b/block/elevator.c
+index bd71f0fc4e4b6..20e70fd3f77f9 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -524,8 +524,6 @@ void elv_unregister_queue(struct request_queue *q)
+ lockdep_assert_held(&q->sysfs_lock);
+
+ if (e && e->registered) {
+- struct elevator_queue *e = q->elevator;
+-
+ kobject_uevent(&e->kobj, KOBJ_REMOVE);
+ kobject_del(&e->kobj);
+
+--
+2.43.0
+
--- /dev/null
+From 253749176dc92e57dc90d7bf99dd82310f3bf2ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 16:34:00 +0100
+Subject: exit: wait_task_zombie: kill the no longer necessary
+ spin_lock_irq(siglock)
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 ]
+
+After the recent changes nobody use siglock to read the values protected
+by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and
+update the comment.
+
+With this patch only __exit_signal() and thread_group_start_cputime() take
+stats_lock under siglock.
+
+Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/exit.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index bccfa4218356e..c95fffc625fcd 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1146,17 +1146,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ * and nobody can change them.
+ *
+ * psig->stats_lock also protects us from our sub-threads
+- * which can reap other children at the same time. Until
+- * we change k_getrusage()-like users to rely on this lock
+- * we have to take ->siglock as well.
++ * which can reap other children at the same time.
+ *
+ * We use thread_group_cputime_adjusted() to get times for
+ * the thread group, which consolidates times for all threads
+ * in the group including the group leader.
+ */
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+- spin_lock_irq(¤t->sighand->siglock);
+- write_seqlock(&psig->stats_lock);
++ write_seqlock_irq(&psig->stats_lock);
+ psig->cutime += tgutime + sig->cutime;
+ psig->cstime += tgstime + sig->cstime;
+ psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
+@@ -1179,8 +1176,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ psig->cmaxrss = maxrss;
+ task_io_accounting_add(&psig->ioac, &p->ioac);
+ task_io_accounting_add(&psig->ioac, &sig->ioac);
+- write_sequnlock(&psig->stats_lock);
+- spin_unlock_irq(¤t->sighand->siglock);
++ write_sequnlock_irq(&psig->stats_lock);
+ }
+
+ if (wo->wo_rusage)
+--
+2.43.0
+
--- /dev/null
+From 30a5f96454071a295334641ab88e22da9c9bcd99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 18:45:01 +0200
+Subject: fs/proc: do_task_stat: use __for_each_thread()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 7904e53ed5a20fc678c01d5d1b07ec486425bb6a ]
+
+do/while_each_thread should be avoided when possible.
+
+Link: https://lkml.kernel.org/r/20230909164501.GA11581@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 7601df8031fd ("fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/array.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index 1b0d78dfd20f9..bcb645627991e 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -526,12 +526,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+
+ /* add up live thread stats at the group level */
+ if (whole) {
+- struct task_struct *t = task;
+- do {
++ struct task_struct *t;
++
++ __for_each_thread(sig, t) {
+ min_flt += t->min_flt;
+ maj_flt += t->maj_flt;
+ gtime += task_gtime(t);
+- } while_each_thread(task, t);
++ }
+
+ min_flt += sig->min_flt;
+ maj_flt += sig->maj_flt;
+--
+2.43.0
+
--- /dev/null
+From ccfb929b0f854215d56556ebff5261bc0f01227c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 16:33:57 +0100
+Subject: fs/proc: do_task_stat: use sig->stats_lock to gather the
+ threads/children stats
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 7601df8031fd67310af891897ef6cc0df4209305 ]
+
+lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
+do_task_stat() at the same time and the process has NR_THREADS, it will
+spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
+
+Change do_task_stat() to use sig->stats_lock to gather the statistics
+outside of ->siglock protected section, in the likely case this code will
+run lockless.
+
+Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/array.c | 58 +++++++++++++++++++++++++++----------------------
+ 1 file changed, 32 insertions(+), 26 deletions(-)
+
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index bcb645627991e..d210b2f8b7ed5 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+ int permitted;
+ struct mm_struct *mm;
+ unsigned long long start_time;
+- unsigned long cmin_flt = 0, cmaj_flt = 0;
+- unsigned long min_flt = 0, maj_flt = 0;
+- u64 cutime, cstime, utime, stime;
+- u64 cgtime, gtime;
++ unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
++ u64 cutime, cstime, cgtime, utime, stime, gtime;
+ unsigned long rsslim = 0;
+ unsigned long flags;
+ int exit_code = task->exit_code;
++ struct signal_struct *sig = task->signal;
++ unsigned int seq = 1;
+
+ state = *get_task_state(task);
+ vsize = eip = esp = 0;
+@@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+
+ sigemptyset(&sigign);
+ sigemptyset(&sigcatch);
+- cutime = cstime = 0;
+- cgtime = gtime = 0;
+
+ if (lock_task_sighand(task, &flags)) {
+- struct signal_struct *sig = task->signal;
+-
+ if (sig->tty) {
+ struct pid *pgrp = tty_get_pgrp(sig->tty);
+ tty_pgrp = pid_nr_ns(pgrp, ns);
+@@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+ num_threads = get_nr_threads(task);
+ collect_sigign_sigcatch(task, &sigign, &sigcatch);
+
+- cmin_flt = sig->cmin_flt;
+- cmaj_flt = sig->cmaj_flt;
+- cutime = sig->cutime;
+- cstime = sig->cstime;
+- cgtime = sig->cgtime;
+ rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
+
+- /* add up live thread stats at the group level */
+ if (whole) {
+- struct task_struct *t;
+-
+- __for_each_thread(sig, t) {
+- min_flt += t->min_flt;
+- maj_flt += t->maj_flt;
+- gtime += task_gtime(t);
+- }
+-
+- min_flt += sig->min_flt;
+- maj_flt += sig->maj_flt;
+- gtime += sig->gtime;
+-
+ if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
+ exit_code = sig->group_exit_code;
+ }
+@@ -552,6 +530,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+ if (permitted && (!whole || num_threads < 2))
+ wchan = !task_is_running(task);
+
++ do {
++ seq++; /* 2 on the 1st/lockless path, otherwise odd */
++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
++
++ cmin_flt = sig->cmin_flt;
++ cmaj_flt = sig->cmaj_flt;
++ cutime = sig->cutime;
++ cstime = sig->cstime;
++ cgtime = sig->cgtime;
++
++ if (whole) {
++ struct task_struct *t;
++
++ min_flt = sig->min_flt;
++ maj_flt = sig->maj_flt;
++ gtime = sig->gtime;
++
++ rcu_read_lock();
++ __for_each_thread(sig, t) {
++ min_flt += t->min_flt;
++ maj_flt += t->maj_flt;
++ gtime += task_gtime(t);
++ }
++ rcu_read_unlock();
++ }
++ } while (need_seqretry(&sig->stats_lock, seq));
++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
++
+ if (whole) {
+ thread_group_cputime_adjusted(task, &utime, &stime);
+ } else {
+--
+2.43.0
+
--- /dev/null
+From 81ff25ceeea37b4c83ad30633828b50019a78f16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:25:54 +0200
+Subject: getrusage: add the "signal_struct *sig" local variable
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ]
+
+No functional changes, cleanup/preparation.
+
+Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 37 +++++++++++++++++++------------------
+ 1 file changed, 19 insertions(+), 18 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index c85e1abf7b7c7..177155ba50cd3 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1779,6 +1779,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ unsigned long flags;
+ u64 tgutime, tgstime, utime, stime;
+ unsigned long maxrss = 0;
++ struct signal_struct *sig = p->signal;
+
+ memset((char *)r, 0, sizeof (*r));
+ utime = stime = 0;
+@@ -1786,7 +1787,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ if (who == RUSAGE_THREAD) {
+ task_cputime_adjusted(current, &utime, &stime);
+ accumulate_thread_rusage(p, r);
+- maxrss = p->signal->maxrss;
++ maxrss = sig->maxrss;
+ goto out;
+ }
+
+@@ -1796,15 +1797,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ switch (who) {
+ case RUSAGE_BOTH:
+ case RUSAGE_CHILDREN:
+- utime = p->signal->cutime;
+- stime = p->signal->cstime;
+- r->ru_nvcsw = p->signal->cnvcsw;
+- r->ru_nivcsw = p->signal->cnivcsw;
+- r->ru_minflt = p->signal->cmin_flt;
+- r->ru_majflt = p->signal->cmaj_flt;
+- r->ru_inblock = p->signal->cinblock;
+- r->ru_oublock = p->signal->coublock;
+- maxrss = p->signal->cmaxrss;
++ utime = sig->cutime;
++ stime = sig->cstime;
++ r->ru_nvcsw = sig->cnvcsw;
++ r->ru_nivcsw = sig->cnivcsw;
++ r->ru_minflt = sig->cmin_flt;
++ r->ru_majflt = sig->cmaj_flt;
++ r->ru_inblock = sig->cinblock;
++ r->ru_oublock = sig->coublock;
++ maxrss = sig->cmaxrss;
+
+ if (who == RUSAGE_CHILDREN)
+ break;
+@@ -1814,14 +1815,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+ utime += tgutime;
+ stime += tgstime;
+- r->ru_nvcsw += p->signal->nvcsw;
+- r->ru_nivcsw += p->signal->nivcsw;
+- r->ru_minflt += p->signal->min_flt;
+- r->ru_majflt += p->signal->maj_flt;
+- r->ru_inblock += p->signal->inblock;
+- r->ru_oublock += p->signal->oublock;
+- if (maxrss < p->signal->maxrss)
+- maxrss = p->signal->maxrss;
++ r->ru_nvcsw += sig->nvcsw;
++ r->ru_nivcsw += sig->nivcsw;
++ r->ru_minflt += sig->min_flt;
++ r->ru_majflt += sig->maj_flt;
++ r->ru_inblock += sig->inblock;
++ r->ru_oublock += sig->oublock;
++ if (maxrss < sig->maxrss)
++ maxrss = sig->maxrss;
+ t = p;
+ do {
+ accumulate_thread_rusage(t, r);
+--
+2.43.0
+
--- /dev/null
+From 915ff491a0f50b26e3e1c864d6331479e6056eeb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:50 +0100
+Subject: getrusage: move thread_group_cputime_adjusted() outside of
+ lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ]
+
+Patch series "getrusage: use sig->stats_lock", v2.
+
+This patch (of 2):
+
+thread_group_cputime() does its own locking, we can safely shift
+thread_group_cputime_adjusted() which does another for_each_thread loop
+outside of ->siglock protected section.
+
+This is also preparation for the next patch which changes getrusage() to
+use stats_lock instead of siglock, thread_group_cputime() takes the same
+lock. With the current implementation recursive read_seqbegin_or_lock()
+is fine, thread_group_cputime() can't enter the slow mode if the caller
+holds stats_lock, yet this looks more safe and better performance-wise.
+
+Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com
+Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 177155ba50cd3..2646047fe5513 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1778,17 +1778,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ struct task_struct *t;
+ unsigned long flags;
+ u64 tgutime, tgstime, utime, stime;
+- unsigned long maxrss = 0;
++ unsigned long maxrss;
++ struct mm_struct *mm;
+ struct signal_struct *sig = p->signal;
+
+- memset((char *)r, 0, sizeof (*r));
++ memset(r, 0, sizeof(*r));
+ utime = stime = 0;
++ maxrss = 0;
+
+ if (who == RUSAGE_THREAD) {
+ task_cputime_adjusted(current, &utime, &stime);
+ accumulate_thread_rusage(p, r);
+ maxrss = sig->maxrss;
+- goto out;
++ goto out_thread;
+ }
+
+ if (!lock_task_sighand(p, &flags))
+@@ -1812,9 +1814,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ fallthrough;
+
+ case RUSAGE_SELF:
+- thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+- utime += tgutime;
+- stime += tgstime;
+ r->ru_nvcsw += sig->nvcsw;
+ r->ru_nivcsw += sig->nivcsw;
+ r->ru_minflt += sig->min_flt;
+@@ -1834,19 +1833,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ }
+ unlock_task_sighand(p, &flags);
+
+-out:
+- r->ru_utime = ns_to_kernel_old_timeval(utime);
+- r->ru_stime = ns_to_kernel_old_timeval(stime);
++ if (who == RUSAGE_CHILDREN)
++ goto out_children;
+
+- if (who != RUSAGE_CHILDREN) {
+- struct mm_struct *mm = get_task_mm(p);
++ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
++ utime += tgutime;
++ stime += tgstime;
+
+- if (mm) {
+- setmax_mm_hiwater_rss(&maxrss, mm);
+- mmput(mm);
+- }
++out_thread:
++ mm = get_task_mm(p);
++ if (mm) {
++ setmax_mm_hiwater_rss(&maxrss, mm);
++ mmput(mm);
+ }
++
++out_children:
+ r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
++ r->ru_utime = ns_to_kernel_old_timeval(utime);
++ r->ru_stime = ns_to_kernel_old_timeval(stime);
+ }
+
+ SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
+--
+2.43.0
+
--- /dev/null
+From c2606554387cfe68ca114e907e6556c86ef1b3ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:26:29 +0200
+Subject: getrusage: use __for_each_thread()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ]
+
+do/while_each_thread should be avoided when possible.
+
+Plus this change allows to avoid lock_task_sighand(), we can use rcu
+and/or sig->stats_lock instead.
+
+Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 2646047fe5513..04102538cf43f 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1822,10 +1822,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ r->ru_oublock += sig->oublock;
+ if (maxrss < sig->maxrss)
+ maxrss = sig->maxrss;
+- t = p;
+- do {
++ __for_each_thread(sig, t)
+ accumulate_thread_rusage(t, r);
+- } while_each_thread(p, t);
+ break;
+
+ default:
+--
+2.43.0
+
--- /dev/null
+From a9c7d357939f612d4a8a5533af560a4b4ad4a57a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:53 +0100
+Subject: getrusage: use sig->stats_lock rather than lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ]
+
+lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
+getrusage() at the same time and the process has NR_THREADS, spin_lock_irq
+will spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
+
+Change getrusage() to use sig->stats_lock, it was specifically designed
+for this type of use. This way it runs lockless in the likely case.
+
+TODO:
+ - Change do_task_stat() to use sig->stats_lock too, then we can
+ remove spin_lock_irq(siglock) in wait_task_zombie().
+
+ - Turn sig->stats_lock into seqcount_rwlock_t, this way the
+ readers in the slow mode won't exclude each other. See
+ https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/
+
+ - stats_lock has to disable irqs because ->siglock can be taken
+ in irq context, it would be very nice to change __exit_signal()
+ to avoid the siglock->stats_lock dependency.
+
+Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 04102538cf43f..d06eda1387b69 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1781,7 +1781,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ unsigned long maxrss;
+ struct mm_struct *mm;
+ struct signal_struct *sig = p->signal;
++ unsigned int seq = 0;
+
++retry:
+ memset(r, 0, sizeof(*r));
+ utime = stime = 0;
+ maxrss = 0;
+@@ -1793,8 +1795,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ goto out_thread;
+ }
+
+- if (!lock_task_sighand(p, &flags))
+- return;
++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+
+ switch (who) {
+ case RUSAGE_BOTH:
+@@ -1822,14 +1823,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ r->ru_oublock += sig->oublock;
+ if (maxrss < sig->maxrss)
+ maxrss = sig->maxrss;
++
++ rcu_read_lock();
+ __for_each_thread(sig, t)
+ accumulate_thread_rusage(t, r);
++ rcu_read_unlock();
++
+ break;
+
+ default:
+ BUG();
+ }
+- unlock_task_sighand(p, &flags);
++
++ if (need_seqretry(&sig->stats_lock, seq)) {
++ seq = 1;
++ goto retry;
++ }
++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+
+ if (who == RUSAGE_CHILDREN)
+ goto out_children;
+--
+2.43.0
+
--- /dev/null
+From 9ff0df5b0577368409e200af8042ed5138f3cc34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Oct 2023 11:32:52 +0200
+Subject: KVM: s390: add stat counter for shadow gmap events
+
+From: Nico Boehr <nrb@linux.ibm.com>
+
+[ Upstream commit c3235e2dd6956448a562d6b1112205eeebc8ab43 ]
+
+The shadow gmap tracks memory of nested guests (guest-3). In certain
+scenarios, the shadow gmap needs to be rebuilt, which is a costly operation
+since it involves a SIE exit into guest-1 for every entry in the respective
+shadow level.
+
+Add kvm stat counters when new shadow structures are created at various
+levels. Also add a counter gmap_shadow_create when a completely fresh
+shadow gmap is created as well as a counter gmap_shadow_reuse when an
+existing gmap is being reused.
+
+Note that when several levels are shadowed at once, counters on all
+affected levels will be increased.
+
+Also note that not all page table levels need to be present and a ASCE
+can directly point to e.g. a segment table. In this case, a new segment
+table will always be equivalent to a new shadow gmap and hence will be
+counted as gmap_shadow_create and not as gmap_shadow_segment.
+
+Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Link: https://lore.kernel.org/r/20231009093304.2555344-2-nrb@linux.ibm.com
+Message-Id: <20231009093304.2555344-2-nrb@linux.ibm.com>
+Stable-dep-of: fe752331d4b3 ("KVM: s390: vsie: fix race during shadow creation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/kvm_host.h | 7 +++++++
+ arch/s390/kvm/gaccess.c | 7 +++++++
+ arch/s390/kvm/kvm-s390.c | 9 ++++++++-
+ arch/s390/kvm/vsie.c | 5 ++++-
+ 4 files changed, 26 insertions(+), 2 deletions(-)
+
+diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
+index b1e98a9ed152b..09abf000359f8 100644
+--- a/arch/s390/include/asm/kvm_host.h
++++ b/arch/s390/include/asm/kvm_host.h
+@@ -777,6 +777,13 @@ struct kvm_vm_stat {
+ u64 inject_service_signal;
+ u64 inject_virtio;
+ u64 aen_forward;
++ u64 gmap_shadow_create;
++ u64 gmap_shadow_reuse;
++ u64 gmap_shadow_r1_entry;
++ u64 gmap_shadow_r2_entry;
++ u64 gmap_shadow_r3_entry;
++ u64 gmap_shadow_sg_entry;
++ u64 gmap_shadow_pg_entry;
+ };
+
+ struct kvm_arch_memory_slot {
+diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
+index 0243b6e38d364..3beceff5f1c09 100644
+--- a/arch/s390/kvm/gaccess.c
++++ b/arch/s390/kvm/gaccess.c
+@@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ unsigned long *pgt, int *dat_protection,
+ int *fake)
+ {
++ struct kvm *kvm;
+ struct gmap *parent;
+ union asce asce;
+ union vaddress vaddr;
+@@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+
+ *fake = 0;
+ *dat_protection = 0;
++ kvm = sg->private;
+ parent = sg->parent;
+ vaddr.addr = saddr;
+ asce.val = sg->orig_asce;
+@@ -1341,6 +1343,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
+ if (rc)
+ return rc;
++ kvm->stat.gmap_shadow_r1_entry++;
+ }
+ fallthrough;
+ case ASCE_TYPE_REGION2: {
+@@ -1369,6 +1372,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
+ if (rc)
+ return rc;
++ kvm->stat.gmap_shadow_r2_entry++;
+ }
+ fallthrough;
+ case ASCE_TYPE_REGION3: {
+@@ -1406,6 +1410,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
+ if (rc)
+ return rc;
++ kvm->stat.gmap_shadow_r3_entry++;
+ }
+ fallthrough;
+ case ASCE_TYPE_SEGMENT: {
+@@ -1439,6 +1444,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
+ if (rc)
+ return rc;
++ kvm->stat.gmap_shadow_sg_entry++;
+ }
+ }
+ /* Return the parent address of the page table */
+@@ -1509,6 +1515,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
+ pte.p |= dat_protection;
+ if (!rc)
+ rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
++ vcpu->kvm->stat.gmap_shadow_pg_entry++;
+ ipte_unlock(vcpu->kvm);
+ mmap_read_unlock(sg->mm);
+ return rc;
+diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
+index f604946ab2c85..348d49268a7ec 100644
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ STATS_DESC_COUNTER(VM, inject_pfault_done),
+ STATS_DESC_COUNTER(VM, inject_service_signal),
+ STATS_DESC_COUNTER(VM, inject_virtio),
+- STATS_DESC_COUNTER(VM, aen_forward)
++ STATS_DESC_COUNTER(VM, aen_forward),
++ STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
++ STATS_DESC_COUNTER(VM, gmap_shadow_create),
++ STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
++ STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
++ STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
++ STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
++ STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
+ };
+
+ const struct kvm_stats_header kvm_vm_stats_header = {
+diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
+index 740f8b56e63f9..b2dbf08a961e5 100644
+--- a/arch/s390/kvm/vsie.c
++++ b/arch/s390/kvm/vsie.c
+@@ -1206,8 +1206,10 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+ * we're holding has been unshadowed. If the gmap is still valid,
+ * we can safely reuse it.
+ */
+- if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
++ if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
++ vcpu->kvm->stat.gmap_shadow_reuse++;
+ return 0;
++ }
+
+ /* release the old shadow - if any, and mark the prefix as unmapped */
+ release_gmap_shadow(vsie_page);
+@@ -1215,6 +1217,7 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+ if (IS_ERR(gmap))
+ return PTR_ERR(gmap);
+ gmap->private = vcpu->kvm;
++ vcpu->kvm->stat.gmap_shadow_create++;
+ WRITE_ONCE(vsie_page->gmap, gmap);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 5a150283ce6521f9a844d5c5f14c8f1dc7e26bab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Dec 2023 13:53:17 +0100
+Subject: KVM: s390: vsie: fix race during shadow creation
+
+From: Christian Borntraeger <borntraeger@linux.ibm.com>
+
+[ Upstream commit fe752331d4b361d43cfd0b89534b4b2176057c32 ]
+
+Right now it is possible to see gmap->private being zero in
+kvm_s390_vsie_gmap_notifier resulting in a crash. This is due to the
+fact that we add gmap->private == kvm after creation:
+
+static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+ struct vsie_page *vsie_page)
+{
+[...]
+ gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+ if (IS_ERR(gmap))
+ return PTR_ERR(gmap);
+ gmap->private = vcpu->kvm;
+
+Let children inherit the private field of the parent.
+
+Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization")
+Cc: <stable@vger.kernel.org>
+Cc: David Hildenbrand <david@redhat.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kvm/vsie.c | 1 -
+ arch/s390/mm/gmap.c | 1 +
+ 2 files changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
+index b2dbf08a961e5..d90c818a9ae71 100644
+--- a/arch/s390/kvm/vsie.c
++++ b/arch/s390/kvm/vsie.c
+@@ -1216,7 +1216,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+ gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+ if (IS_ERR(gmap))
+ return PTR_ERR(gmap);
+- gmap->private = vcpu->kvm;
+ vcpu->kvm->stat.gmap_shadow_create++;
+ WRITE_ONCE(vsie_page->gmap, gmap);
+ return 0;
+diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
+index 243f673fa6515..662cf23a1b44b 100644
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
++ new->private = parent->private;
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+--
+2.43.0
+
--- /dev/null
+From 571c5e0b16a801d079f9d65fde6131b1e7141702 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Mar 2023 08:36:08 +0200
+Subject: nfp: flower: add goto_chain_index for ct entry
+
+From: Wentao Jia <wentao.jia@corigine.com>
+
+[ Upstream commit 3e44d19934b92398785b3ffc2353b9eba264140e ]
+
+The chain_index has different means in pre ct entry and post ct entry.
+In pre ct entry, it means chain index, but in post ct entry, it means
+goto chain index, it is confused.
+
+chain_index and goto_chain_index may be present in one flow rule, It
+cannot be distinguished by one field chain_index, both chain_index
+and goto_chain_index are required in the follow-up patch to support
+multiple ct zones
+
+Another field goto_chain_index is added to record the goto chain index.
+If no goto action in post ct entry, goto_chain_index is 0.
+
+Signed-off-by: Wentao Jia <wentao.jia@corigine.com>
+Acked-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: Louis Peens <louis.peens@corigine.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: cefa98e806fd ("nfp: flower: add hardware offload check for post ct entry")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 8 ++++++--
+ drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 2 ++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+index 7af03b45555dd..da7a47416a208 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+@@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
+ /* Checks that the chain_index of the filter matches the
+ * chain_index of the GOTO action.
+ */
+- if (post_ct_entry->chain_index != pre_ct_entry->chain_index)
++ if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index)
+ return -EINVAL;
+
+ err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry);
+@@ -1776,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
+ if (IS_ERR(ct_entry))
+ return PTR_ERR(ct_entry);
+ ct_entry->type = CT_TYPE_PRE_CT;
+- ct_entry->chain_index = ct_goto->chain_index;
++ ct_entry->chain_index = flow->common.chain_index;
++ ct_entry->goto_chain_index = ct_goto->chain_index;
+ list_add(&ct_entry->list_node, &zt->pre_ct_list);
+ zt->pre_ct_count++;
+
+@@ -1799,6 +1800,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+ struct nfp_fl_ct_zone_entry *zt;
+ bool wildcarded = false;
+ struct flow_match_ct ct;
++ struct flow_action_entry *ct_goto;
+
+ flow_rule_match_ct(rule, &ct);
+ if (!ct.mask->ct_zone) {
+@@ -1823,6 +1825,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+
+ ct_entry->type = CT_TYPE_POST_CT;
+ ct_entry->chain_index = flow->common.chain_index;
++ ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO);
++ ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0;
+ list_add(&ct_entry->list_node, &zt->post_ct_list);
+ zt->post_ct_count++;
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+index 762c0b36e269b..9440ab776ecea 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+@@ -112,6 +112,7 @@ enum nfp_nfp_layer_name {
+ * @cookie: Flow cookie, same as original TC flow, used as key
+ * @list_node: Used by the list
+ * @chain_index: Chain index of the original flow
++ * @goto_chain_index: goto chain index of the flow
+ * @netdev: netdev structure.
+ * @type: Type of pre-entry from enum ct_entry_type
+ * @zt: Reference to the zone table this belongs to
+@@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry {
+ unsigned long cookie;
+ struct list_head list_node;
+ u32 chain_index;
++ u32 goto_chain_index;
+ enum ct_entry_type type;
+ struct net_device *netdev;
+ struct nfp_fl_ct_zone_entry *zt;
+--
+2.43.0
+
--- /dev/null
+From 435ba0cb7080cb3f0960b93523f5da947205147a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jan 2024 17:19:08 +0200
+Subject: nfp: flower: add hardware offload check for post ct entry
+
+From: Hui Zhou <hui.zhou@corigine.com>
+
+[ Upstream commit cefa98e806fd4e2a5e2047457a11ae5f17b8f621 ]
+
+The nfp offload flow pay will not allocate a mask id when the out port
+is openvswitch internal port. This is because these flows are used to
+configure the pre_tun table and are never actually send to the firmware
+as an add-flow message. When a tc rule which action contains ct and
+the post ct entry's out port is openvswitch internal port, the merge
+offload flow pay with the wrong mask id of 0 will be send to the
+firmware. Actually, the nfp can not support hardware offload for this
+situation, so return EOPNOTSUPP.
+
+Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows")
+CC: stable@vger.kernel.org # 5.14+
+Signed-off-by: Hui Zhou <hui.zhou@corigine.com>
+Signed-off-by: Louis Peens <louis.peens@corigine.com>
+Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+index da7a47416a208..497766ecdd91d 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+@@ -1797,10 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+ {
+ struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+ struct nfp_fl_ct_flow_entry *ct_entry;
++ struct flow_action_entry *ct_goto;
+ struct nfp_fl_ct_zone_entry *zt;
++ struct flow_action_entry *act;
+ bool wildcarded = false;
+ struct flow_match_ct ct;
+- struct flow_action_entry *ct_goto;
++ int i;
++
++ flow_action_for_each(i, act, &rule->action) {
++ switch (act->id) {
++ case FLOW_ACTION_REDIRECT:
++ case FLOW_ACTION_REDIRECT_INGRESS:
++ case FLOW_ACTION_MIRRED:
++ case FLOW_ACTION_MIRRED_INGRESS:
++ if (act->dev->rtnl_link_ops &&
++ !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) {
++ NL_SET_ERR_MSG_MOD(extack,
++ "unsupported offload: out port is openvswitch internal port");
++ return -EOPNOTSUPP;
++ }
++ break;
++ default:
++ break;
++ }
++ }
+
+ flow_rule_match_ct(rule, &ct);
+ if (!ct.mask->ct_zone) {
+--
+2.43.0
+
--- /dev/null
+From 751dd31cb25b1fda2357852e790cffcc04bb0544 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Jan 2024 09:58:39 +0100
+Subject: readahead: avoid multiple marked readahead pages
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 ]
+
+ra_alloc_folio() marks a page that should trigger next round of async
+readahead. However it rounds up computed index to the order of page being
+allocated. This can however lead to multiple consecutive pages being
+marked with readahead flag. Consider situation with index == 1, mark ==
+1, order == 0. We insert order 0 page at index 1 and mark it. Then we
+bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page
+at index 2 is marked as well. Then we bump order to 2, index is
+incremented to 4, mark gets rounded to 4 so page at index 4 is marked as
+well. The fact that multiple pages get marked within a single readahead
+window confuses the readahead logic and results in readahead window being
+trimmed back to 1. This situation is triggered in particular when maximum
+readahead window size is not a power of two (in the observed case it was
+768 KB) and as a result sequential read throughput suffers.
+
+Fix the problem by rounding 'mark' down instead of up. Because the index
+is naturally aligned to 'order', we are guaranteed 'rounded mark' == index
+iff 'mark' is within the page we are allocating at 'index' and thus
+exactly one page is marked with readahead flag as required by the
+readahead code and sequential read performance is restored.
+
+This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix
+readahead with large folios"). The commit changed the rounding with the
+rationale:
+
+"... we were setting the readahead flag on the folio which contains the
+last byte read from the block. This is wrong because we will trigger
+readahead at the end of the read without waiting to see if a subsequent
+read is going to use the pages we just read."
+
+Although this is true, the fact is this was always the case with read
+sizes not aligned to folio boundaries and large folios in the page cache
+just make the situation more obvious (and frequent). Also for sequential
+read workloads it is better to trigger the readahead earlier rather than
+later. It is true that the difference in the rounding and thus earlier
+triggering of the readahead can result in reading more for semi-random
+workloads. However workloads really suffering from this seem to be rare.
+In particular I have verified that the workload described in commit
+b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading
+random 100k blocks from a file like:
+
+[reader]
+bs=100k
+rw=randread
+numjobs=1
+size=64g
+runtime=60s
+
+is not impacted by the rounding change and achieves ~70MB/s in both cases.
+
+[jack@suse.cz: fix one more place where mark rounding was done as well]
+ Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz
+Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz
+Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Guo Xuenan <guoxuenan@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/readahead.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/readahead.c b/mm/readahead.c
+index ba43428043a35..e4b772bb70e68 100644
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -483,7 +483,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
+
+ if (!folio)
+ return -ENOMEM;
+- mark = round_up(mark, 1UL << order);
++ mark = round_down(mark, 1UL << order);
+ if (index == mark)
+ folio_set_readahead(folio);
+ err = filemap_add_folio(ractl->mapping, folio, index, gfp);
+@@ -591,7 +591,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
+ * It's the expected callback index, assume sequential access.
+ * Ramp up sizes, and push forward the readahead window.
+ */
+- expected = round_up(ra->start + ra->size - ra->async_size,
++ expected = round_down(ra->start + ra->size - ra->async_size,
+ 1UL << order);
+ if (index == expected || index == (ra->start + ra->size)) {
+ ra->start += ra->size;
+--
+2.43.0
+
--- /dev/null
+From edc5869301a9444152614d751e3bb60d6c2a75db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Jan 2024 06:14:29 -0700
+Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems
+
+From: Nico Pache <npache@redhat.com>
+
+[ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ]
+
+On systems with 64k page size and 512M huge page sizes, the allocation and
+test succeeds but errors out at the munmap. As the comment states, munmap
+will failure if its not HUGEPAGE aligned. This is due to the length of
+the mapping being 1/2 the size of the hugepage causing the munmap to not
+be hugepage aligned. Fix this by making the mapping length the full
+hugepage if the hugepage is larger than the length of the mapping.
+
+Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com
+Signed-off-by: Nico Pache <npache@redhat.com>
+Cc: Donet Tom <donettom@linux.vnet.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
+index 312889edb84ab..c65c55b7a789f 100644
+--- a/tools/testing/selftests/vm/map_hugetlb.c
++++ b/tools/testing/selftests/vm/map_hugetlb.c
+@@ -15,6 +15,7 @@
+ #include <unistd.h>
+ #include <sys/mman.h>
+ #include <fcntl.h>
++#include "vm_util.h"
+
+ #define LENGTH (256UL*1024*1024)
+ #define PROTECTION (PROT_READ | PROT_WRITE)
+@@ -70,10 +71,16 @@ int main(int argc, char **argv)
+ {
+ void *addr;
+ int ret;
++ size_t hugepage_size;
+ size_t length = LENGTH;
+ int flags = FLAGS;
+ int shift = 0;
+
++ hugepage_size = default_huge_page_size();
++ /* munmap with fail if the length is not page aligned */
++ if (hugepage_size > length)
++ length = hugepage_size;
++
+ if (argc > 1)
+ length = atol(argv[1]) << 20;
+ if (argc > 2) {
+--
+2.43.0
+
--- /dev/null
+From a5559df581c2a4189c25ad561a0b87f7bccd22ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 14:04:54 +0500
+Subject: selftests/mm: switch to bash from sh
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ]
+
+Running charge_reserved_hugetlb.sh generates errors if sh is set to
+dash:
+
+./charge_reserved_hugetlb.sh: 9: [[: not found
+./charge_reserved_hugetlb.sh: 19: [[: not found
+./charge_reserved_hugetlb.sh: 27: [[: not found
+./charge_reserved_hugetlb.sh: 37: [[: not found
+./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected
+
+Switch to using /bin/bash instead of /bin/sh. Make the switch for
+write_hugetlb_memory.sh as well which is called from
+charge_reserved_hugetlb.sh.
+
+Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +-
+ tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+index 0899019a7fcb4..e14bdd4455f2d 100644
+--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
+ # Kselftest framework requirement - SKIP code is 4.
+diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+index 70a02301f4c27..3d2d2eb9d6fff 100644
+--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
+ set -e
+--
+2.43.0
+
--- /dev/null
+From 29eb96441917da2cf892c37a0d2fb87ceabfed17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jan 2024 22:49:51 +0100
+Subject: selftests: mptcp: decrease BW in simult flows
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 5e2f3c65af47e527ccac54060cf909e3306652ff ]
+
+When running the simult_flow selftest in slow environments -- e.g. QEmu
+without KVM support --, the results can be unstable. This selftest
+checks if the aggregated bandwidth is (almost) fully used as expected.
+
+To help improving the stability while still keeping the same validation
+in place, the BW and the delay are reduced to lower the pressure on the
+CPU.
+
+Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests")
+Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 6b0014f538a22..6bda70af03a83 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -303,12 +303,12 @@ done
+
+ setup
+ run_test 10 10 0 0 "balanced bwidth"
+-run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
++run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
+
+ # we still need some additional infrastructure to pass the following test-cases
+-run_test 30 10 0 0 "unbalanced bwidth"
+-run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+-run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
++run_test 10 3 0 0 "unbalanced bwidth"
++run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
++run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
+
+ mptcp_lib_result_print_all_tap
+ exit $ret
+--
+2.43.0
+
--- /dev/null
+From 94a73f9dec7fef0ebc322fac05236c755fb6aa29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Feb 2024 19:25:37 +0100
+Subject: selftests: mptcp: simult flows: fix some subtest names
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 4d8e0dde0403b5a86aa83e243f020711a9c3e31f ]
+
+The selftest was correctly recording all the results, but the 'reverse
+direction' part was missing in the name when needed.
+
+It is important to have a unique (sub)test name in TAP, because some CI
+environments drop tests with duplicated name.
+
+Fixes: 675d99338e7a ("selftests: mptcp: simult flows: format subtests results in TAP")
+Cc: stable@vger.kernel.org
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 6bda70af03a83..41d2f4991b35c 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -269,7 +269,8 @@ run_test()
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+- printf "%-60s" "$msg - reverse direction"
++ msg+=" - reverse direction"
++ printf "%-60s" "${msg}"
+ do_transfer $large $small $time
+ lret=$?
+ mptcp_lib_result_code "${lret}" "${msg}"
+--
+2.43.0
+
--- /dev/null
+From 4eeef0aaffa567f812390612c30f800de02edd73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 15:21:31 +0200
+Subject: selftests: mptcp: simult flows: format subtests results in TAP
+
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+
+[ Upstream commit 675d99338e7a6cd925d61d7dbf8c26612f7f08a9 ]
+
+The current selftests infrastructure formats the results in TAP 13. This
+version doesn't support subtests and only the end result of each
+selftest is taken into account. It means that a single issue in a
+subtest of a selftest containing multiple subtests forces the whole
+selftest to be marked as failed. It also means that subtests results are
+not tracked by CIs executing selftests.
+
+MPTCP selftests run hundreds of various subtests. It is then important
+to track each of them and not one result per selftest.
+
+It is particularly interesting to do that when validating stable kernels
+with the last version of the test suite: tests might fail because a
+feature is not supported but the test didn't skip that part. In this
+case, if subtests are not tracked, the whole selftest will be marked as
+failed making the other subtests useless because their results are
+ignored.
+
+This patch formats subtests results in TAP in simult_flows.sh selftest.
+
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 5e2f3c65af47 ("selftests: mptcp: decrease BW in simult flows")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 4a417f9d51d67..6b0014f538a22 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -263,6 +263,7 @@ run_test()
+ printf "%-60s" "$msg"
+ do_transfer $small $large $time
+ lret=$?
++ mptcp_lib_result_code "${lret}" "${msg}"
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+@@ -271,6 +272,7 @@ run_test()
+ printf "%-60s" "$msg - reverse direction"
+ do_transfer $large $small $time
+ lret=$?
++ mptcp_lib_result_code "${lret}" "${msg}"
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+@@ -307,4 +309,6 @@ run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+ run_test 30 10 0 0 "unbalanced bwidth"
+ run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+ run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
++
++mptcp_lib_result_print_all_tap
+ exit $ret
+--
+2.43.0
+
netrom-fix-a-data-race-around-sysctl_netrom_routing_.patch
netrom-fix-a-data-race-around-sysctl_netrom_link_fai.patch
netrom-fix-data-races-around-sysctl_net_busy_read.patch
+kvm-s390-add-stat-counter-for-shadow-gmap-events.patch
+kvm-s390-vsie-fix-race-during-shadow-creation.patch
+asoc-codecs-wcd938x-fix-headphones-volume-controls.patch
+drm-amd-display-fix-uninitialized-variable-usage-in-.patch
+nfp-flower-add-goto_chain_index-for-ct-entry.patch
+nfp-flower-add-hardware-offload-check-for-post-ct-en.patch
+readahead-avoid-multiple-marked-readahead-pages.patch
+selftests-mm-switch-to-bash-from-sh.patch
+selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch
+xhci-process-isoc-td-properly-when-there-was-a-trans.patch
+xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch
+drm-amdgpu-reset-ih-overflow_clear-bit.patch
+selftests-mptcp-simult-flows-format-subtests-results.patch
+selftests-mptcp-decrease-bw-in-simult-flows.patch
+blk-iocost-disable-writeback-throttling.patch
+elevator-remove-redundant-code-in-elv_unregister_que.patch
+blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch
+elevator-add-new-field-flags-in-struct-elevator_queu.patch
+blk-wbt-don-t-enable-throttling-if-default-elevator-.patch
+blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch
+blk-wbt-pass-a-gendisk-to-wbt_init.patch
+blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch
+blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch
+blk-rq-qos-constify-rq_qos_ops.patch
+blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch
+blk-wbt-fix-detection-of-dirty-throttled-tasks.patch
+drm-amd-display-wrong-colorimetry-workaround.patch
+drm-amd-display-fix-mst-null-ptr-for-rv.patch
+getrusage-add-the-signal_struct-sig-local-variable.patch
+getrusage-move-thread_group_cputime_adjusted-outside.patch
+getrusage-use-__for_each_thread.patch
+getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch
+fs-proc-do_task_stat-use-__for_each_thread.patch
+fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch
+exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch
+selftests-mptcp-simult-flows-fix-some-subtest-names.patch
+blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch
+blk-iocost-pass-gendisk-to-ioc_refresh_params.patch
--- /dev/null
+From 9b5b2f37f69d5ac40e70bb8ba57cc444a5731800 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:37 +0200
+Subject: xhci: handle isoc Babble and Buffer Overrun events properly
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+[ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ]
+
+xHCI 4.9 explicitly forbids assuming that the xHC has released its
+ownership of a multi-TRB TD when it reports an error on one of the
+early TRBs. Yet the driver makes such assumption and releases the TD,
+allowing the remaining TRBs to be freed or overwritten by new TDs.
+
+The xHC should also report completion of the final TRB due to its IOC
+flag being set by us, regardless of prior errors. This event cannot
+be recognized if the TD has already been freed earlier, resulting in
+"Transfer event TRB DMA ptr not part of current TD" error message.
+
+Fix this by reusing the logic for processing isoc Transaction Errors.
+This also handles hosts which fail to report the final completion.
+
+Fix transfer length reporting on Babble errors. They may be caused by
+device malfunction, no guarantee that the buffer has been filled.
+
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index e4441a71368e5..239b5edee3268 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2381,9 +2381,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ case COMP_BANDWIDTH_OVERRUN_ERROR:
+ frame->status = -ECOMM;
+ break;
+- case COMP_ISOCH_BUFFER_OVERRUN:
+ case COMP_BABBLE_DETECTED_ERROR:
++ sum_trbs_for_length = true;
++ fallthrough;
++ case COMP_ISOCH_BUFFER_OVERRUN:
+ frame->status = -EOVERFLOW;
++ if (ep_trb != td->last_trb)
++ td->error_mid_td = true;
+ break;
+ case COMP_INCOMPATIBLE_DEVICE_ERROR:
+ case COMP_STALL_ERROR:
+--
+2.43.0
+
--- /dev/null
+From 2ca20b347c88d00cc5e407823e29358788366800 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:36 +0200
+Subject: xhci: process isoc TD properly when there was a transaction error mid
+ TD.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ]
+
+The last TRB of a isoc TD might not trigger an event if there was
+an error event for a TRB mid TD. This is seen on a NEC Corporation
+uPD720200 USB 3.0 Host
+
+After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1
+generate events for passed TRBs with IOC flag set if it proceeds to the
+next TD. This event is either a copy of the original error, or a
+"success" transfer event.
+
+If that event is missing then the driver and xHC host get out of sync as
+the driver is still expecting a transfer event for that first TD, while
+xHC host is already sending events for the next TD in the list.
+This leads to
+"Transfer event TRB DMA ptr not part of current TD" messages.
+
+As a solution we tag the isoc TDs that get error events mid TD.
+If an event doesn't match the first TD, then check if the tag is
+set, and event points to the next TD.
+In that case give back the fist TD and process the next TD normally
+
+Make sure TD status and transferred length stay valid in both cases
+with and without final TD completion event.
+
+Reported-by: Michał Pecio <michal.pecio@gmail.com>
+Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/
+Tested-by: Michał Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++-------
+ drivers/usb/host/xhci.h | 1 +
+ 2 files changed, 61 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 1239e06dfe411..e4441a71368e5 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2363,6 +2363,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ /* handle completion code */
+ switch (trb_comp_code) {
+ case COMP_SUCCESS:
++ /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
++ if (td->error_mid_td)
++ break;
+ if (remaining) {
+ frame->status = short_framestatus;
+ if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
+@@ -2388,8 +2391,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ break;
+ case COMP_USB_TRANSACTION_ERROR:
+ frame->status = -EPROTO;
++ sum_trbs_for_length = true;
+ if (ep_trb != td->last_trb)
+- return 0;
++ td->error_mid_td = true;
+ break;
+ case COMP_STOPPED:
+ sum_trbs_for_length = true;
+@@ -2409,6 +2413,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ break;
+ }
+
++ if (td->urb_length_set)
++ goto finish_td;
++
+ if (sum_trbs_for_length)
+ frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
+ ep_trb_len - remaining;
+@@ -2417,6 +2424,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+
+ td->urb->actual_length += frame->actual_length;
+
++finish_td:
++ /* Don't give back TD yet if we encountered an error mid TD */
++ if (td->error_mid_td && ep_trb != td->last_trb) {
++ xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
++ td->urb_length_set = true;
++ return 0;
++ }
++
+ return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+
+@@ -2801,17 +2816,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+ }
+
+ if (!ep_seg) {
+- if (!ep->skip ||
+- !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+- /* Some host controllers give a spurious
+- * successful event after a short transfer.
+- * Ignore it.
+- */
+- if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+- ep_ring->last_td_was_short) {
+- ep_ring->last_td_was_short = false;
+- goto cleanup;
++
++ if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
++ skip_isoc_td(xhci, td, ep, status);
++ goto cleanup;
++ }
++
++ /*
++ * Some hosts give a spurious success event after a short
++ * transfer. Ignore it.
++ */
++ if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
++ ep_ring->last_td_was_short) {
++ ep_ring->last_td_was_short = false;
++ goto cleanup;
++ }
++
++ /*
++ * xhci 4.10.2 states isoc endpoints should continue
++ * processing the next TD if there was an error mid TD.
++ * So host like NEC don't generate an event for the last
++ * isoc TRB even if the IOC flag is set.
++ * xhci 4.9.1 states that if there are errors in mult-TRB
++ * TDs xHC should generate an error for that TRB, and if xHC
++ * proceeds to the next TD it should genete an event for
++ * any TRB with IOC flag on the way. Other host follow this.
++ * So this event might be for the next TD.
++ */
++ if (td->error_mid_td &&
++ !list_is_last(&td->td_list, &ep_ring->td_list)) {
++ struct xhci_td *td_next = list_next_entry(td, td_list);
++
++ ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
++ td_next->last_trb, ep_trb_dma, false);
++ if (ep_seg) {
++ /* give back previous TD, start handling new */
++ xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
++ ep_ring->dequeue = td->last_trb;
++ ep_ring->deq_seg = td->last_trb_seg;
++ inc_deq(xhci, ep_ring);
++ xhci_td_cleanup(xhci, td, ep_ring, td->status);
++ td = td_next;
+ }
++ }
++
++ if (!ep_seg) {
+ /* HC is busted, give up! */
+ xhci_err(xhci,
+ "ERROR Transfer event TRB DMA ptr not "
+@@ -2823,9 +2872,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+ ep_trb_dma, true);
+ return -ESHUTDOWN;
+ }
+-
+- skip_isoc_td(xhci, td, ep, status);
+- goto cleanup;
+ }
+ if (trb_comp_code == COMP_SHORT_PACKET)
+ ep_ring->last_td_was_short = true;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 1354310cb37b1..fc25a5b09710c 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1570,6 +1570,7 @@ struct xhci_td {
+ struct xhci_segment *bounce_seg;
+ /* actual_length of the URB has already been set */
+ bool urb_length_set;
++ bool error_mid_td;
+ unsigned int num_trbs;
+ };
+
+--
+2.43.0
+