]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sun, 10 Mar 2024 02:31:47 +0000 (21:31 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 10 Mar 2024 02:31:47 +0000 (21:31 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
39 files changed:
queue-6.1/asoc-codecs-wcd938x-fix-headphones-volume-controls.patch [new file with mode: 0644]
queue-6.1/blk-iocost-disable-writeback-throttling.patch [new file with mode: 0644]
queue-6.1/blk-iocost-pass-gendisk-to-ioc_refresh_params.patch [new file with mode: 0644]
queue-6.1/blk-rq-qos-constify-rq_qos_ops.patch [new file with mode: 0644]
queue-6.1/blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch [new file with mode: 0644]
queue-6.1/blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch [new file with mode: 0644]
queue-6.1/blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch [new file with mode: 0644]
queue-6.1/blk-wbt-don-t-enable-throttling-if-default-elevator-.patch [new file with mode: 0644]
queue-6.1/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch [new file with mode: 0644]
queue-6.1/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch [new file with mode: 0644]
queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch [new file with mode: 0644]
queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_init.patch [new file with mode: 0644]
queue-6.1/blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch [new file with mode: 0644]
queue-6.1/drm-amd-display-fix-mst-null-ptr-for-rv.patch [new file with mode: 0644]
queue-6.1/drm-amd-display-fix-uninitialized-variable-usage-in-.patch [new file with mode: 0644]
queue-6.1/drm-amd-display-wrong-colorimetry-workaround.patch [new file with mode: 0644]
queue-6.1/drm-amdgpu-reset-ih-overflow_clear-bit.patch [new file with mode: 0644]
queue-6.1/elevator-add-new-field-flags-in-struct-elevator_queu.patch [new file with mode: 0644]
queue-6.1/elevator-remove-redundant-code-in-elv_unregister_que.patch [new file with mode: 0644]
queue-6.1/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch [new file with mode: 0644]
queue-6.1/fs-proc-do_task_stat-use-__for_each_thread.patch [new file with mode: 0644]
queue-6.1/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch [new file with mode: 0644]
queue-6.1/getrusage-add-the-signal_struct-sig-local-variable.patch [new file with mode: 0644]
queue-6.1/getrusage-move-thread_group_cputime_adjusted-outside.patch [new file with mode: 0644]
queue-6.1/getrusage-use-__for_each_thread.patch [new file with mode: 0644]
queue-6.1/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch [new file with mode: 0644]
queue-6.1/kvm-s390-add-stat-counter-for-shadow-gmap-events.patch [new file with mode: 0644]
queue-6.1/kvm-s390-vsie-fix-race-during-shadow-creation.patch [new file with mode: 0644]
queue-6.1/nfp-flower-add-goto_chain_index-for-ct-entry.patch [new file with mode: 0644]
queue-6.1/nfp-flower-add-hardware-offload-check-for-post-ct-en.patch [new file with mode: 0644]
queue-6.1/readahead-avoid-multiple-marked-readahead-pages.patch [new file with mode: 0644]
queue-6.1/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch [new file with mode: 0644]
queue-6.1/selftests-mm-switch-to-bash-from-sh.patch [new file with mode: 0644]
queue-6.1/selftests-mptcp-decrease-bw-in-simult-flows.patch [new file with mode: 0644]
queue-6.1/selftests-mptcp-simult-flows-fix-some-subtest-names.patch [new file with mode: 0644]
queue-6.1/selftests-mptcp-simult-flows-format-subtests-results.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch [new file with mode: 0644]
queue-6.1/xhci-process-isoc-td-properly-when-there-was-a-trans.patch [new file with mode: 0644]

diff --git a/queue-6.1/asoc-codecs-wcd938x-fix-headphones-volume-controls.patch b/queue-6.1/asoc-codecs-wcd938x-fix-headphones-volume-controls.patch
new file mode 100644 (file)
index 0000000..6bee69b
--- /dev/null
@@ -0,0 +1,42 @@
+From cb6503e46264caaec048dda33ad58268a5f559bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 10:11:30 +0100
+Subject: ASoC: codecs: wcd938x: fix headphones volume controls
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 4d0e8bdfa4a57099dc7230952a460903f2e2f8de ]
+
+The lowest headphones volume setting does not mute so the leave the TLV
+mute flag unset.
+
+This is specifically needed to let the sound server use the lowest gain
+setting.
+
+Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR")
+Cc:  <stable@vger.kernel.org>      # 6.5
+Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wcd938x.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index e80be4e4fa8b4..555b74e7172d8 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -210,7 +210,7 @@ struct wcd938x_priv {
+ };
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
+-static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
++static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
+ struct wcd938x_mbhc_zdet_param {
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-iocost-disable-writeback-throttling.patch b/queue-6.1/blk-iocost-disable-writeback-throttling.patch
new file mode 100644 (file)
index 0000000..7225520
--- /dev/null
@@ -0,0 +1,46 @@
+From c287453564ed11a8d05e35a279e773fa882d33a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Oct 2022 17:40:32 +0800
+Subject: blk-iocost: disable writeback throttling
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 8796acbc9a0eceeddd99eaef833bdda1241d39b9 ]
+
+Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") disable
+wbt for bfq, because different write-throttling heuristics should not
+work together.
+
+For the same reason, wbt and iocost should not work together as well,
+unless admin really want to do that, dispite that performance is
+affected.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20221012094035.390056-2-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index e6557024e3da8..3788774a7b729 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -3281,9 +3281,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
+               blk_stat_enable_accounting(disk->queue);
+               blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+               ioc->enabled = true;
++              wbt_disable_default(disk->queue);
+       } else {
+               blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+               ioc->enabled = false;
++              wbt_enable_default(disk->queue);
+       }
+       if (user) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-iocost-pass-gendisk-to-ioc_refresh_params.patch b/queue-6.1/blk-iocost-pass-gendisk-to-ioc_refresh_params.patch
new file mode 100644 (file)
index 0000000..788ce87
--- /dev/null
@@ -0,0 +1,138 @@
+From 4d92df6c36fe4a84ee71df9bbf00ad1bf65633f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Feb 2023 03:16:54 -0800
+Subject: blk-iocost: Pass gendisk to ioc_refresh_params
+
+From: Breno Leitao <leitao@debian.org>
+
+[ Upstream commit e33b93650fc5364f773985a3e961e24349330d97 ]
+
+Current kernel (d2980d8d826554fa6981d621e569a453787472f8) crashes
+when blk_iocost_init for `nvme1` disk.
+
+       BUG: kernel NULL pointer dereference, address: 0000000000000050
+       #PF: supervisor read access in kernel mode
+       #PF: error_code(0x0000) - not-present page
+
+       blk_iocost_init (include/asm-generic/qspinlock.h:128
+                        include/linux/spinlock.h:203
+                        include/linux/spinlock_api_smp.h:158
+                        include/linux/spinlock.h:400
+                        block/blk-iocost.c:2884)
+       ioc_qos_write (block/blk-iocost.c:3198)
+       ? kretprobe_perf_func (kernel/trace/trace_kprobe.c:1566)
+       ? kernfs_fop_write_iter (include/linux/slab.h:584 fs/kernfs/file.c:311)
+       ? __kmem_cache_alloc_node (mm/slab.h:? mm/slub.c:3452 mm/slub.c:3491)
+       ? _copy_from_iter (arch/x86/include/asm/uaccess_64.h:46
+                          arch/x86/include/asm/uaccess_64.h:52
+                          lib/iov_iter.c:183 lib/iov_iter.c:628)
+       ? kretprobe_dispatcher (kernel/trace/trace_kprobe.c:1693)
+       cgroup_file_write (kernel/cgroup/cgroup.c:4061)
+       kernfs_fop_write_iter (fs/kernfs/file.c:334)
+       vfs_write (include/linux/fs.h:1849 fs/read_write.c:491
+                  fs/read_write.c:584)
+       ksys_write (fs/read_write.c:637)
+       do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+       entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+This happens because ioc_refresh_params() is being called without
+a properly initialized ioc->rqos, which is happening later in the callee
+side.
+
+ioc_refresh_params() -> ioc_autop_idx() tries to access
+ioc->rqos.disk->queue but ioc->rqos.disk is NULL, causing the BUG above.
+
+Create function, called ioc_refresh_params_disk(), that is similar to
+ioc_refresh_params() but where the "struct gendisk" could be passed as
+an explicit argument. This function will be called when ioc->rqos.disk
+is not initialized.
+
+Fixes: ce57b558604e ("blk-rq-qos: make rq_qos_add and rq_qos_del more useful")
+
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230228111654.1778120-1-leitao@debian.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c | 26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index ab5830ba23e0f..0d4bc9d8f2cac 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -801,7 +801,11 @@ static void ioc_refresh_period_us(struct ioc *ioc)
+       ioc_refresh_margins(ioc);
+ }
+-static int ioc_autop_idx(struct ioc *ioc)
++/*
++ *  ioc->rqos.disk isn't initialized when this function is called from
++ *  the init path.
++ */
++static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
+ {
+       int idx = ioc->autop_idx;
+       const struct ioc_params *p = &autop[idx];
+@@ -809,11 +813,11 @@ static int ioc_autop_idx(struct ioc *ioc)
+       u64 now_ns;
+       /* rotational? */
+-      if (!blk_queue_nonrot(ioc->rqos.disk->queue))
++      if (!blk_queue_nonrot(disk->queue))
+               return AUTOP_HDD;
+       /* handle SATA SSDs w/ broken NCQ */
+-      if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
++      if (blk_queue_depth(disk->queue) == 1)
+               return AUTOP_SSD_QD1;
+       /* use one of the normal ssd sets */
+@@ -902,14 +906,19 @@ static void ioc_refresh_lcoefs(struct ioc *ioc)
+                   &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]);
+ }
+-static bool ioc_refresh_params(struct ioc *ioc, bool force)
++/*
++ * struct gendisk is required as an argument because ioc->rqos.disk
++ * is not properly initialized when called from the init path.
++ */
++static bool ioc_refresh_params_disk(struct ioc *ioc, bool force,
++                                  struct gendisk *disk)
+ {
+       const struct ioc_params *p;
+       int idx;
+       lockdep_assert_held(&ioc->lock);
+-      idx = ioc_autop_idx(ioc);
++      idx = ioc_autop_idx(ioc, disk);
+       p = &autop[idx];
+       if (idx == ioc->autop_idx && !force)
+@@ -938,6 +947,11 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
+       return true;
+ }
++static bool ioc_refresh_params(struct ioc *ioc, bool force)
++{
++      return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk);
++}
++
+ /*
+  * When an iocg accumulates too much vtime or gets deactivated, we throw away
+  * some vtime, which lowers the overall device utilization. As the exact amount
+@@ -2884,7 +2898,7 @@ static int blk_iocost_init(struct gendisk *disk)
+       spin_lock_irq(&ioc->lock);
+       ioc->autop_idx = AUTOP_INVALID;
+-      ioc_refresh_params(ioc, true);
++      ioc_refresh_params_disk(ioc, true, disk);
+       spin_unlock_irq(&ioc->lock);
+       /*
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-rq-qos-constify-rq_qos_ops.patch b/queue-6.1/blk-rq-qos-constify-rq_qos_ops.patch
new file mode 100644 (file)
index 0000000..594a636
--- /dev/null
@@ -0,0 +1,103 @@
+From aa235b97093a21478dc99fd9638fc62d88af5f17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:55 +0100
+Subject: blk-rq-qos: constify rq_qos_ops
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 3963d84df7974b6687cb34bce3b9e0b2686f839c ]
+
+These op vectors are constant, so mark them const.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-15-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c    | 2 +-
+ block/blk-iolatency.c | 2 +-
+ block/blk-rq-qos.c    | 2 +-
+ block/blk-rq-qos.h    | 4 ++--
+ block/blk-wbt.c       | 2 +-
+ 5 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index a8a7d2ce927b9..78958c5bece08 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2836,7 +2836,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
+       kfree(ioc);
+ }
+-static struct rq_qos_ops ioc_rqos_ops = {
++static const struct rq_qos_ops ioc_rqos_ops = {
+       .throttle = ioc_rqos_throttle,
+       .merge = ioc_rqos_merge,
+       .done_bio = ioc_rqos_done_bio,
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index c64cfec34ac37..b0f8550f87cd2 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -651,7 +651,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
+       kfree(blkiolat);
+ }
+-static struct rq_qos_ops blkcg_iolatency_ops = {
++static const struct rq_qos_ops blkcg_iolatency_ops = {
+       .throttle = blkcg_iolatency_throttle,
+       .done_bio = blkcg_iolatency_done_bio,
+       .exit = blkcg_iolatency_exit,
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index 14bee1bd76136..8e83734cfe8db 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -296,7 +296,7 @@ void rq_qos_exit(struct request_queue *q)
+ }
+ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+-              struct rq_qos_ops *ops)
++              const struct rq_qos_ops *ops)
+ {
+       struct request_queue *q = disk->queue;
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 22552785aa31e..2b7b668479f71 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -25,7 +25,7 @@ struct rq_wait {
+ };
+ struct rq_qos {
+-      struct rq_qos_ops *ops;
++      const struct rq_qos_ops *ops;
+       struct request_queue *q;
+       enum rq_qos_id id;
+       struct rq_qos *next;
+@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+ }
+ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+-              struct rq_qos_ops *ops);
++              const struct rq_qos_ops *ops);
+ void rq_qos_del(struct rq_qos *rqos);
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index aec4e37c89c4a..d9398347b08d8 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -808,7 +808,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
+ };
+ #endif
+-static struct rq_qos_ops wbt_rqos_ops = {
++static const struct rq_qos_ops wbt_rqos_ops = {
+       .throttle = wbt_wait,
+       .issue = wbt_issue,
+       .track = wbt_track,
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch b/queue-6.1/blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch
new file mode 100644 (file)
index 0000000..348877d
--- /dev/null
@@ -0,0 +1,192 @@
+From 581958da857b8e9faf3303ba6ebc2f7e0b7a15fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:54 +0100
+Subject: blk-rq-qos: make rq_qos_add and rq_qos_del more useful
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit ce57b558604e68277d31ca5ce49ec4579a8618c5 ]
+
+Switch to passing a gendisk, and make rq_qos_add initialize all required
+fields and drop the not required q argument from rq_qos_del.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-14-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c    | 13 +++----------
+ block/blk-iolatency.c | 14 ++++----------
+ block/blk-rq-qos.c    | 13 ++++++++++---
+ block/blk-rq-qos.h    |  5 +++--
+ block/blk-wbt.c       |  5 +----
+ 5 files changed, 21 insertions(+), 29 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 72ca07f24b3c0..a8a7d2ce927b9 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2847,9 +2847,7 @@ static struct rq_qos_ops ioc_rqos_ops = {
+ static int blk_iocost_init(struct gendisk *disk)
+ {
+-      struct request_queue *q = disk->queue;
+       struct ioc *ioc;
+-      struct rq_qos *rqos;
+       int i, cpu, ret;
+       ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
+@@ -2872,11 +2870,6 @@ static int blk_iocost_init(struct gendisk *disk)
+               local64_set(&ccs->rq_wait_ns, 0);
+       }
+-      rqos = &ioc->rqos;
+-      rqos->id = RQ_QOS_COST;
+-      rqos->ops = &ioc_rqos_ops;
+-      rqos->q = q;
+-
+       spin_lock_init(&ioc->lock);
+       timer_setup(&ioc->timer, ioc_timer_fn, 0);
+       INIT_LIST_HEAD(&ioc->active_iocgs);
+@@ -2900,17 +2893,17 @@ static int blk_iocost_init(struct gendisk *disk)
+        * called before policy activation completion, can't assume that the
+        * target bio has an iocg associated and need to test for NULL iocg.
+        */
+-      ret = rq_qos_add(q, rqos);
++      ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops);
+       if (ret)
+               goto err_free_ioc;
+-      ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
++      ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost);
+       if (ret)
+               goto err_del_qos;
+       return 0;
+ err_del_qos:
+-      rq_qos_del(q, rqos);
++      rq_qos_del(&ioc->rqos);
+ err_free_ioc:
+       free_percpu(ioc->pcpu_stat);
+       kfree(ioc);
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index 571fa95aafe96..c64cfec34ac37 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -758,24 +758,18 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
+ int blk_iolatency_init(struct gendisk *disk)
+ {
+-      struct request_queue *q = disk->queue;
+       struct blk_iolatency *blkiolat;
+-      struct rq_qos *rqos;
+       int ret;
+       blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
+       if (!blkiolat)
+               return -ENOMEM;
+-      rqos = &blkiolat->rqos;
+-      rqos->id = RQ_QOS_LATENCY;
+-      rqos->ops = &blkcg_iolatency_ops;
+-      rqos->q = q;
+-
+-      ret = rq_qos_add(q, rqos);
++      ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY,
++                       &blkcg_iolatency_ops);
+       if (ret)
+               goto err_free;
+-      ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
++      ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency);
+       if (ret)
+               goto err_qos_del;
+@@ -785,7 +779,7 @@ int blk_iolatency_init(struct gendisk *disk)
+       return 0;
+ err_qos_del:
+-      rq_qos_del(q, rqos);
++      rq_qos_del(&blkiolat->rqos);
+ err_free:
+       kfree(blkiolat);
+       return ret;
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index aae98dcb01ebe..14bee1bd76136 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -295,8 +295,15 @@ void rq_qos_exit(struct request_queue *q)
+       }
+ }
+-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
++              struct rq_qos_ops *ops)
+ {
++      struct request_queue *q = disk->queue;
++
++      rqos->q = q;
++      rqos->id = id;
++      rqos->ops = ops;
++
+       /*
+        * No IO can be in-flight when adding rqos, so freeze queue, which
+        * is fine since we only support rq_qos for blk-mq queue.
+@@ -326,11 +333,11 @@ int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+       spin_unlock_irq(&q->queue_lock);
+       blk_mq_unfreeze_queue(q);
+       return -EBUSY;
+-
+ }
+-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
++void rq_qos_del(struct rq_qos *rqos)
+ {
++      struct request_queue *q = rqos->q;
+       struct rq_qos **cur;
+       /*
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 805eee8b031d0..22552785aa31e 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -85,8 +85,9 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+       init_waitqueue_head(&rq_wait->wait);
+ }
+-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos);
+-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos);
++int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
++              struct rq_qos_ops *ops);
++void rq_qos_del(struct rq_qos *rqos);
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+ typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 95bec9244e9f3..aec4e37c89c4a 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -842,9 +842,6 @@ int wbt_init(struct gendisk *disk)
+       for (i = 0; i < WBT_NUM_RWQ; i++)
+               rq_wait_init(&rwb->rq_wait[i]);
+-      rwb->rqos.id = RQ_QOS_WBT;
+-      rwb->rqos.ops = &wbt_rqos_ops;
+-      rwb->rqos.q = q;
+       rwb->last_comp = rwb->last_issue = jiffies;
+       rwb->win_nsec = RWB_WINDOW_NSEC;
+       rwb->enable_state = WBT_STATE_ON_DEFAULT;
+@@ -857,7 +854,7 @@ int wbt_init(struct gendisk *disk)
+       /*
+        * Assign rwb and add the stats callback.
+        */
+-      ret = rq_qos_add(q, &rwb->rqos);
++      ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
+       if (ret)
+               goto err_free;
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch b/queue-6.1/blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch
new file mode 100644 (file)
index 0000000..6669d21
--- /dev/null
@@ -0,0 +1,166 @@
+From 99215e8e45084576ff46f9ed9e23f06d152f879c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:53 +0100
+Subject: blk-rq-qos: move rq_qos_add and rq_qos_del out of line
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit b494f9c566ba5fe2cc8abe67fdeb0332c6b48d4b ]
+
+These two functions are rather larger and not in a fast path, so move
+them out of line.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-13-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-rq-qos.c | 60 +++++++++++++++++++++++++++++++++++++++++++++
+ block/blk-rq-qos.h | 61 ++--------------------------------------------
+ 2 files changed, 62 insertions(+), 59 deletions(-)
+
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index 88f0fe7dcf545..aae98dcb01ebe 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -294,3 +294,63 @@ void rq_qos_exit(struct request_queue *q)
+               rqos->ops->exit(rqos);
+       }
+ }
++
++int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++{
++      /*
++       * No IO can be in-flight when adding rqos, so freeze queue, which
++       * is fine since we only support rq_qos for blk-mq queue.
++       *
++       * Reuse ->queue_lock for protecting against other concurrent
++       * rq_qos adding/deleting
++       */
++      blk_mq_freeze_queue(q);
++
++      spin_lock_irq(&q->queue_lock);
++      if (rq_qos_id(q, rqos->id))
++              goto ebusy;
++      rqos->next = q->rq_qos;
++      q->rq_qos = rqos;
++      spin_unlock_irq(&q->queue_lock);
++
++      blk_mq_unfreeze_queue(q);
++
++      if (rqos->ops->debugfs_attrs) {
++              mutex_lock(&q->debugfs_mutex);
++              blk_mq_debugfs_register_rqos(rqos);
++              mutex_unlock(&q->debugfs_mutex);
++      }
++
++      return 0;
++ebusy:
++      spin_unlock_irq(&q->queue_lock);
++      blk_mq_unfreeze_queue(q);
++      return -EBUSY;
++
++}
++
++void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
++{
++      struct rq_qos **cur;
++
++      /*
++       * See comment in rq_qos_add() about freezing queue & using
++       * ->queue_lock.
++       */
++      blk_mq_freeze_queue(q);
++
++      spin_lock_irq(&q->queue_lock);
++      for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
++              if (*cur == rqos) {
++                      *cur = rqos->next;
++                      break;
++              }
++      }
++      spin_unlock_irq(&q->queue_lock);
++
++      blk_mq_unfreeze_queue(q);
++
++      mutex_lock(&q->debugfs_mutex);
++      blk_mq_debugfs_unregister_rqos(rqos);
++      mutex_unlock(&q->debugfs_mutex);
++}
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 1ef1f7d4bc3cb..805eee8b031d0 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -85,65 +85,8 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+       init_waitqueue_head(&rq_wait->wait);
+ }
+-static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+-{
+-      /*
+-       * No IO can be in-flight when adding rqos, so freeze queue, which
+-       * is fine since we only support rq_qos for blk-mq queue.
+-       *
+-       * Reuse ->queue_lock for protecting against other concurrent
+-       * rq_qos adding/deleting
+-       */
+-      blk_mq_freeze_queue(q);
+-
+-      spin_lock_irq(&q->queue_lock);
+-      if (rq_qos_id(q, rqos->id))
+-              goto ebusy;
+-      rqos->next = q->rq_qos;
+-      q->rq_qos = rqos;
+-      spin_unlock_irq(&q->queue_lock);
+-
+-      blk_mq_unfreeze_queue(q);
+-
+-      if (rqos->ops->debugfs_attrs) {
+-              mutex_lock(&q->debugfs_mutex);
+-              blk_mq_debugfs_register_rqos(rqos);
+-              mutex_unlock(&q->debugfs_mutex);
+-      }
+-
+-      return 0;
+-ebusy:
+-      spin_unlock_irq(&q->queue_lock);
+-      blk_mq_unfreeze_queue(q);
+-      return -EBUSY;
+-
+-}
+-
+-static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+-{
+-      struct rq_qos **cur;
+-
+-      /*
+-       * See comment in rq_qos_add() about freezing queue & using
+-       * ->queue_lock.
+-       */
+-      blk_mq_freeze_queue(q);
+-
+-      spin_lock_irq(&q->queue_lock);
+-      for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+-              if (*cur == rqos) {
+-                      *cur = rqos->next;
+-                      break;
+-              }
+-      }
+-      spin_unlock_irq(&q->queue_lock);
+-
+-      blk_mq_unfreeze_queue(q);
+-
+-      mutex_lock(&q->debugfs_mutex);
+-      blk_mq_debugfs_unregister_rqos(rqos);
+-      mutex_unlock(&q->debugfs_mutex);
+-}
++int rq_qos_add(struct request_queue *q, struct rq_qos *rqos);
++void rq_qos_del(struct request_queue *q, struct rq_qos *rqos);
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+ typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch b/queue-6.1/blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch
new file mode 100644 (file)
index 0000000..77370a2
--- /dev/null
@@ -0,0 +1,283 @@
+From 8d1a0d757f1cabbee1a542c21443aefc9746b42d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:56 +0100
+Subject: blk-rq-qos: store a gendisk instead of request_queue in struct rq_qos
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit ba91c849fa50dbc6519cf7808177b3a9b7f6bc97 ]
+
+This is what about half of the users already want, and it's only going to
+grow more.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-16-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c     | 12 ++++++------
+ block/blk-iolatency.c  | 14 +++++++-------
+ block/blk-mq-debugfs.c | 10 ++++------
+ block/blk-rq-qos.c     |  4 ++--
+ block/blk-rq-qos.h     |  2 +-
+ block/blk-wbt.c        | 16 +++++++---------
+ 6 files changed, 27 insertions(+), 31 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 78958c5bece08..ab5830ba23e0f 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -670,7 +670,7 @@ static struct ioc *q_to_ioc(struct request_queue *q)
+ static const char __maybe_unused *ioc_name(struct ioc *ioc)
+ {
+-      struct gendisk *disk = ioc->rqos.q->disk;
++      struct gendisk *disk = ioc->rqos.disk;
+       if (!disk)
+               return "<unknown>";
+@@ -809,11 +809,11 @@ static int ioc_autop_idx(struct ioc *ioc)
+       u64 now_ns;
+       /* rotational? */
+-      if (!blk_queue_nonrot(ioc->rqos.q))
++      if (!blk_queue_nonrot(ioc->rqos.disk->queue))
+               return AUTOP_HDD;
+       /* handle SATA SSDs w/ broken NCQ */
+-      if (blk_queue_depth(ioc->rqos.q) == 1)
++      if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
+               return AUTOP_SSD_QD1;
+       /* use one of the normal ssd sets */
+@@ -2653,7 +2653,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
+       if (use_debt) {
+               iocg_incur_debt(iocg, abs_cost, &now);
+               if (iocg_kick_delay(iocg, &now))
+-                      blkcg_schedule_throttle(rqos->q->disk,
++                      blkcg_schedule_throttle(rqos->disk,
+                                       (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+               iocg_unlock(iocg, ioc_locked, &flags);
+               return;
+@@ -2754,7 +2754,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
+       if (likely(!list_empty(&iocg->active_list))) {
+               iocg_incur_debt(iocg, abs_cost, &now);
+               if (iocg_kick_delay(iocg, &now))
+-                      blkcg_schedule_throttle(rqos->q->disk,
++                      blkcg_schedule_throttle(rqos->disk,
+                                       (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+       } else {
+               iocg_commit_bio(iocg, bio, abs_cost, cost);
+@@ -2825,7 +2825,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
+ {
+       struct ioc *ioc = rqos_to_ioc(rqos);
+-      blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost);
++      blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost);
+       spin_lock_irq(&ioc->lock);
+       ioc->running = IOC_STOP;
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index b0f8550f87cd2..268e6653b5a62 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -292,7 +292,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
+       unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
+       if (use_delay)
+-              blkcg_schedule_throttle(rqos->q->disk, use_memdelay);
++              blkcg_schedule_throttle(rqos->disk, use_memdelay);
+       /*
+        * To avoid priority inversions we want to just take a slot if we are
+@@ -330,7 +330,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
+                               struct child_latency_info *lat_info,
+                               bool up)
+ {
+-      unsigned long qd = blkiolat->rqos.q->nr_requests;
++      unsigned long qd = blkiolat->rqos.disk->queue->nr_requests;
+       unsigned long scale = scale_amount(qd, up);
+       unsigned long old = atomic_read(&lat_info->scale_cookie);
+       unsigned long max_scale = qd << 1;
+@@ -370,7 +370,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
+  */
+ static void scale_change(struct iolatency_grp *iolat, bool up)
+ {
+-      unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
++      unsigned long qd = iolat->blkiolat->rqos.disk->queue->nr_requests;
+       unsigned long scale = scale_amount(qd, up);
+       unsigned long old = iolat->rq_depth.max_depth;
+@@ -647,7 +647,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
+       del_timer_sync(&blkiolat->timer);
+       flush_work(&blkiolat->enable_work);
+-      blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
++      blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency);
+       kfree(blkiolat);
+ }
+@@ -666,7 +666,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
+       rcu_read_lock();
+       blkg_for_each_descendant_pre(blkg, pos_css,
+-                                   blkiolat->rqos.q->root_blkg) {
++                                   blkiolat->rqos.disk->queue->root_blkg) {
+               struct iolatency_grp *iolat;
+               struct child_latency_info *lat_info;
+               unsigned long flags;
+@@ -750,9 +750,9 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
+        */
+       enabled = atomic_read(&blkiolat->enable_cnt);
+       if (enabled != blkiolat->enabled) {
+-              blk_mq_freeze_queue(blkiolat->rqos.q);
++              blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
+               blkiolat->enabled = enabled;
+-              blk_mq_unfreeze_queue(blkiolat->rqos.q);
++              blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
+       }
+ }
+diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
+index 7675e663df365..c152276736832 100644
+--- a/block/blk-mq-debugfs.c
++++ b/block/blk-mq-debugfs.c
+@@ -813,9 +813,9 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
+ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+ {
+-      lockdep_assert_held(&rqos->q->debugfs_mutex);
++      lockdep_assert_held(&rqos->disk->queue->debugfs_mutex);
+-      if (!rqos->q->debugfs_dir)
++      if (!rqos->disk->queue->debugfs_dir)
+               return;
+       debugfs_remove_recursive(rqos->debugfs_dir);
+       rqos->debugfs_dir = NULL;
+@@ -823,7 +823,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+ {
+-      struct request_queue *q = rqos->q;
++      struct request_queue *q = rqos->disk->queue;
+       const char *dir_name = rq_qos_id_to_name(rqos->id);
+       lockdep_assert_held(&q->debugfs_mutex);
+@@ -835,9 +835,7 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+               q->rqos_debugfs_dir = debugfs_create_dir("rqos",
+                                                        q->debugfs_dir);
+-      rqos->debugfs_dir = debugfs_create_dir(dir_name,
+-                                             rqos->q->rqos_debugfs_dir);
+-
++      rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir);
+       debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
+ }
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index 8e83734cfe8db..d8cc820a365e3 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -300,7 +300,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+ {
+       struct request_queue *q = disk->queue;
+-      rqos->q = q;
++      rqos->disk = disk;
+       rqos->id = id;
+       rqos->ops = ops;
+@@ -337,7 +337,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+ void rq_qos_del(struct rq_qos *rqos)
+ {
+-      struct request_queue *q = rqos->q;
++      struct request_queue *q = rqos->disk->queue;
+       struct rq_qos **cur;
+       /*
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 2b7b668479f71..b02a1a3d33a89 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -26,7 +26,7 @@ struct rq_wait {
+ struct rq_qos {
+       const struct rq_qos_ops *ops;
+-      struct request_queue *q;
++      struct gendisk *disk;
+       enum rq_qos_id id;
+       struct rq_qos *next;
+ #ifdef CONFIG_BLK_DEBUG_FS
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index d9398347b08d8..e9206b1406e76 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -98,7 +98,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
+  */
+ static bool wb_recent_wait(struct rq_wb *rwb)
+ {
+-      struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb;
++      struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+       return time_before(jiffies, wb->dirty_sleep + HZ);
+ }
+@@ -235,7 +235,7 @@ enum {
+ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
+ {
+-      struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
++      struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
+       struct rq_depth *rqd = &rwb->rq_depth;
+       u64 thislat;
+@@ -288,7 +288,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
+ static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
+ {
+-      struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
++      struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
+       struct rq_depth *rqd = &rwb->rq_depth;
+       trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
+@@ -358,13 +358,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
+       unsigned int inflight = wbt_inflight(rwb);
+       int status;
+-      if (!rwb->rqos.q->disk)
++      if (!rwb->rqos.disk)
+               return;
+       status = latency_exceeded(rwb, cb->stat);
+-      trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
+-                      inflight);
++      trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight);
+       /*
+        * If we exceeded the latency target, step down. If we did not,
+@@ -689,16 +688,15 @@ static int wbt_data_dir(const struct request *rq)
+ static void wbt_queue_depth_changed(struct rq_qos *rqos)
+ {
+-      RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
++      RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
+       wbt_update_limits(RQWB(rqos));
+ }
+ static void wbt_exit(struct rq_qos *rqos)
+ {
+       struct rq_wb *rwb = RQWB(rqos);
+-      struct request_queue *q = rqos->q;
+-      blk_stat_remove_callback(q, rwb->cb);
++      blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
+       blk_stat_free_callback(rwb->cb);
+       kfree(rwb);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-wbt-don-t-enable-throttling-if-default-elevator-.patch b/queue-6.1/blk-wbt-don-t-enable-throttling-if-default-elevator-.patch
new file mode 100644 (file)
index 0000000..3d91bef
--- /dev/null
@@ -0,0 +1,114 @@
+From 51938e4e6ade6005901b700cfe6ecdd7481af216 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:18 +0800
+Subject: blk-wbt: don't enable throttling if default elevator is bfq
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 671fae5e51297fc76b3758ca2edd514858734a6a ]
+
+Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") tries to
+disable wbt for bfq, it's done by calling wbt_disable_default() in
+bfq_init_queue(). However, wbt is still enabled if default elevator is
+bfq:
+
+device_add_disk
+ elevator_init_mq
+  bfq_init_queue
+   wbt_disable_default -> done nothing
+
+ blk_register_queue
+  wbt_enable_default -> wbt is enabled
+
+Fix the problem by adding a new flag ELEVATOR_FLAG_DISBALE_WBT, bfq
+will set the flag in bfq_init_queue, and following wbt_enable_default()
+won't enable wbt while the flag is set.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20221019121518.3865235-7-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bfq-iosched.c |  2 ++
+ block/blk-wbt.c     | 11 ++++++++---
+ block/elevator.h    |  3 ++-
+ 3 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 52eb79d60a3f3..e4699291aee23 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7059,6 +7059,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
+ #endif
+       blk_stat_disable_accounting(bfqd->queue);
++      clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
+       wbt_enable_default(bfqd->queue);
+       kfree(bfqd);
+@@ -7204,6 +7205,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+       /* We dispatch from request queue wide instead of hw queue */
+       blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
++      set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
+       wbt_disable_default(q);
+       blk_stat_enable_accounting(q);
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index c5a8c10028a08..afb1782b4255e 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -27,6 +27,7 @@
+ #include "blk-wbt.h"
+ #include "blk-rq-qos.h"
++#include "elevator.h"
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/wbt.h>
+@@ -638,11 +639,15 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
+  */
+ void wbt_enable_default(struct request_queue *q)
+ {
+-      struct rq_qos *rqos = wbt_rq_qos(q);
++      struct rq_qos *rqos;
++      bool disable_flag = q->elevator &&
++                  test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+       /* Throttling already enabled? */
++      rqos = wbt_rq_qos(q);
+       if (rqos) {
+-              if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
++              if (!disable_flag &&
++                  RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+                       RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
+               return;
+       }
+@@ -651,7 +656,7 @@ void wbt_enable_default(struct request_queue *q)
+       if (!blk_queue_registered(q))
+               return;
+-      if (queue_is_mq(q))
++      if (queue_is_mq(q) && !disable_flag)
+               wbt_init(q);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+diff --git a/block/elevator.h b/block/elevator.h
+index ed574bf3e629e..75382471222d1 100644
+--- a/block/elevator.h
++++ b/block/elevator.h
+@@ -104,7 +104,8 @@ struct elevator_queue
+       DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
+ };
+-#define ELEVATOR_FLAG_REGISTERED 0
++#define ELEVATOR_FLAG_REGISTERED      0
++#define ELEVATOR_FLAG_DISABLE_WBT     1
+ /*
+  * block elevator interface
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch b/queue-6.1/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch
new file mode 100644 (file)
index 0000000..c065067
--- /dev/null
@@ -0,0 +1,115 @@
+From ecf5ea95f5102ff71cf1675020f9bff184b40208 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 18:58:26 +0100
+Subject: blk-wbt: Fix detection of dirty-throttled tasks
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 ]
+
+The detection of dirty-throttled tasks in blk-wbt has been subtly broken
+since its beginning in 2016. Namely if we are doing cgroup writeback and
+the throttled task is not in the root cgroup, balance_dirty_pages() will
+set dirty_sleep for the non-root bdi_writeback structure. However
+blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback
+structure. Thus detection of recently throttled tasks is not working in
+this case (we noticed this when we switched to cgroup v2 and suddently
+writeback was slow).
+
+Since blk-wbt has no easy way to get to proper bdi_writeback and
+furthermore its intention has always been to work on the whole device
+rather than on individual cgroups, just move the dirty_sleep timestamp
+from bdi_writeback to backing_dev_info. That fixes the checking for
+recently throttled task and saves memory for everybody as a bonus.
+
+CC: stable@vger.kernel.org
+Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz
+[axboe: fixup indentation errors]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c                  | 4 ++--
+ include/linux/backing-dev-defs.h | 7 +++++--
+ mm/backing-dev.c                 | 2 +-
+ mm/page-writeback.c              | 2 +-
+ 4 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index e9206b1406e76..fcacdff8af93b 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -98,9 +98,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
+  */
+ static bool wb_recent_wait(struct rq_wb *rwb)
+ {
+-      struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
++      struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
+-      return time_before(jiffies, wb->dirty_sleep + HZ);
++      return time_before(jiffies, bdi->last_bdp_sleep + HZ);
+ }
+ static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
+diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
+index ae12696ec492c..2ad261082bba5 100644
+--- a/include/linux/backing-dev-defs.h
++++ b/include/linux/backing-dev-defs.h
+@@ -141,8 +141,6 @@ struct bdi_writeback {
+       struct delayed_work dwork;      /* work item used for writeback */
+       struct delayed_work bw_dwork;   /* work item used for bandwidth estimate */
+-      unsigned long dirty_sleep;      /* last wait */
+-
+       struct list_head bdi_node;      /* anchored at bdi->wb_list */
+ #ifdef CONFIG_CGROUP_WRITEBACK
+@@ -179,6 +177,11 @@ struct backing_dev_info {
+        * any dirty wbs, which is depended upon by bdi_has_dirty().
+        */
+       atomic_long_t tot_write_bandwidth;
++      /*
++       * Jiffies when last process was dirty throttled on this bdi. Used by
++       * blk-wbt.
++       */
++      unsigned long last_bdp_sleep;
+       struct bdi_writeback wb;  /* the root writeback info for this bdi */
+       struct list_head wb_list; /* list of all wbs */
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index bf5525c2e561a..c070ff9ef9cf3 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -305,7 +305,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
+       INIT_LIST_HEAD(&wb->work_list);
+       INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+       INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
+-      wb->dirty_sleep = jiffies;
+       err = fprop_local_init_percpu(&wb->completions, gfp);
+       if (err)
+@@ -793,6 +792,7 @@ int bdi_init(struct backing_dev_info *bdi)
+       INIT_LIST_HEAD(&bdi->bdi_list);
+       INIT_LIST_HEAD(&bdi->wb_list);
+       init_waitqueue_head(&bdi->wb_waitq);
++      bdi->last_bdp_sleep = jiffies;
+       return cgwb_bdi_init(bdi);
+ }
+diff --git a/mm/page-writeback.c b/mm/page-writeback.c
+index d3e9d12860b9f..9046d1f1b408e 100644
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1809,7 +1809,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
+                       break;
+               }
+               __set_current_state(TASK_KILLABLE);
+-              wb->dirty_sleep = now;
++              bdi->last_bdp_sleep = jiffies;
+               io_schedule_timeout(pause);
+               current->dirty_paused_when = now + pause;
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch b/queue-6.1/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch
new file mode 100644 (file)
index 0000000..487c08f
--- /dev/null
@@ -0,0 +1,65 @@
+From 5376a7667a1f2430589c3b2f5f0bccafd1dd761b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 20:18:54 +0800
+Subject: blk-wbt: fix that wbt can't be disabled by default
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 8a2b20a997a3779ae9fcae268f2959eb82ec05a1 ]
+
+commit b11d31ae01e6 ("blk-wbt: remove unnecessary check in
+wbt_enable_default()") removes the checking of CONFIG_BLK_WBT_MQ by
+mistake, which is used to control enable or disable wbt by default.
+
+Fix the problem by adding back the checking. This patch also do a litter
+cleanup to make related code more readable.
+
+Fixes: b11d31ae01e6 ("blk-wbt: remove unnecessary check in wbt_enable_default()")
+Reported-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Link: https://lore.kernel.org/lkml/CAKXUXMzfKq_J9nKHGyr5P5rvUETY4B-fxoQD4sO+NYjFOfVtZA@mail.gmail.com/t/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20230522121854.2928880-1-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index fcacdff8af93b..526fb12c3e4cf 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -640,14 +640,16 @@ void wbt_enable_default(struct gendisk *disk)
+ {
+       struct request_queue *q = disk->queue;
+       struct rq_qos *rqos;
+-      bool disable_flag = q->elevator &&
+-                  test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
++      bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
++
++      if (q->elevator &&
++          test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
++              enable = false;
+       /* Throttling already enabled? */
+       rqos = wbt_rq_qos(q);
+       if (rqos) {
+-              if (!disable_flag &&
+-                  RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
++              if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+                       RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
+               return;
+       }
+@@ -656,7 +658,7 @@ void wbt_enable_default(struct gendisk *disk)
+       if (!blk_queue_registered(q))
+               return;
+-      if (queue_is_mq(q) && !disable_flag)
++      if (queue_is_mq(q) && enable)
+               wbt_init(disk);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch b/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch
new file mode 100644 (file)
index 0000000..93821b3
--- /dev/null
@@ -0,0 +1,139 @@
+From 3cab63f95634875a1501abbda551e69098f6c978 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:49 +0100
+Subject: blk-wbt: pass a gendisk to wbt_{enable,disable}_default
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 04aad37be1a88de6a1919996a615437ac74de479 ]
+
+Pass a gendisk to wbt_enable_default and wbt_disable_default to
+prepare for phasing out usage of the request_queue in the blk-cgroup
+code.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-9-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bfq-iosched.c | 4 ++--
+ block/blk-iocost.c  | 4 ++--
+ block/blk-sysfs.c   | 2 +-
+ block/blk-wbt.c     | 7 ++++---
+ block/blk-wbt.h     | 8 ++++----
+ 5 files changed, 13 insertions(+), 12 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index e4699291aee23..84b4763b2b223 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7060,7 +7060,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
+       blk_stat_disable_accounting(bfqd->queue);
+       clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
+-      wbt_enable_default(bfqd->queue);
++      wbt_enable_default(bfqd->queue->disk);
+       kfree(bfqd);
+ }
+@@ -7206,7 +7206,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+       blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+       set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
+-      wbt_disable_default(q);
++      wbt_disable_default(q->disk);
+       blk_stat_enable_accounting(q);
+       return 0;
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 3788774a7b729..72ca07f24b3c0 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -3281,11 +3281,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
+               blk_stat_enable_accounting(disk->queue);
+               blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+               ioc->enabled = true;
+-              wbt_disable_default(disk->queue);
++              wbt_disable_default(disk);
+       } else {
+               blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
+               ioc->enabled = false;
+-              wbt_enable_default(disk->queue);
++              wbt_enable_default(disk);
+       }
+       if (user) {
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index a82bdec923b21..c59c4d3ee7a27 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -837,7 +837,7 @@ int blk_register_queue(struct gendisk *disk)
+               goto put_dev;
+       blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
+-      wbt_enable_default(q);
++      wbt_enable_default(disk);
+       blk_throtl_register(disk);
+       /* Now everything is ready and send out KOBJ_ADD uevent */
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index afb1782b4255e..8d4f075f13e2f 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -637,8 +637,9 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
+ /*
+  * Enable wbt if defaults are configured that way
+  */
+-void wbt_enable_default(struct request_queue *q)
++void wbt_enable_default(struct gendisk *disk)
+ {
++      struct request_queue *q = disk->queue;
+       struct rq_qos *rqos;
+       bool disable_flag = q->elevator &&
+                   test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+@@ -705,9 +706,9 @@ static void wbt_exit(struct rq_qos *rqos)
+ /*
+  * Disable wbt, if enabled by default.
+  */
+-void wbt_disable_default(struct request_queue *q)
++void wbt_disable_default(struct gendisk *disk)
+ {
+-      struct rq_qos *rqos = wbt_rq_qos(q);
++      struct rq_qos *rqos = wbt_rq_qos(disk->queue);
+       struct rq_wb *rwb;
+       if (!rqos)
+               return;
+diff --git a/block/blk-wbt.h b/block/blk-wbt.h
+index 7e44eccc676dd..58c226fe33d48 100644
+--- a/block/blk-wbt.h
++++ b/block/blk-wbt.h
+@@ -89,8 +89,8 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
+ #ifdef CONFIG_BLK_WBT
+ int wbt_init(struct request_queue *);
+-void wbt_disable_default(struct request_queue *);
+-void wbt_enable_default(struct request_queue *);
++void wbt_disable_default(struct gendisk *disk);
++void wbt_enable_default(struct gendisk *disk);
+ u64 wbt_get_min_lat(struct request_queue *q);
+ void wbt_set_min_lat(struct request_queue *q, u64 val);
+@@ -105,10 +105,10 @@ static inline int wbt_init(struct request_queue *q)
+ {
+       return -EINVAL;
+ }
+-static inline void wbt_disable_default(struct request_queue *q)
++static inline void wbt_disable_default(struct gendisk *disk)
+ {
+ }
+-static inline void wbt_enable_default(struct request_queue *q)
++static inline void wbt_enable_default(struct gendisk *disk)
+ {
+ }
+ static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_init.patch b/queue-6.1/blk-wbt-pass-a-gendisk-to-wbt_init.patch
new file mode 100644 (file)
index 0000000..c2e6a0a
--- /dev/null
@@ -0,0 +1,87 @@
+From 64436d303bf9f3e4b615121498533f1e7b068e19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 16:03:50 +0100
+Subject: blk-wbt: pass a gendisk to wbt_init
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 958f29654747a54f2272eb478e493eb97f492e06 ]
+
+Pass a gendisk to wbt_init to prepare for phasing out usage of the
+request_queue in the blk-cgroup code.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230203150400.3199230-10-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-sysfs.c | 2 +-
+ block/blk-wbt.c   | 5 +++--
+ block/blk-wbt.h   | 4 ++--
+ 3 files changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index c59c4d3ee7a27..31f53ef01982d 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -488,7 +488,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+       rqos = wbt_rq_qos(q);
+       if (!rqos) {
+-              ret = wbt_init(q);
++              ret = wbt_init(q->disk);
+               if (ret)
+                       return ret;
+       }
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 8d4f075f13e2f..95bec9244e9f3 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -658,7 +658,7 @@ void wbt_enable_default(struct gendisk *disk)
+               return;
+       if (queue_is_mq(q) && !disable_flag)
+-              wbt_init(q);
++              wbt_init(disk);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+@@ -822,8 +822,9 @@ static struct rq_qos_ops wbt_rqos_ops = {
+ #endif
+ };
+-int wbt_init(struct request_queue *q)
++int wbt_init(struct gendisk *disk)
+ {
++      struct request_queue *q = disk->queue;
+       struct rq_wb *rwb;
+       int i;
+       int ret;
+diff --git a/block/blk-wbt.h b/block/blk-wbt.h
+index 58c226fe33d48..8170439b89d6e 100644
+--- a/block/blk-wbt.h
++++ b/block/blk-wbt.h
+@@ -88,7 +88,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
+ #ifdef CONFIG_BLK_WBT
+-int wbt_init(struct request_queue *);
++int wbt_init(struct gendisk *disk);
+ void wbt_disable_default(struct gendisk *disk);
+ void wbt_enable_default(struct gendisk *disk);
+@@ -101,7 +101,7 @@ u64 wbt_default_latency_nsec(struct request_queue *);
+ #else
+-static inline int wbt_init(struct request_queue *q)
++static inline int wbt_init(struct gendisk *disk)
+ {
+       return -EINVAL;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch b/queue-6.1/blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch
new file mode 100644 (file)
index 0000000..b656839
--- /dev/null
@@ -0,0 +1,37 @@
+From 631dc45a1e1ca5721ec23d80d60381e818e3c409 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:14 +0800
+Subject: blk-wbt: remove unnecessary check in wbt_enable_default()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit b11d31ae01e6b0762b28e645ad6718a12faa8d14 ]
+
+If CONFIG_BLK_WBT_MQ is disabled, wbt_init() won't do anything.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20221019121518.3865235-3-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index c293e08b301ff..c5a8c10028a08 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -651,7 +651,7 @@ void wbt_enable_default(struct request_queue *q)
+       if (!blk_queue_registered(q))
+               return;
+-      if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
++      if (queue_is_mq(q))
+               wbt_init(q);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-amd-display-fix-mst-null-ptr-for-rv.patch b/queue-6.1/drm-amd-display-fix-mst-null-ptr-for-rv.patch
new file mode 100644 (file)
index 0000000..70bcfc7
--- /dev/null
@@ -0,0 +1,126 @@
+From 525bbc796fc5729946f6d394ea8f72487384a8a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 13:43:46 -0500
+Subject: drm/amd/display: Fix MST Null Ptr for RV
+
+From: Fangzhi Zuo <jerry.zuo@amd.com>
+
+[ Upstream commit e6a7df96facdcf5b1f71eb3ec26f2f9f6ad61e57 ]
+
+The change try to fix below error specific to RV platform:
+
+BUG: kernel NULL pointer dereference, address: 0000000000000008
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2
+Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022
+RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
+Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8>
+RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224
+RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280
+RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850
+R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000
+R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224
+FS:  00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0
+Call Trace:
+ <TASK>
+ ? __die+0x23/0x70
+ ? page_fault_oops+0x171/0x4e0
+ ? plist_add+0xbe/0x100
+ ? exc_page_fault+0x7c/0x180
+ ? asm_exc_page_fault+0x26/0x30
+ ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026]
+ ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026]
+ compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ drm_atomic_check_only+0x5c5/0xa40
+ drm_mode_atomic_ioctl+0x76e/0xbc0
+ ? _copy_to_user+0x25/0x30
+ ? drm_ioctl+0x296/0x4b0
+ ? __pfx_drm_mode_atomic_ioctl+0x10/0x10
+ drm_ioctl_kernel+0xcd/0x170
+ drm_ioctl+0x26d/0x4b0
+ ? __pfx_drm_mode_atomic_ioctl+0x10/0x10
+ amdgpu_drm_ioctl+0x4e/0x90 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054]
+ __x64_sys_ioctl+0x94/0xd0
+ do_syscall_64+0x60/0x90
+ ? do_syscall_64+0x6c/0x90
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7f4dad17f76f
+Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c>
+RSP: 002b:00007ffd9ae859f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 000055e255a55900 RCX: 00007f4dad17f76f
+RDX: 00007ffd9ae85a90 RSI: 00000000c03864bc RDI: 000000000000000b
+RBP: 00007ffd9ae85a90 R08: 0000000000000003 R09: 0000000000000003
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c03864bc
+R13: 000000000000000b R14: 000055e255a7fc60 R15: 000055e255a01eb0
+ </TASK>
+Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device ccm cmac algif_hash algif_skcipher af_alg joydev mousedev bnep >
+ typec libphy k10temp ipmi_msghandler roles i2c_scmi acpi_cpufreq mac_hid nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_mas>
+CR2: 0000000000000008
+---[ end trace 0000000000000000 ]---
+RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
+Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8>
+RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224
+RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280
+RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850
+R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000
+R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224
+FS:  00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0
+
+With a second DP monitor connected, drm_atomic_state in dm atomic check
+sequence does not include the connector state for the old/existing/first
+DP monitor. In such case, dsc determination policy would hit a null ptr
+when it tries to iterate the old/existing stream that does not have a
+valid connector state attached to it. When that happens, dm atomic check
+should call drm_atomic_get_connector_state for a new connector state.
+Existing dm has already done that, except for RV due to it does not have
+official support of dsc where .num_dsc is not defined in dcn10 resource
+cap, that prevent from getting drm_atomic_get_connector_state called.
+So, skip dsc determination policy for ASICs that don't have DSC support.
+
+Cc: stable@vger.kernel.org # 6.1+
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314
+Reviewed-by: Wayne Lin <wayne.lin@amd.com>
+Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Fangzhi Zuo <jerry.zuo@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index bea49befdcacc..a6c6f286a5988 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -10123,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
+               }
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+-              ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+-              if (ret) {
+-                      DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
+-                      ret = -EINVAL;
+-                      goto fail;
++              if (dc_resource_is_dsc_encoding_supported(dc)) {
++                      ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
++                      if (ret) {
++                              DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
++                              ret = -EINVAL;
++                              goto fail;
++                      }
+               }
+               ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-amd-display-fix-uninitialized-variable-usage-in-.patch b/queue-6.1/drm-amd-display-fix-uninitialized-variable-usage-in-.patch
new file mode 100644 (file)
index 0000000..e5f8a85
--- /dev/null
@@ -0,0 +1,59 @@
+From a06f08e22b6af5f25d8f4a6abddfaf9548d74e5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jan 2024 08:41:52 +0530
+Subject: drm/amd/display: Fix uninitialized variable usage in core_link_
+ 'read_dpcd() & write_dpcd()' functions
+
+From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
+
+[ Upstream commit a58371d632ebab9ea63f10893a6b6731196b6f8d ]
+
+The 'status' variable in 'core_link_read_dpcd()' &
+'core_link_write_dpcd()' was uninitialized.
+
+Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default.
+
+Fixes the below:
+drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'.
+drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'.
+
+Cc: stable@vger.kernel.org
+Cc: Jerry Zuo <jerry.zuo@amd.com>
+Cc: Jun Lei <Jun.Lei@amd.com>
+Cc: Wayne Lin <Wayne.Lin@amd.com>
+Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
+Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
+index af110bf9470fa..aefca9756dbe8 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c
+@@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd(
+       uint32_t extended_size;
+       /* size of the remaining partitioned address space */
+       uint32_t size_left_to_read;
+-      enum dc_status status;
++      enum dc_status status = DC_ERROR_UNEXPECTED;
+       /* size of the next partition to be read from */
+       uint32_t partition_size;
+       uint32_t data_index = 0;
+@@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd(
+ {
+       uint32_t partition_size;
+       uint32_t data_index = 0;
+-      enum dc_status status;
++      enum dc_status status = DC_ERROR_UNEXPECTED;
+       while (size) {
+               partition_size = dpcd_get_next_partition_size(address, size);
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-amd-display-wrong-colorimetry-workaround.patch b/queue-6.1/drm-amd-display-wrong-colorimetry-workaround.patch
new file mode 100644 (file)
index 0000000..3462268
--- /dev/null
@@ -0,0 +1,119 @@
+From db5f2cf82b018c2d5cf047cde55d1e9baa8f6b31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 11:46:56 -0400
+Subject: drm/amd/display: Wrong colorimetry workaround
+
+From: Ma Hanghong <hanghong.ma@amd.com>
+
+[ Upstream commit b1a98cf89a695d36c414653634ea7ba91b6e701f ]
+
+[Why]
+For FreeSync HDR, native color space flag in AMD VSIF(BT.709) should be
+used when intepreting content and color space flag in VSC or AVI
+infoFrame should be ignored. However, it turned out some userspace
+application still use color flag in VSC or AVI infoFrame which is
+incorrect.
+
+[How]
+Transfer function is used when building the VSC and AVI infoFrame. Set
+colorimetry to BT.709 when all the following match:
+
+1. Pixel format is YCbCr;
+2. In FreeSync 2 HDR, color is COLOR_SPACE_2020_YCBCR;
+3. Transfer function is TRANSFER_FUNC_GAMMA_22;
+
+Tested-by: Mark Broadworth <mark.broadworth@amd.com>
+Reviewed-by: Krunoslav Kovac <Krunoslav.Kovac@amd.com>
+Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Signed-off-by: Ma Hanghong <hanghong.ma@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: e6a7df96facd ("drm/amd/display: Fix MST Null Ptr for RV")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c           | 5 ++++-
+ drivers/gpu/drm/amd/display/dc/core/dc_resource.c           | 6 ++++++
+ drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h   | 3 ++-
+ .../gpu/drm/amd/display/modules/info_packet/info_packet.c   | 6 +++++-
+ 4 files changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index da16048bf1004..bea49befdcacc 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -5938,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+       bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
+       int mode_refresh;
+       int preferred_refresh = 0;
++      enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+       struct dsc_dec_dpcd_caps dsc_caps;
+ #endif
+@@ -6071,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+                       if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
+                               stream->use_vsc_sdp_for_colorimetry = true;
+               }
+-              mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space);
++              if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
++                      tf = TRANSFER_FUNC_GAMMA_22;
++              mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
+               aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
+       }
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+index 66923f51037a3..e2f80cd0ca8cb 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+@@ -3038,6 +3038,12 @@ static void set_avi_info_frame(
+               hdmi_info.bits.C0_C1   = COLORIMETRY_EXTENDED;
+       }
++      if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR &&
++                      stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
++              hdmi_info.bits.EC0_EC2 = 0;
++              hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
++      }
++
+       /* TODO: un-hardcode aspect ratio */
+       aspect = stream->timing.aspect_ratio;
+diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+index 1d8b746b02f24..edf5845f6a1f7 100644
+--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
++++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+@@ -35,7 +35,8 @@ struct mod_vrr_params;
+ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+               struct dc_info_packet *info_packet,
+-              enum dc_color_space cs);
++              enum dc_color_space cs,
++              enum color_transfer_func tf);
+ void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
+               struct dc_info_packet *info_packet);
+diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+index 27ceba9d6d658..69691058ab898 100644
+--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
++++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+@@ -132,7 +132,8 @@ enum ColorimetryYCCDP {
+ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+               struct dc_info_packet *info_packet,
+-              enum dc_color_space cs)
++              enum dc_color_space cs,
++              enum color_transfer_func tf)
+ {
+       unsigned int vsc_packet_revision = vsc_packet_undefined;
+       unsigned int i;
+@@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+                               colorimetryFormat = ColorimetryYCC_DP_AdobeYCC;
+                       else if (cs == COLOR_SPACE_2020_YCBCR)
+                               colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr;
++
++                      if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22)
++                              colorimetryFormat = ColorimetryYCC_DP_ITU709;
+                       break;
+               default:
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-amdgpu-reset-ih-overflow_clear-bit.patch b/queue-6.1/drm-amdgpu-reset-ih-overflow_clear-bit.patch
new file mode 100644 (file)
index 0000000..16035f2
--- /dev/null
@@ -0,0 +1,188 @@
+From 288715497704306fceb8c2cb307a168cfb241320 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 12:52:03 +0100
+Subject: drm/amdgpu: Reset IH OVERFLOW_CLEAR bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Friedrich Vock <friedrich.vock@gmx.de>
+
+[ Upstream commit 7330256268664ea0a7dd5b07a3fed363093477dd ]
+
+Allows us to detect subsequent IH ring buffer overflows as well.
+
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/cik_ih.c     | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/cz_ih.c      | 5 +++++
+ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++
+ drivers/gpu/drm/amd/amdgpu/ih_v6_0.c    | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/si_ih.c      | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/tonga_ih.c   | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 6 ++++++
+ drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 6 ++++++
+ 9 files changed, 52 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+index df385ffc97683..6578ca1b90afa 100644
+--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
+               tmp = RREG32(mmIH_RB_CNTL);
+               tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+               WREG32(mmIH_RB_CNTL, tmp);
++
++              /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++               * can be detected.
++               */
++              tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
++              WREG32(mmIH_RB_CNTL, tmp);
+       }
+       return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+index b8c47e0cf37ad..c19681492efa7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32(mmIH_RB_CNTL, tmp);
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32(mmIH_RB_CNTL, tmp);
+ out:
+       return (wptr & ih->ptr_mask);
+diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+index aecad530b10a6..2c02ae69883d2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32(mmIH_RB_CNTL, tmp);
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32(mmIH_RB_CNTL, tmp);
+ out:
+       return (wptr & ih->ptr_mask);
+diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+index 7cd79a3844b24..657e4ca6f9dd2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+@@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
+       tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ out:
+       return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+index eec13cb5bf758..84e8e8b008ef6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
+       tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ out:
+       return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
+index 9a24f17a57502..cada9f300a7f5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
+@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
+               tmp = RREG32(IH_RB_CNTL);
+               tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+               WREG32(IH_RB_CNTL, tmp);
++
++              /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++               * can be detected.
++               */
++              tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
++              WREG32(IH_RB_CNTL, tmp);
+       }
+       return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+index b08905d1c00f0..07a5d95be07f5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32(mmIH_RB_CNTL, tmp);
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32(mmIH_RB_CNTL, tmp);
++
+ out:
+       return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+index 1e83db0c5438d..74c94df423455 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
+ out:
+       return (wptr & ih->ptr_mask);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+index 59dfca093155c..f1ba76c35cd6e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+@@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++      /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
++       * can be detected.
++       */
++      tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
++      WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
++
+ out:
+       return (wptr & ih->ptr_mask);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/elevator-add-new-field-flags-in-struct-elevator_queu.patch b/queue-6.1/elevator-add-new-field-flags-in-struct-elevator_queu.patch
new file mode 100644 (file)
index 0000000..864e5a1
--- /dev/null
@@ -0,0 +1,70 @@
+From 85ad0276e21822aca9c6a80d8a03247daa354d1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:17 +0800
+Subject: elevator: add new field flags in struct elevator_queue
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 181d06637451b5348d746039478e71fa53dfbff6 ]
+
+There are only one flag to indicate that elevator is registered currently,
+prepare to add a flag to disable wbt if default elevator is bfq.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20221019121518.3865235-6-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/elevator.c | 6 ++----
+ block/elevator.h | 4 +++-
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/block/elevator.c b/block/elevator.c
+index 20e70fd3f77f9..9e12706e8d8cb 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -512,7 +512,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
+               if (uevent)
+                       kobject_uevent(&e->kobj, KOBJ_ADD);
+-              e->registered = 1;
++              set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
+       }
+       return error;
+ }
+@@ -523,11 +523,9 @@ void elv_unregister_queue(struct request_queue *q)
+       lockdep_assert_held(&q->sysfs_lock);
+-      if (e && e->registered) {
++      if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
+               kobject_uevent(&e->kobj, KOBJ_REMOVE);
+               kobject_del(&e->kobj);
+-
+-              e->registered = 0;
+       }
+ }
+diff --git a/block/elevator.h b/block/elevator.h
+index 3f0593b3bf9d3..ed574bf3e629e 100644
+--- a/block/elevator.h
++++ b/block/elevator.h
+@@ -100,10 +100,12 @@ struct elevator_queue
+       void *elevator_data;
+       struct kobject kobj;
+       struct mutex sysfs_lock;
+-      unsigned int registered:1;
++      unsigned long flags;
+       DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
+ };
++#define ELEVATOR_FLAG_REGISTERED 0
++
+ /*
+  * block elevator interface
+  */
+-- 
+2.43.0
+
diff --git a/queue-6.1/elevator-remove-redundant-code-in-elv_unregister_que.patch b/queue-6.1/elevator-remove-redundant-code-in-elv_unregister_que.patch
new file mode 100644 (file)
index 0000000..ede298a
--- /dev/null
@@ -0,0 +1,39 @@
+From efa75e7a472dd9a1c9519c1cf50e37d2b5d3ca47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Oct 2022 20:15:13 +0800
+Subject: elevator: remove redundant code in elv_unregister_queue()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 6d9f4cf125585ebf0718abcf5ce9ca898877c6d2 ]
+
+"elevator_queue *e" is already declared and initialized in the beginning
+of elv_unregister_queue().
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20221019121518.3865235-2-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: f814bdda774c ("blk-wbt: Fix detection of dirty-throttled tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/elevator.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/block/elevator.c b/block/elevator.c
+index bd71f0fc4e4b6..20e70fd3f77f9 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -524,8 +524,6 @@ void elv_unregister_queue(struct request_queue *q)
+       lockdep_assert_held(&q->sysfs_lock);
+       if (e && e->registered) {
+-              struct elevator_queue *e = q->elevator;
+-
+               kobject_uevent(&e->kobj, KOBJ_REMOVE);
+               kobject_del(&e->kobj);
+-- 
+2.43.0
+
diff --git a/queue-6.1/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch b/queue-6.1/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch
new file mode 100644 (file)
index 0000000..102d360
--- /dev/null
@@ -0,0 +1,65 @@
+From 253749176dc92e57dc90d7bf99dd82310f3bf2ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 16:34:00 +0100
+Subject: exit: wait_task_zombie: kill the no longer necessary
+ spin_lock_irq(siglock)
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 ]
+
+After the recent changes nobody use siglock to read the values protected
+by stats_lock, we can kill spin_lock_irq(&current->sighand->siglock) and
+update the comment.
+
+With this patch only __exit_signal() and thread_group_start_cputime() take
+stats_lock under siglock.
+
+Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/exit.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index bccfa4218356e..c95fffc625fcd 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1146,17 +1146,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+                * and nobody can change them.
+                *
+                * psig->stats_lock also protects us from our sub-threads
+-               * which can reap other children at the same time. Until
+-               * we change k_getrusage()-like users to rely on this lock
+-               * we have to take ->siglock as well.
++               * which can reap other children at the same time.
+                *
+                * We use thread_group_cputime_adjusted() to get times for
+                * the thread group, which consolidates times for all threads
+                * in the group including the group leader.
+                */
+               thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+-              spin_lock_irq(&current->sighand->siglock);
+-              write_seqlock(&psig->stats_lock);
++              write_seqlock_irq(&psig->stats_lock);
+               psig->cutime += tgutime + sig->cutime;
+               psig->cstime += tgstime + sig->cstime;
+               psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
+@@ -1179,8 +1176,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+                       psig->cmaxrss = maxrss;
+               task_io_accounting_add(&psig->ioac, &p->ioac);
+               task_io_accounting_add(&psig->ioac, &sig->ioac);
+-              write_sequnlock(&psig->stats_lock);
+-              spin_unlock_irq(&current->sighand->siglock);
++              write_sequnlock_irq(&psig->stats_lock);
+       }
+       if (wo->wo_rusage)
+-- 
+2.43.0
+
diff --git a/queue-6.1/fs-proc-do_task_stat-use-__for_each_thread.patch b/queue-6.1/fs-proc-do_task_stat-use-__for_each_thread.patch
new file mode 100644 (file)
index 0000000..9ad1d0e
--- /dev/null
@@ -0,0 +1,45 @@
+From 30a5f96454071a295334641ab88e22da9c9bcd99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 18:45:01 +0200
+Subject: fs/proc: do_task_stat: use __for_each_thread()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 7904e53ed5a20fc678c01d5d1b07ec486425bb6a ]
+
+do/while_each_thread should be avoided when possible.
+
+Link: https://lkml.kernel.org/r/20230909164501.GA11581@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 7601df8031fd ("fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/array.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index 1b0d78dfd20f9..bcb645627991e 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -526,12 +526,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+               /* add up live thread stats at the group level */
+               if (whole) {
+-                      struct task_struct *t = task;
+-                      do {
++                      struct task_struct *t;
++
++                      __for_each_thread(sig, t) {
+                               min_flt += t->min_flt;
+                               maj_flt += t->maj_flt;
+                               gtime += task_gtime(t);
+-                      } while_each_thread(task, t);
++                      }
+                       min_flt += sig->min_flt;
+                       maj_flt += sig->maj_flt;
+-- 
+2.43.0
+
diff --git a/queue-6.1/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch b/queue-6.1/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch
new file mode 100644 (file)
index 0000000..228aa6d
--- /dev/null
@@ -0,0 +1,130 @@
+From ccfb929b0f854215d56556ebff5261bc0f01227c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 16:33:57 +0100
+Subject: fs/proc: do_task_stat: use sig->stats_lock to gather the
+ threads/children stats
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 7601df8031fd67310af891897ef6cc0df4209305 ]
+
+lock_task_sighand() can trigger a hard lockup.  If NR_CPUS threads call
+do_task_stat() at the same time and the process has NR_THREADS, it will
+spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
+
+Change do_task_stat() to use sig->stats_lock to gather the statistics
+outside of ->siglock protected section, in the likely case this code will
+run lockless.
+
+Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/array.c | 58 +++++++++++++++++++++++++++----------------------
+ 1 file changed, 32 insertions(+), 26 deletions(-)
+
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index bcb645627991e..d210b2f8b7ed5 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+       int permitted;
+       struct mm_struct *mm;
+       unsigned long long start_time;
+-      unsigned long cmin_flt = 0, cmaj_flt = 0;
+-      unsigned long  min_flt = 0,  maj_flt = 0;
+-      u64 cutime, cstime, utime, stime;
+-      u64 cgtime, gtime;
++      unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
++      u64 cutime, cstime, cgtime, utime, stime, gtime;
+       unsigned long rsslim = 0;
+       unsigned long flags;
+       int exit_code = task->exit_code;
++      struct signal_struct *sig = task->signal;
++      unsigned int seq = 1;
+       state = *get_task_state(task);
+       vsize = eip = esp = 0;
+@@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+       sigemptyset(&sigign);
+       sigemptyset(&sigcatch);
+-      cutime = cstime = 0;
+-      cgtime = gtime = 0;
+       if (lock_task_sighand(task, &flags)) {
+-              struct signal_struct *sig = task->signal;
+-
+               if (sig->tty) {
+                       struct pid *pgrp = tty_get_pgrp(sig->tty);
+                       tty_pgrp = pid_nr_ns(pgrp, ns);
+@@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+               num_threads = get_nr_threads(task);
+               collect_sigign_sigcatch(task, &sigign, &sigcatch);
+-              cmin_flt = sig->cmin_flt;
+-              cmaj_flt = sig->cmaj_flt;
+-              cutime = sig->cutime;
+-              cstime = sig->cstime;
+-              cgtime = sig->cgtime;
+               rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
+-              /* add up live thread stats at the group level */
+               if (whole) {
+-                      struct task_struct *t;
+-
+-                      __for_each_thread(sig, t) {
+-                              min_flt += t->min_flt;
+-                              maj_flt += t->maj_flt;
+-                              gtime += task_gtime(t);
+-                      }
+-
+-                      min_flt += sig->min_flt;
+-                      maj_flt += sig->maj_flt;
+-                      gtime += sig->gtime;
+-
+                       if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
+                               exit_code = sig->group_exit_code;
+               }
+@@ -552,6 +530,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+       if (permitted && (!whole || num_threads < 2))
+               wchan = !task_is_running(task);
++      do {
++              seq++; /* 2 on the 1st/lockless path, otherwise odd */
++              flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
++
++              cmin_flt = sig->cmin_flt;
++              cmaj_flt = sig->cmaj_flt;
++              cutime = sig->cutime;
++              cstime = sig->cstime;
++              cgtime = sig->cgtime;
++
++              if (whole) {
++                      struct task_struct *t;
++
++                      min_flt = sig->min_flt;
++                      maj_flt = sig->maj_flt;
++                      gtime = sig->gtime;
++
++                      rcu_read_lock();
++                      __for_each_thread(sig, t) {
++                              min_flt += t->min_flt;
++                              maj_flt += t->maj_flt;
++                              gtime += task_gtime(t);
++                      }
++                      rcu_read_unlock();
++              }
++      } while (need_seqretry(&sig->stats_lock, seq));
++      done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
++
+       if (whole) {
+               thread_group_cputime_adjusted(task, &utime, &stime);
+       } else {
+-- 
+2.43.0
+
diff --git a/queue-6.1/getrusage-add-the-signal_struct-sig-local-variable.patch b/queue-6.1/getrusage-add-the-signal_struct-sig-local-variable.patch
new file mode 100644 (file)
index 0000000..3b77b3f
--- /dev/null
@@ -0,0 +1,93 @@
+From 81ff25ceeea37b4c83ad30633828b50019a78f16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:25:54 +0200
+Subject: getrusage: add the "signal_struct *sig" local variable
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ]
+
+No functional changes, cleanup/preparation.
+
+Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 37 +++++++++++++++++++------------------
+ 1 file changed, 19 insertions(+), 18 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index c85e1abf7b7c7..177155ba50cd3 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1779,6 +1779,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       unsigned long flags;
+       u64 tgutime, tgstime, utime, stime;
+       unsigned long maxrss = 0;
++      struct signal_struct *sig = p->signal;
+       memset((char *)r, 0, sizeof (*r));
+       utime = stime = 0;
+@@ -1786,7 +1787,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       if (who == RUSAGE_THREAD) {
+               task_cputime_adjusted(current, &utime, &stime);
+               accumulate_thread_rusage(p, r);
+-              maxrss = p->signal->maxrss;
++              maxrss = sig->maxrss;
+               goto out;
+       }
+@@ -1796,15 +1797,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       switch (who) {
+       case RUSAGE_BOTH:
+       case RUSAGE_CHILDREN:
+-              utime = p->signal->cutime;
+-              stime = p->signal->cstime;
+-              r->ru_nvcsw = p->signal->cnvcsw;
+-              r->ru_nivcsw = p->signal->cnivcsw;
+-              r->ru_minflt = p->signal->cmin_flt;
+-              r->ru_majflt = p->signal->cmaj_flt;
+-              r->ru_inblock = p->signal->cinblock;
+-              r->ru_oublock = p->signal->coublock;
+-              maxrss = p->signal->cmaxrss;
++              utime = sig->cutime;
++              stime = sig->cstime;
++              r->ru_nvcsw = sig->cnvcsw;
++              r->ru_nivcsw = sig->cnivcsw;
++              r->ru_minflt = sig->cmin_flt;
++              r->ru_majflt = sig->cmaj_flt;
++              r->ru_inblock = sig->cinblock;
++              r->ru_oublock = sig->coublock;
++              maxrss = sig->cmaxrss;
+               if (who == RUSAGE_CHILDREN)
+                       break;
+@@ -1814,14 +1815,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+               utime += tgutime;
+               stime += tgstime;
+-              r->ru_nvcsw += p->signal->nvcsw;
+-              r->ru_nivcsw += p->signal->nivcsw;
+-              r->ru_minflt += p->signal->min_flt;
+-              r->ru_majflt += p->signal->maj_flt;
+-              r->ru_inblock += p->signal->inblock;
+-              r->ru_oublock += p->signal->oublock;
+-              if (maxrss < p->signal->maxrss)
+-                      maxrss = p->signal->maxrss;
++              r->ru_nvcsw += sig->nvcsw;
++              r->ru_nivcsw += sig->nivcsw;
++              r->ru_minflt += sig->min_flt;
++              r->ru_majflt += sig->maj_flt;
++              r->ru_inblock += sig->inblock;
++              r->ru_oublock += sig->oublock;
++              if (maxrss < sig->maxrss)
++                      maxrss = sig->maxrss;
+               t = p;
+               do {
+                       accumulate_thread_rusage(t, r);
+-- 
+2.43.0
+
diff --git a/queue-6.1/getrusage-move-thread_group_cputime_adjusted-outside.patch b/queue-6.1/getrusage-move-thread_group_cputime_adjusted-outside.patch
new file mode 100644 (file)
index 0000000..aecdb3c
--- /dev/null
@@ -0,0 +1,111 @@
+From 915ff491a0f50b26e3e1c864d6331479e6056eeb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:50 +0100
+Subject: getrusage: move thread_group_cputime_adjusted() outside of
+ lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ]
+
+Patch series "getrusage: use sig->stats_lock", v2.
+
+This patch (of 2):
+
+thread_group_cputime() does its own locking, we can safely shift
+thread_group_cputime_adjusted() which does another for_each_thread loop
+outside of ->siglock protected section.
+
+This is also preparation for the next patch which changes getrusage() to
+use stats_lock instead of siglock, thread_group_cputime() takes the same
+lock.  With the current implementation recursive read_seqbegin_or_lock()
+is fine, thread_group_cputime() can't enter the slow mode if the caller
+holds stats_lock, yet this looks more safe and better performance-wise.
+
+Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com
+Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 177155ba50cd3..2646047fe5513 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1778,17 +1778,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       struct task_struct *t;
+       unsigned long flags;
+       u64 tgutime, tgstime, utime, stime;
+-      unsigned long maxrss = 0;
++      unsigned long maxrss;
++      struct mm_struct *mm;
+       struct signal_struct *sig = p->signal;
+-      memset((char *)r, 0, sizeof (*r));
++      memset(r, 0, sizeof(*r));
+       utime = stime = 0;
++      maxrss = 0;
+       if (who == RUSAGE_THREAD) {
+               task_cputime_adjusted(current, &utime, &stime);
+               accumulate_thread_rusage(p, r);
+               maxrss = sig->maxrss;
+-              goto out;
++              goto out_thread;
+       }
+       if (!lock_task_sighand(p, &flags))
+@@ -1812,9 +1814,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               fallthrough;
+       case RUSAGE_SELF:
+-              thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+-              utime += tgutime;
+-              stime += tgstime;
+               r->ru_nvcsw += sig->nvcsw;
+               r->ru_nivcsw += sig->nivcsw;
+               r->ru_minflt += sig->min_flt;
+@@ -1834,19 +1833,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       }
+       unlock_task_sighand(p, &flags);
+-out:
+-      r->ru_utime = ns_to_kernel_old_timeval(utime);
+-      r->ru_stime = ns_to_kernel_old_timeval(stime);
++      if (who == RUSAGE_CHILDREN)
++              goto out_children;
+-      if (who != RUSAGE_CHILDREN) {
+-              struct mm_struct *mm = get_task_mm(p);
++      thread_group_cputime_adjusted(p, &tgutime, &tgstime);
++      utime += tgutime;
++      stime += tgstime;
+-              if (mm) {
+-                      setmax_mm_hiwater_rss(&maxrss, mm);
+-                      mmput(mm);
+-              }
++out_thread:
++      mm = get_task_mm(p);
++      if (mm) {
++              setmax_mm_hiwater_rss(&maxrss, mm);
++              mmput(mm);
+       }
++
++out_children:
+       r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
++      r->ru_utime = ns_to_kernel_old_timeval(utime);
++      r->ru_stime = ns_to_kernel_old_timeval(stime);
+ }
+ SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
+-- 
+2.43.0
+
diff --git a/queue-6.1/getrusage-use-__for_each_thread.patch b/queue-6.1/getrusage-use-__for_each_thread.patch
new file mode 100644 (file)
index 0000000..81c6b5f
--- /dev/null
@@ -0,0 +1,43 @@
+From c2606554387cfe68ca114e907e6556c86ef1b3ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:26:29 +0200
+Subject: getrusage: use __for_each_thread()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ]
+
+do/while_each_thread should be avoided when possible.
+
+Plus this change allows to avoid lock_task_sighand(), we can use rcu
+and/or sig->stats_lock instead.
+
+Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 2646047fe5513..04102538cf43f 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1822,10 +1822,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               r->ru_oublock += sig->oublock;
+               if (maxrss < sig->maxrss)
+                       maxrss = sig->maxrss;
+-              t = p;
+-              do {
++              __for_each_thread(sig, t)
+                       accumulate_thread_rusage(t, r);
+-              } while_each_thread(p, t);
+               break;
+       default:
+-- 
+2.43.0
+
diff --git a/queue-6.1/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch b/queue-6.1/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch
new file mode 100644 (file)
index 0000000..df97529
--- /dev/null
@@ -0,0 +1,92 @@
+From a9c7d357939f612d4a8a5533af560a4b4ad4a57a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:53 +0100
+Subject: getrusage: use sig->stats_lock rather than lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ]
+
+lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
+getrusage() at the same time and the process has NR_THREADS, spin_lock_irq
+will spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
+
+Change getrusage() to use sig->stats_lock, it was specifically designed
+for this type of use. This way it runs lockless in the likely case.
+
+TODO:
+       - Change do_task_stat() to use sig->stats_lock too, then we can
+         remove spin_lock_irq(siglock) in wait_task_zombie().
+
+       - Turn sig->stats_lock into seqcount_rwlock_t, this way the
+         readers in the slow mode won't exclude each other. See
+         https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/
+
+       - stats_lock has to disable irqs because ->siglock can be taken
+         in irq context, it would be very nice to change __exit_signal()
+         to avoid the siglock->stats_lock dependency.
+
+Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 04102538cf43f..d06eda1387b69 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1781,7 +1781,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       unsigned long maxrss;
+       struct mm_struct *mm;
+       struct signal_struct *sig = p->signal;
++      unsigned int seq = 0;
++retry:
+       memset(r, 0, sizeof(*r));
+       utime = stime = 0;
+       maxrss = 0;
+@@ -1793,8 +1795,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               goto out_thread;
+       }
+-      if (!lock_task_sighand(p, &flags))
+-              return;
++      flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+       switch (who) {
+       case RUSAGE_BOTH:
+@@ -1822,14 +1823,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               r->ru_oublock += sig->oublock;
+               if (maxrss < sig->maxrss)
+                       maxrss = sig->maxrss;
++
++              rcu_read_lock();
+               __for_each_thread(sig, t)
+                       accumulate_thread_rusage(t, r);
++              rcu_read_unlock();
++
+               break;
+       default:
+               BUG();
+       }
+-      unlock_task_sighand(p, &flags);
++
++      if (need_seqretry(&sig->stats_lock, seq)) {
++              seq = 1;
++              goto retry;
++      }
++      done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+       if (who == RUSAGE_CHILDREN)
+               goto out_children;
+-- 
+2.43.0
+
diff --git a/queue-6.1/kvm-s390-add-stat-counter-for-shadow-gmap-events.patch b/queue-6.1/kvm-s390-add-stat-counter-for-shadow-gmap-events.patch
new file mode 100644 (file)
index 0000000..9547060
--- /dev/null
@@ -0,0 +1,168 @@
+From 9ff0df5b0577368409e200af8042ed5138f3cc34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Oct 2023 11:32:52 +0200
+Subject: KVM: s390: add stat counter for shadow gmap events
+
+From: Nico Boehr <nrb@linux.ibm.com>
+
+[ Upstream commit c3235e2dd6956448a562d6b1112205eeebc8ab43 ]
+
+The shadow gmap tracks memory of nested guests (guest-3). In certain
+scenarios, the shadow gmap needs to be rebuilt, which is a costly operation
+since it involves a SIE exit into guest-1 for every entry in the respective
+shadow level.
+
+Add kvm stat counters when new shadow structures are created at various
+levels. Also add a counter gmap_shadow_create when a completely fresh
+shadow gmap is created as well as a counter gmap_shadow_reuse when an
+existing gmap is being reused.
+
+Note that when several levels are shadowed at once, counters on all
+affected levels will be increased.
+
+Also note that not all page table levels need to be present and a ASCE
+can directly point to e.g. a segment table. In this case, a new segment
+table will always be equivalent to a new shadow gmap and hence will be
+counted as gmap_shadow_create and not as gmap_shadow_segment.
+
+Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Link: https://lore.kernel.org/r/20231009093304.2555344-2-nrb@linux.ibm.com
+Message-Id: <20231009093304.2555344-2-nrb@linux.ibm.com>
+Stable-dep-of: fe752331d4b3 ("KVM: s390: vsie: fix race during shadow creation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/kvm_host.h | 7 +++++++
+ arch/s390/kvm/gaccess.c          | 7 +++++++
+ arch/s390/kvm/kvm-s390.c         | 9 ++++++++-
+ arch/s390/kvm/vsie.c             | 5 ++++-
+ 4 files changed, 26 insertions(+), 2 deletions(-)
+
+diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
+index b1e98a9ed152b..09abf000359f8 100644
+--- a/arch/s390/include/asm/kvm_host.h
++++ b/arch/s390/include/asm/kvm_host.h
+@@ -777,6 +777,13 @@ struct kvm_vm_stat {
+       u64 inject_service_signal;
+       u64 inject_virtio;
+       u64 aen_forward;
++      u64 gmap_shadow_create;
++      u64 gmap_shadow_reuse;
++      u64 gmap_shadow_r1_entry;
++      u64 gmap_shadow_r2_entry;
++      u64 gmap_shadow_r3_entry;
++      u64 gmap_shadow_sg_entry;
++      u64 gmap_shadow_pg_entry;
+ };
+ struct kvm_arch_memory_slot {
+diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
+index 0243b6e38d364..3beceff5f1c09 100644
+--- a/arch/s390/kvm/gaccess.c
++++ b/arch/s390/kvm/gaccess.c
+@@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+                                 unsigned long *pgt, int *dat_protection,
+                                 int *fake)
+ {
++      struct kvm *kvm;
+       struct gmap *parent;
+       union asce asce;
+       union vaddress vaddr;
+@@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+       *fake = 0;
+       *dat_protection = 0;
++      kvm = sg->private;
+       parent = sg->parent;
+       vaddr.addr = saddr;
+       asce.val = sg->orig_asce;
+@@ -1341,6 +1343,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+               rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
+               if (rc)
+                       return rc;
++              kvm->stat.gmap_shadow_r1_entry++;
+       }
+               fallthrough;
+       case ASCE_TYPE_REGION2: {
+@@ -1369,6 +1372,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+               rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
+               if (rc)
+                       return rc;
++              kvm->stat.gmap_shadow_r2_entry++;
+       }
+               fallthrough;
+       case ASCE_TYPE_REGION3: {
+@@ -1406,6 +1410,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+               rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
+               if (rc)
+                       return rc;
++              kvm->stat.gmap_shadow_r3_entry++;
+       }
+               fallthrough;
+       case ASCE_TYPE_SEGMENT: {
+@@ -1439,6 +1444,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+               rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
+               if (rc)
+                       return rc;
++              kvm->stat.gmap_shadow_sg_entry++;
+       }
+       }
+       /* Return the parent address of the page table */
+@@ -1509,6 +1515,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
+       pte.p |= dat_protection;
+       if (!rc)
+               rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
++      vcpu->kvm->stat.gmap_shadow_pg_entry++;
+       ipte_unlock(vcpu->kvm);
+       mmap_read_unlock(sg->mm);
+       return rc;
+diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
+index f604946ab2c85..348d49268a7ec 100644
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+       STATS_DESC_COUNTER(VM, inject_pfault_done),
+       STATS_DESC_COUNTER(VM, inject_service_signal),
+       STATS_DESC_COUNTER(VM, inject_virtio),
+-      STATS_DESC_COUNTER(VM, aen_forward)
++      STATS_DESC_COUNTER(VM, aen_forward),
++      STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
++      STATS_DESC_COUNTER(VM, gmap_shadow_create),
++      STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
++      STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
++      STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
++      STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
++      STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
+ };
+ const struct kvm_stats_header kvm_vm_stats_header = {
+diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
+index 740f8b56e63f9..b2dbf08a961e5 100644
+--- a/arch/s390/kvm/vsie.c
++++ b/arch/s390/kvm/vsie.c
+@@ -1206,8 +1206,10 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+        * we're holding has been unshadowed. If the gmap is still valid,
+        * we can safely reuse it.
+        */
+-      if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
++      if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
++              vcpu->kvm->stat.gmap_shadow_reuse++;
+               return 0;
++      }
+       /* release the old shadow - if any, and mark the prefix as unmapped */
+       release_gmap_shadow(vsie_page);
+@@ -1215,6 +1217,7 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+       if (IS_ERR(gmap))
+               return PTR_ERR(gmap);
+       gmap->private = vcpu->kvm;
++      vcpu->kvm->stat.gmap_shadow_create++;
+       WRITE_ONCE(vsie_page->gmap, gmap);
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/kvm-s390-vsie-fix-race-during-shadow-creation.patch b/queue-6.1/kvm-s390-vsie-fix-race-during-shadow-creation.patch
new file mode 100644 (file)
index 0000000..db7f648
--- /dev/null
@@ -0,0 +1,66 @@
+From 5a150283ce6521f9a844d5c5f14c8f1dc7e26bab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Dec 2023 13:53:17 +0100
+Subject: KVM: s390: vsie: fix race during shadow creation
+
+From: Christian Borntraeger <borntraeger@linux.ibm.com>
+
+[ Upstream commit fe752331d4b361d43cfd0b89534b4b2176057c32 ]
+
+Right now it is possible to see gmap->private being zero in
+kvm_s390_vsie_gmap_notifier resulting in a crash.  This is due to the
+fact that we add gmap->private == kvm after creation:
+
+static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+                               struct vsie_page *vsie_page)
+{
+[...]
+        gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+        if (IS_ERR(gmap))
+                return PTR_ERR(gmap);
+        gmap->private = vcpu->kvm;
+
+Let children inherit the private field of the parent.
+
+Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization")
+Cc: <stable@vger.kernel.org>
+Cc: David Hildenbrand <david@redhat.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kvm/vsie.c | 1 -
+ arch/s390/mm/gmap.c  | 1 +
+ 2 files changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
+index b2dbf08a961e5..d90c818a9ae71 100644
+--- a/arch/s390/kvm/vsie.c
++++ b/arch/s390/kvm/vsie.c
+@@ -1216,7 +1216,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+       gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+       if (IS_ERR(gmap))
+               return PTR_ERR(gmap);
+-      gmap->private = vcpu->kvm;
+       vcpu->kvm->stat.gmap_shadow_create++;
+       WRITE_ONCE(vsie_page->gmap, gmap);
+       return 0;
+diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
+index 243f673fa6515..662cf23a1b44b 100644
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+               return ERR_PTR(-ENOMEM);
+       new->mm = parent->mm;
+       new->parent = gmap_get(parent);
++      new->private = parent->private;
+       new->orig_asce = asce;
+       new->edat_level = edat_level;
+       new->initialized = false;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfp-flower-add-goto_chain_index-for-ct-entry.patch b/queue-6.1/nfp-flower-add-goto_chain_index-for-ct-entry.patch
new file mode 100644 (file)
index 0000000..720de5b
--- /dev/null
@@ -0,0 +1,95 @@
+From 571c5e0b16a801d079f9d65fde6131b1e7141702 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Mar 2023 08:36:08 +0200
+Subject: nfp: flower: add goto_chain_index for ct entry
+
+From: Wentao Jia <wentao.jia@corigine.com>
+
+[ Upstream commit 3e44d19934b92398785b3ffc2353b9eba264140e ]
+
+The chain_index has different means in pre ct entry and post ct entry.
+In pre ct entry, it means chain index, but in post ct entry, it means
+goto chain index, it is confused.
+
+chain_index and goto_chain_index may be present in one flow rule, It
+cannot be distinguished by one field chain_index, both chain_index
+and goto_chain_index are required in the follow-up patch to support
+multiple ct zones
+
+Another field goto_chain_index is added to record the goto chain index.
+If no goto action in post ct entry, goto_chain_index is 0.
+
+Signed-off-by: Wentao Jia <wentao.jia@corigine.com>
+Acked-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: Louis Peens <louis.peens@corigine.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: cefa98e806fd ("nfp: flower: add hardware offload check for post ct entry")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 8 ++++++--
+ drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 2 ++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+index 7af03b45555dd..da7a47416a208 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+@@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
+       /* Checks that the chain_index of the filter matches the
+        * chain_index of the GOTO action.
+        */
+-      if (post_ct_entry->chain_index != pre_ct_entry->chain_index)
++      if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index)
+               return -EINVAL;
+       err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry);
+@@ -1776,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
+       if (IS_ERR(ct_entry))
+               return PTR_ERR(ct_entry);
+       ct_entry->type = CT_TYPE_PRE_CT;
+-      ct_entry->chain_index = ct_goto->chain_index;
++      ct_entry->chain_index = flow->common.chain_index;
++      ct_entry->goto_chain_index = ct_goto->chain_index;
+       list_add(&ct_entry->list_node, &zt->pre_ct_list);
+       zt->pre_ct_count++;
+@@ -1799,6 +1800,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+       struct nfp_fl_ct_zone_entry *zt;
+       bool wildcarded = false;
+       struct flow_match_ct ct;
++      struct flow_action_entry *ct_goto;
+       flow_rule_match_ct(rule, &ct);
+       if (!ct.mask->ct_zone) {
+@@ -1823,6 +1825,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+       ct_entry->type = CT_TYPE_POST_CT;
+       ct_entry->chain_index = flow->common.chain_index;
++      ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO);
++      ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0;
+       list_add(&ct_entry->list_node, &zt->post_ct_list);
+       zt->post_ct_count++;
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+index 762c0b36e269b..9440ab776ecea 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+@@ -112,6 +112,7 @@ enum nfp_nfp_layer_name {
+  * @cookie:   Flow cookie, same as original TC flow, used as key
+  * @list_node:        Used by the list
+  * @chain_index:      Chain index of the original flow
++ * @goto_chain_index: goto chain index of the flow
+  * @netdev:   netdev structure.
+  * @type:     Type of pre-entry from enum ct_entry_type
+  * @zt:               Reference to the zone table this belongs to
+@@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry {
+       unsigned long cookie;
+       struct list_head list_node;
+       u32 chain_index;
++      u32 goto_chain_index;
+       enum ct_entry_type type;
+       struct net_device *netdev;
+       struct nfp_fl_ct_zone_entry *zt;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfp-flower-add-hardware-offload-check-for-post-ct-en.patch b/queue-6.1/nfp-flower-add-hardware-offload-check-for-post-ct-en.patch
new file mode 100644 (file)
index 0000000..6d00f77
--- /dev/null
@@ -0,0 +1,68 @@
+From 435ba0cb7080cb3f0960b93523f5da947205147a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jan 2024 17:19:08 +0200
+Subject: nfp: flower: add hardware offload check for post ct entry
+
+From: Hui Zhou <hui.zhou@corigine.com>
+
+[ Upstream commit cefa98e806fd4e2a5e2047457a11ae5f17b8f621 ]
+
+The nfp offload flow pay will not allocate a mask id when the out port
+is openvswitch internal port. This is because these flows are used to
+configure the pre_tun table and are never actually send to the firmware
+as an add-flow message. When a tc rule which action contains ct and
+the post ct entry's out port is openvswitch internal port, the merge
+offload flow pay with the wrong mask id of 0 will be send to the
+firmware. Actually, the nfp can not support hardware offload for this
+situation, so return EOPNOTSUPP.
+
+Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows")
+CC: stable@vger.kernel.org # 5.14+
+Signed-off-by: Hui Zhou <hui.zhou@corigine.com>
+Signed-off-by: Louis Peens <louis.peens@corigine.com>
+Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+index da7a47416a208..497766ecdd91d 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+@@ -1797,10 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+ {
+       struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+       struct nfp_fl_ct_flow_entry *ct_entry;
++      struct flow_action_entry *ct_goto;
+       struct nfp_fl_ct_zone_entry *zt;
++      struct flow_action_entry *act;
+       bool wildcarded = false;
+       struct flow_match_ct ct;
+-      struct flow_action_entry *ct_goto;
++      int i;
++
++      flow_action_for_each(i, act, &rule->action) {
++              switch (act->id) {
++              case FLOW_ACTION_REDIRECT:
++              case FLOW_ACTION_REDIRECT_INGRESS:
++              case FLOW_ACTION_MIRRED:
++              case FLOW_ACTION_MIRRED_INGRESS:
++                      if (act->dev->rtnl_link_ops &&
++                          !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) {
++                              NL_SET_ERR_MSG_MOD(extack,
++                                                 "unsupported offload: out port is openvswitch internal port");
++                              return -EOPNOTSUPP;
++                      }
++                      break;
++              default:
++                      break;
++              }
++      }
+       flow_rule_match_ct(rule, &ct);
+       if (!ct.mask->ct_zone) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/readahead-avoid-multiple-marked-readahead-pages.patch b/queue-6.1/readahead-avoid-multiple-marked-readahead-pages.patch
new file mode 100644 (file)
index 0000000..acba474
--- /dev/null
@@ -0,0 +1,97 @@
+From 751dd31cb25b1fda2357852e790cffcc04bb0544 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Jan 2024 09:58:39 +0100
+Subject: readahead: avoid multiple marked readahead pages
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 ]
+
+ra_alloc_folio() marks a page that should trigger next round of async
+readahead.  However it rounds up computed index to the order of page being
+allocated.  This can however lead to multiple consecutive pages being
+marked with readahead flag.  Consider situation with index == 1, mark ==
+1, order == 0.  We insert order 0 page at index 1 and mark it.  Then we
+bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page
+at index 2 is marked as well.  Then we bump order to 2, index is
+incremented to 4, mark gets rounded to 4 so page at index 4 is marked as
+well.  The fact that multiple pages get marked within a single readahead
+window confuses the readahead logic and results in readahead window being
+trimmed back to 1.  This situation is triggered in particular when maximum
+readahead window size is not a power of two (in the observed case it was
+768 KB) and as a result sequential read throughput suffers.
+
+Fix the problem by rounding 'mark' down instead of up.  Because the index
+is naturally aligned to 'order', we are guaranteed 'rounded mark' == index
+iff 'mark' is within the page we are allocating at 'index' and thus
+exactly one page is marked with readahead flag as required by the
+readahead code and sequential read performance is restored.
+
+This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix
+readahead with large folios").  The commit changed the rounding with the
+rationale:
+
+"...  we were setting the readahead flag on the folio which contains the
+last byte read from the block.  This is wrong because we will trigger
+readahead at the end of the read without waiting to see if a subsequent
+read is going to use the pages we just read."
+
+Although this is true, the fact is this was always the case with read
+sizes not aligned to folio boundaries and large folios in the page cache
+just make the situation more obvious (and frequent).  Also for sequential
+read workloads it is better to trigger the readahead earlier rather than
+later.  It is true that the difference in the rounding and thus earlier
+triggering of the readahead can result in reading more for semi-random
+workloads.  However workloads really suffering from this seem to be rare.
+In particular I have verified that the workload described in commit
+b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading
+random 100k blocks from a file like:
+
+[reader]
+bs=100k
+rw=randread
+numjobs=1
+size=64g
+runtime=60s
+
+is not impacted by the rounding change and achieves ~70MB/s in both cases.
+
+[jack@suse.cz: fix one more place where mark rounding was done as well]
+  Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz
+Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz
+Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Guo Xuenan <guoxuenan@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/readahead.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/readahead.c b/mm/readahead.c
+index ba43428043a35..e4b772bb70e68 100644
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -483,7 +483,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
+       if (!folio)
+               return -ENOMEM;
+-      mark = round_up(mark, 1UL << order);
++      mark = round_down(mark, 1UL << order);
+       if (index == mark)
+               folio_set_readahead(folio);
+       err = filemap_add_folio(ractl->mapping, folio, index, gfp);
+@@ -591,7 +591,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
+        * It's the expected callback index, assume sequential access.
+        * Ramp up sizes, and push forward the readahead window.
+        */
+-      expected = round_up(ra->start + ra->size - ra->async_size,
++      expected = round_down(ra->start + ra->size - ra->async_size,
+                       1UL << order);
+       if (index == expected || index == (ra->start + ra->size)) {
+               ra->start += ra->size;
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch b/queue-6.1/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch
new file mode 100644 (file)
index 0000000..f6f376f
--- /dev/null
@@ -0,0 +1,61 @@
+From edc5869301a9444152614d751e3bb60d6c2a75db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Jan 2024 06:14:29 -0700
+Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems
+
+From: Nico Pache <npache@redhat.com>
+
+[ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ]
+
+On systems with 64k page size and 512M huge page sizes, the allocation and
+test succeeds but errors out at the munmap.  As the comment states, munmap
+will failure if its not HUGEPAGE aligned.  This is due to the length of
+the mapping being 1/2 the size of the hugepage causing the munmap to not
+be hugepage aligned.  Fix this by making the mapping length the full
+hugepage if the hugepage is larger than the length of the mapping.
+
+Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com
+Signed-off-by: Nico Pache <npache@redhat.com>
+Cc: Donet Tom <donettom@linux.vnet.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
+index 312889edb84ab..c65c55b7a789f 100644
+--- a/tools/testing/selftests/vm/map_hugetlb.c
++++ b/tools/testing/selftests/vm/map_hugetlb.c
+@@ -15,6 +15,7 @@
+ #include <unistd.h>
+ #include <sys/mman.h>
+ #include <fcntl.h>
++#include "vm_util.h"
+ #define LENGTH (256UL*1024*1024)
+ #define PROTECTION (PROT_READ | PROT_WRITE)
+@@ -70,10 +71,16 @@ int main(int argc, char **argv)
+ {
+       void *addr;
+       int ret;
++      size_t hugepage_size;
+       size_t length = LENGTH;
+       int flags = FLAGS;
+       int shift = 0;
++      hugepage_size = default_huge_page_size();
++      /* munmap with fail if the length is not page aligned */
++      if (hugepage_size > length)
++              length = hugepage_size;
++
+       if (argc > 1)
+               length = atol(argv[1]) << 20;
+       if (argc > 2) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mm-switch-to-bash-from-sh.patch b/queue-6.1/selftests-mm-switch-to-bash-from-sh.patch
new file mode 100644 (file)
index 0000000..0b6dc0a
--- /dev/null
@@ -0,0 +1,58 @@
+From a5559df581c2a4189c25ad561a0b87f7bccd22ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 14:04:54 +0500
+Subject: selftests/mm: switch to bash from sh
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ]
+
+Running charge_reserved_hugetlb.sh generates errors if sh is set to
+dash:
+
+./charge_reserved_hugetlb.sh: 9: [[: not found
+./charge_reserved_hugetlb.sh: 19: [[: not found
+./charge_reserved_hugetlb.sh: 27: [[: not found
+./charge_reserved_hugetlb.sh: 37: [[: not found
+./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected
+
+Switch to using /bin/bash instead of /bin/sh.  Make the switch for
+write_hugetlb_memory.sh as well which is called from
+charge_reserved_hugetlb.sh.
+
+Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +-
+ tools/testing/selftests/vm/write_hugetlb_memory.sh    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+index 0899019a7fcb4..e14bdd4455f2d 100644
+--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+ # Kselftest framework requirement - SKIP code is 4.
+diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+index 70a02301f4c27..3d2d2eb9d6fff 100644
+--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+ set -e
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mptcp-decrease-bw-in-simult-flows.patch b/queue-6.1/selftests-mptcp-decrease-bw-in-simult-flows.patch
new file mode 100644 (file)
index 0000000..7e62dd5
--- /dev/null
@@ -0,0 +1,53 @@
+From 29eb96441917da2cf892c37a0d2fb87ceabfed17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jan 2024 22:49:51 +0100
+Subject: selftests: mptcp: decrease BW in simult flows
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 5e2f3c65af47e527ccac54060cf909e3306652ff ]
+
+When running the simult_flow selftest in slow environments -- e.g. QEmu
+without KVM support --, the results can be unstable. This selftest
+checks if the aggregated bandwidth is (almost) fully used as expected.
+
+To help improving the stability while still keeping the same validation
+in place, the BW and the delay are reduced to lower the pressure on the
+CPU.
+
+Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests")
+Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 6b0014f538a22..6bda70af03a83 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -303,12 +303,12 @@ done
+ setup
+ run_test 10 10 0 0 "balanced bwidth"
+-run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
++run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
+ # we still need some additional infrastructure to pass the following test-cases
+-run_test 30 10 0 0 "unbalanced bwidth"
+-run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+-run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
++run_test 10 3 0 0 "unbalanced bwidth"
++run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
++run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
+ mptcp_lib_result_print_all_tap
+ exit $ret
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mptcp-simult-flows-fix-some-subtest-names.patch b/queue-6.1/selftests-mptcp-simult-flows-fix-some-subtest-names.patch
new file mode 100644 (file)
index 0000000..58f1540
--- /dev/null
@@ -0,0 +1,42 @@
+From 94a73f9dec7fef0ebc322fac05236c755fb6aa29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Feb 2024 19:25:37 +0100
+Subject: selftests: mptcp: simult flows: fix some subtest names
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 4d8e0dde0403b5a86aa83e243f020711a9c3e31f ]
+
+The selftest was correctly recording all the results, but the 'reverse
+direction' part was missing in the name when needed.
+
+It is important to have a unique (sub)test name in TAP, because some CI
+environments drop tests with duplicated name.
+
+Fixes: 675d99338e7a ("selftests: mptcp: simult flows: format subtests results in TAP")
+Cc: stable@vger.kernel.org
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 6bda70af03a83..41d2f4991b35c 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -269,7 +269,8 @@ run_test()
+               [ $bail -eq 0 ] || exit $ret
+       fi
+-      printf "%-60s" "$msg - reverse direction"
++      msg+=" - reverse direction"
++      printf "%-60s" "${msg}"
+       do_transfer $large $small $time
+       lret=$?
+       mptcp_lib_result_code "${lret}" "${msg}"
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mptcp-simult-flows-format-subtests-results.patch b/queue-6.1/selftests-mptcp-simult-flows-format-subtests-results.patch
new file mode 100644 (file)
index 0000000..1952961
--- /dev/null
@@ -0,0 +1,68 @@
+From 4eeef0aaffa567f812390612c30f800de02edd73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 15:21:31 +0200
+Subject: selftests: mptcp: simult flows: format subtests results in TAP
+
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+
+[ Upstream commit 675d99338e7a6cd925d61d7dbf8c26612f7f08a9 ]
+
+The current selftests infrastructure formats the results in TAP 13. This
+version doesn't support subtests and only the end result of each
+selftest is taken into account. It means that a single issue in a
+subtest of a selftest containing multiple subtests forces the whole
+selftest to be marked as failed. It also means that subtests results are
+not tracked by CIs executing selftests.
+
+MPTCP selftests run hundreds of various subtests. It is then important
+to track each of them and not one result per selftest.
+
+It is particularly interesting to do that when validating stable kernels
+with the last version of the test suite: tests might fail because a
+feature is not supported but the test didn't skip that part. In this
+case, if subtests are not tracked, the whole selftest will be marked as
+failed making the other subtests useless because their results are
+ignored.
+
+This patch formats subtests results in TAP in simult_flows.sh selftest.
+
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 5e2f3c65af47 ("selftests: mptcp: decrease BW in simult flows")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 4a417f9d51d67..6b0014f538a22 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -263,6 +263,7 @@ run_test()
+       printf "%-60s" "$msg"
+       do_transfer $small $large $time
+       lret=$?
++      mptcp_lib_result_code "${lret}" "${msg}"
+       if [ $lret -ne 0 ]; then
+               ret=$lret
+               [ $bail -eq 0 ] || exit $ret
+@@ -271,6 +272,7 @@ run_test()
+       printf "%-60s" "$msg - reverse direction"
+       do_transfer $large $small $time
+       lret=$?
++      mptcp_lib_result_code "${lret}" "${msg}"
+       if [ $lret -ne 0 ]; then
+               ret=$lret
+               [ $bail -eq 0 ] || exit $ret
+@@ -307,4 +309,6 @@ run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+ run_test 30 10 0 0 "unbalanced bwidth"
+ run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+ run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
++
++mptcp_lib_result_print_all_tap
+ exit $ret
+-- 
+2.43.0
+
index ce57a431ba374f48b6f7b735eb868f5af1466cef..6aa0f5bb096f93bc4f4d84e98f44cfbf7814900d 100644 (file)
@@ -28,3 +28,41 @@ netrom-fix-a-data-race-around-sysctl_netrom_transpor.patch-15916
 netrom-fix-a-data-race-around-sysctl_netrom_routing_.patch
 netrom-fix-a-data-race-around-sysctl_netrom_link_fai.patch
 netrom-fix-data-races-around-sysctl_net_busy_read.patch
+kvm-s390-add-stat-counter-for-shadow-gmap-events.patch
+kvm-s390-vsie-fix-race-during-shadow-creation.patch
+asoc-codecs-wcd938x-fix-headphones-volume-controls.patch
+drm-amd-display-fix-uninitialized-variable-usage-in-.patch
+nfp-flower-add-goto_chain_index-for-ct-entry.patch
+nfp-flower-add-hardware-offload-check-for-post-ct-en.patch
+readahead-avoid-multiple-marked-readahead-pages.patch
+selftests-mm-switch-to-bash-from-sh.patch
+selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch
+xhci-process-isoc-td-properly-when-there-was-a-trans.patch
+xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch
+drm-amdgpu-reset-ih-overflow_clear-bit.patch
+selftests-mptcp-simult-flows-format-subtests-results.patch
+selftests-mptcp-decrease-bw-in-simult-flows.patch
+blk-iocost-disable-writeback-throttling.patch
+elevator-remove-redundant-code-in-elv_unregister_que.patch
+blk-wbt-remove-unnecessary-check-in-wbt_enable_defau.patch
+elevator-add-new-field-flags-in-struct-elevator_queu.patch
+blk-wbt-don-t-enable-throttling-if-default-elevator-.patch
+blk-wbt-pass-a-gendisk-to-wbt_-enable-disable-_defau.patch
+blk-wbt-pass-a-gendisk-to-wbt_init.patch
+blk-rq-qos-move-rq_qos_add-and-rq_qos_del-out-of-lin.patch
+blk-rq-qos-make-rq_qos_add-and-rq_qos_del-more-usefu.patch
+blk-rq-qos-constify-rq_qos_ops.patch
+blk-rq-qos-store-a-gendisk-instead-of-request_queue-.patch
+blk-wbt-fix-detection-of-dirty-throttled-tasks.patch
+drm-amd-display-wrong-colorimetry-workaround.patch
+drm-amd-display-fix-mst-null-ptr-for-rv.patch
+getrusage-add-the-signal_struct-sig-local-variable.patch
+getrusage-move-thread_group_cputime_adjusted-outside.patch
+getrusage-use-__for_each_thread.patch
+getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch
+fs-proc-do_task_stat-use-__for_each_thread.patch
+fs-proc-do_task_stat-use-sig-stats_lock-to-gather-th.patch
+exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch
+selftests-mptcp-simult-flows-fix-some-subtest-names.patch
+blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch
+blk-iocost-pass-gendisk-to-ioc_refresh_params.patch
diff --git a/queue-6.1/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch b/queue-6.1/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch
new file mode 100644 (file)
index 0000000..3094e1d
--- /dev/null
@@ -0,0 +1,57 @@
+From 9b5b2f37f69d5ac40e70bb8ba57cc444a5731800 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:37 +0200
+Subject: xhci: handle isoc Babble and Buffer Overrun events properly
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+[ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ]
+
+xHCI 4.9 explicitly forbids assuming that the xHC has released its
+ownership of a multi-TRB TD when it reports an error on one of the
+early TRBs. Yet the driver makes such assumption and releases the TD,
+allowing the remaining TRBs to be freed or overwritten by new TDs.
+
+The xHC should also report completion of the final TRB due to its IOC
+flag being set by us, regardless of prior errors. This event cannot
+be recognized if the TD has already been freed earlier, resulting in
+"Transfer event TRB DMA ptr not part of current TD" error message.
+
+Fix this by reusing the logic for processing isoc Transaction Errors.
+This also handles hosts which fail to report the final completion.
+
+Fix transfer length reporting on Babble errors. They may be caused by
+device malfunction, no guarantee that the buffer has been filled.
+
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index e4441a71368e5..239b5edee3268 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2381,9 +2381,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+       case COMP_BANDWIDTH_OVERRUN_ERROR:
+               frame->status = -ECOMM;
+               break;
+-      case COMP_ISOCH_BUFFER_OVERRUN:
+       case COMP_BABBLE_DETECTED_ERROR:
++              sum_trbs_for_length = true;
++              fallthrough;
++      case COMP_ISOCH_BUFFER_OVERRUN:
+               frame->status = -EOVERFLOW;
++              if (ep_trb != td->last_trb)
++                      td->error_mid_td = true;
+               break;
+       case COMP_INCOMPATIBLE_DEVICE_ERROR:
+       case COMP_STALL_ERROR:
+-- 
+2.43.0
+
diff --git a/queue-6.1/xhci-process-isoc-td-properly-when-there-was-a-trans.patch b/queue-6.1/xhci-process-isoc-td-properly-when-there-was-a-trans.patch
new file mode 100644 (file)
index 0000000..473cfd5
--- /dev/null
@@ -0,0 +1,186 @@
+From 2ca20b347c88d00cc5e407823e29358788366800 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:36 +0200
+Subject: xhci: process isoc TD properly when there was a transaction error mid
+ TD.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ]
+
+The last TRB of a isoc TD might not trigger an event if there was
+an error event for a TRB mid TD. This is seen on a NEC Corporation
+uPD720200 USB 3.0 Host
+
+After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1
+generate events for passed TRBs with IOC flag set if it proceeds to the
+next TD. This event is either a copy of the original error, or a
+"success" transfer event.
+
+If that event is missing then the driver and xHC host get out of sync as
+the driver is still expecting a transfer event for that first TD, while
+xHC host is already sending events for the next TD in the list.
+This leads to
+"Transfer event TRB DMA ptr not part of current TD" messages.
+
+As a solution we tag the isoc TDs that get error events mid TD.
+If an event doesn't match the first TD, then check if the tag is
+set, and event points to the next TD.
+In that case give back the fist TD and process the next TD normally
+
+Make sure TD status and transferred length stay valid in both cases
+with and without final TD completion event.
+
+Reported-by: Michał Pecio <michal.pecio@gmail.com>
+Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/
+Tested-by: Michał Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++-------
+ drivers/usb/host/xhci.h      |  1 +
+ 2 files changed, 61 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 1239e06dfe411..e4441a71368e5 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2363,6 +2363,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+       /* handle completion code */
+       switch (trb_comp_code) {
+       case COMP_SUCCESS:
++              /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
++              if (td->error_mid_td)
++                      break;
+               if (remaining) {
+                       frame->status = short_framestatus;
+                       if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
+@@ -2388,8 +2391,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+               break;
+       case COMP_USB_TRANSACTION_ERROR:
+               frame->status = -EPROTO;
++              sum_trbs_for_length = true;
+               if (ep_trb != td->last_trb)
+-                      return 0;
++                      td->error_mid_td = true;
+               break;
+       case COMP_STOPPED:
+               sum_trbs_for_length = true;
+@@ -2409,6 +2413,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+               break;
+       }
++      if (td->urb_length_set)
++              goto finish_td;
++
+       if (sum_trbs_for_length)
+               frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
+                       ep_trb_len - remaining;
+@@ -2417,6 +2424,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+       td->urb->actual_length += frame->actual_length;
++finish_td:
++      /* Don't give back TD yet if we encountered an error mid TD */
++      if (td->error_mid_td && ep_trb != td->last_trb) {
++              xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
++              td->urb_length_set = true;
++              return 0;
++      }
++
+       return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+@@ -2801,17 +2816,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+               }
+               if (!ep_seg) {
+-                      if (!ep->skip ||
+-                          !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+-                              /* Some host controllers give a spurious
+-                               * successful event after a short transfer.
+-                               * Ignore it.
+-                               */
+-                              if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+-                                              ep_ring->last_td_was_short) {
+-                                      ep_ring->last_td_was_short = false;
+-                                      goto cleanup;
++
++                      if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
++                              skip_isoc_td(xhci, td, ep, status);
++                              goto cleanup;
++                      }
++
++                      /*
++                       * Some hosts give a spurious success event after a short
++                       * transfer. Ignore it.
++                       */
++                      if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
++                          ep_ring->last_td_was_short) {
++                              ep_ring->last_td_was_short = false;
++                              goto cleanup;
++                      }
++
++                      /*
++                       * xhci 4.10.2 states isoc endpoints should continue
++                       * processing the next TD if there was an error mid TD.
++                       * So host like NEC don't generate an event for the last
++                       * isoc TRB even if the IOC flag is set.
++                       * xhci 4.9.1 states that if there are errors in mult-TRB
++                       * TDs xHC should generate an error for that TRB, and if xHC
++                       * proceeds to the next TD it should genete an event for
++                       * any TRB with IOC flag on the way. Other host follow this.
++                       * So this event might be for the next TD.
++                       */
++                      if (td->error_mid_td &&
++                          !list_is_last(&td->td_list, &ep_ring->td_list)) {
++                              struct xhci_td *td_next = list_next_entry(td, td_list);
++
++                              ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
++                                                 td_next->last_trb, ep_trb_dma, false);
++                              if (ep_seg) {
++                                      /* give back previous TD, start handling new */
++                                      xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
++                                      ep_ring->dequeue = td->last_trb;
++                                      ep_ring->deq_seg = td->last_trb_seg;
++                                      inc_deq(xhci, ep_ring);
++                                      xhci_td_cleanup(xhci, td, ep_ring, td->status);
++                                      td = td_next;
+                               }
++                      }
++
++                      if (!ep_seg) {
+                               /* HC is busted, give up! */
+                               xhci_err(xhci,
+                                       "ERROR Transfer event TRB DMA ptr not "
+@@ -2823,9 +2872,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+                                         ep_trb_dma, true);
+                               return -ESHUTDOWN;
+                       }
+-
+-                      skip_isoc_td(xhci, td, ep, status);
+-                      goto cleanup;
+               }
+               if (trb_comp_code == COMP_SHORT_PACKET)
+                       ep_ring->last_td_was_short = true;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 1354310cb37b1..fc25a5b09710c 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1570,6 +1570,7 @@ struct xhci_td {
+       struct xhci_segment     *bounce_seg;
+       /* actual_length of the URB has already been set */
+       bool                    urb_length_set;
++      bool                    error_mid_td;
+       unsigned int            num_trbs;
+ };
+-- 
+2.43.0
+