--- /dev/null
+From aa1b46dcdc7baaf5fec0be25782ef24b26aa209e Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Sun, 13 Mar 2022 21:15:02 -1000
+Subject: block: fix rq-qos breakage from skipping rq_qos_done_bio()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit aa1b46dcdc7baaf5fec0be25782ef24b26aa209e upstream.
+
+a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't
+tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set.
+While this fixed a potential oops, it also broke blk-iocost by skipping the
+done_bio callback for merged bios.
+
+Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(),
+rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED
+distinguishing the former from the latter. rq_qos_done_bio() is not called
+for bios which wenth through rq_qos_merge(). This royally confuses
+blk-iocost as the merged bios never finish and are considered perpetually
+in-flight.
+
+One reliably reproducible failure mode is an intermediate cgroup geting
+stuck active preventing its children from being activated due to the
+leaf-only rule, leading to loss of control. The following is from
+resctl-bench protection scenario which emulates isolating a web server like
+workload from a memory bomb run on an iocost configuration which should
+yield a reasonable level of protection.
+
+ # cat /sys/block/nvme2n1/device/model
+ Samsung SSD 970 PRO 512GB
+ # cat /sys/fs/cgroup/io.cost.model
+ 259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025
+ # cat /sys/fs/cgroup/io.cost.qos
+ 259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00
+ # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1
+ ...
+ Memory Hog Summary
+ ==================
+
+ IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m
+ W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m
+
+ Isolation and Request Latency Impact Distributions:
+
+ min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev
+ isol% 15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82
+ lat-imp% 0 0 0 0 0 4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6
+
+ Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96%
+
+The isolation result of 58.12% is close to what this device would show
+without any IO control.
+
+Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and
+calling rq_qos_done_bio() on them too. For consistency and clarity, rename
+BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into
+rq_qos_done_bio() so that it's next to the code paths that set the flags.
+
+With the patch applied, the above same benchmark shows:
+
+ # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1
+ ...
+ Memory Hog Summary
+ ==================
+
+ IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m
+ W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m
+
+ Isolation and Request Latency Impact Distributions:
+
+ min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev
+ isol% 84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42 2.81
+ lat-imp% 0 0 0 0 0 2.81 5.73 11.11 13.92 17.53 22.61 4.10 4.68
+
+ Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0%
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked")
+Cc: stable@vger.kernel.org # v5.15+
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/Yi7rdrzQEHjJLGKB@slm.duckdns.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bio.c | 3 +--
+ block/blk-iolatency.c | 2 +-
+ block/blk-rq-qos.h | 20 +++++++++++---------
+ include/linux/blk_types.h | 3 ++-
+ 4 files changed, 15 insertions(+), 13 deletions(-)
+
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -1486,8 +1486,7 @@ again:
+ if (!bio_integrity_endio(bio))
+ return;
+
+- if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
+- rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio);
++ rq_qos_done_bio(bio);
+
+ if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+ trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio);
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(str
+ int inflight = 0;
+
+ blkg = bio->bi_blkg;
+- if (!blkg || !bio_flagged(bio, BIO_TRACKED))
++ if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED))
+ return;
+
+ iolat = blkg_to_lat(bio->bi_blkg);
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct
+ __rq_qos_requeue(q->rq_qos, rq);
+ }
+
+-static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
++static inline void rq_qos_done_bio(struct bio *bio)
+ {
+- if (q->rq_qos)
+- __rq_qos_done_bio(q->rq_qos, bio);
++ if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
++ bio_flagged(bio, BIO_QOS_MERGED))) {
++ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
++ if (q->rq_qos)
++ __rq_qos_done_bio(q->rq_qos, bio);
++ }
+ }
+
+ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
+ {
+- /*
+- * BIO_TRACKED lets controllers know that a bio went through the
+- * normal rq_qos path.
+- */
+ if (q->rq_qos) {
+- bio_set_flag(bio, BIO_TRACKED);
++ bio_set_flag(bio, BIO_QOS_THROTTLED);
+ __rq_qos_throttle(q->rq_qos, bio);
+ }
+ }
+@@ -205,8 +205,10 @@ static inline void rq_qos_track(struct r
+ static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+ {
+- if (q->rq_qos)
++ if (q->rq_qos) {
++ bio_set_flag(bio, BIO_QOS_MERGED);
+ __rq_qos_merge(q->rq_qos, rq, bio);
++ }
+ }
+
+ static inline void rq_qos_queue_depth_changed(struct request_queue *q)
+--- a/include/linux/blk_types.h
++++ b/include/linux/blk_types.h
+@@ -317,7 +317,8 @@ enum {
+ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
+ * of this bio. */
+ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
+- BIO_TRACKED, /* set if bio goes through the rq_qos path */
++ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
++ BIO_QOS_MERGED, /* but went through rq_qos merge path */
+ BIO_REMAPPED,
+ BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
+ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
--- /dev/null
+From 572299f03afd676dd4e20669cdaf5ed0fe1379d4 Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Fri, 18 Mar 2022 11:26:41 +0900
+Subject: block: limit request dispatch loop duration
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit 572299f03afd676dd4e20669cdaf5ed0fe1379d4 upstream.
+
+When IO requests are made continuously and the target block device
+handles requests faster than request arrival, the request dispatch loop
+keeps on repeating to dispatch the arriving requests very long time,
+more than a minute. Since the loop runs as a workqueue worker task, the
+very long loop duration triggers workqueue watchdog timeout and BUG [1].
+
+To avoid the very long loop duration, break the loop periodically. When
+opportunity to dispatch requests still exists, check need_resched(). If
+need_resched() returns true, the dispatch loop already consumed its time
+slice, then reschedule the dispatch work and break the loop. With heavy
+IO load, need_resched() does not return true for 20~30 seconds. To cover
+such case, check time spent in the dispatch loop with jiffies. If more
+than 1 second is spent, reschedule the dispatch work and break the loop.
+
+[1]
+
+[ 609.691437] BUG: workqueue lockup - pool cpus=10 node=1 flags=0x0 nice=-20 stuck for 35s!
+[ 609.701820] Showing busy workqueues and worker pools:
+[ 609.707915] workqueue events: flags=0x0
+[ 609.712615] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
+[ 609.712626] pending: drm_fb_helper_damage_work [drm_kms_helper]
+[ 609.712687] workqueue events_freezable: flags=0x4
+[ 609.732943] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
+[ 609.732952] pending: pci_pme_list_scan
+[ 609.732968] workqueue events_power_efficient: flags=0x80
+[ 609.751947] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
+[ 609.751955] pending: neigh_managed_work
+[ 609.752018] workqueue kblockd: flags=0x18
+[ 609.769480] pwq 21: cpus=10 node=1 flags=0x0 nice=-20 active=3/256 refcnt=4
+[ 609.769488] in-flight: 1020:blk_mq_run_work_fn
+[ 609.769498] pending: blk_mq_timeout_work, blk_mq_run_work_fn
+[ 609.769744] pool 21: cpus=10 node=1 flags=0x0 nice=-20 hung=35s workers=2 idle: 67
+[ 639.899730] BUG: workqueue lockup - pool cpus=10 node=1 flags=0x0 nice=-20 stuck for 66s!
+[ 639.909513] Showing busy workqueues and worker pools:
+[ 639.915404] workqueue events: flags=0x0
+[ 639.920197] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
+[ 639.920215] pending: drm_fb_helper_damage_work [drm_kms_helper]
+[ 639.920365] workqueue kblockd: flags=0x18
+[ 639.939932] pwq 21: cpus=10 node=1 flags=0x0 nice=-20 active=3/256 refcnt=4
+[ 639.939942] in-flight: 1020:blk_mq_run_work_fn
+[ 639.939955] pending: blk_mq_timeout_work, blk_mq_run_work_fn
+[ 639.940212] pool 21: cpus=10 node=1 flags=0x0 nice=-20 hung=66s workers=2 idle: 67
+
+Fixes: 6e6fcbc27e778 ("blk-mq: support batching dispatch in case of io")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Cc: stable@vger.kernel.org # v5.10+
+Link: https://lore.kernel.org/linux-block/20220310091649.zypaem5lkyfadymg@shindev/
+Link: https://lore.kernel.org/r/20220318022641.133484-1-shinichiro.kawasaki@wdc.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-mq-sched.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/block/blk-mq-sched.c
++++ b/block/blk-mq-sched.c
+@@ -180,11 +180,18 @@ static int __blk_mq_do_dispatch_sched(st
+
+ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+ {
++ unsigned long end = jiffies + HZ;
+ int ret;
+
+ do {
+ ret = __blk_mq_do_dispatch_sched(hctx);
+- } while (ret == 1);
++ if (ret != 1)
++ break;
++ if (need_resched() || time_is_before_jiffies(end)) {
++ blk_mq_delay_run_hw_queue(hctx, 0);
++ break;
++ }
++ } while (1);
+
+ return ret;
+ }
--- /dev/null
+From d3e29967079c522ce1c5cab0e9fab2c280b977eb Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Mon, 7 Mar 2022 15:30:02 +0200
+Subject: btrfs: zoned: put block group after final usage
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+commit d3e29967079c522ce1c5cab0e9fab2c280b977eb upstream.
+
+It's counter-intuitive (and wrong) to put the block group _before_ the
+final usage in submit_eb_page. Fix it by re-ordering the call to
+btrfs_put_block_group after its final reference. Also fix a minor typo
+in 'implies'
+
+Fixes: be1a1d7a5d24 ("btrfs: zoned: finish fully written block group")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4780,11 +4780,12 @@ static int submit_eb_page(struct page *p
+ return ret;
+ }
+ if (cache) {
+- /* Impiles write in zoned mode */
+- btrfs_put_block_group(cache);
+- /* Mark the last eb in a block group */
++ /*
++ * Implies write in zoned mode. Mark the last eb in a block group.
++ */
+ if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
+ set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
++ btrfs_put_block_group(cache);
+ }
+ ret = write_one_eb(eb, wbc, epd);
+ free_extent_buffer(eb);
--- /dev/null
+From aea0b9f2486da8497f35c7114b764bf55e17c7ea Mon Sep 17 00:00:00 2001
+From: Christian Brauner <christian.brauner@ubuntu.com>
+Date: Mon, 11 Oct 2021 15:37:04 +0200
+Subject: landlock: Use square brackets around "landlock-ruleset"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian Brauner <christian.brauner@ubuntu.com>
+
+commit aea0b9f2486da8497f35c7114b764bf55e17c7ea upstream.
+
+Make the name of the anon inode fd "[landlock-ruleset]" instead of
+"landlock-ruleset". This is minor but most anon inode fds already
+carry square brackets around their name:
+
+ [eventfd]
+ [eventpoll]
+ [fanotify]
+ [fscontext]
+ [io_uring]
+ [pidfd]
+ [signalfd]
+ [timerfd]
+ [userfaultfd]
+
+For the sake of consistency lets do the same for the landlock-ruleset anon
+inode fd that comes with landlock. We did the same in
+1cdc415f1083 ("uapi, fsopen: use square brackets around "fscontext" [ver #2]")
+for the new mount api.
+
+Cc: linux-security-module@vger.kernel.org
+Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
+Link: https://lore.kernel.org/r/20211011133704.1704369-1-brauner@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/landlock/syscalls.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/landlock/syscalls.c
++++ b/security/landlock/syscalls.c
+@@ -192,7 +192,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
+ return PTR_ERR(ruleset);
+
+ /* Creates anonymous FD referring to the ruleset. */
+- ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops,
++ ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
+ ruleset, O_RDWR | O_CLOEXEC);
+ if (ruleset_fd < 0)
+ landlock_put_ruleset(ruleset);
--- /dev/null
+From 60de2d2dc284e0dd1c2c897d08625bde24ef3454 Mon Sep 17 00:00:00 2001
+From: Pekka Pessi <ppessi@nvidia.com>
+Date: Wed, 2 Mar 2022 16:04:24 +0100
+Subject: mailbox: tegra-hsp: Flush whole channel
+
+From: Pekka Pessi <ppessi@nvidia.com>
+
+commit 60de2d2dc284e0dd1c2c897d08625bde24ef3454 upstream.
+
+The txdone can re-fill the mailbox. Keep polling the mailbox during the
+flush until all the messages have been delivered.
+
+This fixes an issue with the Tegra Combined UART (TCU) where output can
+get truncated under high traffic load.
+
+Signed-off-by: Pekka Pessi <ppessi@nvidia.com>
+Tested-by: Jon Hunter <jonathanh@nvidia.com>
+Fixes: 91b1b1c3da8a ("mailbox: tegra-hsp: Add support for shared mailboxes")
+Cc: stable@vger.kernel.org
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mailbox/tegra-hsp.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/mailbox/tegra-hsp.c
++++ b/drivers/mailbox/tegra-hsp.c
+@@ -412,6 +412,11 @@ static int tegra_hsp_mailbox_flush(struc
+ value = tegra_hsp_channel_readl(ch, HSP_SM_SHRD_MBOX);
+ if ((value & HSP_SM_SHRD_MBOX_FULL) == 0) {
+ mbox_chan_txdone(chan, 0);
++
++ /* Wait until channel is empty */
++ if (chan->active_req != NULL)
++ continue;
++
+ return 0;
+ }
+
--- /dev/null
+From 66b513b7c64a7290c1fbb88e657f7cece992e131 Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Wed, 28 Apr 2021 14:38:52 -0700
+Subject: samples/landlock: Fix path_list memory leak
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tom Rix <trix@redhat.com>
+
+commit 66b513b7c64a7290c1fbb88e657f7cece992e131 upstream.
+
+Clang static analysis reports this error
+
+sandboxer.c:134:8: warning: Potential leak of memory
+ pointed to by 'path_list'
+ ret = 0;
+ ^
+path_list is allocated in parse_path() but never freed.
+
+Signed-off-by: Tom Rix <trix@redhat.com>
+Link: https://lore.kernel.org/r/20210428213852.2874324-1-trix@redhat.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ samples/landlock/sandboxer.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/samples/landlock/sandboxer.c
++++ b/samples/landlock/sandboxer.c
+@@ -134,6 +134,7 @@ static int populate_ruleset(
+ ret = 0;
+
+ out_free_name:
++ free(path_list);
+ free(env_path_name);
+ return ret;
+ }
ext4-fix-fs-corruption-when-tring-to-remove-a-non-empty-directory-with-io-error.patch
ext4-make-mb_optimize_scan-option-work-with-set-unset-mount-cmd.patch
ext4-make-mb_optimize_scan-performance-mount-option-work-with-extents.patch
+samples-landlock-fix-path_list-memory-leak.patch
+landlock-use-square-brackets-around-landlock-ruleset.patch
+mailbox-tegra-hsp-flush-whole-channel.patch
+btrfs-zoned-put-block-group-after-final-usage.patch
+block-fix-rq-qos-breakage-from-skipping-rq_qos_done_bio.patch
+block-limit-request-dispatch-loop-duration.patch