--- /dev/null
+From 6b2b04590b51aa4cf395fcd185ce439cab5961dc Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 14 Mar 2022 14:30:11 -1000
+Subject: block: don't merge across cgroup boundaries if blkcg is enabled
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 6b2b04590b51aa4cf395fcd185ce439cab5961dc upstream.
+
+blk-iocost and iolatency are cgroup aware rq-qos policies but they didn't
+disable merges across different cgroups. This obviously can lead to
+accounting and control errors but more importantly to priority inversions -
+e.g. an IO which belongs to a higher priority cgroup or IO class may end up
+getting throttled incorrectly because it gets merged to an IO issued from a
+low priority cgroup.
+
+Fix it by adding blk_cgroup_mergeable() which is called from merge paths and
+rejects cross-cgroup and cross-issue_as_root merges.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: d70675121546 ("block: introduce blk-iolatency io controller")
+Cc: stable@vger.kernel.org # v4.19+
+Cc: Josef Bacik <jbacik@fb.com>
+Link: https://lore.kernel.org/r/Yi/eE/6zFNyWJ+qd@slm.duckdns.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-merge.c | 11 +++++++++++
+ include/linux/blk-cgroup.h | 17 +++++++++++++++++
+ 2 files changed, 28 insertions(+)
+
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -9,6 +9,7 @@
+ #include <linux/blk-integrity.h>
+ #include <linux/scatterlist.h>
+ #include <linux/part_stat.h>
++#include <linux/blk-cgroup.h>
+
+ #include <trace/events/block.h>
+
+@@ -600,6 +601,9 @@ static inline unsigned int blk_rq_get_ma
+ static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
+ unsigned int nr_phys_segs)
+ {
++ if (!blk_cgroup_mergeable(req, bio))
++ goto no_merge;
++
+ if (blk_integrity_merge_bio(req->q, req, bio) == false)
+ goto no_merge;
+
+@@ -696,6 +700,9 @@ static int ll_merge_requests_fn(struct r
+ if (total_phys_segments > blk_rq_get_max_segments(req))
+ return 0;
+
++ if (!blk_cgroup_mergeable(req, next->bio))
++ return 0;
++
+ if (blk_integrity_merge_rq(q, req, next) == false)
+ return 0;
+
+@@ -904,6 +911,10 @@ bool blk_rq_merge_ok(struct request *rq,
+ if (bio_data_dir(bio) != rq_data_dir(rq))
+ return false;
+
++ /* don't merge across cgroup boundaries */
++ if (!blk_cgroup_mergeable(rq, bio))
++ return false;
++
+ /* only merge integrity protected bio into ditto rq */
+ if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
+ return false;
+--- a/include/linux/blk-cgroup.h
++++ b/include/linux/blk-cgroup.h
+@@ -24,6 +24,7 @@
+ #include <linux/atomic.h>
+ #include <linux/kthread.h>
+ #include <linux/fs.h>
++#include <linux/blk-mq.h>
+
+ /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
+ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
+@@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(str
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ }
+
++/**
++ * blk_cgroup_mergeable - Determine whether to allow or disallow merges
++ * @rq: request to merge into
++ * @bio: bio to merge
++ *
++ * @bio and @rq should belong to the same cgroup and their issue_as_root should
++ * match. The latter is necessary as we don't want to throttle e.g. a metadata
++ * update because it happens to be next to a regular IO.
++ */
++static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
++{
++ return rq->bio->bi_blkg == bio->bi_blkg &&
++ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
++}
++
+ void blk_cgroup_bio_start(struct bio *bio);
+ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+@@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg
+ static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
+ static inline void blkcg_bio_issue_init(struct bio *bio) { }
+ static inline void blk_cgroup_bio_start(struct bio *bio) { }
++static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
+
+ #define blk_queue_for_each_rl(rl, q) \
+ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)