From 1bb26caf2ad1111e1220b9a1b690996397b0290b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 16 Jul 2023 17:30:04 +0200 Subject: [PATCH] 6.1-stable patches added patches: blk-cgroup-flush-stats-before-releasing-blkcg_gq.patch blk-cgroup-reinit-blkg_iostat_set-after-clearing-in-blkcg_reset_stats.patch mips-dts-ci20-raise-vddcore-voltage-to-1.125-volts.patch --- ...lush-stats-before-releasing-blkcg_gq.patch | 160 ++++++++++++++++++ ...-after-clearing-in-blkcg_reset_stats.patch | 53 ++++++ ...raise-vddcore-voltage-to-1.125-volts.patch | 46 +++++ queue-6.1/series | 3 + 4 files changed, 262 insertions(+) create mode 100644 queue-6.1/blk-cgroup-flush-stats-before-releasing-blkcg_gq.patch create mode 100644 queue-6.1/blk-cgroup-reinit-blkg_iostat_set-after-clearing-in-blkcg_reset_stats.patch create mode 100644 queue-6.1/mips-dts-ci20-raise-vddcore-voltage-to-1.125-volts.patch diff --git a/queue-6.1/blk-cgroup-flush-stats-before-releasing-blkcg_gq.patch b/queue-6.1/blk-cgroup-flush-stats-before-releasing-blkcg_gq.patch new file mode 100644 index 00000000000..caba5c4b1ff --- /dev/null +++ b/queue-6.1/blk-cgroup-flush-stats-before-releasing-blkcg_gq.patch @@ -0,0 +1,160 @@ +From 20cb1c2fb7568a6054c55defe044311397e01ddb Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Sat, 10 Jun 2023 07:42:49 +0800 +Subject: blk-cgroup: Flush stats before releasing blkcg_gq + +From: Ming Lei + +commit 20cb1c2fb7568a6054c55defe044311397e01ddb upstream. + +As noted by Michal, the blkg_iostat_set's in the lockless list hold +reference to blkg's to protect against their removal. Those blkg's +hold reference to blkcg. When a cgroup is being destroyed, +cgroup_rstat_flush() is only called at css_release_work_fn() which +is called when the blkcg reference count reaches 0. This circular +dependency will prevent blkcg and some blkgs from being freed after +they are made offline. + +It is less a problem if the cgroup to be destroyed also has other +controllers like memory that will call cgroup_rstat_flush() which will +clean up the reference count. If block is the only controller that uses +rstat, these offline blkcg and blkgs may never be freed leaking more +and more memory over time. + +To prevent this potential memory leak: + +- flush blkcg per-cpu stats list in __blkg_release(), when no new stat +can be added + +- add global blkg_stat_lock for covering concurrent parent blkg stat +update + +- don't grab bio->bi_blkg reference when adding the stats into blkcg's +per-cpu stat list since all stats are guaranteed to be consumed before +releasing blkg instance, and grabbing blkg reference for stats was the +most fragile part of original patch + +Based on Waiman's patch: + +https://lore.kernel.org/linux-block/20221215033132.230023-3-longman@redhat.com/ + +Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()") +Cc: stable@vger.kernel.org +Reported-by: Jay Shin +Acked-by: Tejun Heo +Cc: Waiman Long +Cc: mkoutny@suse.com +Cc: Yosry Ahmed +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20230609234249.1412858-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-cgroup.c | 41 ++++++++++++++++++++++++++++++++--------- + 1 file changed, 32 insertions(+), 9 deletions(-) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -35,6 +35,8 @@ + #include "blk-throttle.h" + #include "blk-rq-qos.h" + ++static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu); ++ + /* + * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. + * blkcg_pol_register_mutex nests outside of it and synchronizes entire +@@ -58,6 +60,8 @@ static LIST_HEAD(all_blkcgs); /* protec + bool blkcg_debug_stats = false; + static struct workqueue_struct *blkcg_punt_bio_wq; + ++static DEFINE_RAW_SPINLOCK(blkg_stat_lock); ++ + #define BLKG_DESTROY_BATCH_SIZE 64 + + /* +@@ -153,9 +157,20 @@ static void blkg_free(struct blkcg_gq *b + static void __blkg_release(struct rcu_head *rcu) + { + struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); ++ struct blkcg *blkcg = blkg->blkcg; ++ int cpu; + + WARN_ON(!bio_list_empty(&blkg->async_bios)); + ++ /* ++ * Flush all the non-empty percpu lockless lists before releasing ++ * us, given these stat belongs to us. ++ * ++ * blkg_stat_lock is for serializing blkg stat update ++ */ ++ for_each_possible_cpu(cpu) ++ __blkcg_rstat_flush(blkcg, cpu); ++ + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) +@@ -866,17 +881,12 @@ static void blkcg_iostat_update(struct b + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + } + +-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) ++static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) + { +- struct blkcg *blkcg = css_to_blkcg(css); + struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); + struct llist_node *lnode; + struct blkg_iostat_set *bisc, *next_bisc; + +- /* Root-level stats are sourced from system-wide IO stats */ +- if (!cgroup_parent(css->cgroup)) +- return; +- + rcu_read_lock(); + + lnode = llist_del_all(lhead); +@@ -884,6 +894,14 @@ static void blkcg_rstat_flush(struct cgr + goto out; + + /* ++ * For covering concurrent parent blkg update from blkg_release(). ++ * ++ * When flushing from cgroup, cgroup_rstat_lock is always held, so ++ * this lock won't cause contention most of time. ++ */ ++ raw_spin_lock(&blkg_stat_lock); ++ ++ /* + * Iterate only the iostat_cpu's queued in the lockless list. + */ + llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) { +@@ -906,13 +924,19 @@ static void blkcg_rstat_flush(struct cgr + if (parent && parent->parent) + blkcg_iostat_update(parent, &blkg->iostat.cur, + &blkg->iostat.last); +- percpu_ref_put(&blkg->refcnt); + } +- ++ raw_spin_unlock(&blkg_stat_lock); + out: + rcu_read_unlock(); + } + ++static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) ++{ ++ /* Root-level stats are sourced from system-wide IO stats */ ++ if (cgroup_parent(css->cgroup)) ++ __blkcg_rstat_flush(css_to_blkcg(css), cpu); ++} ++ + /* + * We source root cgroup stats from the system-wide stats to avoid + * tracking the same information twice and incurring overhead when no +@@ -2043,7 +2067,6 @@ void blk_cgroup_bio_start(struct bio *bi + + llist_add(&bis->lnode, lhead); + WRITE_ONCE(bis->lqueued, true); +- percpu_ref_get(&bis->blkg->refcnt); + } + + u64_stats_update_end_irqrestore(&bis->sync, flags); diff --git a/queue-6.1/blk-cgroup-reinit-blkg_iostat_set-after-clearing-in-blkcg_reset_stats.patch b/queue-6.1/blk-cgroup-reinit-blkg_iostat_set-after-clearing-in-blkcg_reset_stats.patch new file mode 100644 index 00000000000..ba9c0c8bd7c --- /dev/null +++ b/queue-6.1/blk-cgroup-reinit-blkg_iostat_set-after-clearing-in-blkcg_reset_stats.patch @@ -0,0 +1,53 @@ +From 3d2af77e31ade05ff7ccc3658c3635ec1bea0979 Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Tue, 6 Jun 2023 14:07:24 -0400 +Subject: blk-cgroup: Reinit blkg_iostat_set after clearing in blkcg_reset_stats() + +From: Waiman Long + +commit 3d2af77e31ade05ff7ccc3658c3635ec1bea0979 upstream. + +When blkg_alloc() is called to allocate a blkcg_gq structure +with the associated blkg_iostat_set's, there are 2 fields within +blkg_iostat_set that requires proper initialization - blkg & sync. +The former field was introduced by commit 3b8cc6298724 ("blk-cgroup: +Optimize blkcg_rstat_flush()") while the later one was introduced by +commit f73316482977 ("blk-cgroup: reimplement basic IO stats using +cgroup rstat"). + +Unfortunately those fields in the blkg_iostat_set's are not properly +re-initialized when they are cleared in v1's blkcg_reset_stats(). This +can lead to a kernel panic due to NULL pointer access of the blkg +pointer. The missing initialization of sync is less problematic and +can be a problem in a debug kernel due to missing lockdep initialization. + +Fix these problems by re-initializing them after memory clearing. + +Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()") +Fixes: f73316482977 ("blk-cgroup: reimplement basic IO stats using cgroup rstat") +Signed-off-by: Waiman Long +Reviewed-by: Ming Lei +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20230606180724.2455066-1-longman@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-cgroup.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -544,8 +544,13 @@ static int blkcg_reset_stats(struct cgro + struct blkg_iostat_set *bis = + per_cpu_ptr(blkg->iostat_cpu, cpu); + memset(bis, 0, sizeof(*bis)); ++ ++ /* Re-initialize the cleared blkg_iostat_set */ ++ u64_stats_init(&bis->sync); ++ bis->blkg = blkg; + } + memset(&blkg->iostat, 0, sizeof(blkg->iostat)); ++ u64_stats_init(&blkg->iostat.sync); + + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; diff --git a/queue-6.1/mips-dts-ci20-raise-vddcore-voltage-to-1.125-volts.patch b/queue-6.1/mips-dts-ci20-raise-vddcore-voltage-to-1.125-volts.patch new file mode 100644 index 00000000000..017a4ce20a4 --- /dev/null +++ b/queue-6.1/mips-dts-ci20-raise-vddcore-voltage-to-1.125-volts.patch @@ -0,0 +1,46 @@ +From 944520f85d5b1fb2f9ea243be41f9c9af3d4cef3 Mon Sep 17 00:00:00 2001 +From: Paul Cercueil +Date: Thu, 22 Jun 2023 19:59:34 +0200 +Subject: MIPS: DTS: CI20: Raise VDDCORE voltage to 1.125 volts + +From: Paul Cercueil + +commit 944520f85d5b1fb2f9ea243be41f9c9af3d4cef3 upstream. + +Commit 08384e80a70f ("MIPS: DTS: CI20: Fix ACT8600 regulator node +names") caused the VDDCORE power supply (regulated by the ACT8600's +DCDC1 output) to drop from a voltage of 1.2V configured by the +bootloader, to the 1.1V set in the Device Tree. + +According to the documentation, the VDDCORE supply should be between +0.99V and 1.21V; both values are therefore within the supported range. + +However, VDDCORE being 1.1V results in the CI20 being very unstable, +with corrupted memory, failures to boot, or reboots at random. The +reason might be succint drops of the voltage below the minimum required. + +Raising the minimum voltage to 1.125 volts seems to be enough to address +this issue, while still keeping a relatively low core voltage which +helps for power consumption and thermals. + +Fixes: 08384e80a70f ("MIPS: DTS: CI20: Fix ACT8600 regulator node names") +Signed-off-by: Paul Cercueil +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/boot/dts/ingenic/ci20.dts | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/mips/boot/dts/ingenic/ci20.dts ++++ b/arch/mips/boot/dts/ingenic/ci20.dts +@@ -240,8 +240,8 @@ + + regulators { + vddcore: DCDC1 { +- regulator-min-microvolt = <1100000>; +- regulator-max-microvolt = <1100000>; ++ regulator-min-microvolt = <1125000>; ++ regulator-max-microvolt = <1125000>; + regulator-always-on; + }; + vddmem: DCDC2 { diff --git a/queue-6.1/series b/queue-6.1/series index 044cd1ef274..6b739e66c27 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -594,3 +594,6 @@ drm-i915-tc-fix-system-resume-mst-mode-restore-for-dp-alt-sinks.patch mtd-parsers-refer-to-arch_bcmbca-instead-of-arch_bcm4908.patch ovl-fix-null-pointer-dereference-in-ovl_permission.patch netfilter-nf_tables-unbind-non-anonymous-set-if-rule-construction-fails.patch +blk-cgroup-reinit-blkg_iostat_set-after-clearing-in-blkcg_reset_stats.patch +blk-cgroup-flush-stats-before-releasing-blkcg_gq.patch +mips-dts-ci20-raise-vddcore-voltage-to-1.125-volts.patch -- 2.47.3