From: Yu Kuai Date: Mon, 8 Jun 2026 03:42:44 +0000 (+0800) Subject: blk-cgroup: don't nest queue_lock under rcu in blkcg_print_blkgs() X-Git-Tag: v7.2-rc1~31^2~7 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=56cc24f59c145ce6938959f792df04b8a4f5a4d8;p=thirdparty%2Flinux.git blk-cgroup: don't nest queue_lock under rcu in blkcg_print_blkgs() With previous modification to delay freeing policy data after an RCU grace period, prfill() can run under RCU instead of taking queue_lock. However, policy teardown can still clear blkg->pd[plid] after blkcg_print_blkgs() observes the policy enabled bit. Load policy data once with READ_ONCE() and skip the blkg if teardown already cleared it. Do the same in recursive stat walks for descendant blkgs. Remove the stale BFQ debug queue_lock assertion because blkcg_print_blkgs() no longer calls prfill() with queue_lock held. This also lets ioc_qos_prfill() and ioc_cost_model_prfill() use IRQ-safe ioc->lock locking without re-enabling IRQs while queue_lock is still held. Signed-off-by: Yu Kuai Link: https://patch.msgid.link/db7633d5e263dd1c2bf9b901762545a84b7d714e.1780621988.git.yukuai@fygo.io Signed-off-by: Jens Axboe --- diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 0d3e32d246a24..e82ff03bda02e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1163,16 +1163,18 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct cgroup_subsys_state *pos_css; u64 sum = 0; - lockdep_assert_held(&blkg->q->queue_lock); - rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { + struct blkg_policy_data *pd; struct bfq_stat *stat; if (!pos_blkg->online) continue; - stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off; + pd = blkg_to_pd(pos_blkg, &blkcg_policy_bfq); + if (!pd) + continue; + stat = (void *)pd + off; sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); } rcu_read_unlock(); diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c index a55fb0c535582..aae910713814f 100644 --- a/block/blk-cgroup-rwstat.c +++ b/block/blk-cgroup-rwstat.c @@ -101,24 +101,27 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, struct cgroup_subsys_state *pos_css; unsigned int i; - lockdep_assert_held(&blkg->q->queue_lock); + WARN_ON_ONCE(!rcu_read_lock_held()); memset(sum, 0, sizeof(*sum)); - rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct blkg_rwstat *rwstat; if (!pos_blkg->online) continue; - if (pol) - rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off; - else + if (pol) { + struct blkg_policy_data *pd = blkg_to_pd(pos_blkg, pol); + + if (!pd) + continue; + rwstat = (void *)pd + off; + } else { rwstat = (void *)pos_blkg + off; + } for (i = 0; i < BLKG_RWSTAT_NR; i++) sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i); } - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c991c263cb5ab..d6355338f2900 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -699,9 +699,9 @@ const char *blkg_dev_name(struct blkcg_gq *blkg) * * This function invokes @prfill on each blkg of @blkcg if pd for the * policy specified by @pol exists. @prfill is invoked with @sf, the - * policy data and @data and the matching queue lock held. If @show_total - * is %true, the sum of the return values from @prfill is printed with - * "Total" label at the end. + * policy data and @data under RCU read lock. If @show_total is %true, the + * sum of the return values from @prfill is printed with "Total" label at the + * end. * * This is to be used to construct print functions for * cftype->read_seq_string method. @@ -717,10 +717,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->q->queue_lock); - if (blkcg_policy_enabled(blkg->q, pol)) - total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkg->q->queue_lock); + struct blkg_policy_data *pd; + + if (!blkcg_policy_enabled(blkg->q, pol)) + continue; + + pd = blkg_to_pd(blkg, pol); + if (pd) + total += prfill(sf, pd, data); } rcu_read_unlock(); @@ -1604,7 +1608,7 @@ retry: pd->blkg = blkg; pd->plid = pol->plid; - blkg->pd[pol->plid] = pd; + WRITE_ONCE(blkg->pd[pol->plid], pd); if (pol->pd_init_fn) pol->pd_init_fn(pd); @@ -1643,7 +1647,7 @@ enomem: pol->pd_offline_fn(pd); pd->online = false; pol->pd_free_fn(pd); - blkg->pd[pol->plid] = NULL; + WRITE_ONCE(blkg->pd[pol->plid], NULL); } spin_unlock(&blkcg->lock); } diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index cc603ded6ded4..615390f751aa2 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -284,9 +284,9 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, * Return pointer to private data associated with the @blkg-@pol pair. */ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) + const struct blkcg_policy *pol) { - return blkg ? blkg->pd[pol->plid] : NULL; + return blkg ? READ_ONCE(blkg->pd[pol->plid]) : NULL; } static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, @@ -493,7 +493,7 @@ static inline void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) { return NULL; } + const struct blkcg_policy *pol) { return NULL; } static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 27d2dcaa65f02..8b2aeba2e1e39 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3221,7 +3221,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd, if (!dname) return 0; - spin_lock(&ioc->lock); + spin_lock_irq(&ioc->lock); seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n", dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto", ioc->params.qos[QOS_RPPM] / 10000, @@ -3234,7 +3234,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd, ioc->params.qos[QOS_MIN] % 10000 / 100, ioc->params.qos[QOS_MAX] / 10000, ioc->params.qos[QOS_MAX] % 10000 / 100); - spin_unlock(&ioc->lock); + spin_unlock_irq(&ioc->lock); return 0; } @@ -3430,14 +3430,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf, if (!dname) return 0; - spin_lock(&ioc->lock); + spin_lock_irq(&ioc->lock); seq_printf(sf, "%s ctrl=%s model=linear " "rbps=%llu rseqiops=%llu rrandiops=%llu " "wbps=%llu wseqiops=%llu wrandiops=%llu\n", dname, ioc->user_cost_model ? "user" : "auto", u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); - spin_unlock(&ioc->lock); + spin_unlock_irq(&ioc->lock); return 0; }