]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
blk-cgroup: don't nest queue_lock under rcu in blkcg_print_blkgs()
authorYu Kuai <yukuai@fygo.io>
Mon, 8 Jun 2026 03:42:44 +0000 (11:42 +0800)
committerJens Axboe <axboe@kernel.dk>
Wed, 24 Jun 2026 12:42:19 +0000 (06:42 -0600)
With previous modification to delay freeing policy data after an RCU grace
period, prfill() can run under RCU instead of taking queue_lock. However,
policy teardown can still clear blkg->pd[plid] after blkcg_print_blkgs()
observes the policy enabled bit.

Load policy data once with READ_ONCE() and skip the blkg if teardown
already cleared it. Do the same in recursive stat walks for descendant
blkgs. Remove the stale BFQ debug queue_lock assertion because
blkcg_print_blkgs() no longer calls prfill() with queue_lock held. This
also lets ioc_qos_prfill() and ioc_cost_model_prfill() use IRQ-safe
ioc->lock locking without re-enabling IRQs while queue_lock is still held.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
Link: https://patch.msgid.link/db7633d5e263dd1c2bf9b901762545a84b7d714e.1780621988.git.yukuai@fygo.io
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/bfq-cgroup.c
block/blk-cgroup-rwstat.c
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-iocost.c

index 0d3e32d246a24e33ded7185724be2eae1224b388..e82ff03bda02e1e1ea67ff476f7fed1884390c10 100644 (file)
@@ -1163,16 +1163,18 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
        struct cgroup_subsys_state *pos_css;
        u64 sum = 0;
 
-       lockdep_assert_held(&blkg->q->queue_lock);
-
        rcu_read_lock();
        blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+               struct blkg_policy_data *pd;
                struct bfq_stat *stat;
 
                if (!pos_blkg->online)
                        continue;
 
-               stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
+               pd = blkg_to_pd(pos_blkg, &blkcg_policy_bfq);
+               if (!pd)
+                       continue;
+               stat = (void *)pd + off;
                sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
        }
        rcu_read_unlock();
index a55fb0c535582ba182aff6e05a269aa6f9bd9617..aae910713814f34bdf9d34881e5009cd074f6e56 100644 (file)
@@ -101,24 +101,27 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
        struct cgroup_subsys_state *pos_css;
        unsigned int i;
 
-       lockdep_assert_held(&blkg->q->queue_lock);
+       WARN_ON_ONCE(!rcu_read_lock_held());
 
        memset(sum, 0, sizeof(*sum));
-       rcu_read_lock();
        blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
                struct blkg_rwstat *rwstat;
 
                if (!pos_blkg->online)
                        continue;
 
-               if (pol)
-                       rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
-               else
+               if (pol) {
+                       struct blkg_policy_data *pd = blkg_to_pd(pos_blkg, pol);
+
+                       if (!pd)
+                               continue;
+                       rwstat = (void *)pd + off;
+               } else {
                        rwstat = (void *)pos_blkg + off;
+               }
 
                for (i = 0; i < BLKG_RWSTAT_NR; i++)
                        sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i);
        }
-       rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
index c991c263cb5ab9981f464fb19edc81ce21c4028e..d6355338f2900b3d08ba430ca6864d143cdf9762 100644 (file)
@@ -699,9 +699,9 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
  *
  * This function invokes @prfill on each blkg of @blkcg if pd for the
  * policy specified by @pol exists.  @prfill is invoked with @sf, the
- * policy data and @data and the matching queue lock held.  If @show_total
- * is %true, the sum of the return values from @prfill is printed with
- * "Total" label at the end.
+ * policy data and @data under RCU read lock.  If @show_total is %true, the
+ * sum of the return values from @prfill is printed with "Total" label at the
+ * end.
  *
  * This is to be used to construct print functions for
  * cftype->read_seq_string method.
@@ -717,10 +717,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 
        rcu_read_lock();
        hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-               spin_lock_irq(&blkg->q->queue_lock);
-               if (blkcg_policy_enabled(blkg->q, pol))
-                       total += prfill(sf, blkg->pd[pol->plid], data);
-               spin_unlock_irq(&blkg->q->queue_lock);
+               struct blkg_policy_data *pd;
+
+               if (!blkcg_policy_enabled(blkg->q, pol))
+                       continue;
+
+               pd = blkg_to_pd(blkg, pol);
+               if (pd)
+                       total += prfill(sf, pd, data);
        }
        rcu_read_unlock();
 
@@ -1604,7 +1608,7 @@ retry:
 
                pd->blkg = blkg;
                pd->plid = pol->plid;
-               blkg->pd[pol->plid] = pd;
+               WRITE_ONCE(blkg->pd[pol->plid], pd);
 
                if (pol->pd_init_fn)
                        pol->pd_init_fn(pd);
@@ -1643,7 +1647,7 @@ enomem:
                                pol->pd_offline_fn(pd);
                        pd->online = false;
                        pol->pd_free_fn(pd);
-                       blkg->pd[pol->plid] = NULL;
+                       WRITE_ONCE(blkg->pd[pol->plid], NULL);
                }
                spin_unlock(&blkcg->lock);
        }
index cc603ded6ded43301c8e4b53f0b8bc28f6cabcda..615390f751aa2545c0ff070eaf3a54bd7b3e0b37 100644 (file)
@@ -284,9 +284,9 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
  * Return pointer to private data associated with the @blkg-@pol pair.
  */
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-                                                 struct blkcg_policy *pol)
+                                                 const struct blkcg_policy *pol)
 {
-       return blkg ? blkg->pd[pol->plid] : NULL;
+       return blkg ? READ_ONCE(blkg->pd[pol->plid]) : NULL;
 }
 
 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
@@ -493,7 +493,7 @@ static inline void blkcg_deactivate_policy(struct gendisk *disk,
                                           const struct blkcg_policy *pol) { }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-                                                 struct blkcg_policy *pol) { return NULL; }
+                                                 const struct blkcg_policy *pol) { return NULL; }
 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
 static inline void blkg_get(struct blkcg_gq *blkg) { }
 static inline void blkg_put(struct blkcg_gq *blkg) { }
index 27d2dcaa65f02694c7a990cfb53b02f8343a324c..8b2aeba2e1e39c1d652b7e8ce6fb27624dc5d7bb 100644 (file)
@@ -3221,7 +3221,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
        if (!dname)
                return 0;
 
-       spin_lock(&ioc->lock);
+       spin_lock_irq(&ioc->lock);
        seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
                   dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
                   ioc->params.qos[QOS_RPPM] / 10000,
@@ -3234,7 +3234,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
                   ioc->params.qos[QOS_MIN] % 10000 / 100,
                   ioc->params.qos[QOS_MAX] / 10000,
                   ioc->params.qos[QOS_MAX] % 10000 / 100);
-       spin_unlock(&ioc->lock);
+       spin_unlock_irq(&ioc->lock);
        return 0;
 }
 
@@ -3430,14 +3430,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
        if (!dname)
                return 0;
 
-       spin_lock(&ioc->lock);
+       spin_lock_irq(&ioc->lock);
        seq_printf(sf, "%s ctrl=%s model=linear "
                   "rbps=%llu rseqiops=%llu rrandiops=%llu "
                   "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
                   dname, ioc->user_cost_model ? "user" : "auto",
                   u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
                   u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
-       spin_unlock(&ioc->lock);
+       spin_unlock_irq(&ioc->lock);
        return 0;
 }