]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
blk-cgroup: defer blkcg css_put until blkg is unlinked from queue
authorZizhi Wo <wozizhi@huawei.com>
Tue, 16 Jun 2026 01:17:46 +0000 (09:17 +0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 22 Jun 2026 21:59:53 +0000 (15:59 -0600)
[BUG]
Our fuzz testing triggered a blkcg use-after-free issue:

  BUG: KASAN: slab-use-after-free in _raw_spin_lock+0x75/0xe0
  Call Trace:
  ...
  blkcg_deactivate_policy+0x244/0x4d0
  ioc_rqos_exit+0x44/0xe0
  rq_qos_exit+0xba/0x120
  __del_gendisk+0x50b/0x800
  del_gendisk+0xff/0x190
  ...

[CAUSE]
process1 process2
cgroup_rmdir
...
  css_killed_work_fn
    offline_css
    ...
      blkcg_destroy_blkgs
      ...
        __blkg_release
  css_put(&blkg->blkcg->css)
          blkg_free
    INIT_WORK(xxx, blkg_free_workfn)
    schedule_work
    css_put
    ...
      blkcg_css_free
        kfree(blkcg)--------blkcg has been freed!!!
====================================schedule_work
              blkg_free_workfn
__del_gendisk
  rq_qos_exit
    ioc_rqos_exit
      blkcg_deactivate_policy
        mutex_lock(&q->blkcg_mutex)
spin_lock_irq(&q->queue_lock)
        list_for_each_entry(blkg, xxx)
  blkcg = blkg->blkcg
  spin_lock(&blkcg->lock)-------UAF!!!
        mutex_lock(&q->blkcg_mutex)
        spin_lock_irq(&q->queue_lock)
        /* Only then is the blkg removed from the list */
        list_del_init(&blkg->q_node)

As a result, a blkg can still be reachable through q->blkg_list while
its ->blkcg has already been freed.

[Fix]
Fix this by deferring the blkcg css_put() until after the blkg has been
unlinked from q->blkg_list in blkg_free_workfn(). This ensures that the
blkcg outlives every blkg still reachable through q->blkg_list, so any
iterator holding q->queue_lock is guaranteed to observe a valid
blkg->blkcg.

While at it, move css_tryget_online() from blkg_create() into blkg_alloc()
so that the css reference is owned by the alloc/free pair rather than
straddling layers:
blkg_alloc()  <-> blkg_free()
blkg_create() <-> blkg_destroy()

Fixes: f1c006f1c685 ("blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()")
Suggested-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Yu Kuai <yukuai@fygo.io>
Reviewed-by: Tang Yizhou <yizhou.tang@shopee.com>
Link: https://patch.msgid.link/20260616011746.2451461-1-wozizhi@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-cgroup.c

index 342816cbbd1b9aae2e9f797891e490abccf6572e..ee076ab795d33276af385f07af48194be1abed57 100644 (file)
@@ -136,6 +136,11 @@ static void blkg_free_workfn(struct work_struct *work)
        spin_unlock_irq(&q->queue_lock);
        mutex_unlock(&q->blkcg_mutex);
 
+       /*
+        * Release blkcg css ref only after blkg is removed from q->blkg_list,
+        * so concurrent iterators won't see a blkg with a freed blkcg.
+        */
+       css_put(&blkg->blkcg->css);
        blk_put_queue(q);
        free_percpu(blkg->iostat_cpu);
        percpu_ref_exit(&blkg->refcnt);
@@ -169,8 +174,6 @@ static void __blkg_release(struct rcu_head *rcu)
        WARN_ON(!bio_list_empty(&blkg->async_bios));
 #endif
 
-       /* release the blkcg and parent blkg refs this blkg has been holding */
-       css_put(&blkg->blkcg->css);
        blkg_free(blkg);
 }
 
@@ -314,6 +317,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
                goto out_exit_refcnt;
        if (!blk_get_queue(disk->queue))
                goto out_free_iostat;
+       /* blkg holds a reference to blkcg */
+       if (!css_tryget_online(&blkcg->css))
+               goto out_put_queue;
 
        blkg->q = disk->queue;
        INIT_LIST_HEAD(&blkg->q_node);
@@ -354,6 +360,8 @@ out_free_pds:
        while (--i >= 0)
                if (blkg->pd[i])
                        blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
+       css_put(&blkcg->css);
+out_put_queue:
        blk_put_queue(disk->queue);
 out_free_iostat:
        free_percpu(blkg->iostat_cpu);
@@ -382,18 +390,12 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
                goto err_free_blkg;
        }
 
-       /* blkg holds a reference to blkcg */
-       if (!css_tryget_online(&blkcg->css)) {
-               ret = -ENODEV;
-               goto err_free_blkg;
-       }
-
        /* allocate */
        if (!new_blkg) {
                new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT);
                if (unlikely(!new_blkg)) {
                        ret = -ENOMEM;
-                       goto err_put_css;
+                       goto err_free_blkg;
                }
        }
        blkg = new_blkg;
@@ -403,7 +405,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
                blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue);
                if (WARN_ON_ONCE(!blkg->parent)) {
                        ret = -ENODEV;
-                       goto err_put_css;
+                       goto err_free_blkg;
                }
                blkg_get(blkg->parent);
        }
@@ -443,8 +445,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
        blkg_put(blkg);
        return ERR_PTR(ret);
 
-err_put_css:
-       css_put(&blkcg->css);
 err_free_blkg:
        if (new_blkg)
                blkg_free(new_blkg);