]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
block, bfq: protect async queue reset with blkcg locks
authorCen Zhang <zzzccc427@gmail.com>
Sun, 21 Jun 2026 13:59:30 +0000 (21:59 +0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 22 Jun 2026 21:57:31 +0000 (15:57 -0600)
Writing 0 to BFQ's low_latency attribute ends weight raising for active,
idle and async queues. The async cgroup path walks q->blkg_list, converts
each blkg to BFQ policy data and then reads bfqg->async_bfqq and
bfqg->async_idle_bfqq.

That walk was protected only by bfqd->lock. blkcg release work is
serialized by q->blkcg_mutex and q->queue_lock instead, and
blkg_free_workfn() can call BFQ's pd_free_fn before it removes
blkg->q_node from q->blkg_list. A low_latency reset can therefore still
find the blkg on the queue list after the BFQ policy data has been freed.

The buggy scenario involves two paths, with each column showing the order
within that path:

BFQ low_latency reset:              blkcg blkg release work:
1. bfq_low_latency_store()          1. blkg_free_workfn() takes
   calls bfq_end_wr().                 q->blkcg_mutex.
2. bfq_end_wr_async() walks         2. BFQ pd_free_fn drops the
   q->blkg_list.                       final bfq_group reference.
3. blkg_to_bfqg() returns           3. blkg->q_node remains on
   the stale policy data.              q->blkg_list until list_del_init().
4. bfq_end_wr_async_queues()
   reads async queue fields.

Fix this by taking q->blkcg_mutex and q->queue_lock around the
q->blkg_list walk, then taking bfqd->lock before touching BFQ async
queues. The mutex serializes against policy-data free and queue_lock
stabilizes the list. Move the async reset out of bfq_end_wr()'s existing
bfqd->lock critical section so the lock order matches blkcg policy
callbacks.

Validation reproduced this kernel report:
BUG: KASAN: slab-use-after-free in bfq_end_wr_async_queues+0x246/0x340

Call Trace:
 <TASK>
 dump_stack_lvl+0x66/0xa0
 print_report+0xce/0x630
 ? bfq_end_wr_async_queues+0x246/0x340
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? __virt_addr_valid+0x20d/0x410
 ? bfq_end_wr_async_queues+0x246/0x340
 kasan_report+0xe0/0x110
 ? bfq_end_wr_async_queues+0x246/0x340
 bfq_end_wr_async_queues+0x246/0x340
 bfq_end_wr_async+0xba/0x180
 bfq_low_latency_store+0x4e5/0x690
 ? 0xffffffffc02150da
 ? __pfx_bfq_low_latency_store+0x10/0x10
 ? __pfx_bfq_low_latency_store+0x10/0x10
 elv_attr_store+0xc4/0x110
 kernfs_fop_write_iter+0x2f5/0x4a0
 vfs_write+0x604/0x11f0
 ? __pfx_locks_remove_posix+0x10/0x10
 ? __pfx_vfs_write+0x10/0x10
 ksys_write+0xf9/0x1d0
 ? __pfx_ksys_write+0x10/0x10
 do_syscall_64+0x115/0x6a0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Allocated by task 544:
 kasan_save_stack+0x33/0x60
 kasan_save_track+0x14/0x30
 __kasan_kmalloc+0xaa/0xb0
 bfq_pd_alloc+0xc0/0x1b0
 blkg_alloc+0x346/0x960
 blkg_create+0x8c2/0x10d0
 bio_associate_blkg_from_css+0x9f3/0xfa0
 bio_associate_blkg+0xd9/0x200
 bio_init+0x303/0x640
 __blkdev_direct_IO_simple+0x56b/0x8a0
 blkdev_direct_IO+0x8e7/0x2580
 blkdev_read_iter+0x205/0x400
 vfs_read+0x7b0/0xda0
 ksys_read+0xf9/0x1d0
 do_syscall_64+0x115/0x6a0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 465:
 kasan_save_stack+0x33/0x60
 kasan_save_track+0x14/0x30
 kasan_save_free_info+0x3b/0x60
 __kasan_slab_free+0x5f/0x80
 kfree+0x307/0x580
 blkg_free_workfn+0xef/0x460
 process_one_work+0x8d0/0x1870
 worker_thread+0x575/0xf80
 kthread+0x2e7/0x3c0
 ret_from_fork+0x576/0x810
 ret_from_fork_asm+0x1a/0x30

Fixes: 44e44a1b329e ("block, bfq: improve responsiveness")
Assisted-by: Codex:gpt-5.5
Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
Reviewed-by: Tao Cui <cuitao@kylinos.cn>
Link: https://patch.msgid.link/20260621135930.2657810-1-zzzccc427@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/bfq-cgroup.c
block/bfq-iosched.c

index 0bd0332b3d786dad533406e74b42499634a83c5d..d8fdace464b450040416c9a2566eca0b9235d5b3 100644 (file)
@@ -936,14 +936,23 @@ put_async_queues:
 
 void bfq_end_wr_async(struct bfq_data *bfqd)
 {
+       struct request_queue *q = bfqd->queue;
        struct blkcg_gq *blkg;
 
-       list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+       mutex_lock(&q->blkcg_mutex);
+       spin_lock_irq(&q->queue_lock);
+       spin_lock(&bfqd->lock);
+
+       list_for_each_entry(blkg, &q->blkg_list, q_node) {
                struct bfq_group *bfqg = blkg_to_bfqg(blkg);
 
                bfq_end_wr_async_queues(bfqd, bfqg);
        }
        bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+
+       spin_unlock(&bfqd->lock);
+       spin_unlock_irq(&q->queue_lock);
+       mutex_unlock(&q->blkcg_mutex);
 }
 
 static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
@@ -1416,7 +1425,9 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
 
 void bfq_end_wr_async(struct bfq_data *bfqd)
 {
+       spin_lock_irq(&bfqd->lock);
        bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+       spin_unlock_irq(&bfqd->lock);
 }
 
 struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
index 141c602d5e8580a5d0c62bfc9f55c0eaff9a0c83..eec9be62061bdd8737c91bd715c5be5169df50a3 100644 (file)
@@ -2653,9 +2653,10 @@ static void bfq_end_wr(struct bfq_data *bfqd)
        }
        list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
                bfq_bfqq_end_wr(bfqq);
-       bfq_end_wr_async(bfqd);
 
        spin_unlock_irq(&bfqd->lock);
+
+       bfq_end_wr_async(bfqd);
 }
 
 static sector_t bfq_io_struct_pos(void *io_struct, bool request)