]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
block: Remove queue freezing from several sysfs store callbacks
authorBart Van Assche <bvanassche@acm.org>
Fri, 14 Nov 2025 21:04:07 +0000 (13:04 -0800)
committerJens Axboe <axboe@kernel.dk>
Tue, 18 Nov 2025 22:00:11 +0000 (15:00 -0700)
Freezing the request queue from inside sysfs store callbacks may cause a
deadlock in combination with the dm-multipath driver and the
queue_if_no_path option. Additionally, freezing the request queue slows
down system boot on systems where sysfs attributes are set synchronously.

Fix this by removing the blk_mq_freeze_queue() / blk_mq_unfreeze_queue()
calls from the store callbacks that do not strictly need these callbacks.
Add the __data_racy annotation to request_queue.rq_timeout to suppress
KCSAN data race reports about the rq_timeout reads.

This patch may cause a small delay in applying the new settings.

For all the attributes affected by this patch, I/O will complete
correctly whether the old or the new value of the attribute is used.

This patch affects the following sysfs attributes:
* io_poll_delay
* io_timeout
* nomerges
* read_ahead_kb
* rq_affinity

Here is an example of a deadlock triggered by running test srp/002
if this patch is not applied:

task:multipathd
Call Trace:
 <TASK>
 __schedule+0x8c1/0x1bf0
 schedule+0xdd/0x270
 schedule_preempt_disabled+0x1c/0x30
 __mutex_lock+0xb89/0x1650
 mutex_lock_nested+0x1f/0x30
 dm_table_set_restrictions+0x823/0xdf0
 __bind+0x166/0x590
 dm_swap_table+0x2a7/0x490
 do_resume+0x1b1/0x610
 dev_suspend+0x55/0x1a0
 ctl_ioctl+0x3a5/0x7e0
 dm_ctl_ioctl+0x12/0x20
 __x64_sys_ioctl+0x127/0x1a0
 x64_sys_call+0xe2b/0x17d0
 do_syscall_64+0x96/0x3a0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53
 </TASK>
task:(udev-worker)
Call Trace:
 <TASK>
 __schedule+0x8c1/0x1bf0
 schedule+0xdd/0x270
 blk_mq_freeze_queue_wait+0xf2/0x140
 blk_mq_freeze_queue_nomemsave+0x23/0x30
 queue_ra_store+0x14e/0x290
 queue_attr_store+0x23e/0x2c0
 sysfs_kf_write+0xde/0x140
 kernfs_fop_write_iter+0x3b2/0x630
 vfs_write+0x4fd/0x1390
 ksys_write+0xfd/0x230
 __x64_sys_write+0x76/0xc0
 x64_sys_call+0x276/0x17d0
 do_syscall_64+0x96/0x3a0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53
 </TASK>

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Nilay Shroff <nilay@linux.ibm.com>
Cc: Martin Wilck <mwilck@suse.com>
Cc: Benjamin Marzinski <bmarzins@redhat.com>
Cc: stable@vger.kernel.org
Fixes: af2814149883 ("block: freeze the queue in queue_attr_store")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-sysfs.c
include/linux/blkdev.h

index 76c47fe9b8d68c971f015b4c62a7b84a4cf6b22e..8684c57498cc1d98e625460642ed06fb8f5d5089 100644 (file)
@@ -143,21 +143,22 @@ queue_ra_store(struct gendisk *disk, const char *page, size_t count)
 {
        unsigned long ra_kb;
        ssize_t ret;
-       unsigned int memflags;
        struct request_queue *q = disk->queue;
 
        ret = queue_var_store(&ra_kb, page, count);
        if (ret < 0)
                return ret;
        /*
-        * ->ra_pages is protected by ->limits_lock because it is usually
-        * calculated from the queue limits by queue_limits_commit_update.
+        * The ->ra_pages change below is protected by ->limits_lock because it
+        * is usually calculated from the queue limits by
+        * queue_limits_commit_update().
+        *
+        * bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
+        * Use WRITE_ONCE() to write bdi->ra_pages once.
         */
        mutex_lock(&q->limits_lock);
-       memflags = blk_mq_freeze_queue(q);
-       disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
+       WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
        mutex_unlock(&q->limits_lock);
-       blk_mq_unfreeze_queue(q, memflags);
 
        return ret;
 }
@@ -375,21 +376,18 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
                                    size_t count)
 {
        unsigned long nm;
-       unsigned int memflags;
        struct request_queue *q = disk->queue;
        ssize_t ret = queue_var_store(&nm, page, count);
 
        if (ret < 0)
                return ret;
 
-       memflags = blk_mq_freeze_queue(q);
        blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
        blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
        if (nm == 2)
                blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
        else if (nm)
                blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-       blk_mq_unfreeze_queue(q, memflags);
 
        return ret;
 }
@@ -409,7 +407,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
 #ifdef CONFIG_SMP
        struct request_queue *q = disk->queue;
        unsigned long val;
-       unsigned int memflags;
 
        ret = queue_var_store(&val, page, count);
        if (ret < 0)
@@ -421,7 +418,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
         * are accessed individually using atomic test_bit operation. So we
         * don't grab any lock while updating these flags.
         */
-       memflags = blk_mq_freeze_queue(q);
        if (val == 2) {
                blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
                blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
@@ -432,7 +428,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
                blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
                blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
        }
-       blk_mq_unfreeze_queue(q, memflags);
 #endif
        return ret;
 }
@@ -446,11 +441,9 @@ static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
 static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
                                size_t count)
 {
-       unsigned int memflags;
        ssize_t ret = count;
        struct request_queue *q = disk->queue;
 
-       memflags = blk_mq_freeze_queue(q);
        if (!(q->limits.features & BLK_FEAT_POLL)) {
                ret = -EINVAL;
                goto out;
@@ -459,7 +452,6 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
        pr_info_ratelimited("writes to the poll attribute are ignored.\n");
        pr_info_ratelimited("please use driver specific parameters instead.\n");
 out:
-       blk_mq_unfreeze_queue(q, memflags);
        return ret;
 }
 
@@ -472,7 +464,7 @@ static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
                                  size_t count)
 {
-       unsigned int val, memflags;
+       unsigned int val;
        int err;
        struct request_queue *q = disk->queue;
 
@@ -480,9 +472,7 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
        if (err || val == 0)
                return -EINVAL;
 
-       memflags = blk_mq_freeze_queue(q);
        blk_queue_rq_timeout(q, msecs_to_jiffies(val));
-       blk_mq_unfreeze_queue(q, memflags);
 
        return count;
 }
index 2fff8a80dbd25beb17ba1827238826568d8b0940..cb4ba09959ee4d78d60cb5ec762215b1d1bf43d0 100644 (file)
@@ -495,7 +495,7 @@ struct request_queue {
         */
        unsigned long           queue_flags;
 
-       unsigned int            rq_timeout;
+       unsigned int __data_racy rq_timeout;
 
        unsigned int            queue_depth;