From: Abd-Alrhman Masalkhi Date: Fri, 1 May 2026 11:46:49 +0000 (+0200) Subject: md/raid1,raid10: fix deadlock in read error recovery path X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7b15c24f805339a585cfe7d72f446b7e88b9bcc0;p=thirdparty%2Flinux.git md/raid1,raid10: fix deadlock in read error recovery path raid1d and raid10d may resubmit a split md cloned bio while handling a read error. In this case, resubmitting the bio can lead to a deadlock if the array is suspended before md_handle_request() acquires an active_io reference via percpu_ref_tryget_live(). Since the cloned bio already holds an active_io reference, trying to acquire another reference via percpu_ref_tryget_live() can lead to a deadlock while the array is suspended. Fix this by using percpu_ref_get() for md cloned bios. Fixes: bb2a9acefaf9 ("md/raid1: switch to use md_account_bio() for io accounting") Fixes: 820455238366 ("md/raid10: switch to use md_account_bio() for io accounting") Signed-off-by: Abd-Alrhman Masalkhi Reviewed-by: Xiao Ni Reviewed-by: Yu Kuai Link: https://patch.msgid.link/20260501114652.590037-2-abd.masalkhi@gmail.com Signed-off-by: Yu Kuai --- diff --git a/drivers/md/md.c b/drivers/md/md.c index 6cb2c452f963c..096bb64e87bd5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -395,17 +395,24 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio) bool md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: - if (is_suspended(mddev, bio)) { - /* Bail out if REQ_NOWAIT is set for the bio */ - if (bio->bi_opf & REQ_NOWAIT) { - bio_wouldblock_error(bio); - return true; + if (unlikely(md_cloned_bio(mddev, bio))) { + /* + * This bio is an MD cloned bio and already holds an + * active_io reference, so percpu_ref_get() is safe here. + */ + percpu_ref_get(&mddev->active_io); + } else { + if (is_suspended(mddev, bio)) { + /* Bail out if REQ_NOWAIT is set for the bio */ + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return true; + } + wait_event(mddev->sb_wait, !is_suspended(mddev, bio)); } - wait_event(mddev->sb_wait, !is_suspended(mddev, bio)); + if (!percpu_ref_tryget_live(&mddev->active_io)) + goto check_suspended; } - if (!percpu_ref_tryget_live(&mddev->active_io)) - goto check_suspended; - if (!mddev->pers->make_request(mddev, bio)) { percpu_ref_put(&mddev->active_io); if (mddev_is_dm(mddev) && mddev->pers->prepare_suspend) diff --git a/drivers/md/md.h b/drivers/md/md.h index 9e5100609d120..d8daf0f75cbbe 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -1044,6 +1044,11 @@ void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes); extern const struct block_device_operations md_fops; +static inline bool md_cloned_bio(struct mddev *mddev, struct bio *bio) +{ + return bio->bi_pool == &mddev->io_clone_set; +} + /* * MD devices can be used undeneath by DM, in which case ->gendisk is NULL. */