From: Qu Wenruo Date: Mon, 16 Jun 2025 22:11:14 +0000 (+0930) Subject: btrfs: delay btrfs_open_devices() until super block is created X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bddf57a70781ef8821d415200bdbcb71f443993a;p=thirdparty%2Flinux.git btrfs: delay btrfs_open_devices() until super block is created Currently we always call btrfs_open_devices() before creating the super block. It's fine for now because: - No blk_holder_ops is provided - btrfs_fs_type is used as a holder This means no matter who wins the device opening race, the holder will be the same thus not affecting the later sget_fc() race. And since no blk_holder_ops is provided, no bdev operation is depending on the holder. But this will no longer be true if we want to implement a proper blk_holder_ops using fs_holder_ops. This means we will need a proper super block as the bdev holder. To prepare for such change: - Add btrfs_fs_devices::holding member This will prevent btrfs_free_stale_devices() and btrfs_close_device() from deleting the fs_devices when there is another process trying to mount the fs. Along with the new member, here come the two helpers, btrfs_fs_devices_inc_holding() and btrfs_fs_devices_dec_holding(). This will allow us to hold fs_devices without opening it. This is needed because we cannot hold uuid_mutex while calling sget_fc(), this will reverse the lock sequence with s_umount, causing a lockdep warning. - Delay btrfs_open_devices() until a super block is returned This means we have to hold the initial fs_devices first, then unlock uuid_mutex, call sget_fc(), then re-lock uuid_mutex, and decrease the holding number. For new super block case, we continue to btrfs_open_devices() with uuid_mutex hold. For existing super block case, we can unlock uuid_mutex and continue. Although this means a more complex error handling path, as if we didn't call btrfs_open_devices() (either got an existing sb, or sget_fc() failed), we cannot let btrfs_put_fs_info() cleanup the fs_devices, as it can be freed at any time after we decrease the hold on fs_devices and unlock uuid_mutex. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 77249e0dd5780..e0ee96cc749ce 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1841,7 +1841,6 @@ static int btrfs_get_tree_super(struct fs_context *fc) struct btrfs_fs_info *fs_info = fc->s_fs_info; struct btrfs_fs_context *ctx = fc->fs_private; struct btrfs_fs_devices *fs_devices = NULL; - struct block_device *bdev; struct btrfs_device *device; struct super_block *sb; blk_mode_t mode = btrfs_open_mode(fc); @@ -1860,23 +1859,37 @@ static int btrfs_get_tree_super(struct fs_context *fc) mutex_unlock(&uuid_mutex); return PTR_ERR(device); } - fs_devices = device->fs_devices; + /* + * We cannot hold uuid_mutex calling sget_fc(), it will lead to a + * locking order reversal with s_umount. + * + * So here we increase the holding number of fs_devices, this will ensure + * the fs_devices itself won't be freed. + */ + btrfs_fs_devices_inc_holding(fs_devices); fs_info->fs_devices = fs_devices; - - ret = btrfs_open_devices(fs_devices, mode, &btrfs_fs_type); mutex_unlock(&uuid_mutex); - if (ret) - return ret; - if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) - return -EACCES; - - bdev = fs_devices->latest_dev->bdev; sb = sget_fc(fc, btrfs_fc_test_super, set_anon_super_fc); - if (IS_ERR(sb)) + if (IS_ERR(sb)) { + mutex_lock(&uuid_mutex); + btrfs_fs_devices_dec_holding(fs_devices); + /* + * Since the fs_devices is not opened, it can be freed at any + * time after unlocking uuid_mutex. We need to avoid double + * free through put_fs_context()->btrfs_free_fs_info(). + * So here we reset fs_info->fs_devices to NULL, and let the + * regular fs_devices reclaim path to handle it. + * + * This applies to all later branches where no fs_devices is + * opened. + */ + fs_info->fs_devices = NULL; + mutex_unlock(&uuid_mutex); return PTR_ERR(sb); + } set_device_specific_options(fs_info); @@ -1887,11 +1900,13 @@ static int btrfs_get_tree_super(struct fs_context *fc) * * fc->s_fs_info is not touched and will be later freed by * put_fs_context() through btrfs_free_fs_context(). - * - * The fs_info->fs_devices will also be closed by btrfs_free_fs_context(). */ ASSERT(fc->s_fs_info == fs_info); + mutex_lock(&uuid_mutex); + btrfs_fs_devices_dec_holding(fs_devices); + fs_info->fs_devices = NULL; + mutex_unlock(&uuid_mutex); /* * At this stage we may have RO flag mismatch between * fc->sb_flags and sb->s_flags. Caller should detect such @@ -1899,6 +1914,8 @@ static int btrfs_get_tree_super(struct fs_context *fc) * needed. */ } else { + struct block_device *bdev; + /* * The first mount of the fs thus a new superblock, fc->s_fs_info * must be NULL, and the ownership of our fs_info and fs_devices is @@ -1906,6 +1923,21 @@ static int btrfs_get_tree_super(struct fs_context *fc) */ ASSERT(fc->s_fs_info == NULL); + mutex_lock(&uuid_mutex); + btrfs_fs_devices_dec_holding(fs_devices); + ret = btrfs_open_devices(fs_devices, mode, &btrfs_fs_type); + if (ret < 0) + fs_info->fs_devices = NULL; + mutex_unlock(&uuid_mutex); + if (ret < 0) { + deactivate_locked_super(sb); + return ret; + } + if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) { + deactivate_locked_super(sb); + return -EACCES; + } + bdev = fs_devices->latest_dev->bdev; snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); btrfs_sb(sb)->bdev_holder = &btrfs_fs_type; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f49eb43548745..b974e2a84ed7c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -413,6 +413,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) struct btrfs_device *device; WARN_ON(fs_devices->opened); + WARN_ON(fs_devices->holding); while (!list_empty(&fs_devices->devices)) { device = list_first_entry(&fs_devices->devices, struct btrfs_device, dev_list); @@ -540,7 +541,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device continue; if (devt && devt != device->devt) continue; - if (fs_devices->opened) { + if (fs_devices->opened || fs_devices->holding) { if (devt) ret = -EBUSY; break; @@ -1196,7 +1197,7 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); close_fs_devices(fs_devices); - if (!fs_devices->opened) { + if (!fs_devices->opened && !fs_devices->holding) { list_splice_init(&fs_devices->seed_list, &list); /* diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1cfced687ce37..d48969f681bba 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -422,6 +422,16 @@ struct btrfs_fs_devices { /* Count fs-devices opened. */ int opened; + /* + * Counter of the processes that are holding this fs_devices but not + * yet opened. + * This is for mounting handling, as we can only open the fs_devices + * after a super block is created. But we cannot take uuid_mutex + * during sget_fc(), thus we have to hold the fs_devices (meaning it + * cannot be released) until a super block is returned. + */ + int holding; + /* Set when we find or add a device that doesn't have the nonrot flag set. */ bool rotating; /* Devices support TRIM/discard commands. */ @@ -853,6 +863,20 @@ static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocati WARN_ONCE(1, "unknown allocation policy %d, fallback to regular", pol); } +static inline void btrfs_fs_devices_inc_holding(struct btrfs_fs_devices *fs_devices) +{ + lockdep_assert_held(&uuid_mutex); + ASSERT(fs_devices->holding >= 0); + fs_devices->holding++; +} + +static inline void btrfs_fs_devices_dec_holding(struct btrfs_fs_devices *fs_devices) +{ + lockdep_assert_held(&uuid_mutex); + ASSERT(fs_devices->holding > 0); + fs_devices->holding--; +} + void btrfs_commit_device_sizes(struct btrfs_transaction *trans); struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);