]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
btrfs: implement remove_bdev and shutdown super operation callbacks
authorQu Wenruo <wqu@suse.com>
Sun, 12 Oct 2025 23:52:05 +0000 (10:22 +1030)
committerDavid Sterba <dsterba@suse.com>
Mon, 24 Nov 2025 20:58:40 +0000 (21:58 +0100)
For the ->remove_bdev() callback, btrfs will:

- Mark the target device as missing

- Go degraded if the fs can afford it

- Return error other wise
  Thus falls back to the shutdown callback

For the ->shutdown callback, btrfs will:

- Set the SHUTDOWN flag
  Which will reject all new incoming operations, and make all writeback
  to fail.

  The behavior is the same as the NOLOGFLUSH behavior.

To support the lookup from bdev to a btrfs_device,
btrfs_dev_lookup_args is enhanced to have a new @devt member.
If set, we should be able to use that @devt member to uniquely locating a
btrfs device.

I know the shutdown can be a little overkilled, if one has a RAID1
metadata and RAID0 data, in that case one can still read data with 50%
chance to got some good data.

But a filesystem returning -EIO for half of the time is not really
considered usable.
Further it can also be as bad as the only device went missing for a single
device btrfs.

So here we go safe other than sorry when handling missing device.

And the remove_bdev callback will be hidden behind experimental features
for now, the reasons are:

- There are not enough btrfs specific bdev removal test cases
  The existing test cases are all removing the only device, thus only
  exercises the ->shutdown() behavior.

- Not yet determined what's the expected behavior
  Although the current auto-degrade behavior is no worse than the old
  behavior, it may not always be what the end users want.

  Before there is a concrete interface, better hide the new feature
  from end users.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Anand Jain <asj@kernel.org>
Tested-by: Anand Jain <asj@kernel.org>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/super.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 430e7419349c9a001baf1d4d420ab82f4ca3f888..e606e11d3f5773ed02b961afaf409e6c8009e663 100644 (file)
@@ -2430,6 +2430,66 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont
        return 0;
 }
 
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_remove_bdev(struct super_block *sb, struct block_device *bdev)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+       struct btrfs_device *device;
+       struct btrfs_dev_lookup_args lookup_args = { .devt = bdev->bd_dev };
+       bool can_rw;
+
+       mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       device = btrfs_find_device(fs_info->fs_devices, &lookup_args);
+       if (!device) {
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+               /* Device not found, should not affect the running fs, just give a warning. */
+               btrfs_warn(fs_info, "unable to find btrfs device for block device '%pg'", bdev);
+               return 0;
+       }
+       /*
+        * The to-be-removed device is already missing?
+        *
+        * That's weird but no special handling needed and can exit right now.
+        */
+       if (unlikely(test_and_set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))) {
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+               btrfs_warn(fs_info, "btrfs device id %llu is already missing", device->devid);
+               return 0;
+       }
+
+       device->fs_devices->missing_devices++;
+       if (test_and_clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
+               list_del_init(&device->dev_alloc_list);
+               WARN_ON(device->fs_devices->rw_devices < 1);
+               device->fs_devices->rw_devices--;
+       }
+       can_rw = btrfs_check_rw_degradable(fs_info, device);
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+       /*
+        * Now device is considered missing, btrfs_device_name() won't give a
+        * meaningful result anymore, so only output the devid.
+        */
+       if (unlikely(!can_rw)) {
+               btrfs_crit(fs_info,
+               "btrfs device id %llu has gone missing, can not maintain read-write",
+                          device->devid);
+               return -EIO;
+       }
+       btrfs_warn(fs_info,
+                  "btrfs device id %llu has gone missing, continue as degraded",
+                  device->devid);
+       btrfs_set_opt(fs_info->mount_opt, DEGRADED);
+       return 0;
+}
+
+static void btrfs_shutdown(struct super_block *sb)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
+       btrfs_force_shutdown(fs_info);
+}
+#endif
+
 static const struct super_operations btrfs_super_ops = {
        .drop_inode     = btrfs_drop_inode,
        .evict_inode    = btrfs_evict_inode,
@@ -2445,6 +2505,10 @@ static const struct super_operations btrfs_super_ops = {
        .unfreeze_fs    = btrfs_unfreeze,
        .nr_cached_objects = btrfs_nr_cached_objects,
        .free_cached_objects = btrfs_free_cached_objects,
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+       .remove_bdev    = btrfs_remove_bdev,
+       .shutdown       = btrfs_shutdown,
+#endif
 };
 
 static const struct file_operations btrfs_ctl_fops = {
index 65b02a93db3149e7a46fde11fb6507899d19380e..928fc6a061b656aff07004d98f2d37c649be7b87 100644 (file)
@@ -6802,6 +6802,8 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
 static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args,
                                  const struct btrfs_device *device)
 {
+       if (args->devt)
+               return device->devt == args->devt;
        if (args->missing) {
                if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) &&
                    !device->bdev)
index 2cbf8080eade061702003d5a8632be0e579e1ccf..adbd9e6c09ff3b2c9020264c37faf20739c2b484 100644 (file)
@@ -662,6 +662,11 @@ struct btrfs_dev_lookup_args {
        u64 devid;
        u8 *uuid;
        u8 *fsid;
+       /*
+        * If devt is specified, all other members will be ignored as it is
+        * enough to uniquely locate a device.
+        */
+       dev_t devt;
        bool missing;
 };