]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
btrfs: introduce a new shutdown state
authorQu Wenruo <wqu@suse.com>
Sun, 12 Oct 2025 23:52:03 +0000 (10:22 +1030)
committerDavid Sterba <dsterba@suse.com>
Mon, 24 Nov 2025 20:45:03 +0000 (21:45 +0100)
A new fs state EMERGENCY_SHUTDOWN is introduced, which is btrfs'
equivalent of XFS_IOC_GOINGDOWN or EXT4_IOC_SHUTDOWN, after entering
emergency shutdown state, all operations will return errors (-EIO), and
can not be bring back to normal state until unmouont.

The new state will reject the following file operations:

- read_iter()
- write_iter()
- mmap()
- open()
- remap_file_range()
- uring_cmd()
- splice_read()
  This requires a small wrapper to do the extra shutdown check, then call
  the regular filemap_splice_read() function

This should reject most of the file operations on a shutdown btrfs.

And for the existing dirty folios, extra shutdown checks are introduced
to the following functions:

- run_delalloc_nocow()
- run_delalloc_compressed()
- cow_file_range()

So that dirty ranges will still be properly cleaned without being
submitted.

Finally the shutdown state will also set the fs error, so that no new
transaction will be committed, protecting the metadata from any possible
further corruption.

And when the fs entered shutdown mode for the first time, a critical
level kernel message will show up to indicate the incident.

That message will be important for end users as rejected delalloc ranges
will output error messages, hopefully that shutdown message and the fact
that all fs operations are returning error will prevent end users from
getting too confused about the delalloc error messages.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Anand Jain <asj@kernel.org>
Tested-by: Anand Jain <asj@kernel.org>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/file.c
fs/btrfs/fs.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/messages.c
fs/btrfs/reflink.c

index 30986a625bdb368216574882eb8d867d53a3c94e..1e0ff3d7210db0f4b496a8dec14db1dea00e9213 100644 (file)
@@ -1440,6 +1440,8 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
        struct btrfs_inode *inode = BTRFS_I(file_inode(file));
        ssize_t num_written, num_sync;
 
+       if (unlikely(btrfs_is_shutdown(inode->root->fs_info)))
+               return -EIO;
        /*
         * If the fs flips readonly due to some impossible error, although we
         * have opened a file as writable, we have to stop this write operation
@@ -2042,6 +2044,8 @@ static int btrfs_file_mmap_prepare(struct vm_area_desc *desc)
        struct file *filp = desc->file;
        struct address_space *mapping = filp->f_mapping;
 
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(filp)))))
+               return -EIO;
        if (!mapping->a_ops->read_folio)
                return -ENOEXEC;
 
@@ -3111,6 +3115,9 @@ static long btrfs_fallocate(struct file *file, int mode,
        int blocksize = BTRFS_I(inode)->root->fs_info->sectorsize;
        int ret;
 
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode))))
+               return -EIO;
+
        /* Do not allow fallocate in ZONED mode */
        if (btrfs_is_zoned(inode_to_fs_info(inode)))
                return -EOPNOTSUPP;
@@ -3802,6 +3809,9 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
 {
        int ret;
 
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode))))
+               return -EIO;
+
        filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
 
        ret = fsverity_file_open(inode, filp);
@@ -3814,6 +3824,9 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        ssize_t ret = 0;
 
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(iocb->ki_filp)))))
+               return -EIO;
+
        if (iocb->ki_flags & IOCB_DIRECT) {
                ret = btrfs_direct_read(iocb, to);
                if (ret < 0 || !iov_iter_count(to) ||
@@ -3824,10 +3837,20 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return filemap_read(iocb, to, ret);
 }
 
+static ssize_t btrfs_file_splice_read(struct file *in, loff_t *ppos,
+                                     struct pipe_inode_info *pipe,
+                                     size_t len, unsigned int flags)
+{
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(in)))))
+               return -EIO;
+
+       return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
 const struct file_operations btrfs_file_operations = {
        .llseek         = btrfs_file_llseek,
        .read_iter      = btrfs_file_read_iter,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = btrfs_file_splice_read,
        .write_iter     = btrfs_file_write_iter,
        .splice_write   = iter_file_splice_write,
        .mmap_prepare   = btrfs_file_mmap_prepare,
index 814bbc9417d2a29baa088b020a1b2b92857754f0..c83fd192a7dc3e91cf8263e56472ab768feeaa0b 100644 (file)
@@ -29,6 +29,7 @@
 #include "extent-io-tree.h"
 #include "async-thread.h"
 #include "block-rsv.h"
+#include "messages.h"
 
 struct inode;
 struct super_block;
@@ -124,6 +125,12 @@ enum {
        /* No more delayed iput can be queued. */
        BTRFS_FS_STATE_NO_DELAYED_IPUT,
 
+       /*
+        * Emergency shutdown, a step further than transaction aborted by
+        * rejecting all operations.
+        */
+       BTRFS_FS_STATE_EMERGENCY_SHUTDOWN,
+
        BTRFS_FS_STATE_COUNT
 };
 
@@ -1120,6 +1127,27 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
        (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,            \
                           &(fs_info)->fs_state)))
 
+static inline bool btrfs_is_shutdown(struct btrfs_fs_info *fs_info)
+{
+       return test_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state);
+}
+
+static inline void btrfs_force_shutdown(struct btrfs_fs_info *fs_info)
+{
+       /*
+        * Here we do not want to use handle_fs_error(), which will mark the fs
+        * read-only.
+        * Some call sites like shutdown ioctl will mark the fs shutdown when
+        * the fs is frozen. But thaw path will handle RO and RW fs
+        * differently.
+        *
+        * So here we only mark the fs error without flipping it RO.
+        */
+       WRITE_ONCE(fs_info->fs_error, -EIO);
+       if (!test_and_set_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state))
+               btrfs_crit(fs_info, "emergency shutdown");
+}
+
 /*
  * We use folio flag owner_2 to indicate there is an ordered extent with
  * unfinished IO.
index 6131589aba7cb441ea2305b22e2affcf13f2b8d1..15131873f73da9a65aa89aba32f287f18ca64299 100644 (file)
@@ -864,7 +864,7 @@ static void compress_file_range(struct btrfs_work *work)
        u64 actual_end;
        u64 i_size;
        int ret = 0;
-       struct folio **folios;
+       struct folio **folios = NULL;
        unsigned long nr_folios;
        unsigned long total_compressed = 0;
        unsigned long total_in = 0;
@@ -873,6 +873,9 @@ static void compress_file_range(struct btrfs_work *work)
        int compress_type = fs_info->compress_type;
        int compress_level = fs_info->compress_level;
 
+       if (unlikely(btrfs_is_shutdown(fs_info)))
+               goto cleanup_and_bail_uncompressed;
+
        inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
 
        /*
@@ -1288,6 +1291,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
        unsigned long page_ops;
        int ret = 0;
 
+       if (unlikely(btrfs_is_shutdown(fs_info))) {
+               ret = -EIO;
+               goto out_unlock;
+       }
+
        if (btrfs_is_free_space_inode(inode)) {
                ret = -EINVAL;
                goto out_unlock;
@@ -2006,7 +2014,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_root *root = inode->root;
-       struct btrfs_path *path;
+       struct btrfs_path *path = NULL;
        u64 cow_start = (u64)-1;
        /*
         * If not 0, represents the inclusive end of the last fallback_to_cow()
@@ -2036,6 +2044,10 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
         */
        ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root));
 
+       if (unlikely(btrfs_is_shutdown(fs_info))) {
+               ret = -EIO;
+               goto error;
+       }
        path = btrfs_alloc_path();
        if (!path) {
                ret = -ENOMEM;
index 8cb7d5a462ef793e1789c95091c1cfb15060ba45..803556ec0e183965f9b601eed673c86ae88c2431 100644 (file)
@@ -5077,6 +5077,9 @@ out_acct:
 
 int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(cmd->file)))))
+               return -EIO;
+
        switch (cmd->cmd_op) {
        case BTRFS_IOC_ENCODED_READ:
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
index a0cf8effe008e14950c92d595e7677fc30b67423..2f853de44473982fccbaaf793440ebc24b2f33ec 100644 (file)
@@ -24,6 +24,7 @@ static const char fs_state_chars[] = {
        [BTRFS_FS_STATE_NO_DATA_CSUMS]          = 'C',
        [BTRFS_FS_STATE_SKIP_META_CSUMS]        = 'S',
        [BTRFS_FS_STATE_LOG_CLEANUP_ERROR]      = 'L',
+       [BTRFS_FS_STATE_EMERGENCY_SHUTDOWN]     = 'E',
 };
 
 static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
index 5465a5eae9b2d15bab14484ab994bc293231c544..1bbe3bb7e1bb352c7201b961ca8bbbac3f596fce 100644 (file)
@@ -868,6 +868,9 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
        bool same_inode = dst_inode == src_inode;
        int ret;
 
+       if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(src_file)))))
+               return -EIO;
+
        if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
                return -EINVAL;