]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
btrfs: fix inode lookup error handling during log replay
authorFilipe Manana <fdmanana@suse.com>
Sat, 12 Jul 2025 04:27:22 +0000 (00:27 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 17 Jul 2025 16:35:17 +0000 (18:35 +0200)
[ Upstream commit 5f61b961599acbd2bed028d3089105a1f7d224b8 ]

When replaying log trees we use read_one_inode() to get an inode, which is
just a wrapper around btrfs_iget_logging(), which in turn is a wrapper for
btrfs_iget(). But read_one_inode() always returns NULL for any error
that btrfs_iget_logging() / btrfs_iget() may return and this is a problem
because:

1) In many callers of read_one_inode() we convert the NULL into -EIO,
   which is not accurate since btrfs_iget() may return -ENOMEM and -ENOENT
   for example, besides -EIO and other errors. So during log replay we
   may end up reporting a false -EIO, which is confusing since we may
   not have had any IO error at all;

2) When replaying directory deletes, at replay_dir_deletes(), we assume
   the NULL returned from read_one_inode() means that the inode doesn't
   exist and then proceed as if no error had happened. This is wrong
   because unless btrfs_iget() returned ERR_PTR(-ENOENT), we had an
   actual error and the target inode may exist in the target subvolume
   root - this may later result in the log replay code failing at a
   later stage (if we are "lucky") or succeed but leaving some
   inconsistency in the filesystem.

So fix this by not ignoring errors from btrfs_iget_logging() and as
a consequence remove the read_one_inode() wrapper and just use
btrfs_iget_logging() directly. Also since btrfs_iget_logging() is
supposed to be called only against subvolume roots, just like
read_one_inode() which had a comment about it, add an assertion to
btrfs_iget_logging() to check that the target root corresponds to a
subvolume root.

Fixes: 5d4f98a28c7d ("Btrfs: Mixed back reference (FORWARD ROLLING FORMAT CHANGE)")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/btrfs/tree-log.c

index f846dcbd707561990cbab2b78c4d048a1809d5fe..16434106c465db8249e77a5a380fb9543deb50c5 100644 (file)
@@ -145,6 +145,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
        unsigned int nofs_flag;
        struct inode *inode;
 
+       /* Only meant to be called for subvolume roots and not for log roots. */
+       ASSERT(is_fstree(btrfs_root_id(root)));
+
        /*
         * We're holding a transaction handle whether we are logging or
         * replaying a log tree, so we must make sure NOFS semantics apply
@@ -616,20 +619,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
        return 0;
 }
 
-/*
- * simple helper to read an inode off the disk from a given root
- * This can only be called for subvolume roots and not for the log
- */
-static noinline struct inode *read_one_inode(struct btrfs_root *root,
-                                            u64 objectid)
-{
-       struct btrfs_inode *inode;
-
-       inode = btrfs_iget_logging(objectid, root);
-       if (IS_ERR(inode))
-               return NULL;
-       return &inode->vfs_inode;
-}
 
 /* replays a single extent in 'eb' at 'slot' with 'key' into the
  * subvolume 'root'.  path is released on entry and should be released
@@ -684,10 +673,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                goto out;
        }
 
-       inode = read_one_inode(root, key->objectid);
-       if (!inode) {
-               ret = -EIO;
-               goto out;
+       {
+               struct btrfs_inode *btrfs_inode;
+
+               btrfs_inode = btrfs_iget_logging(key->objectid, root);
+               if (IS_ERR(btrfs_inode)) {
+                       ret = PTR_ERR(btrfs_inode);
+                       goto out;
+               }
+               inode = &btrfs_inode->vfs_inode;
        }
 
        /*
@@ -966,10 +960,16 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 
        btrfs_release_path(path);
 
-       inode = read_one_inode(root, location.objectid);
-       if (!inode) {
-               ret = -EIO;
-               goto out;
+       {
+               struct btrfs_inode *btrfs_inode;
+
+               btrfs_inode = btrfs_iget_logging(location.objectid, root);
+               if (IS_ERR(btrfs_inode)) {
+                       ret = PTR_ERR(btrfs_inode);
+                       inode = NULL;
+                       goto out;
+               }
+               inode = &btrfs_inode->vfs_inode;
        }
 
        ret = link_to_fixup_dir(trans, root, path, location.objectid);
@@ -1186,18 +1186,21 @@ again:
                                kfree(victim_name.name);
                                return ret;
                        } else if (!ret) {
-                               ret = -ENOENT;
-                               victim_parent = read_one_inode(root,
-                                               parent_objectid);
-                               if (victim_parent) {
+                               struct btrfs_inode *btrfs_victim;
+
+                               btrfs_victim = btrfs_iget_logging(parent_objectid, root);
+                               if (IS_ERR(btrfs_victim)) {
+                                       ret = PTR_ERR(btrfs_victim);
+                               } else {
+                                       victim_parent = &btrfs_victim->vfs_inode;
                                        inc_nlink(&inode->vfs_inode);
                                        btrfs_release_path(path);
 
                                        ret = unlink_inode_for_log_replay(trans,
                                                        BTRFS_I(victim_parent),
                                                        inode, &victim_name);
+                                       iput(victim_parent);
                                }
-                               iput(victim_parent);
                                kfree(victim_name.name);
                                if (ret)
                                        return ret;
@@ -1334,11 +1337,16 @@ again:
                        struct inode *dir;
 
                        btrfs_release_path(path);
-                       dir = read_one_inode(root, parent_id);
-                       if (!dir) {
-                               ret = -ENOENT;
-                               kfree(name.name);
-                               goto out;
+                       {
+                               struct btrfs_inode *btrfs_dir;
+
+                               btrfs_dir = btrfs_iget_logging(parent_id, root);
+                               if (IS_ERR(btrfs_dir)) {
+                                       ret = PTR_ERR(btrfs_dir);
+                                       kfree(name.name);
+                                       goto out;
+                               }
+                               dir = &btrfs_dir->vfs_inode;
                        }
                        ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir),
                                                 inode, &name);
@@ -1409,16 +1417,28 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
         * copy the back ref in.  The link count fixup code will take
         * care of the rest
         */
-       dir = read_one_inode(root, parent_objectid);
-       if (!dir) {
-               ret = -ENOENT;
-               goto out;
+       {
+               struct btrfs_inode *btrfs_dir;
+
+               btrfs_dir = btrfs_iget_logging(parent_objectid, root);
+               if (IS_ERR(btrfs_dir)) {
+                       ret = PTR_ERR(btrfs_dir);
+                       dir = NULL;
+                       goto out;
+               }
+               dir = &btrfs_dir->vfs_inode;
        }
 
-       inode = read_one_inode(root, inode_objectid);
-       if (!inode) {
-               ret = -EIO;
-               goto out;
+       {
+               struct btrfs_inode *btrfs_inode;
+
+               btrfs_inode = btrfs_iget_logging(inode_objectid, root);
+               if (IS_ERR(btrfs_inode)) {
+                       ret = PTR_ERR(btrfs_inode);
+                       inode = NULL;
+                       goto out;
+               }
+               inode = &btrfs_inode->vfs_inode;
        }
 
        while (ref_ptr < ref_end) {
@@ -1429,11 +1449,16 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
                         * parent object can change from one array
                         * item to another.
                         */
-                       if (!dir)
-                               dir = read_one_inode(root, parent_objectid);
                        if (!dir) {
-                               ret = -ENOENT;
-                               goto out;
+                               struct btrfs_inode *btrfs_dir;
+
+                               btrfs_dir = btrfs_iget_logging(parent_objectid, root);
+                               if (IS_ERR(btrfs_dir)) {
+                                       ret = PTR_ERR(btrfs_dir);
+                                       dir = NULL;
+                                       goto out;
+                               }
+                               dir = &btrfs_dir->vfs_inode;
                        }
                } else {
                        ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
@@ -1701,10 +1726,15 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
                        break;
 
                btrfs_release_path(path);
-               inode = read_one_inode(root, key.offset);
-               if (!inode) {
-                       ret = -EIO;
-                       break;
+               {
+                       struct btrfs_inode *btrfs_inode;
+
+                       btrfs_inode = btrfs_iget_logging(key.offset, root);
+                       if (IS_ERR(btrfs_inode)) {
+                               ret = PTR_ERR(btrfs_inode);
+                               break;
+                       }
+                       inode = &btrfs_inode->vfs_inode;
                }
 
                ret = fixup_inode_link_count(trans, inode);
@@ -1738,9 +1768,14 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
        int ret = 0;
        struct inode *inode;
 
-       inode = read_one_inode(root, objectid);
-       if (!inode)
-               return -EIO;
+       {
+               struct btrfs_inode *btrfs_inode;
+
+               btrfs_inode = btrfs_iget_logging(objectid, root);
+               if (IS_ERR(btrfs_inode))
+                       return PTR_ERR(btrfs_inode);
+               inode = &btrfs_inode->vfs_inode;
+       }
 
        key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
        key.type = BTRFS_ORPHAN_ITEM_KEY;
@@ -1778,14 +1813,24 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
        struct inode *dir;
        int ret;
 
-       inode = read_one_inode(root, location->objectid);
-       if (!inode)
-               return -ENOENT;
+       {
+               struct btrfs_inode *btrfs_inode;
 
-       dir = read_one_inode(root, dirid);
-       if (!dir) {
-               iput(inode);
-               return -EIO;
+               btrfs_inode = btrfs_iget_logging(location->objectid, root);
+               if (IS_ERR(btrfs_inode))
+                       return PTR_ERR(btrfs_inode);
+               inode = &btrfs_inode->vfs_inode;
+       }
+
+       {
+               struct btrfs_inode *btrfs_dir;
+
+               btrfs_dir = btrfs_iget_logging(dirid, root);
+               if (IS_ERR(btrfs_dir)) {
+                       iput(inode);
+                       return PTR_ERR(btrfs_dir);
+               }
+               dir = &btrfs_dir->vfs_inode;
        }
 
        ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
@@ -1863,9 +1908,14 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
        bool update_size = true;
        bool name_added = false;
 
-       dir = read_one_inode(root, key->objectid);
-       if (!dir)
-               return -EIO;
+       {
+               struct btrfs_inode *btrfs_dir;
+
+               btrfs_dir = btrfs_iget_logging(key->objectid, root);
+               if (IS_ERR(btrfs_dir))
+                       return PTR_ERR(btrfs_dir);
+               dir = &btrfs_dir->vfs_inode;
+       }
 
        ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
        if (ret)
@@ -2167,10 +2217,16 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
        btrfs_dir_item_key_to_cpu(eb, di, &location);
        btrfs_release_path(path);
        btrfs_release_path(log_path);
-       inode = read_one_inode(root, location.objectid);
-       if (!inode) {
-               ret = -EIO;
-               goto out;
+       {
+               struct btrfs_inode *btrfs_inode;
+
+               btrfs_inode = btrfs_iget_logging(location.objectid, root);
+               if (IS_ERR(btrfs_inode)) {
+                       ret = PTR_ERR(btrfs_inode);
+                       inode = NULL;
+                       goto out;
+               }
+               inode = &btrfs_inode->vfs_inode;
        }
 
        ret = link_to_fixup_dir(trans, root, path, location.objectid);
@@ -2321,14 +2377,22 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
        if (!log_path)
                return -ENOMEM;
 
-       dir = read_one_inode(root, dirid);
-       /* it isn't an error if the inode isn't there, that can happen
-        * because we replay the deletes before we copy in the inode item
-        * from the log
-        */
-       if (!dir) {
-               btrfs_free_path(log_path);
-               return 0;
+       {
+               struct btrfs_inode *btrfs_dir;
+
+               btrfs_dir = btrfs_iget_logging(dirid, root);
+               /*
+                * It isn't an error if the inode isn't there, that can happen because
+                * we replay the deletes before we copy in the inode item from the log.
+                */
+               if (IS_ERR(btrfs_dir)) {
+                       btrfs_free_path(log_path);
+                       ret = PTR_ERR(btrfs_dir);
+                       if (ret == -ENOENT)
+                               ret = 0;
+                       return ret;
+               }
+               dir = &btrfs_dir->vfs_inode;
        }
 
        range_start = 0;
@@ -2487,10 +2551,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
                                struct inode *inode;
                                u64 from;
 
-                               inode = read_one_inode(root, key.objectid);
-                               if (!inode) {
-                                       ret = -EIO;
-                                       break;
+                               {
+                                       struct btrfs_inode *btrfs_inode;
+
+                                       btrfs_inode = btrfs_iget_logging(key.objectid, root);
+                                       if (IS_ERR(btrfs_inode)) {
+                                               ret = PTR_ERR(btrfs_inode);
+                                               break;
+                                       }
+                                       inode = &btrfs_inode->vfs_inode;
                                }
                                from = ALIGN(i_size_read(inode),
                                             root->fs_info->sectorsize);