]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 28 May 2019 13:06:19 +0000 (15:06 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 28 May 2019 13:06:19 +0000 (15:06 +0200)
added patches:
btrfs-honor-path-skip_locking-in-backref-code.patch

queue-4.14/btrfs-honor-path-skip_locking-in-backref-code.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/btrfs-honor-path-skip_locking-in-backref-code.patch b/queue-4.14/btrfs-honor-path-skip_locking-in-backref-code.patch
new file mode 100644 (file)
index 0000000..478f0b6
--- /dev/null
@@ -0,0 +1,154 @@
+From 38e3eebff643db725633657d1d87a3be019d1018 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 16 Jan 2019 11:00:57 -0500
+Subject: btrfs: honor path->skip_locking in backref code
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 38e3eebff643db725633657d1d87a3be019d1018 upstream.
+
+Qgroups will do the old roots lookup at delayed ref time, which could be
+while walking down the extent root while running a delayed ref.  This
+should be fine, except we specifically lock eb's in the backref walking
+code irrespective of path->skip_locking, which deadlocks the system.
+Fix up the backref code to honor path->skip_locking, nobody will be
+modifying the commit_root when we're searching so it's completely safe
+to do.
+
+This happens since fb235dc06fac ("btrfs: qgroup: Move half of the qgroup
+accounting time out of commit trans"), kernel may lockup with quota
+enabled.
+
+There is one backref trace triggered by snapshot dropping along with
+write operation in the source subvolume.  The example can be reliably
+reproduced:
+
+  btrfs-cleaner   D    0  4062      2 0x80000000
+  Call Trace:
+   schedule+0x32/0x90
+   btrfs_tree_read_lock+0x93/0x130 [btrfs]
+   find_parent_nodes+0x29b/0x1170 [btrfs]
+   btrfs_find_all_roots_safe+0xa8/0x120 [btrfs]
+   btrfs_find_all_roots+0x57/0x70 [btrfs]
+   btrfs_qgroup_trace_extent_post+0x37/0x70 [btrfs]
+   btrfs_qgroup_trace_leaf_items+0x10b/0x140 [btrfs]
+   btrfs_qgroup_trace_subtree+0xc8/0xe0 [btrfs]
+   do_walk_down+0x541/0x5e3 [btrfs]
+   walk_down_tree+0xab/0xe7 [btrfs]
+   btrfs_drop_snapshot+0x356/0x71a [btrfs]
+   btrfs_clean_one_deleted_snapshot+0xb8/0xf0 [btrfs]
+   cleaner_kthread+0x12b/0x160 [btrfs]
+   kthread+0x112/0x130
+   ret_from_fork+0x27/0x50
+
+When dropping snapshots with qgroup enabled, we will trigger backref
+walk.
+
+However such backref walk at that timing is pretty dangerous, as if one
+of the parent nodes get WRITE locked by other thread, we could cause a
+dead lock.
+
+For example:
+
+           FS 260     FS 261 (Dropped)
+            node A        node B
+           /      \      /      \
+       node C      node D      node E
+      /   \         /  \        /     \
+  leaf F|leaf G|leaf H|leaf I|leaf J|leaf K
+
+The lock sequence would be:
+
+      Thread A (cleaner)             |       Thread B (other writer)
+-----------------------------------------------------------------------
+write_lock(B)                        |
+write_lock(D)                        |
+^^^ called by walk_down_tree()       |
+                                     |       write_lock(A)
+                                     |       write_lock(D) << Stall
+read_lock(H) << for backref walk     |
+read_lock(D) << lock owner is        |
+                the same thread A    |
+                so read lock is OK   |
+read_lock(A) << Stall                |
+
+So thread A hold write lock D, and needs read lock A to unlock.
+While thread B holds write lock A, while needs lock D to unlock.
+
+This will cause a deadlock.
+
+This is not only limited to snapshot dropping case.  As the backref
+walk, even only happens on commit trees, is breaking the normal top-down
+locking order, makes it deadlock prone.
+
+Fixes: fb235dc06fac ("btrfs: qgroup: Move half of the qgroup accounting time out of commit trans")
+CC: stable@vger.kernel.org # 4.14+
+Reported-and-tested-by: David Sterba <dsterba@suse.com>
+Reported-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+[ rebase to latest branch and fix lock assert bug in btrfs/007 ]
+[ backport to linux-4.19.y branch, solve minor conflicts ]
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+[ copy logs and deadlock analysis from Qu's patch ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/backref.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -718,7 +718,7 @@ out:
+  * read tree blocks and add keys where required.
+  */
+ static int add_missing_keys(struct btrfs_fs_info *fs_info,
+-                          struct preftrees *preftrees)
++                          struct preftrees *preftrees, bool lock)
+ {
+       struct prelim_ref *ref;
+       struct extent_buffer *eb;
+@@ -742,12 +742,14 @@ static int add_missing_keys(struct btrfs
+                       free_extent_buffer(eb);
+                       return -EIO;
+               }
+-              btrfs_tree_read_lock(eb);
++              if (lock)
++                      btrfs_tree_read_lock(eb);
+               if (btrfs_header_level(eb) == 0)
+                       btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
+               else
+                       btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
+-              btrfs_tree_read_unlock(eb);
++              if (lock)
++                      btrfs_tree_read_unlock(eb);
+               free_extent_buffer(eb);
+               prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL);
+               cond_resched();
+@@ -1228,7 +1230,7 @@ again:
+       btrfs_release_path(path);
+-      ret = add_missing_keys(fs_info, &preftrees);
++      ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0);
+       if (ret)
+               goto out;
+@@ -1290,9 +1292,14 @@ again:
+                               }
+                               btrfs_tree_read_lock(eb);
+                               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++                              if (!path->skip_locking) {
++                                      btrfs_tree_read_lock(eb);
++                                      btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++                              }
+                               ret = find_extent_in_eb(eb, bytenr,
+                                                       *extent_item_pos, &eie);
+-                              btrfs_tree_read_unlock_blocking(eb);
++                              if (!path->skip_locking)
++                                      btrfs_tree_read_unlock_blocking(eb);
+                               free_extent_buffer(eb);
+                               if (ret < 0)
+                                       goto out;
index 980e82b60ba1c26477cadefdb982e8ea3703eff1..c92eea6e4bc39c91a3531ff272591ff9b9e5d6b3 100644 (file)
@@ -22,3 +22,4 @@ fbdev-fix-divide-error-in-fb_var_to_videomode.patch
 hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch
 brcmfmac-assure-ssid-length-from-firmware-is-limited.patch
 brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch
+btrfs-honor-path-skip_locking-in-backref-code.patch