4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 11 Nov 2018 20:26:38 +0000 (12:26 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 11 Nov 2018 20:26:38 +0000 (12:26 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 11 Nov 2018 20:26:38 +0000 (12:26 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 11 Nov 2018 20:26:38 +0000 (12:26 -0800)
diff --git a/queue-4.4/btrfs-don-t-attempt-to-trim-devices-that-don-t-support-it.patch b/queue-4.4/btrfs-don-t-attempt-to-trim-devices-that-don-t-support-it.patch

new file mode 100644 (file)

index 0000000..4b07b02
--- /dev/null
+++ b/queue-4.4/btrfs-don-t-attempt-to-trim-devices-that-don-t-support-it.patch
@@ -0,0 +1,39 @@
+From 0be88e367fd8fbdb45257615d691f4675dda062f Mon Sep 17 00:00:00 2001
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Thu, 6 Sep 2018 17:18:15 -0400
+Subject: btrfs: don't attempt to trim devices that don't support it
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit 0be88e367fd8fbdb45257615d691f4675dda062f upstream.
+
+We check whether any device the file system is using supports discard in
+the ioctl call, but then we attempt to trim free extents on every device
+regardless of whether discard is supported.  Due to the way we mask off
+EOPNOTSUPP, we can end up issuing the trim operations on each free range
+on devices that don't support it, just wasting time.
+
+Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -10629,6 +10629,10 @@ static int btrfs_trim_free_extents(struc
+ 
+       *trimmed = 0;
+ 
++      /* Discard not supported = nothing to do. */
++      if (!blk_queue_discard(bdev_get_queue(device->bdev)))
++              return 0;
++
+       /* Not writeable = nothing to do. */
+       if (!device->writeable)
+               return 0;
diff --git a/queue-4.4/btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch b/queue-4.4/btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch

new file mode 100644 (file)

index 0000000..43faf3f
--- /dev/null
+++ b/queue-4.4/btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch
@@ -0,0 +1,86 @@
+From 0f375eed92b5a407657532637ed9652611a682f5 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 9 Oct 2018 15:05:29 +0100
+Subject: Btrfs: fix wrong dentries after fsync of file that got its parent replaced
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0f375eed92b5a407657532637ed9652611a682f5 upstream.
+
+In a scenario like the following:
+
+  mkdir /mnt/A               # inode 258
+  mkdir /mnt/B               # inode 259
+  touch /mnt/B/bar           # inode 260
+
+  sync
+
+  mv /mnt/B/bar /mnt/A/bar
+  mv -T /mnt/A /mnt/B
+  fsync /mnt/B/bar
+
+  <power fail>
+
+After replaying the log we end up with file bar having 2 hard links, both
+with the name 'bar' and one in the directory with inode number 258 and the
+other in the directory with inode number 259. Also, we end up with the
+directory inode 259 still existing and with the directory inode 258 still
+named as 'A', instead of 'B'. In this scenario, file 'bar' should only
+have one hard link, located at directory inode 258, the directory inode
+259 should not exist anymore and the name for directory inode 258 should
+be 'B'.
+
+This incorrect behaviour happens because when attempting to log the old
+parents of an inode, we skip any parents that no longer exist. Fix this
+by forcing a full commit if an old parent no longer exists.
+
+A test case for fstests follows soon.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |   30 +++++++++++++++++++++++++++---
+ 1 file changed, 27 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -5240,9 +5240,33 @@ static int btrfs_log_all_parents(struct
+ 
+                       dir_inode = btrfs_iget(root->fs_info->sb, &inode_key,
+                                              root, NULL);
+-                      /* If parent inode was deleted, skip it. */
+-                      if (IS_ERR(dir_inode))
+-                              continue;
++                      /*
++                       * If the parent inode was deleted, return an error to
++                       * fallback to a transaction commit. This is to prevent
++                       * getting an inode that was moved from one parent A to
++                       * a parent B, got its former parent A deleted and then
++                       * it got fsync'ed, from existing at both parents after
++                       * a log replay (and the old parent still existing).
++                       * Example:
++                       *
++                       * mkdir /mnt/A
++                       * mkdir /mnt/B
++                       * touch /mnt/B/bar
++                       * sync
++                       * mv /mnt/B/bar /mnt/A/bar
++                       * mv -T /mnt/A /mnt/B
++                       * fsync /mnt/B/bar
++                       * <power fail>
++                       *
++                       * If we ignore the old parent B which got deleted,
++                       * after a log replay we would have file bar linked
++                       * at both parents and the old parent B would still
++                       * exist.
++                       */
++                      if (IS_ERR(dir_inode)) {
++                              ret = PTR_ERR(dir_inode);
++                              goto out;
++                      }
+ 
+                       ret = btrfs_log_inode(trans, root, dir_inode,
+                                             LOG_INODE_ALL, 0, LLONG_MAX, ctx);
diff --git a/queue-4.4/btrfs-handle-owner-mismatch-gracefully-when-walking-up-tree.patch b/queue-4.4/btrfs-handle-owner-mismatch-gracefully-when-walking-up-tree.patch

new file mode 100644 (file)

index 0000000..c1b5070
--- /dev/null
+++ b/queue-4.4/btrfs-handle-owner-mismatch-gracefully-when-walking-up-tree.patch
@@ -0,0 +1,118 @@
+From 65c6e82becec33731f48786e5a30f98662c86b16 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 21 Aug 2018 09:42:03 +0800
+Subject: btrfs: Handle owner mismatch gracefully when walking up tree
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 65c6e82becec33731f48786e5a30f98662c86b16 upstream.
+
+[BUG]
+When mounting certain crafted image, btrfs will trigger kernel BUG_ON()
+when trying to recover balance:
+
+  kernel BUG at fs/btrfs/extent-tree.c:8956!
+  invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+  CPU: 1 PID: 662 Comm: mount Not tainted 4.18.0-rc1-custom+ #10
+  RIP: 0010:walk_up_proc+0x336/0x480 [btrfs]
+  RSP: 0018:ffffb53540c9b890 EFLAGS: 00010202
+  Call Trace:
+   walk_up_tree+0x172/0x1f0 [btrfs]
+   btrfs_drop_snapshot+0x3a4/0x830 [btrfs]
+   merge_reloc_roots+0xe1/0x1d0 [btrfs]
+   btrfs_recover_relocation+0x3ea/0x420 [btrfs]
+   open_ctree+0x1af3/0x1dd0 [btrfs]
+   btrfs_mount_root+0x66b/0x740 [btrfs]
+   mount_fs+0x3b/0x16a
+   vfs_kern_mount.part.9+0x54/0x140
+   btrfs_mount+0x16d/0x890 [btrfs]
+   mount_fs+0x3b/0x16a
+   vfs_kern_mount.part.9+0x54/0x140
+   do_mount+0x1fd/0xda0
+   ksys_mount+0xba/0xd0
+   __x64_sys_mount+0x21/0x30
+   do_syscall_64+0x60/0x210
+   entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+[CAUSE]
+Extent tree corruption.  In this particular case, reloc tree root's
+owner is DATA_RELOC_TREE (should be TREE_RELOC), thus its backref is
+corrupted and we failed the owner check in walk_up_tree().
+
+[FIX]
+It's pretty hard to take care of every extent tree corruption, but at
+least we can remove such BUG_ON() and exit more gracefully.
+
+And since in this particular image, DATA_RELOC_TREE and TREE_RELOC share
+the same root (which is obviously invalid), we needs to make
+__del_reloc_root() more robust to detect such invalid sharing to avoid
+possible NULL dereference as root->node can be NULL in this case.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200411
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |   18 ++++++++++++------
+ fs/btrfs/relocation.c  |    2 +-
+ 2 files changed, 13 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -8704,15 +8704,14 @@ static noinline int walk_up_proc(struct
+       if (eb == root->node) {
+               if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+                       parent = eb->start;
+-              else
+-                      BUG_ON(root->root_key.objectid !=
+-                             btrfs_header_owner(eb));
++              else if (root->root_key.objectid != btrfs_header_owner(eb))
++                      goto owner_mismatch;
+       } else {
+               if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+                       parent = path->nodes[level + 1]->start;
+-              else
+-                      BUG_ON(root->root_key.objectid !=
+-                             btrfs_header_owner(path->nodes[level + 1]));
++              else if (root->root_key.objectid !=
++                       btrfs_header_owner(path->nodes[level + 1]))
++                      goto owner_mismatch;
+       }
+ 
+       btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
+@@ -8720,6 +8719,11 @@ out:
+       wc->refs[level] = 0;
+       wc->flags[level] = 0;
+       return 0;
++
++owner_mismatch:
++      btrfs_err_rl(root->fs_info, "unexpected tree owner, have %llu expect %llu",
++                   btrfs_header_owner(eb), root->root_key.objectid);
++      return -EUCLEAN;
+ }
+ 
+ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+@@ -8773,6 +8777,8 @@ static noinline int walk_up_tree(struct
+                       ret = walk_up_proc(trans, root, path, wc);
+                       if (ret > 0)
+                               return 0;
++                      if (ret < 0)
++                              return ret;
+ 
+                       if (path->locks[level]) {
+                               btrfs_tree_unlock_rw(path->nodes[level],
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1318,7 +1318,7 @@ static void __del_reloc_root(struct btrf
+       struct mapping_node *node = NULL;
+       struct reloc_control *rc = root->fs_info->reloc_ctl;
+ 
+-      if (rc) {
++      if (rc && root->node) {
+               spin_lock(&rc->reloc_root_tree.lock);
+               rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+                                     root->node->start);
diff --git a/queue-4.4/btrfs-iterate-all-devices-during-trim-instead-of-fs_devices-alloc_list.patch b/queue-4.4/btrfs-iterate-all-devices-during-trim-instead-of-fs_devices-alloc_list.patch

new file mode 100644 (file)

index 0000000..b8a5e85
--- /dev/null
+++ b/queue-4.4/btrfs-iterate-all-devices-during-trim-instead-of-fs_devices-alloc_list.patch
@@ -0,0 +1,42 @@
+From d4e329de5e5e21594df2e0dd59da9acee71f133b Mon Sep 17 00:00:00 2001
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Thu, 6 Sep 2018 17:18:14 -0400
+Subject: btrfs: iterate all devices during trim, instead of fs_devices::alloc_list
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit d4e329de5e5e21594df2e0dd59da9acee71f133b upstream.
+
+btrfs_trim_fs iterates over the fs_devices->alloc_list while holding the
+device_list_mutex.  The problem is that ->alloc_list is protected by the
+chunk mutex.  We don't want to hold the chunk mutex over the trim of the
+entire file system.  Fortunately, the ->dev_list list is protected by
+the dev_list mutex and while it will give us all devices, including
+read-only devices, we already just skip the read-only devices.  Then we
+can continue to take and release the chunk mutex while scanning each
+device.
+
+Fixes: 499f377f49f ("btrfs: iterate over unused chunk space in FITRIM")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -10751,8 +10751,8 @@ int btrfs_trim_fs(struct btrfs_root *roo
+       }
+ 
+       mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+-      devices = &root->fs_info->fs_devices->alloc_list;
+-      list_for_each_entry(device, devices, dev_alloc_list) {
++      devices = &root->fs_info->fs_devices->devices;
++      list_for_each_entry(device, devices, dev_list) {
+               ret = btrfs_trim_free_extents(device, range->minlen,
+                                             &group_trimmed);
+               if (ret)
diff --git a/queue-4.4/btrfs-locking-add-extra-check-in-btrfs_init_new_buffer-to-avoid-deadlock.patch b/queue-4.4/btrfs-locking-add-extra-check-in-btrfs_init_new_buffer-to-avoid-deadlock.patch

new file mode 100644 (file)

index 0000000..e94e07a
--- /dev/null
+++ b/queue-4.4/btrfs-locking-add-extra-check-in-btrfs_init_new_buffer-to-avoid-deadlock.patch
@@ -0,0 +1,131 @@
+From b72c3aba09a53fc7c1824250d71180ca154517a7 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 21 Aug 2018 09:53:47 +0800
+Subject: btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid deadlock
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit b72c3aba09a53fc7c1824250d71180ca154517a7 upstream.
+
+[BUG]
+For certain crafted image, whose csum root leaf has missing backref, if
+we try to trigger write with data csum, it could cause deadlock with the
+following kernel WARN_ON():
+
+  WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400
+  CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8
+  Workqueue: btrfs-endio-write btrfs_endio_write_helper
+  RIP: 0010:btrfs_tree_lock+0x3e2/0x400
+  Call Trace:
+   btrfs_alloc_tree_block+0x39f/0x770
+   __btrfs_cow_block+0x285/0x9e0
+   btrfs_cow_block+0x191/0x2e0
+   btrfs_search_slot+0x492/0x1160
+   btrfs_lookup_csum+0xec/0x280
+   btrfs_csum_file_blocks+0x2be/0xa60
+   add_pending_csums+0xaf/0xf0
+   btrfs_finish_ordered_io+0x74b/0xc90
+   finish_ordered_fn+0x15/0x20
+   normal_work_helper+0xf6/0x500
+   btrfs_endio_write_helper+0x12/0x20
+   process_one_work+0x302/0x770
+   worker_thread+0x81/0x6d0
+   kthread+0x180/0x1d0
+   ret_from_fork+0x35/0x40
+
+[CAUSE]
+That crafted image has missing backref for csum tree root leaf.  And
+when we try to allocate new tree block, since there is no
+EXTENT/METADATA_ITEM for csum tree root, btrfs consider it's free slot
+and use it.
+
+The extent tree of the image looks like:
+
+  Normal image                      |       This fuzzed image
+  ----------------------------------+--------------------------------
+  BG 29360128                       | BG 29360128
+   One empty slot                   |  One empty slot
+  29364224: backref to UUID tree    | 29364224: backref to UUID tree
+   Two empty slots                  |  Two empty slots
+  29376512: backref to CSUM tree    |  One empty slot (bad type) <<<
+  29380608: backref to D_RELOC tree | 29380608: backref to D_RELOC tree
+  ...                               | ...
+
+Since bytenr 29376512 has no METADATA/EXTENT_ITEM, when btrfs try to
+alloc tree block, it's an valid slot for btrfs.
+
+And for finish_ordered_write, when we need to insert csum, we try to CoW
+csum tree root.
+
+By accident, empty slots at bytenr BG_OFFSET, BG_OFFSET + 8K,
+BG_OFFSET + 12K is already used by tree block COW for other trees, the
+next empty slot is BG_OFFSET + 16K, which should be the backref for CSUM
+tree.
+
+But due to the bad type, btrfs can recognize it and still consider it as
+an empty slot, and will try to use it for csum tree CoW.
+
+Then in the following call trace, we will try to lock the new tree
+block, which turns out to be the old csum tree root which is already
+locked:
+
+btrfs_search_slot() called on csum tree root, which is at 29376512
+|- btrfs_cow_block()
+   |- btrfs_set_lock_block()
+   |  |- Now locks tree block 29376512 (old csum tree root)
+   |- __btrfs_cow_block()
+      |- btrfs_alloc_tree_block()
+         |- btrfs_reserve_extent()
+            | Now it returns tree block 29376512, which extent tree
+            | shows its empty slot, but it's already hold by csum tree
+            |- btrfs_init_new_buffer()
+               |- btrfs_tree_lock()
+                  | Triggers WARN_ON(eb->lock_owner == current->pid)
+                  |- wait_event()
+                     Wait lock owner to release the lock, but it's
+                     locked by ourself, so it will deadlock
+
+[FIX]
+This patch will do the lock_owner and current->pid check at
+btrfs_init_new_buffer().
+So above deadlock can be avoided.
+
+Since such problem can only happen in crafted image, we will still
+trigger kernel warning for later aborted transaction, but with a little
+more meaningful warning message.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200405
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -7835,6 +7835,20 @@ btrfs_init_new_buffer(struct btrfs_trans
+       buf = btrfs_find_create_tree_block(root, bytenr);
+       if (!buf)
+               return ERR_PTR(-ENOMEM);
++
++      /*
++       * Extra safety check in case the extent tree is corrupted and extent
++       * allocator chooses to use a tree block which is already used and
++       * locked.
++       */
++      if (buf->lock_owner == current->pid) {
++              btrfs_err_rl(root->fs_info,
++"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
++                      buf->start, btrfs_header_owner(buf), current->pid);
++              free_extent_buffer(buf);
++              return ERR_PTR(-EUCLEAN);
++      }
++
+       btrfs_set_header_generation(buf, trans->transid);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
+       btrfs_tree_lock(buf);
diff --git a/queue-4.4/btrfs-make-sure-we-create-all-new-block-groups.patch b/queue-4.4/btrfs-make-sure-we-create-all-new-block-groups.patch

new file mode 100644 (file)

index 0000000..2b44df8
--- /dev/null
+++ b/queue-4.4/btrfs-make-sure-we-create-all-new-block-groups.patch
@@ -0,0 +1,49 @@
+From 545e3366db823dc3342ca9d7fea803f829c9062f Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 28 Sep 2018 07:18:02 -0400
+Subject: btrfs: make sure we create all new block groups
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 545e3366db823dc3342ca9d7fea803f829c9062f upstream.
+
+Allocating new chunks modifies both the extent and chunk tree, which can
+trigger new chunk allocations.  So instead of doing list_for_each_safe,
+just do while (!list_empty()) so we make sure we don't exit with other
+pending bg's still on our list.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9912,7 +9912,7 @@ error:
+ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root)
+ {
+-      struct btrfs_block_group_cache *block_group, *tmp;
++      struct btrfs_block_group_cache *block_group;
+       struct btrfs_root *extent_root = root->fs_info->extent_root;
+       struct btrfs_block_group_item item;
+       struct btrfs_key key;
+@@ -9920,7 +9920,10 @@ void btrfs_create_pending_block_groups(s
+       bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
+ 
+       trans->can_flush_pending_bgs = false;
+-      list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
++      while (!list_empty(&trans->new_bgs)) {
++              block_group = list_first_entry(&trans->new_bgs,
++                                             struct btrfs_block_group_cache,
++                                             bg_list);
+               if (ret)
+                       goto next;
+ 
diff --git a/queue-4.4/btrfs-qgroup-dirty-all-qgroups-before-rescan.patch b/queue-4.4/btrfs-qgroup-dirty-all-qgroups-before-rescan.patch

new file mode 100644 (file)

index 0000000..2edf0db
--- /dev/null
+++ b/queue-4.4/btrfs-qgroup-dirty-all-qgroups-before-rescan.patch
@@ -0,0 +1,85 @@
+From 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 10 Aug 2018 10:20:26 +0800
+Subject: btrfs: qgroup: Dirty all qgroups before rescan
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 upstream.
+
+[BUG]
+In the following case, rescan won't zero out the number of qgroup 1/0:
+
+  $ mkfs.btrfs -fq $DEV
+  $ mount $DEV /mnt
+
+  $ btrfs quota enable /mnt
+  $ btrfs qgroup create 1/0 /mnt
+  $ btrfs sub create /mnt/sub
+  $ btrfs qgroup assign 0/257 1/0 /mnt
+
+  $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
+  $ btrfs sub snap /mnt/sub /mnt/snap
+  $ btrfs quota rescan -w /mnt
+  $ btrfs qgroup show -pcre /mnt
+  qgroupid         rfer         excl     max_rfer     max_excl parent  child
+  --------         ----         ----     --------     -------- ------  -----
+  0/5          16.00KiB     16.00KiB         none         none ---     ---
+  0/257      1016.00KiB     16.00KiB         none         none 1/0     ---
+  0/258      1016.00KiB     16.00KiB         none         none ---     ---
+  1/0        1016.00KiB     16.00KiB         none         none ---     0/257
+
+So far so good, but:
+
+  $ btrfs qgroup remove 0/257 1/0 /mnt
+  WARNING: quotas may be inconsistent, rescan needed
+  $ btrfs quota rescan -w /mnt
+  $ btrfs qgroup show -pcre  /mnt
+  qgoupid         rfer         excl     max_rfer     max_excl parent  child
+  --------         ----         ----     --------     -------- ------  -----
+  0/5          16.00KiB     16.00KiB         none         none ---     ---
+  0/257      1016.00KiB     16.00KiB         none         none ---     ---
+  0/258      1016.00KiB     16.00KiB         none         none ---     ---
+  1/0        1016.00KiB     16.00KiB         none         none ---     ---
+            ^^^^^^^^^^     ^^^^^^^^ not cleared
+
+[CAUSE]
+Before rescan we call qgroup_rescan_zero_tracking() to zero out all
+qgroups' accounting numbers.
+
+However we don't mark all qgroups dirty, but rely on rescan to do so.
+
+If we have any high level qgroup without children, it won't be marked
+dirty during rescan, since we cannot reach that qgroup.
+
+This will cause QGROUP_INFO items of childless qgroups never get updated
+in the quota tree, thus their numbers will stay the same in "btrfs
+qgroup show" output.
+
+[FIX]
+Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even if
+we have childless qgroups, their QGROUP_INFO items will still get
+updated during rescan.
+
+Reported-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Tested-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2446,6 +2446,7 @@ qgroup_rescan_zero_tracking(struct btrfs
+               qgroup->rfer_cmpr = 0;
+               qgroup->excl = 0;
+               qgroup->excl_cmpr = 0;
++              qgroup_dirty(fs_info, qgroup);
+       }
+       spin_unlock(&fs_info->qgroup_lock);
+ }
diff --git a/queue-4.4/btrfs-reset-max_extent_size-on-clear-in-a-bitmap.patch b/queue-4.4/btrfs-reset-max_extent_size-on-clear-in-a-bitmap.patch

new file mode 100644 (file)

index 0000000..d14dc43
--- /dev/null
+++ b/queue-4.4/btrfs-reset-max_extent_size-on-clear-in-a-bitmap.patch
@@ -0,0 +1,34 @@
+From 553cceb49681d60975d00892877d4c871bf220f9 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 28 Sep 2018 07:18:00 -0400
+Subject: btrfs: reset max_extent_size on clear in a bitmap
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 553cceb49681d60975d00892877d4c871bf220f9 upstream.
+
+We need to clear the max_extent_size when we clear bits from a bitmap
+since it could have been from the range that contains the
+max_extent_size.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/free-space-cache.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -1699,6 +1699,8 @@ static inline void __bitmap_clear_bits(s
+       bitmap_clear(info->bitmap, start, count);
+ 
+       info->bytes -= bytes;
++      if (info->max_extent_size > ctl->unit)
++              info->max_extent_size = 0;
+ }
+ 
+ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
diff --git a/queue-4.4/btrfs-wait-on-caching-when-putting-the-bg-cache.patch b/queue-4.4/btrfs-wait-on-caching-when-putting-the-bg-cache.patch

new file mode 100644 (file)

index 0000000..f41dc68
--- /dev/null
+++ b/queue-4.4/btrfs-wait-on-caching-when-putting-the-bg-cache.patch
@@ -0,0 +1,85 @@
+From 3aa7c7a31c26321696b92841d5103461c6f3f517 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 12 Sep 2018 10:45:45 -0400
+Subject: btrfs: wait on caching when putting the bg cache
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 3aa7c7a31c26321696b92841d5103461c6f3f517 upstream.
+
+While testing my backport I noticed there was a panic if I ran
+generic/416 generic/417 generic/418 all in a row.  This just happened to
+uncover a race where we had outstanding IO after we destroy all of our
+workqueues, and then we'd go to queue the endio work on those free'd
+workqueues.
+
+This is because we aren't waiting for the caching threads to be done
+before freeing everything up, so to fix this make sure we wait on any
+outstanding caching that's being done before we free up the block group,
+so we're sure to be done with all IO by the time we get to
+btrfs_stop_all_workers().  This fixes the panic I was seeing
+consistently in testing.
+
+------------[ cut here ]------------
+kernel BUG at fs/btrfs/volumes.c:6112!
+SMP PTI
+Modules linked in:
+CPU: 1 PID: 27165 Comm: kworker/u4:7 Not tainted 4.16.0-02155-g3553e54a578d-dirty #875
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+Workqueue: btrfs-cache btrfs_cache_helper
+RIP: 0010:btrfs_map_bio+0x346/0x370
+RSP: 0000:ffffc900061e79d0 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: ffff880071542e00 RCX: 0000000000533000
+RDX: ffff88006bb74380 RSI: 0000000000000008 RDI: ffff880078160000
+RBP: 0000000000000001 R08: ffff8800781cd200 R09: 0000000000503000
+R10: ffff88006cd21200 R11: 0000000000000000 R12: 0000000000000000
+R13: 0000000000000000 R14: ffff8800781cd200 R15: ffff880071542e00
+FS:  0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000817ffc4 CR3: 0000000078314000 CR4: 00000000000006e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ btree_submit_bio_hook+0x8a/0xd0
+ submit_one_bio+0x5d/0x80
+ read_extent_buffer_pages+0x18a/0x320
+ btree_read_extent_buffer_pages+0xbc/0x200
+ ? alloc_extent_buffer+0x359/0x3e0
+ read_tree_block+0x3d/0x60
+ read_block_for_search.isra.30+0x1a5/0x360
+ btrfs_search_slot+0x41b/0xa10
+ btrfs_next_old_leaf+0x212/0x470
+ caching_thread+0x323/0x490
+ normal_work_helper+0xc5/0x310
+ process_one_work+0x141/0x340
+ worker_thread+0x44/0x3c0
+ kthread+0xf8/0x130
+ ? process_one_work+0x340/0x340
+ ? kthread_bind+0x10/0x10
+ ret_from_fork+0x35/0x40
+RIP: btrfs_map_bio+0x346/0x370 RSP: ffffc900061e79d0
+---[ end trace 827eb13e50846033 ]---
+Kernel panic - not syncing: Fatal exception
+Kernel Offset: disabled
+---[ end Kernel panic - not syncing: Fatal exception
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9521,6 +9521,7 @@ void btrfs_put_block_group_cache(struct
+ 
+               block_group = btrfs_lookup_first_block_group(info, last);
+               while (block_group) {
++                      wait_block_group_cache_done(block_group);
+                       spin_lock(&block_group->lock);
+                       if (block_group->iref)
+                               break;
diff --git a/queue-4.4/series b/queue-4.4/series

index 46698640201bac47886c394f205167ec04134ba0..bcdf8834a0fd6cbeb98f12ab4112df2e0220a39c 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -85,3 +85,12 @@ ext4-avoid-running-out-of-journal-credits-when-appending-to-an-inline-file.patch
  cramfs-fix-abad-comparison-when-wrap-arounds-occur.patch
  arm64-dts-stratix10-correct-system-manager-register-size.patch
  soc-tegra-pmc-fix-child-node-lookup.patch
+btrfs-handle-owner-mismatch-gracefully-when-walking-up-tree.patch
+btrfs-locking-add-extra-check-in-btrfs_init_new_buffer-to-avoid-deadlock.patch
+btrfs-iterate-all-devices-during-trim-instead-of-fs_devices-alloc_list.patch
+btrfs-don-t-attempt-to-trim-devices-that-don-t-support-it.patch
+btrfs-wait-on-caching-when-putting-the-bg-cache.patch
+btrfs-reset-max_extent_size-on-clear-in-a-bitmap.patch
+btrfs-make-sure-we-create-all-new-block-groups.patch
+btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch
+btrfs-qgroup-dirty-all-qgroups-before-rescan.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 11 Nov 2018 20:26:38 +0000 (12:26 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 11 Nov 2018 20:26:38 +0000 (12:26 -0800)
queue-4.4/btrfs-don-t-attempt-to-trim-devices-that-don-t-support-it.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-handle-owner-mismatch-gracefully-when-walking-up-tree.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-iterate-all-devices-during-trim-instead-of-fs_devices-alloc_list.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-locking-add-extra-check-in-btrfs_init_new_buffer-to-avoid-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-make-sure-we-create-all-new-block-groups.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-qgroup-dirty-all-qgroups-before-rescan.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-reset-max_extent_size-on-clear-in-a-bitmap.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-wait-on-caching-when-putting-the-bg-cache.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history