4.8-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jan 2017 09:40:51 +0000 (10:40 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jan 2017 09:40:51 +0000 (10:40 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2017 09:40:51 +0000 (10:40 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2017 09:40:51 +0000 (10:40 +0100)
diff --git a/queue-4.8/btrfs-bail-out-if-block-group-has-different-mixed-flag.patch b/queue-4.8/btrfs-bail-out-if-block-group-has-different-mixed-flag.patch

new file mode 100644 (file)

index 0000000..5930b19
--- /dev/null
+++ b/queue-4.8/btrfs-bail-out-if-block-group-has-different-mixed-flag.patch
@@ -0,0 +1,62 @@
+From 49303381f19ab16a371a061b67e783d3f570d56e Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Thu, 25 Aug 2016 18:08:27 -0700
+Subject: Btrfs: bail out if block group has different mixed flag
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 49303381f19ab16a371a061b67e783d3f570d56e upstream.
+
+Currently we allow inconsistence about mixed flag
+ (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA).
+
+We'd get ENOSPC if block group has mixed flag and btrfs doesn't.
+If that happens, we have one space_info with mixed flag and another
+space_info only with BTRFS_BLOCK_GROUP_METADATA, and
+global_block_rsv.space_info points to the latter one, but all bytes
+from block_group contributes to the mixed space_info, thus all the
+allocation will fail with ENOSPC.
+
+This adds a check for the above case.
+
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+[ updated message ]
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -10127,6 +10127,11 @@ int btrfs_read_block_groups(struct btrfs
+       struct extent_buffer *leaf;
+       int need_clear = 0;
+       u64 cache_gen;
++      u64 feature;
++      int mixed;
++
++      feature = btrfs_super_incompat_flags(info->super_copy);
++      mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
+ 
+       root = info->extent_root;
+       key.objectid = 0;
+@@ -10180,6 +10185,15 @@ int btrfs_read_block_groups(struct btrfs
+                                  btrfs_item_ptr_offset(leaf, path->slots[0]),
+                                  sizeof(cache->item));
+               cache->flags = btrfs_block_group_flags(&cache->item);
++              if (!mixed &&
++                  ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
++                  (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
++                      btrfs_err(info,
++"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
++                                cache->key.objectid);
++                      ret = -EINVAL;
++                      goto error;
++              }
+ 
+               key.objectid = found_key.objectid + found_key.offset;
+               btrfs_release_path(path);
diff --git a/queue-4.8/btrfs-clean-the-old-superblocks-before-freeing-the-device.patch b/queue-4.8/btrfs-clean-the-old-superblocks-before-freeing-the-device.patch

new file mode 100644 (file)

index 0000000..3270603
--- /dev/null
+++ b/queue-4.8/btrfs-clean-the-old-superblocks-before-freeing-the-device.patch
@@ -0,0 +1,101 @@
+From cea67ab92d3d4da9f2b4141d87cb8664757daca0 Mon Sep 17 00:00:00 2001
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Tue, 20 Sep 2016 08:50:21 -0400
+Subject: btrfs: clean the old superblocks before freeing the device
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit cea67ab92d3d4da9f2b4141d87cb8664757daca0 upstream.
+
+btrfs_rm_device frees the block device but then re-opens it using
+the saved device name.  A race exists between the close and the
+re-open that allows the block size to be changed.  The result
+is getting stuck forever in the reclaim loop in __getblk_slow.
+
+This patch moves the superblock cleanup before closing the block
+device, which is also consistent with other callers.  We also don't
+need a private copy of dev_name as the whole routine operates under
+the uuid_mutex.
+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |   38 +++++++++++---------------------------
+ 1 file changed, 11 insertions(+), 27 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1846,7 +1846,6 @@ int btrfs_rm_device(struct btrfs_root *r
+       u64 num_devices;
+       int ret = 0;
+       bool clear_super = false;
+-      char *dev_name = NULL;
+ 
+       mutex_lock(&uuid_mutex);
+ 
+@@ -1882,11 +1881,6 @@ int btrfs_rm_device(struct btrfs_root *r
+               list_del_init(&device->dev_alloc_list);
+               device->fs_devices->rw_devices--;
+               unlock_chunks(root);
+-              dev_name = kstrdup(device->name->str, GFP_KERNEL);
+-              if (!dev_name) {
+-                      ret = -ENOMEM;
+-                      goto error_undo;
+-              }
+               clear_super = true;
+       }
+ 
+@@ -1936,14 +1930,21 @@ int btrfs_rm_device(struct btrfs_root *r
+               btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
+       }
+ 
+-      btrfs_close_bdev(device);
+-
+-      call_rcu(&device->rcu, free_device);
+-
+       num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
+       btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+ 
++      /*
++       * at this point, the device is zero sized and detached from
++       * the devices list.  All that's left is to zero out the old
++       * supers and free the device.
++       */
++      if (device->writeable)
++              btrfs_scratch_superblocks(device->bdev, device->name->str);
++
++      btrfs_close_bdev(device);
++      call_rcu(&device->rcu, free_device);
++
+       if (cur_devices->open_devices == 0) {
+               struct btrfs_fs_devices *fs_devices;
+               fs_devices = root->fs_info->fs_devices;
+@@ -1962,24 +1963,7 @@ int btrfs_rm_device(struct btrfs_root *r
+       root->fs_info->num_tolerated_disk_barrier_failures =
+               btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info);
+ 
+-      /*
+-       * at this point, the device is zero sized.  We want to
+-       * remove it from the devices list and zero out the old super
+-       */
+-      if (clear_super) {
+-              struct block_device *bdev;
+-
+-              bdev = blkdev_get_by_path(dev_name, FMODE_READ | FMODE_EXCL,
+-                                              root->fs_info->bdev_holder);
+-              if (!IS_ERR(bdev)) {
+-                      btrfs_scratch_superblocks(bdev, dev_name);
+-                      blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
+-              }
+-      }
+-
+ out:
+-      kfree(dev_name);
+-
+       mutex_unlock(&uuid_mutex);
+       return ret;
+ 
diff --git a/queue-4.8/btrfs-don-t-bug-during-drop-snapshot.patch b/queue-4.8/btrfs-don-t-bug-during-drop-snapshot.patch

new file mode 100644 (file)

index 0000000..bcce7be
--- /dev/null
+++ b/queue-4.8/btrfs-don-t-bug-during-drop-snapshot.patch
@@ -0,0 +1,98 @@
+From 4867268c57ff709a7b6b86ae6f6537d846d1443a Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 23 Sep 2016 13:23:28 +0200
+Subject: Btrfs: don't BUG() during drop snapshot
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 4867268c57ff709a7b6b86ae6f6537d846d1443a upstream.
+
+Really there's lots of things that can go wrong here, kill all the
+BUG_ON()'s and replace the logic ones with ASSERT()'s and return EIO
+instead.
+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+[ switched to btrfs_err, errors go to common label ]
+Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |   38 +++++++++++++++++++++++++++-----------
+ 1 file changed, 27 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -8884,15 +8884,13 @@ static noinline int do_walk_down(struct
+       ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
+                                      &wc->refs[level - 1],
+                                      &wc->flags[level - 1]);
+-      if (ret < 0) {
+-              btrfs_tree_unlock(next);
+-              free_extent_buffer(next);
+-              return ret;
+-      }
++      if (ret < 0)
++              goto out_unlock;
+ 
+       if (unlikely(wc->refs[level - 1] == 0)) {
+               btrfs_err(root->fs_info, "Missing references.");
+-              BUG();
++              ret = -EIO;
++              goto out_unlock;
+       }
+       *lookup_info = 0;
+ 
+@@ -8944,7 +8942,12 @@ static noinline int do_walk_down(struct
+       }
+ 
+       level--;
+-      BUG_ON(level != btrfs_header_level(next));
++      ASSERT(level == btrfs_header_level(next));
++      if (level != btrfs_header_level(next)) {
++              btrfs_err(root->fs_info, "mismatched level");
++              ret = -EIO;
++              goto out_unlock;
++      }
+       path->nodes[level] = next;
+       path->slots[level] = 0;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
+@@ -8959,8 +8962,15 @@ skip:
+               if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+                       parent = path->nodes[level]->start;
+               } else {
+-                      BUG_ON(root->root_key.objectid !=
++                      ASSERT(root->root_key.objectid ==
+                              btrfs_header_owner(path->nodes[level]));
++                      if (root->root_key.objectid !=
++                          btrfs_header_owner(path->nodes[level])) {
++                              btrfs_err(root->fs_info,
++                                              "mismatched block owner");
++                              ret = -EIO;
++                              goto out_unlock;
++                      }
+                       parent = 0;
+               }
+ 
+@@ -8977,12 +8987,18 @@ skip:
+               }
+               ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+                               root->root_key.objectid, level - 1, 0);
+-              BUG_ON(ret); /* -ENOMEM */
++              if (ret)
++                      goto out_unlock;
+       }
++
++      *lookup_info = 1;
++      ret = 1;
++
++out_unlock:
+       btrfs_tree_unlock(next);
+       free_extent_buffer(next);
+-      *lookup_info = 1;
+-      return 1;
++
++      return ret;
+ }
+ 
+ /*
diff --git a/queue-4.8/btrfs-don-t-leak-reloc-root-nodes-on-error.patch b/queue-4.8/btrfs-don-t-leak-reloc-root-nodes-on-error.patch

new file mode 100644 (file)

index 0000000..a61e901
--- /dev/null
+++ b/queue-4.8/btrfs-don-t-leak-reloc-root-nodes-on-error.patch
@@ -0,0 +1,35 @@
+From 6bdf131fac2336adb1a628f992ba32384f653a55 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 2 Sep 2016 15:25:43 -0400
+Subject: Btrfs: don't leak reloc root nodes on error
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 6bdf131fac2336adb1a628f992ba32384f653a55 upstream.
+
+We don't track the reloc roots in any sort of normal way, so the only way the
+root/commit_root nodes get free'd is if the relocation finishes successfully and
+the reloc root is deleted.  Fix this by free'ing them in free_reloc_roots.
+Thanks,
+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -2366,6 +2366,10 @@ void free_reloc_roots(struct list_head *
+       while (!list_empty(list)) {
+               reloc_root = list_entry(list->next, struct btrfs_root,
+                                       root_list);
++              free_extent_buffer(reloc_root->node);
++              free_extent_buffer(reloc_root->commit_root);
++              reloc_root->node = NULL;
++              reloc_root->commit_root = NULL;
+               __del_reloc_root(reloc_root);
+       }
+ }
diff --git a/queue-4.8/btrfs-fix-a-possible-umount-deadlock.patch b/queue-4.8/btrfs-fix-a-possible-umount-deadlock.patch

new file mode 100644 (file)

index 0000000..2ae3a0b
--- /dev/null
+++ b/queue-4.8/btrfs-fix-a-possible-umount-deadlock.patch
@@ -0,0 +1,233 @@
+From 0ccd05285e7f5a8e297e1d6dfc41e7c65757d6fa Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Thu, 22 Sep 2016 12:56:13 +0800
+Subject: btrfs: fix a possible umount deadlock
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 0ccd05285e7f5a8e297e1d6dfc41e7c65757d6fa upstream.
+
+btrfs_show_devname() is using the device_list_mutex, sometimes
+a call to blkdev_put() leads vfs calling into this func. So
+call blkdev_put() outside of device_list_mutex, as of now.
+
+[  983.284212] ======================================================
+[  983.290401] [ INFO: possible circular locking dependency detected ]
+[  983.296677] 4.8.0-rc5-ceph-00023-g1b39cec2 #1 Not tainted
+[  983.302081] -------------------------------------------------------
+[  983.308357] umount/21720 is trying to acquire lock:
+[  983.313243]  (&bdev->bd_mutex){+.+.+.}, at: [<ffffffff9128ec51>] blkdev_put+0x31/0x150
+[  983.321264]
+[  983.321264] but task is already holding lock:
+[  983.327101]  (&fs_devs->device_list_mutex){+.+...}, at: [<ffffffffc033d6f6>] __btrfs_close_devices+0x46/0x200 [btrfs]
+[  983.337839]
+[  983.337839] which lock already depends on the new lock.
+[  983.337839]
+[  983.346024]
+[  983.346024] the existing dependency chain (in reverse order) is:
+[  983.353512]
+-> #4 (&fs_devs->device_list_mutex){+.+...}:
+[  983.359096]        [<ffffffff910dfd0c>] lock_acquire+0x1bc/0x1f0
+[  983.365143]        [<ffffffff91823125>] mutex_lock_nested+0x65/0x350
+[  983.371521]        [<ffffffffc02d8116>] btrfs_show_devname+0x36/0x1f0 [btrfs]
+[  983.378710]        [<ffffffff9129523e>] show_vfsmnt+0x4e/0x150
+[  983.384593]        [<ffffffff9126ffc7>] m_show+0x17/0x20
+[  983.389957]        [<ffffffff91276405>] seq_read+0x2b5/0x3b0
+[  983.395669]        [<ffffffff9124c808>] __vfs_read+0x28/0x100
+[  983.401464]        [<ffffffff9124eb3b>] vfs_read+0xab/0x150
+[  983.407080]        [<ffffffff9124ec32>] SyS_read+0x52/0xb0
+[  983.412609]        [<ffffffff91825fc0>] entry_SYSCALL_64_fastpath+0x23/0xc1
+[  983.419617]
+-> #3 (namespace_sem){++++++}:
+[  983.424024]        [<ffffffff910dfd0c>] lock_acquire+0x1bc/0x1f0
+[  983.430074]        [<ffffffff918239e9>] down_write+0x49/0x80
+[  983.435785]        [<ffffffff91272457>] lock_mount+0x67/0x1c0
+[  983.441582]        [<ffffffff91272ab2>] do_add_mount+0x32/0xf0
+[  983.447458]        [<ffffffff9127363a>] finish_automount+0x5a/0xc0
+[  983.453682]        [<ffffffff91259513>] follow_managed+0x1b3/0x2a0
+[  983.459912]        [<ffffffff9125b750>] lookup_fast+0x300/0x350
+[  983.465875]        [<ffffffff9125d6e7>] path_openat+0x3a7/0xaa0
+[  983.471846]        [<ffffffff9125ef75>] do_filp_open+0x85/0xe0
+[  983.477731]        [<ffffffff9124c41c>] do_sys_open+0x14c/0x1f0
+[  983.483702]        [<ffffffff9124c4de>] SyS_open+0x1e/0x20
+[  983.489240]        [<ffffffff91825fc0>] entry_SYSCALL_64_fastpath+0x23/0xc1
+[  983.496254]
+-> #2 (&sb->s_type->i_mutex_key#3){+.+.+.}:
+[  983.501798]        [<ffffffff910dfd0c>] lock_acquire+0x1bc/0x1f0
+[  983.507855]        [<ffffffff918239e9>] down_write+0x49/0x80
+[  983.513558]        [<ffffffff91366237>] start_creating+0x87/0x100
+[  983.519703]        [<ffffffff91366647>] debugfs_create_dir+0x17/0x100
+[  983.526195]        [<ffffffff911df153>] bdi_register+0x93/0x210
+[  983.532165]        [<ffffffff911df313>] bdi_register_owner+0x43/0x70
+[  983.538570]        [<ffffffff914080fb>] device_add_disk+0x1fb/0x450
+[  983.544888]        [<ffffffff91580226>] loop_add+0x1e6/0x290
+[  983.550596]        [<ffffffff91fec358>] loop_init+0x10b/0x14f
+[  983.556394]        [<ffffffff91002207>] do_one_initcall+0xa7/0x180
+[  983.562618]        [<ffffffff91f932e0>] kernel_init_freeable+0x1cc/0x266
+[  983.569370]        [<ffffffff918174be>] kernel_init+0xe/0x100
+[  983.575166]        [<ffffffff9182620f>] ret_from_fork+0x1f/0x40
+[  983.581131]
+-> #1 (loop_index_mutex){+.+.+.}:
+[  983.585801]        [<ffffffff910dfd0c>] lock_acquire+0x1bc/0x1f0
+[  983.591858]        [<ffffffff91823125>] mutex_lock_nested+0x65/0x350
+[  983.598256]        [<ffffffff9157ed3f>] lo_open+0x1f/0x60
+[  983.603704]        [<ffffffff9128eec3>] __blkdev_get+0x123/0x400
+[  983.609757]        [<ffffffff9128f4ea>] blkdev_get+0x34a/0x350
+[  983.615639]        [<ffffffff9128f554>] blkdev_open+0x64/0x80
+[  983.621428]        [<ffffffff9124aff6>] do_dentry_open+0x1c6/0x2d0
+[  983.627651]        [<ffffffff9124c029>] vfs_open+0x69/0x80
+[  983.633181]        [<ffffffff9125db74>] path_openat+0x834/0xaa0
+[  983.639152]        [<ffffffff9125ef75>] do_filp_open+0x85/0xe0
+[  983.645035]        [<ffffffff9124c41c>] do_sys_open+0x14c/0x1f0
+[  983.650999]        [<ffffffff9124c4de>] SyS_open+0x1e/0x20
+[  983.656535]        [<ffffffff91825fc0>] entry_SYSCALL_64_fastpath+0x23/0xc1
+[  983.663541]
+-> #0 (&bdev->bd_mutex){+.+.+.}:
+[  983.668107]        [<ffffffff910def43>] __lock_acquire+0x1003/0x17b0
+[  983.674510]        [<ffffffff910dfd0c>] lock_acquire+0x1bc/0x1f0
+[  983.680561]        [<ffffffff91823125>] mutex_lock_nested+0x65/0x350
+[  983.686967]        [<ffffffff9128ec51>] blkdev_put+0x31/0x150
+[  983.692761]        [<ffffffffc033481f>] btrfs_close_bdev+0x4f/0x60 [btrfs]
+[  983.699699]        [<ffffffffc033d77b>] __btrfs_close_devices+0xcb/0x200 [btrfs]
+[  983.707178]        [<ffffffffc033d8db>] btrfs_close_devices+0x2b/0xa0 [btrfs]
+[  983.714380]        [<ffffffffc03081c5>] close_ctree+0x265/0x340 [btrfs]
+[  983.721061]        [<ffffffffc02d7959>] btrfs_put_super+0x19/0x20 [btrfs]
+[  983.727908]        [<ffffffff91250e2f>] generic_shutdown_super+0x6f/0x100
+[  983.734744]        [<ffffffff91250f56>] kill_anon_super+0x16/0x30
+[  983.740888]        [<ffffffffc02da97e>] btrfs_kill_super+0x1e/0x130 [btrfs]
+[  983.747909]        [<ffffffff91250fe9>] deactivate_locked_super+0x49/0x80
+[  983.754745]        [<ffffffff912515fd>] deactivate_super+0x5d/0x70
+[  983.760977]        [<ffffffff91270a1c>] cleanup_mnt+0x5c/0x80
+[  983.766773]        [<ffffffff91270a92>] __cleanup_mnt+0x12/0x20
+[  983.772738]        [<ffffffff910aa2fe>] task_work_run+0x7e/0xc0
+[  983.778708]        [<ffffffff91081b5a>] exit_to_usermode_loop+0x7e/0xb4
+[  983.785373]        [<ffffffff910039eb>] syscall_return_slowpath+0xbb/0xd0
+[  983.792212]        [<ffffffff9182605c>] entry_SYSCALL_64_fastpath+0xbf/0xc1
+[  983.799225]
+[  983.799225] other info that might help us debug this:
+[  983.799225]
+[  983.807291] Chain exists of:
+  &bdev->bd_mutex --> namespace_sem --> &fs_devs->device_list_mutex
+
+[  983.816521]  Possible unsafe locking scenario:
+[  983.816521]
+[  983.822489]        CPU0                    CPU1
+[  983.827043]        ----                    ----
+[  983.831599]   lock(&fs_devs->device_list_mutex);
+[  983.836289]                                lock(namespace_sem);
+[  983.842268]                                lock(&fs_devs->device_list_mutex);
+[  983.849478]   lock(&bdev->bd_mutex);
+[  983.853127]
+[  983.853127]  *** DEADLOCK ***
+[  983.853127]
+[  983.859113] 3 locks held by umount/21720:
+[  983.863145]  #0:  (&type->s_umount_key#35){++++..}, at: [<ffffffff912515f5>] deactivate_super+0x55/0x70
+[  983.872713]  #1:  (uuid_mutex){+.+.+.}, at: [<ffffffffc033d8d3>] btrfs_close_devices+0x23/0xa0 [btrfs]
+[  983.882206]  #2:  (&fs_devs->device_list_mutex){+.+...}, at: [<ffffffffc033d6f6>] __btrfs_close_devices+0x46/0x200 [btrfs]
+[  983.893422]
+[  983.893422] stack backtrace:
+[  983.897824] CPU: 6 PID: 21720 Comm: umount Not tainted 4.8.0-rc5-ceph-00023-g1b39cec2 #1
+[  983.905958] Hardware name: Supermicro SYS-5018R-WR/X10SRW-F, BIOS 1.0c 09/07/2015
+[  983.913492]  0000000000000000 ffff8c8a53c17a38 ffffffff91429521 ffffffff9260f4f0
+[  983.921018]  ffffffff92642760 ffff8c8a53c17a88 ffffffff911b2b04 0000000000000050
+[  983.928542]  ffffffff9237d620 ffff8c8a5294aee0 ffff8c8a5294aeb8 ffff8c8a5294aee0
+[  983.936072] Call Trace:
+[  983.938545]  [<ffffffff91429521>] dump_stack+0x85/0xc4
+[  983.943715]  [<ffffffff911b2b04>] print_circular_bug+0x1fb/0x20c
+[  983.949748]  [<ffffffff910def43>] __lock_acquire+0x1003/0x17b0
+[  983.955613]  [<ffffffff910dfd0c>] lock_acquire+0x1bc/0x1f0
+[  983.961123]  [<ffffffff9128ec51>] ? blkdev_put+0x31/0x150
+[  983.966550]  [<ffffffff91823125>] mutex_lock_nested+0x65/0x350
+[  983.972407]  [<ffffffff9128ec51>] ? blkdev_put+0x31/0x150
+[  983.977832]  [<ffffffff9128ec51>] blkdev_put+0x31/0x150
+[  983.983101]  [<ffffffffc033481f>] btrfs_close_bdev+0x4f/0x60 [btrfs]
+[  983.989500]  [<ffffffffc033d77b>] __btrfs_close_devices+0xcb/0x200 [btrfs]
+[  983.996415]  [<ffffffffc033d8db>] btrfs_close_devices+0x2b/0xa0 [btrfs]
+[  984.003068]  [<ffffffffc03081c5>] close_ctree+0x265/0x340 [btrfs]
+[  984.009189]  [<ffffffff9126cc5e>] ? evict_inodes+0x15e/0x170
+[  984.014881]  [<ffffffffc02d7959>] btrfs_put_super+0x19/0x20 [btrfs]
+[  984.021176]  [<ffffffff91250e2f>] generic_shutdown_super+0x6f/0x100
+[  984.027476]  [<ffffffff91250f56>] kill_anon_super+0x16/0x30
+[  984.033082]  [<ffffffffc02da97e>] btrfs_kill_super+0x1e/0x130 [btrfs]
+[  984.039548]  [<ffffffff91250fe9>] deactivate_locked_super+0x49/0x80
+[  984.045839]  [<ffffffff912515fd>] deactivate_super+0x5d/0x70
+[  984.051525]  [<ffffffff91270a1c>] cleanup_mnt+0x5c/0x80
+[  984.056774]  [<ffffffff91270a92>] __cleanup_mnt+0x12/0x20
+[  984.062201]  [<ffffffff910aa2fe>] task_work_run+0x7e/0xc0
+[  984.067625]  [<ffffffff91081b5a>] exit_to_usermode_loop+0x7e/0xb4
+[  984.073747]  [<ffffffff910039eb>] syscall_return_slowpath+0xbb/0xd0
+[  984.080038]  [<ffffffff9182605c>] entry_SYSCALL_64_fastpath+0xbf/0xc1
+
+Reported-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |   26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -859,7 +859,7 @@ static void btrfs_close_bdev(struct btrf
+               blkdev_put(device->bdev, device->mode);
+ }
+ 
+-static void btrfs_close_one_device(struct btrfs_device *device)
++static void btrfs_prepare_close_one_device(struct btrfs_device *device)
+ {
+       struct btrfs_fs_devices *fs_devices = device->fs_devices;
+       struct btrfs_device *new_device;
+@@ -877,8 +877,6 @@ static void btrfs_close_one_device(struc
+       if (device->missing)
+               fs_devices->missing_devices--;
+ 
+-      btrfs_close_bdev(device);
+-
+       new_device = btrfs_alloc_device(NULL, &device->devid,
+                                       device->uuid);
+       BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
+@@ -892,23 +890,39 @@ static void btrfs_close_one_device(struc
+ 
+       list_replace_rcu(&device->dev_list, &new_device->dev_list);
+       new_device->fs_devices = device->fs_devices;
+-
+-      call_rcu(&device->rcu, free_device);
+ }
+ 
+ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+ {
+       struct btrfs_device *device, *tmp;
++      struct list_head pending_put;
++
++      INIT_LIST_HEAD(&pending_put);
+ 
+       if (--fs_devices->opened > 0)
+               return 0;
+ 
+       mutex_lock(&fs_devices->device_list_mutex);
+       list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
+-              btrfs_close_one_device(device);
++              btrfs_prepare_close_one_device(device);
++              list_add(&device->dev_list, &pending_put);
+       }
+       mutex_unlock(&fs_devices->device_list_mutex);
+ 
++      /*
++       * btrfs_show_devname() is using the device_list_mutex,
++       * sometimes call to blkdev_put() leads vfs calling
++       * into this func. So do put outside of device_list_mutex,
++       * as of now.
++       */
++      while (!list_empty(&pending_put)) {
++              device = list_first_entry(&pending_put,
++                              struct btrfs_device, dev_list);
++              list_del(&device->dev_list);
++              btrfs_close_bdev(device);
++              call_rcu(&device->rcu, free_device);
++      }
++
+       WARN_ON(fs_devices->open_devices);
+       WARN_ON(fs_devices->rw_devices);
+       fs_devices->opened = 0;
diff --git a/queue-4.8/btrfs-fix-incremental-send-failure-caused-by-balance.patch b/queue-4.8/btrfs-fix-incremental-send-failure-caused-by-balance.patch

new file mode 100644 (file)

index 0000000..b62f920
--- /dev/null
+++ b/queue-4.8/btrfs-fix-incremental-send-failure-caused-by-balance.patch
@@ -0,0 +1,199 @@
+From d5e84fd8d0634d056248b67463b42f6c85896a19 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 19 Sep 2016 10:57:40 +0100
+Subject: Btrfs: fix incremental send failure caused by balance
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit d5e84fd8d0634d056248b67463b42f6c85896a19 upstream.
+
+Commit 951555856b88 ("Btrfs: send, don't bug on inconsistent snapshots")
+removed some BUG_ON() statements (replacing them with returning errors
+to user space and logging error messages) when a snapshot is in an
+inconsistent state due to failures to update a delayed inode item (ENOMEM
+or ENOSPC) after adding/updating/deleting references, xattrs or file
+extent items.
+
+However there is a case, when no errors happen, where a file extent item
+can be modified without having the corresponding inode item updated. This
+case happens during balance under very specific timings, when relocation
+is in the stage where it updates data pointers and a leaf that contains
+file extent items is COWed. When that happens file extent items get their
+disk_bytenr field updated to a new value that reflects the post relocation
+logical address of the extent, without updating their respective inode
+items (as there is nothing that needs to be updated on them). This is
+performed at relocation.c:replace_file_extents() through
+relocation.c:btrfs_reloc_cow_block().
+
+So make an incremental send deal with this case and don't do any processing
+for a file extent item that got its disk_bytenr field updated by relocation,
+since the extent's data is the same as the one pointed by the file extent
+item in the parent snapshot.
+
+After the recent commit mentioned above this case resulted in EIO errors
+returned to user space (and an error message logged to dmesg/syslog) when
+doing an incremental send, while before it, it resulted in hitting a
+BUG_ON leading to the following trace:
+
+[  952.206705] ------------[ cut here ]------------
+[  952.206714] kernel BUG at ../fs/btrfs/send.c:5653!
+[  952.206719] Internal error: Oops - BUG: 0 [#1] SMP
+[  952.209854] Modules linked in: st dm_mod nls_utf8 isofs fuse nf_log_ipv6 xt_pkttype xt_physdev br_netfilter nf_log_ipv4 nf_log_common xt_LOG xt_limit ebtable_filter ebtables af_packet bridge stp llc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw ipt_REJECT iptable_raw xt_CT iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ipv4 nf_defrag_ipv4 ip_tables xt_conntrack nf_conntrack ip6table_filter ip6_tables x_tables xfs libcrc32c nls_iso8859_1 nls_cp437 vfat fat joydev aes_ce_blk ablk_helper cryptd snd_intel8x0 aes_ce_cipher snd_ac97_codec ac97_bus snd_pcm ghash_ce sha2_ce sha1_ce snd_timer snd virtio_net soundcore btrfs xor sr_mod cdrom hid_generic usbhid raid6_pq virtio_blk virtio_scsi bochs_drm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_mmio xhci_pci xhci_hcd usbcore usb_common virtio_pci virtio_ring virtio drm sg efivarfs
+[  952.228333] Supported: Yes
+[  952.228908] CPU: 0 PID: 12779 Comm: snapperd Not tainted 4.4.14-50-default #1
+[  952.230329] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+[  952.231683] task: ffff800058e94100 ti: ffff8000d866c000 task.ti: ffff8000d866c000
+[  952.233279] PC is at changed_cb+0x9f4/0xa48 [btrfs]
+[  952.234375] LR is at changed_cb+0x58/0xa48 [btrfs]
+[  952.236552] pc : [<ffff7ffffc39de7c>] lr : [<ffff7ffffc39d4e0>] pstate: 80000145
+[  952.238049] sp : ffff8000d866fa20
+[  952.238732] x29: ffff8000d866fa20 x28: 0000000000000019
+[  952.239840] x27: 00000000000028d5 x26: 00000000000024a2
+[  952.241008] x25: 0000000000000002 x24: ffff8000e66e92f0
+[  952.242131] x23: ffff8000b8c76800 x22: ffff800092879140
+[  952.243238] x21: 0000000000000002 x20: ffff8000d866fb78
+[  952.244348] x19: ffff8000b8f8c200 x18: 0000000000002710
+[  952.245607] x17: 0000ffff90d42480 x16: ffff800000237dc0
+[  952.246719] x15: 0000ffff90de7510 x14: ab000c000a2faf08
+[  952.247835] x13: 0000000000577c2b x12: ab000c000b696665
+[  952.248981] x11: 2e65726f632f6966 x10: 652d34366d72612f
+[  952.250101] x9 : 32627572672f746f x8 : ab000c00092f1671
+[  952.251352] x7 : 8000000000577c2b x6 : ffff800053eadf45
+[  952.252468] x5 : 0000000000000000 x4 : ffff80005e169494
+[  952.253582] x3 : 0000000000000004 x2 : ffff8000d866fb78
+[  952.254695] x1 : 000000000003e2a3 x0 : 000000000003e2a4
+[  952.255803]
+[  952.256150] Process snapperd (pid: 12779, stack limit = 0xffff8000d866c020)
+[  952.257516] Stack: (0xffff8000d866fa20 to 0xffff8000d8670000)
+[  952.258654] fa20: ffff8000d866fae0 ffff7ffffc308fc0 ffff800092879140 ffff8000e66e92f0
+[  952.260219] fa40: 0000000000000035 ffff800055de6000 ffff8000b8c76800 ffff8000d866fb78
+[  952.261745] fa60: 0000000000000002 00000000000024a2 00000000000028d5 0000000000000019
+[  952.263269] fa80: ffff8000d866fae0 ffff7ffffc3090f0 ffff8000d866fae0 ffff7ffffc309128
+[  952.264797] faa0: ffff800092879140 ffff8000e66e92f0 0000000000000035 ffff800055de6000
+[  952.268261] fac0: ffff8000b8c76800 ffff8000d866fb78 0000000000000002 0000000000001000
+[  952.269822] fae0: ffff8000d866fbc0 ffff7ffffc39ecfc ffff8000b8f8c200 ffff8000b8f8c368
+[  952.271368] fb00: ffff8000b8f8c378 ffff800055de6000 0000000000000001 ffff8000ecb17500
+[  952.272893] fb20: ffff8000b8c76800 ffff800092879140 ffff800062b6d000 ffff80007a9e2470
+[  952.274420] fb40: ffff8000b8f8c208 0000000005784000 ffff8000580a8000 ffff8000b8f8c200
+[  952.276088] fb60: ffff7ffffc39d488 00000002b8f8c368 0000000000000000 000000000003e2a4
+[  952.280275] fb80: 000000000000006c ffff7ffffc39ec00 000000000003e2a4 000000000000006c
+[  952.283219] fba0: ffff8000b8f8c300 0000000000000100 0000000000000001 ffff8000ecb17500
+[  952.286166] fbc0: ffff8000d866fcd0 ffff7ffffc3643c0 ffff8000f8842700 0000ffff8ffe9278
+[  952.289136] fbe0: 0000000040489426 ffff800055de6000 0000ffff8ffe9278 0000000040489426
+[  952.292083] fc00: 000000000000011d 000000000000001d ffff80007a9e4598 ffff80007a9e43e8
+[  952.294959] fc20: ffff8000b8c7693f 0000000000003b24 0000000000000019 ffff8000b8f8c218
+[  952.301161] fc40: 00000001d866fc70 ffff8000b8c76800 0000000000000128 ffffffffffffff84
+[  952.305749] fc60: ffff800058e941ff 0000000000003a58 ffff8000d866fcb0 ffff8000000f7390
+[  952.308875] fc80: 000000000000012a 0000000000010290 ffff8000d866fc00 000000000000007b
+[  952.311915] fca0: 0000000000010290 ffff800046c1b100 74732d7366727462 000001006d616572
+[  952.314937] fcc0: ffff8000fffc4100 cb88537fdc8ba60e ffff8000d866fe10 ffff8000002499e8
+[  952.318008] fce0: 0000000040489426 ffff8000f8842700 0000ffff8ffe9278 ffff80007a9e4598
+[  952.321321] fd00: 0000ffff8ffe9278 0000000040489426 000000000000011d 000000000000001d
+[  952.324280] fd20: ffff80000072c000 ffff8000d866c000 ffff8000d866fda0 ffff8000000e997c
+[  952.327156] fd40: ffff8000fffc4180 00000000000031ed ffff8000fffc4180 ffff800046c1b7d4
+[  952.329895] fd60: 0000000000000140 0000ffff907ea170 000000000000011d 00000000000000dc
+[  952.334641] fd80: ffff80000072c000 ffff8000d866c000 0000000000000000 0000000000000002
+[  952.338002] fda0: ffff8000d866fdd0 ffff8000000ebacc ffff800046c1b080 ffff800046c1b7d4
+[  952.340724] fdc0: ffff8000d866fdf0 ffff8000000db67c 0000000000000040 ffff800000e69198
+[  952.343415] fde0: 0000ffff8ffea790 00000000000031ed ffff8000d866fe20 ffff800000254000
+[  952.346101] fe00: 000000000000001d 0000000000000004 ffff8000d866fe90 ffff800000249d3c
+[  952.348980] fe20: ffff8000f8842700 0000000000000000 ffff8000f8842701 0000000000000008
+[  952.351696] fe40: ffff8000d866fe70 0000000000000008 ffff8000d866fe90 ffff800000249cf8
+[  952.354387] fe60: ffff8000f8842700 0000ffff8ffe9170 ffff8000f8842701 0000000000000008
+[  952.357083] fe80: 0000ffff8ffe9278 ffff80008ff85500 0000ffff8ffe90c0 ffff800000085c84
+[  952.359800] fea0: 0000000000000000 0000ffff8ffe9170 ffffffffffffffff 0000ffff90d473bc
+[  952.365351] fec0: 0000000000000000 0000000000000015 0000000000000008 0000000040489426
+[  952.369550] fee0: 0000ffff8ffe9278 0000ffff907ea790 0000ffff907ea170 0000ffff907ea790
+[  952.372416] ff00: 0000ffff907ea170 0000000000000000 000000000000001d 0000000000000004
+[  952.375223] ff20: 0000ffff90a32220 00000000003d0f00 0000ffff907ea0a0 0000ffff8ffe8f30
+[  952.378099] ff40: 0000ffff9100f554 0000ffff91147000 0000ffff91117bc0 0000ffff90d473b0
+[  952.381115] ff60: 0000ffff9100f620 0000ffff880069b0 0000ffff8ffe9170 0000ffff8ffe91a0
+[  952.384003] ff80: 0000ffff8ffe9160 0000ffff8ffe9140 0000ffff88006990 0000ffff8ffe9278
+[  952.386860] ffa0: 0000ffff88008a60 0000ffff8ffe9480 0000ffff88014ca0 0000ffff8ffe90c0
+[  952.389654] ffc0: 0000ffff910be8e8 0000ffff8ffe90c0 0000ffff90d473bc 0000000000000000
+[  952.410986] ffe0: 0000000000000008 000000000000001d 6e2079747265706f 72616d223d656d61
+[  952.415497] Call trace:
+[  952.417403] [<ffff7ffffc39de7c>] changed_cb+0x9f4/0xa48 [btrfs]
+[  952.420023] [<ffff7ffffc308fc0>] btrfs_compare_trees+0x500/0x6b0 [btrfs]
+[  952.422759] [<ffff7ffffc39ecfc>] btrfs_ioctl_send+0xb4c/0xe10 [btrfs]
+[  952.425601] [<ffff7ffffc3643c0>] btrfs_ioctl+0x374/0x29a4 [btrfs]
+[  952.428031] [<ffff8000002499e8>] do_vfs_ioctl+0x33c/0x600
+[  952.430360] [<ffff800000249d3c>] SyS_ioctl+0x90/0xa4
+[  952.432552] [<ffff800000085c84>] el0_svc_naked+0x38/0x3c
+[  952.434803] Code: 2a1503e0 17fffdac b9404282 17ffff28 (d4210000)
+[  952.437457] ---[ end trace 9afd7090c466cf15 ]---
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/send.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 58 insertions(+)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -5802,6 +5802,64 @@ static int changed_extent(struct send_ct
+       int ret = 0;
+ 
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
++
++              if (result == BTRFS_COMPARE_TREE_CHANGED) {
++                      struct extent_buffer *leaf_l;
++                      struct extent_buffer *leaf_r;
++                      struct btrfs_file_extent_item *ei_l;
++                      struct btrfs_file_extent_item *ei_r;
++
++                      leaf_l = sctx->left_path->nodes[0];
++                      leaf_r = sctx->right_path->nodes[0];
++                      ei_l = btrfs_item_ptr(leaf_l,
++                                            sctx->left_path->slots[0],
++                                            struct btrfs_file_extent_item);
++                      ei_r = btrfs_item_ptr(leaf_r,
++                                            sctx->right_path->slots[0],
++                                            struct btrfs_file_extent_item);
++
++                      /*
++                       * We may have found an extent item that has changed
++                       * only its disk_bytenr field and the corresponding
++                       * inode item was not updated. This case happens due to
++                       * very specific timings during relocation when a leaf
++                       * that contains file extent items is COWed while
++                       * relocation is ongoing and its in the stage where it
++                       * updates data pointers. So when this happens we can
++                       * safely ignore it since we know it's the same extent,
++                       * but just at different logical and physical locations
++                       * (when an extent is fully replaced with a new one, we
++                       * know the generation number must have changed too,
++                       * since snapshot creation implies committing the current
++                       * transaction, and the inode item must have been updated
++                       * as well).
++                       * This replacement of the disk_bytenr happens at
++                       * relocation.c:replace_file_extents() through
++                       * relocation.c:btrfs_reloc_cow_block().
++                       */
++                      if (btrfs_file_extent_generation(leaf_l, ei_l) ==
++                          btrfs_file_extent_generation(leaf_r, ei_r) &&
++                          btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
++                          btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
++                          btrfs_file_extent_compression(leaf_l, ei_l) ==
++                          btrfs_file_extent_compression(leaf_r, ei_r) &&
++                          btrfs_file_extent_encryption(leaf_l, ei_l) ==
++                          btrfs_file_extent_encryption(leaf_r, ei_r) &&
++                          btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
++                          btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
++                          btrfs_file_extent_type(leaf_l, ei_l) ==
++                          btrfs_file_extent_type(leaf_r, ei_r) &&
++                          btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
++                          btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
++                          btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
++                          btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
++                          btrfs_file_extent_offset(leaf_l, ei_l) ==
++                          btrfs_file_extent_offset(leaf_r, ei_r) &&
++                          btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
++                          btrfs_file_extent_num_bytes(leaf_r, ei_r))
++                              return 0;
++              }
++
+               inconsistent_snapshot_error(sctx, result, "extent");
+               return -EIO;
+       }
diff --git a/queue-4.8/btrfs-fix-memory-leak-in-do_walk_down.patch b/queue-4.8/btrfs-fix-memory-leak-in-do_walk_down.patch

new file mode 100644 (file)

index 0000000..47aa5dc
--- /dev/null
+++ b/queue-4.8/btrfs-fix-memory-leak-in-do_walk_down.patch
@@ -0,0 +1,30 @@
+From a958eab0ed7fdc1b977bc25d3af6efedaa945488 Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Tue, 13 Sep 2016 19:02:27 -0700
+Subject: Btrfs: fix memory leak in do_walk_down
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit a958eab0ed7fdc1b977bc25d3af6efedaa945488 upstream.
+
+The extent buffer 'next' needs to be free'd conditionally.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -8886,6 +8886,7 @@ static noinline int do_walk_down(struct
+                                      &wc->flags[level - 1]);
+       if (ret < 0) {
+               btrfs_tree_unlock(next);
++              free_extent_buffer(next);
+               return ret;
+       }
+ 
diff --git a/queue-4.8/btrfs-fix-memory-leak-in-reading-btree-blocks.patch b/queue-4.8/btrfs-fix-memory-leak-in-reading-btree-blocks.patch

new file mode 100644 (file)

index 0000000..66955b5
--- /dev/null
+++ b/queue-4.8/btrfs-fix-memory-leak-in-reading-btree-blocks.patch
@@ -0,0 +1,79 @@
+From 2571e739677f1e4c0c63f5ed49adcc0857923625 Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Wed, 3 Aug 2016 12:33:01 -0700
+Subject: Btrfs: fix memory leak in reading btree blocks
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 2571e739677f1e4c0c63f5ed49adcc0857923625 upstream.
+
+So we can read a btree block via readahead or intentional read,
+and we can end up with a memory leak when something happens as
+follows,
+1) readahead starts to read block A but does not wait for read
+   completion,
+2) btree_readpage_end_io_hook finds that block A is corrupted,
+   and it needs to clear all block A's pages' uptodate bit.
+3) meanwhile an intentional read kicks in and checks block A's
+   pages' uptodate to decide which page needs to be read.
+4) when some pages have the uptodate bit during 3)'s check so
+   3) doesn't count them for eb->io_pages, but they are later
+   cleared by 2) so we has to readpage on the page, we get
+   the wrong eb->io_pages which results in a memory leak of
+   this block.
+
+This fixes the problem by firstly getting all pages's locking and
+then checking pages' uptodate bit.
+
+   t1(readahead)                              t2(readahead endio)                                       t3(the following read)
+read_extent_buffer_pages                    end_bio_extent_readpage
+  for pg in eb:                                for page 0,1,2 in eb:
+      if pg is uptodate:                           btree_readpage_end_io_hook(pg)
+          num_reads++                              if uptodate:
+  eb->io_pages = num_reads                             SetPageUptodate(pg)              _______________
+  for pg in eb:                                for page 3 in eb:                                     read_extent_buffer_pages
+       if pg is NOT uptodate:                      btree_readpage_end_io_hook(pg)                       for pg in eb:
+           __extent_read_full_page(pg)                 sanity check reports something wrong                 if pg is uptodate:
+                                                       clear_extent_buffer_uptodate(eb)                         num_reads++
+                                                           for pg in eb:                                eb->io_pages = num_reads
+                                                               ClearPageUptodate(page)  _______________
+                                                                                                        for pg in eb:
+                                                                                                            if pg is NOT uptodate:
+                                                                                                                __extent_read_full_page(pg)
+
+So t3's eb->io_pages is not consistent with the number of pages it's reading,
+and during endio(), atomic_dec_and_test(&eb->io_pages) will get a negative
+number so that we're not able to free the eb.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -5209,11 +5209,20 @@ int read_extent_buffer_pages(struct exte
+                       lock_page(page);
+               }
+               locked_pages++;
++      }
++      /*
++       * We need to firstly lock all pages to make sure that
++       * the uptodate bit of our pages won't be affected by
++       * clear_extent_buffer_uptodate().
++       */
++      for (i = start_i; i < num_pages; i++) {
++              page = eb->pages[i];
+               if (!PageUptodate(page)) {
+                       num_reads++;
+                       all_uptodate = 0;
+               }
+       }
++
+       if (all_uptodate) {
+               if (start_i == 0)
+                       set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
diff --git a/queue-4.8/btrfs-make-file-clone-aware-of-fatal-signals.patch b/queue-4.8/btrfs-make-file-clone-aware-of-fatal-signals.patch

new file mode 100644 (file)

index 0000000..b068fc7
--- /dev/null
+++ b/queue-4.8/btrfs-make-file-clone-aware-of-fatal-signals.patch
@@ -0,0 +1,35 @@
+From 69ae5e4459e43e56f03d0987e865fbac2b05af2a Mon Sep 17 00:00:00 2001
+From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
+Date: Thu, 13 Oct 2016 09:23:39 +0800
+Subject: btrfs: make file clone aware of fatal signals
+
+From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
+
+commit 69ae5e4459e43e56f03d0987e865fbac2b05af2a upstream.
+
+Indeed this just make the behavior similar to xfs when process has
+fatal signals pending, and it'll make fstests/generic/298 happy.
+
+Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3813,6 +3813,11 @@ process_slot:
+               }
+               btrfs_release_path(path);
+               key.offset = next_key_min_offset;
++
++              if (fatal_signal_pending(current)) {
++                      ret = -EINTR;
++                      goto out;
++              }
+       }
+       ret = 0;
+ 
diff --git a/queue-4.8/btrfs-return-gracefully-from-balance-if-fs-tree-is-corrupted.patch b/queue-4.8/btrfs-return-gracefully-from-balance-if-fs-tree-is-corrupted.patch

new file mode 100644 (file)

index 0000000..8d5c6a2
--- /dev/null
+++ b/queue-4.8/btrfs-return-gracefully-from-balance-if-fs-tree-is-corrupted.patch
@@ -0,0 +1,69 @@
+From 3561b9db70928f207be4570b48fc19898eeaef54 Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Wed, 14 Sep 2016 08:51:46 -0700
+Subject: Btrfs: return gracefully from balance if fs tree is corrupted
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 3561b9db70928f207be4570b48fc19898eeaef54 upstream.
+
+When relocating tree blocks, we firstly get block information from
+back references in the extent tree, we then search fs tree to try to
+find all parents of a block.
+
+However, if fs tree is corrupted, eg. if there're some missing
+items, we could come across these WARN_ONs and BUG_ONs.
+
+This makes us print some error messages and return gracefully
+from balance.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Reviewed-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c |   23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -923,9 +923,16 @@ again:
+                       path2->slots[level]--;
+ 
+               eb = path2->nodes[level];
+-              WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
+-                      cur->bytenr);
+-
++              if (btrfs_node_blockptr(eb, path2->slots[level]) !=
++                  cur->bytenr) {
++                      btrfs_err(root->fs_info,
++      "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
++                                cur->bytenr, level - 1, root->objectid,
++                                node_key->objectid, node_key->type,
++                                node_key->offset);
++                      err = -ENOENT;
++                      goto out;
++              }
+               lower = cur;
+               need_check = true;
+               for (; level < BTRFS_MAX_LEVEL; level++) {
+@@ -2695,11 +2702,15 @@ static int do_relocation(struct btrfs_tr
+ 
+               if (!upper->eb) {
+                       ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+-                      if (ret < 0) {
+-                              err = ret;
++                      if (ret) {
++                              if (ret < 0)
++                                      err = ret;
++                              else
++                                      err = -ENOENT;
++
++                              btrfs_release_path(path);
+                               break;
+                       }
+-                      BUG_ON(ret > 0);
+ 
+                       if (!upper->eb) {
+                               upper->eb = path->nodes[upper->level];
diff --git a/queue-4.8/clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch b/queue-4.8/clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch

new file mode 100644 (file)

index 0000000..aaba36b
--- /dev/null
+++ b/queue-4.8/clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch
@@ -0,0 +1,215 @@
+From 035cd485a47dda64f25ccf8a90b11a07d0b7aa7a Mon Sep 17 00:00:00 2001
+From: Richard Watts <rrw@kynesim.co.uk>
+Date: Fri, 2 Dec 2016 23:14:38 +0200
+Subject: clk: ti: omap36xx: Work around sprz319 advisory 2.1
+
+From: Richard Watts <rrw@kynesim.co.uk>
+
+commit 035cd485a47dda64f25ccf8a90b11a07d0b7aa7a upstream.
+
+The OMAP36xx DPLL5, driving EHCI USB, can be subject to a long-term
+frequency drift. The frequency drift magnitude depends on the VCO update
+rate, which is inversely proportional to the PLL divider. The kernel
+DPLL configuration code results in a high value for the divider, leading
+to a long term drift high enough to cause USB transmission errors. In
+the worst case the USB PHY's ULPI interface can stop responding,
+breaking USB operation completely. This manifests itself on the
+Beagleboard xM by the LAN9514 reporting 'Cannot enable port 2. Maybe the
+cable is bad?' in the kernel log.
+
+Errata sprz319 advisory 2.1 documents PLL values that minimize the
+drift. Use them automatically when DPLL5 is used for USB operation,
+which we detect based on the requested clock rate. The clock framework
+will still compute the PLL parameters and resulting rate as usual, but
+the PLL M and N values will then be overridden. This can result in the
+effective clock rate being slightly different than the rate cached by
+the clock framework, but won't cause any adverse effect to USB
+operation.
+
+Signed-off-by: Richard Watts <rrw@kynesim.co.uk>
+[Upported from v3.2 to v4.9]
+Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Tested-by: Ladislav Michl <ladis@linux-mips.org>
+Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
+Cc: Adam Ford <aford173@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/ti/clk-3xxx.c |   20 ++++++-------
+ drivers/clk/ti/clock.h    |    9 ++++++
+ drivers/clk/ti/dpll.c     |   19 ++++++++++++-
+ drivers/clk/ti/dpll3xxx.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 104 insertions(+), 11 deletions(-)
+
+--- a/drivers/clk/ti/clk-3xxx.c
++++ b/drivers/clk/ti/clk-3xxx.c
+@@ -22,13 +22,6 @@
+ 
+ #include "clock.h"
+ 
+-/*
+- * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks
+- * that are sourced by DPLL5, and both of these require this clock
+- * to be at 120 MHz for proper operation.
+- */
+-#define DPLL5_FREQ_FOR_USBHOST                120000000
+-
+ #define OMAP3430ES2_ST_DSS_IDLE_SHIFT                 1
+ #define OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT            5
+ #define OMAP3430ES2_ST_SSI_IDLE_SHIFT                 8
+@@ -546,14 +539,21 @@ void __init omap3_clk_lock_dpll5(void)
+       struct clk *dpll5_clk;
+       struct clk *dpll5_m2_clk;
+ 
++      /*
++       * Errata sprz319f advisory 2.1 documents a USB host clock drift issue
++       * that can be worked around using specially crafted dpll5 settings
++       * with a dpll5_m2 divider set to 8. Set the dpll5 rate to 8x the USB
++       * host clock rate, its .set_rate handler() will detect that frequency
++       * and use the errata settings.
++       */
+       dpll5_clk = clk_get(NULL, "dpll5_ck");
+-      clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST);
++      clk_set_rate(dpll5_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST * 8);
+       clk_prepare_enable(dpll5_clk);
+ 
+-      /* Program dpll5_m2_clk divider for no division */
++      /* Program dpll5_m2_clk divider */
+       dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck");
+       clk_prepare_enable(dpll5_m2_clk);
+-      clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST);
++      clk_set_rate(dpll5_m2_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST);
+ 
+       clk_disable_unprepare(dpll5_m2_clk);
+       clk_disable_unprepare(dpll5_clk);
+--- a/drivers/clk/ti/clock.h
++++ b/drivers/clk/ti/clock.h
+@@ -257,11 +257,20 @@ long omap2_dpll_round_rate(struct clk_hw
+ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw,
+                                   unsigned long parent_rate);
+ 
++/*
++ * OMAP3_DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks
++ * that are sourced by DPLL5, and both of these require this clock
++ * to be at 120 MHz for proper operation.
++ */
++#define OMAP3_DPLL5_FREQ_FOR_USBHOST  120000000
++
+ unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate);
+ int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate,
+                        unsigned long parent_rate);
+ int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate,
+                                   unsigned long parent_rate, u8 index);
++int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate,
++                       unsigned long parent_rate);
+ void omap3_clk_lock_dpll5(void);
+ 
+ unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
+--- a/drivers/clk/ti/dpll.c
++++ b/drivers/clk/ti/dpll.c
+@@ -114,6 +114,18 @@ static const struct clk_ops omap3_dpll_c
+       .round_rate     = &omap2_dpll_round_rate,
+ };
+ 
++static const struct clk_ops omap3_dpll5_ck_ops = {
++      .enable         = &omap3_noncore_dpll_enable,
++      .disable        = &omap3_noncore_dpll_disable,
++      .get_parent     = &omap2_init_dpll_parent,
++      .recalc_rate    = &omap3_dpll_recalc,
++      .set_rate       = &omap3_dpll5_set_rate,
++      .set_parent     = &omap3_noncore_dpll_set_parent,
++      .set_rate_and_parent    = &omap3_noncore_dpll_set_rate_and_parent,
++      .determine_rate = &omap3_noncore_dpll_determine_rate,
++      .round_rate     = &omap2_dpll_round_rate,
++};
++
+ static const struct clk_ops omap3_dpll_per_ck_ops = {
+       .enable         = &omap3_noncore_dpll_enable,
+       .disable        = &omap3_noncore_dpll_disable,
+@@ -474,7 +486,12 @@ static void __init of_ti_omap3_dpll_setu
+               .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
+       };
+ 
+-      of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd);
++      if ((of_machine_is_compatible("ti,omap3630") ||
++           of_machine_is_compatible("ti,omap36xx")) &&
++          !strcmp(node->name, "dpll5_ck"))
++              of_ti_dpll_setup(node, &omap3_dpll5_ck_ops, &dd);
++      else
++              of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd);
+ }
+ CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock",
+              of_ti_omap3_dpll_setup);
+--- a/drivers/clk/ti/dpll3xxx.c
++++ b/drivers/clk/ti/dpll3xxx.c
+@@ -838,3 +838,70 @@ int omap3_dpll4_set_rate_and_parent(stru
+       return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate,
+                                                     index);
+ }
++
++/* Apply DM3730 errata sprz319 advisory 2.1. */
++static bool omap3_dpll5_apply_errata(struct clk_hw *hw,
++                                   unsigned long parent_rate)
++{
++      struct omap3_dpll5_settings {
++              unsigned int rate, m, n;
++      };
++
++      static const struct omap3_dpll5_settings precomputed[] = {
++              /*
++               * From DM3730 errata advisory 2.1, table 35 and 36.
++               * The N value is increased by 1 compared to the tables as the
++               * errata lists register values while last_rounded_field is the
++               * real divider value.
++               */
++              { 12000000,  80,  0 + 1 },
++              { 13000000, 443,  5 + 1 },
++              { 19200000,  50,  0 + 1 },
++              { 26000000, 443, 11 + 1 },
++              { 38400000,  25,  0 + 1 }
++      };
++
++      const struct omap3_dpll5_settings *d;
++      struct clk_hw_omap *clk = to_clk_hw_omap(hw);
++      struct dpll_data *dd;
++      unsigned int i;
++
++      for (i = 0; i < ARRAY_SIZE(precomputed); ++i) {
++              if (parent_rate == precomputed[i].rate)
++                      break;
++      }
++
++      if (i == ARRAY_SIZE(precomputed))
++              return false;
++
++      d = &precomputed[i];
++
++      /* Update the M, N and rounded rate values and program the DPLL. */
++      dd = clk->dpll_data;
++      dd->last_rounded_m = d->m;
++      dd->last_rounded_n = d->n;
++      dd->last_rounded_rate = div_u64((u64)parent_rate * d->m, d->n);
++      omap3_noncore_dpll_program(clk, 0);
++
++      return true;
++}
++
++/**
++ * omap3_dpll5_set_rate - set rate for omap3 dpll5
++ * @hw: clock to change
++ * @rate: target rate for clock
++ * @parent_rate: rate of the parent clock
++ *
++ * Set rate for the DPLL5 clock. Apply the sprz319 advisory 2.1 on OMAP36xx if
++ * the DPLL is used for USB host (detected through the requested rate).
++ */
++int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate,
++                       unsigned long parent_rate)
++{
++      if (rate == OMAP3_DPLL5_FREQ_FOR_USBHOST * 8) {
++              if (omap3_dpll5_apply_errata(hw, parent_rate))
++                      return 0;
++      }
++
++      return omap3_noncore_dpll_set_rate(hw, rate, parent_rate);
++}
diff --git a/queue-4.8/series b/queue-4.8/series

index 84c2ead3e4d51d212a4fc3159087edc38341f7d0..8b3b1b18635548363e2d83e3e3688e02619271b5 100644 (file)
--- a/queue-4.8/series
+++ b/queue-4.8/series
@@ -24,3 +24,14 @@ alsa-hda-ignore-the-assoc-and-seq-when-comparing-pin-configurations.patch
  alsa-hda-fix-headset-mic-problem-on-a-dell-laptop.patch
  alsa-hda-gate-the-mic-jack-on-hp-z1-gen3-aio.patch
  alsa-hda-when-comparing-pin-configurations-ignore-assoc-in-addition-to-seq.patch
+clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch
+btrfs-fix-memory-leak-in-reading-btree-blocks.patch
+btrfs-bail-out-if-block-group-has-different-mixed-flag.patch
+btrfs-return-gracefully-from-balance-if-fs-tree-is-corrupted.patch
+btrfs-don-t-leak-reloc-root-nodes-on-error.patch
+btrfs-clean-the-old-superblocks-before-freeing-the-device.patch
+btrfs-fix-memory-leak-in-do_walk_down.patch
+btrfs-fix-a-possible-umount-deadlock.patch
+btrfs-don-t-bug-during-drop-snapshot.patch
+btrfs-fix-incremental-send-failure-caused-by-balance.patch
+btrfs-make-file-clone-aware-of-fatal-signals.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jan 2017 09:40:51 +0000 (10:40 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jan 2017 09:40:51 +0000 (10:40 +0100)
queue-4.8/btrfs-bail-out-if-block-group-has-different-mixed-flag.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-clean-the-old-superblocks-before-freeing-the-device.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-don-t-bug-during-drop-snapshot.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-don-t-leak-reloc-root-nodes-on-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-fix-a-possible-umount-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-fix-incremental-send-failure-caused-by-balance.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-fix-memory-leak-in-do_walk_down.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-fix-memory-leak-in-reading-btree-blocks.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-make-file-clone-aware-of-fatal-signals.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/btrfs-return-gracefully-from-balance-if-fs-tree-is-corrupted.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/series		patch \| blob \| blame \| history