From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 19 Aug 2020 11:27:21 +0000 (+0200)
Subject: 5.8-stable patches
X-Git-Tag: v4.14.194~49
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7e6044d36298e9a2306e14daafeb29f5be2ad1b2;p=thirdparty%2Fkernel%2Fstable-queue.git

5.8-stable patches

added patches:
	btrfs-add-missing-check-for-nocow-and-compression-inode-flags.patch
	btrfs-allow-use-of-global-block-reserve-for-balance-item-deletion.patch
	btrfs-avoid-possible-signal-interruption-of-btrfs_drop_snapshot-on-relocation-tree.patch
	btrfs-don-t-allocate-anonymous-block-device-for-user-invisible-roots.patch
	btrfs-don-t-traverse-into-the-seed-devices-in-show_devname.patch
	btrfs-don-t-warn-if-we-abort-a-transaction-with-erofs.patch
	btrfs-fix-race-between-page-release-and-a-fast-fsync.patch
	btrfs-free-anon-block-device-right-after-subvolume-deletion.patch
	btrfs-move-the-chunk_mutex-in-btrfs_read_chunk_tree.patch
	btrfs-only-commit-delayed-items-at-fsync-if-we-are-logging-a-directory.patch
	btrfs-only-commit-the-delayed-inode-when-doing-a-full-fsync.patch
	btrfs-open-device-without-device_list_mutex.patch
	btrfs-pass-checksum-type-via-btrfs_ioc_fs_info-ioctl.patch
	btrfs-preallocate-anon-block-device-at-first-phase-of-snapshot-creation.patch
	btrfs-ref-verify-fix-memory-leak-in-add_block_entry.patch
	btrfs-relocation-review-the-call-sites-which-can-be-interrupted-by-signal.patch
	btrfs-remove-no-longer-needed-use-of-log_writers-for-the-log-root-tree.patch
	btrfs-return-erofs-for-btrfs_fs_state_error-cases.patch
	btrfs-stop-incremening-log_batch-for-the-log-root-tree-when-syncing-log.patch
	btrfs-sysfs-use-nofs-for-device-creation.patch
---

diff --git a/queue-5.8/btrfs-add-missing-check-for-nocow-and-compression-inode-flags.patch b/queue-5.8/btrfs-add-missing-check-for-nocow-and-compression-inode-flags.patch
new file mode 100644
index 00000000000..582a858e556
--- /dev/null
+++ b/queue-5.8/btrfs-add-missing-check-for-nocow-and-compression-inode-flags.patch
@@ -0,0 +1,111 @@
+From f37c563bab4297024c300b05c8f48430e323809d Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Fri, 10 Jul 2020 09:49:56 +0200
+Subject: btrfs: add missing check for nocow and compression inode flags
+
+From: David Sterba <dsterba@suse.com>
+
+commit f37c563bab4297024c300b05c8f48430e323809d upstream.
+
+User Forza reported on IRC that some invalid combinations of file
+attributes are accepted by chattr.
+
+The NODATACOW and compression file flags/attributes are mutually
+exclusive, but they could be set by 'chattr +c +C' on an empty file. The
+nodatacow will be in effect because it's checked first in
+btrfs_run_delalloc_range.
+
+Extend the flag validation to catch the following cases:
+
+  - input flags are conflicting
+  - old and new flags are conflicting
+  - initialize the local variable with inode flags after inode ls locked
+
+Inode attributes take precedence over mount options and are an
+independent setting.
+
+Nocompress would be a no-op with nodatacow, but we don't want to mix
+any compression-related options with nodatacow.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |   30 ++++++++++++++++++++++--------
+ 1 file changed, 22 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct f
+ 	return 0;
+ }
+ 
+-/* Check if @flags are a supported and valid set of FS_*_FL flags */
+-static int check_fsflags(unsigned int flags)
++/*
++ * Check if @flags are a supported and valid set of FS_*_FL flags and that
++ * the old and new flags are not conflicting
++ */
++static int check_fsflags(unsigned int old_flags, unsigned int flags)
+ {
+ 	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+ 		      FS_NOATIME_FL | FS_NODUMP_FL | \
+@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int fl
+ 		      FS_NOCOW_FL))
+ 		return -EOPNOTSUPP;
+ 
++	/* COMPR and NOCOMP on new/old are valid */
+ 	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
+ 		return -EINVAL;
+ 
++	if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
++		return -EINVAL;
++
++	/* NOCOW and compression options are mutually exclusive */
++	if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
++		return -EINVAL;
++	if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
++		return -EINVAL;
++
+ 	return 0;
+ }
+ 
+@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct f
+ 	unsigned int fsflags, old_fsflags;
+ 	int ret;
+ 	const char *comp = NULL;
+-	u32 binode_flags = binode->flags;
++	u32 binode_flags;
+ 
+ 	if (!inode_owner_or_capable(inode))
+ 		return -EPERM;
+@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct f
+ 	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
+ 		return -EFAULT;
+ 
+-	ret = check_fsflags(fsflags);
+-	if (ret)
+-		return ret;
+-
+ 	ret = mnt_want_write_file(file);
+ 	if (ret)
+ 		return ret;
+ 
+ 	inode_lock(inode);
+-
+ 	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+ 	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
++
+ 	ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
+ 	if (ret)
+ 		goto out_unlock;
+ 
++	ret = check_fsflags(old_fsflags, fsflags);
++	if (ret)
++		goto out_unlock;
++
++	binode_flags = binode->flags;
+ 	if (fsflags & FS_SYNC_FL)
+ 		binode_flags |= BTRFS_INODE_SYNC;
+ 	else
diff --git a/queue-5.8/btrfs-allow-use-of-global-block-reserve-for-balance-item-deletion.patch b/queue-5.8/btrfs-allow-use-of-global-block-reserve-for-balance-item-deletion.patch
new file mode 100644
index 00000000000..4912d1ba87c
--- /dev/null
+++ b/queue-5.8/btrfs-allow-use-of-global-block-reserve-for-balance-item-deletion.patch
@@ -0,0 +1,50 @@
+From 3502a8c0dc1bd4b4970b59b06e348f22a1c05581 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Thu, 25 Jun 2020 12:35:28 +0200
+Subject: btrfs: allow use of global block reserve for balance item deletion
+
+From: David Sterba <dsterba@suse.com>
+
+commit 3502a8c0dc1bd4b4970b59b06e348f22a1c05581 upstream.
+
+On a filesystem with exhausted metadata, but still enough to start
+balance, it's possible to hit this error:
+
+[324402.053842] BTRFS info (device loop0): 1 enospc errors during balance
+[324402.060769] BTRFS info (device loop0): balance: ended with status: -28
+[324402.172295] BTRFS: error (device loop0) in reset_balance_state:3321: errno=-28 No space left
+
+It fails inside reset_balance_state and turns the filesystem to
+read-only, which is unnecessary and should be fixed too, but the problem
+is caused by lack for space when the balance item is deleted. This is a
+one-time operation and from the same rank as unlink that is allowed to
+use the global block reserve. So do the same for the balance item.
+
+Status of the filesystem (100GiB) just after the balance fails:
+
+$ btrfs fi df mnt
+Data, single: total=80.01GiB, used=38.58GiB
+System, single: total=4.00MiB, used=16.00KiB
+Metadata, single: total=19.99GiB, used=19.48GiB
+GlobalReserve, single: total=512.00MiB, used=50.11MiB
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -3231,7 +3231,7 @@ static int del_balance_item(struct btrfs
+ 	if (!path)
+ 		return -ENOMEM;
+ 
+-	trans = btrfs_start_transaction(root, 0);
++	trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
+ 	if (IS_ERR(trans)) {
+ 		btrfs_free_path(path);
+ 		return PTR_ERR(trans);
diff --git a/queue-5.8/btrfs-avoid-possible-signal-interruption-of-btrfs_drop_snapshot-on-relocation-tree.patch b/queue-5.8/btrfs-avoid-possible-signal-interruption-of-btrfs_drop_snapshot-on-relocation-tree.patch
new file mode 100644
index 00000000000..ad7f43ce430
--- /dev/null
+++ b/queue-5.8/btrfs-avoid-possible-signal-interruption-of-btrfs_drop_snapshot-on-relocation-tree.patch
@@ -0,0 +1,86 @@
+From f3e3d9cc35252a70a2fd698762c9687718268ec6 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 13 Jul 2020 09:03:20 +0800
+Subject: btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f3e3d9cc35252a70a2fd698762c9687718268ec6 upstream.
+
+[BUG]
+There is a bug report about bad signal timing could lead to read-only
+fs during balance:
+
+  BTRFS info (device xvdb): balance: start -d -m -s
+  BTRFS info (device xvdb): relocating block group 73001861120 flags metadata
+  BTRFS info (device xvdb): found 12236 extents, stage: move data extents
+  BTRFS info (device xvdb): relocating block group 71928119296 flags data
+  BTRFS info (device xvdb): found 3 extents, stage: move data extents
+  BTRFS info (device xvdb): found 3 extents, stage: update data pointers
+  BTRFS info (device xvdb): relocating block group 60922265600 flags metadata
+  BTRFS: error (device xvdb) in btrfs_drop_snapshot:5505: errno=-4 unknown
+  BTRFS info (device xvdb): forced readonly
+  BTRFS info (device xvdb): balance: ended with status: -4
+
+[CAUSE]
+The direct cause is the -EINTR from the following call chain when a
+fatal signal is pending:
+
+ relocate_block_group()
+ |- clean_dirty_subvols()
+    |- btrfs_drop_snapshot()
+       |- btrfs_start_transaction()
+          |- btrfs_delayed_refs_rsv_refill()
+             |- btrfs_reserve_metadata_bytes()
+                |- __reserve_metadata_bytes()
+                   |- wait_reserve_ticket()
+                      |- prepare_to_wait_event();
+                      |- ticket->error = -EINTR;
+
+Normally this behavior is fine for most btrfs_start_transaction()
+callers, as they need to catch any other error, same for the signal, and
+exit ASAP.
+
+However for balance, especially for the clean_dirty_subvols() case, we're
+already doing cleanup works, getting -EINTR from btrfs_drop_snapshot()
+could cause a lot of unexpected problems.
+
+From the mentioned forced read-only report, to later balance error due
+to half dropped reloc trees.
+
+[FIX]
+Fix this problem by using btrfs_join_transaction() if
+btrfs_drop_snapshot() is called from relocation context.
+
+Since btrfs_join_transaction() won't get interrupted by signal, we can
+continue the cleanup.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>3
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -5298,7 +5298,14 @@ int btrfs_drop_snapshot(struct btrfs_roo
+ 		goto out;
+ 	}
+ 
+-	trans = btrfs_start_transaction(tree_root, 0);
++	/*
++	 * Use join to avoid potential EINTR from transaction start. See
++	 * wait_reserve_ticket and the whole reservation callchain.
++	 */
++	if (for_reloc)
++		trans = btrfs_join_transaction(tree_root);
++	else
++		trans = btrfs_start_transaction(tree_root, 0);
+ 	if (IS_ERR(trans)) {
+ 		err = PTR_ERR(trans);
+ 		goto out_free;
diff --git a/queue-5.8/btrfs-don-t-allocate-anonymous-block-device-for-user-invisible-roots.patch b/queue-5.8/btrfs-don-t-allocate-anonymous-block-device-for-user-invisible-roots.patch
new file mode 100644
index 00000000000..7cd86551520
--- /dev/null
+++ b/queue-5.8/btrfs-don-t-allocate-anonymous-block-device-for-user-invisible-roots.patch
@@ -0,0 +1,90 @@
+From 851fd730a743e072badaf67caf39883e32439431 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 16 Jun 2020 10:17:34 +0800
+Subject: btrfs: don't allocate anonymous block device for user invisible roots
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 851fd730a743e072badaf67caf39883e32439431 upstream.
+
+[BUG]
+When a lot of subvolumes are created, there is a user report about
+transaction aborted:
+
+  BTRFS: Transaction aborted (error -24)
+  WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
+  RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
+  Call Trace:
+   create_pending_snapshots+0x82/0xa0 [btrfs]
+   btrfs_commit_transaction+0x275/0x8c0 [btrfs]
+   btrfs_mksubvol+0x4b9/0x500 [btrfs]
+   btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
+   btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
+   btrfs_ioctl+0x11a4/0x2da0 [btrfs]
+   do_vfs_ioctl+0xa9/0x640
+   ksys_ioctl+0x67/0x90
+   __x64_sys_ioctl+0x1a/0x20
+   do_syscall_64+0x5a/0x110
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  ---[ end trace 33f2f83f3d5250e9 ]---
+  BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
+  BTRFS info (device sda1): forced readonly
+  BTRFS warning (device sda1): Skipping commit of aborted transaction.
+  BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
+
+[CAUSE]
+The error is EMFILE (Too many files open) and comes from the anonymous
+block device allocation. The ids are in a shared pool of size 1<<20.
+
+The ids are assigned to live subvolumes, ie. the root structure exists
+in memory (eg. after creation or after the root appears in some path).
+The pool could be exhausted if the numbers are not reclaimed fast
+enough, after subvolume deletion or if other system component uses the
+anon block devices.
+
+[WORKAROUND]
+Since it's not possible to completely solve the problem, we can only
+minimize the time the id is allocated to a subvolume root.
+
+Firstly, we can reduce the use of anon_dev by trees that are not
+subvolume roots, like data reloc tree.
+
+This patch will do extra check on root objectid, to skip roots that
+don't need anon_dev.  Currently it's only data reloc tree and orphan
+roots.
+
+Reported-by: Greed Rong <greedrong@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1428,9 +1428,16 @@ static int btrfs_init_fs_root(struct btr
+ 	spin_lock_init(&root->ino_cache_lock);
+ 	init_waitqueue_head(&root->ino_cache_wait);
+ 
+-	ret = get_anon_bdev(&root->anon_dev);
+-	if (ret)
+-		goto fail;
++	/*
++	 * Don't assign anonymous block device to roots that are not exposed to
++	 * userspace, the id pool is limited to 1M
++	 */
++	if (is_fstree(root->root_key.objectid) &&
++	    btrfs_root_refs(&root->root_item) > 0) {
++		ret = get_anon_bdev(&root->anon_dev);
++		if (ret)
++			goto fail;
++	}
+ 
+ 	mutex_lock(&root->objectid_mutex);
+ 	ret = btrfs_find_highest_objectid(root,
diff --git a/queue-5.8/btrfs-don-t-traverse-into-the-seed-devices-in-show_devname.patch b/queue-5.8/btrfs-don-t-traverse-into-the-seed-devices-in-show_devname.patch
new file mode 100644
index 00000000000..42d8b88a1a4
--- /dev/null
+++ b/queue-5.8/btrfs-don-t-traverse-into-the-seed-devices-in-show_devname.patch
@@ -0,0 +1,123 @@
+From 4faf55b03823e96c44dc4e364520000ed3b12fdb Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 10 Jul 2020 14:37:38 +0800
+Subject: btrfs: don't traverse into the seed devices in show_devname
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 4faf55b03823e96c44dc4e364520000ed3b12fdb upstream.
+
+->show_devname currently shows the lowest devid in the list. As the seed
+devices have the lowest devid in the sprouted filesystem, the userland
+tool such as findmnt end up seeing seed device instead of the device from
+the read-writable sprouted filesystem. As shown below.
+
+ mount /dev/sda /btrfs
+ mount: /btrfs: WARNING: device write-protected, mounted read-only.
+
+ findmnt --output SOURCE,TARGET,UUID /btrfs
+ SOURCE   TARGET UUID
+ /dev/sda /btrfs 899f7027-3e46-4626-93e7-7d4c9ad19111
+
+ btrfs dev add -f /dev/sdb /btrfs
+
+ umount /btrfs
+ mount /dev/sdb /btrfs
+
+ findmnt --output SOURCE,TARGET,UUID /btrfs
+ SOURCE   TARGET UUID
+ /dev/sda /btrfs 899f7027-3e46-4626-93e7-7d4c9ad19111
+
+All sprouts from a single seed will show the same seed device and the
+same fsid. That's confusing.
+This is causing problems in our prototype as there isn't any reference
+to the sprout file-system(s) which is being used for actual read and
+write.
+
+This was added in the patch which implemented the show_devname in btrfs
+commit 9c5085c14798 ("Btrfs: implement ->show_devname").
+I tried to look for any particular reason that we need to show the seed
+device, there isn't any.
+
+So instead, do not traverse through the seed devices, just show the
+lowest devid in the sprouted fsid.
+
+After the patch:
+
+ mount /dev/sda /btrfs
+ mount: /btrfs: WARNING: device write-protected, mounted read-only.
+
+ findmnt --output SOURCE,TARGET,UUID /btrfs
+ SOURCE   TARGET UUID
+ /dev/sda /btrfs 899f7027-3e46-4626-93e7-7d4c9ad19111
+
+ btrfs dev add -f /dev/sdb /btrfs
+ mount -o rw,remount /dev/sdb /btrfs
+
+ findmnt --output SOURCE,TARGET,UUID /btrfs
+ SOURCE   TARGET UUID
+ /dev/sdb /btrfs 595ca0e6-b82e-46b5-b9e2-c72a6928be48
+
+ mount /dev/sda /btrfs1
+ mount: /btrfs1: WARNING: device write-protected, mounted read-only.
+
+ btrfs dev add -f /dev/sdc /btrfs1
+
+ findmnt --output SOURCE,TARGET,UUID /btrfs1
+ SOURCE   TARGET  UUID
+ /dev/sdc /btrfs1 ca1dbb7a-8446-4f95-853c-a20f3f82bdbb
+
+ cat /proc/self/mounts | grep btrfs
+ /dev/sdb /btrfs btrfs rw,relatime,noacl,space_cache,subvolid=5,subvol=/ 0 0
+ /dev/sdc /btrfs1 btrfs ro,relatime,noacl,space_cache,subvolid=5,subvol=/ 0 0
+
+Reported-by: Martin K. Petersen <martin.petersen@oracle.com>
+CC: stable@vger.kernel.org # 4.19+
+Tested-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/super.c |   21 +++++++--------------
+ 1 file changed, 7 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2296,9 +2296,7 @@ static int btrfs_unfreeze(struct super_b
+ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
+ {
+ 	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
+-	struct btrfs_fs_devices *cur_devices;
+ 	struct btrfs_device *dev, *first_dev = NULL;
+-	struct list_head *head;
+ 
+ 	/*
+ 	 * Lightweight locking of the devices. We should not need
+@@ -2308,18 +2306,13 @@ static int btrfs_show_devname(struct seq
+ 	 * least until the rcu_read_unlock.
+ 	 */
+ 	rcu_read_lock();
+-	cur_devices = fs_info->fs_devices;
+-	while (cur_devices) {
+-		head = &cur_devices->devices;
+-		list_for_each_entry_rcu(dev, head, dev_list) {
+-			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
+-				continue;
+-			if (!dev->name)
+-				continue;
+-			if (!first_dev || dev->devid < first_dev->devid)
+-				first_dev = dev;
+-		}
+-		cur_devices = cur_devices->seed;
++	list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
++		if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
++			continue;
++		if (!dev->name)
++			continue;
++		if (!first_dev || dev->devid < first_dev->devid)
++			first_dev = dev;
+ 	}
+ 
+ 	if (first_dev)
diff --git a/queue-5.8/btrfs-don-t-warn-if-we-abort-a-transaction-with-erofs.patch b/queue-5.8/btrfs-don-t-warn-if-we-abort-a-transaction-with-erofs.patch
new file mode 100644
index 00000000000..27c12e21e01
--- /dev/null
+++ b/queue-5.8/btrfs-don-t-warn-if-we-abort-a-transaction-with-erofs.patch
@@ -0,0 +1,37 @@
+From f95ebdbed46a4d8b9fdb7bff109fdbb6fc9a6dc8 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Tue, 21 Jul 2020 11:24:27 -0400
+Subject: btrfs: don't WARN if we abort a transaction with EROFS
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit f95ebdbed46a4d8b9fdb7bff109fdbb6fc9a6dc8 upstream.
+
+If we got some sort of corruption via a read and call
+btrfs_handle_fs_error() we'll set BTRFS_FS_STATE_ERROR on the fs and
+complain.  If a subsequent trans handle trips over this it'll get EROFS
+and then abort.  However at that point we're not aborting for the
+original reason, we're aborting because we've been flipped read only.
+We do not need to WARN_ON() here.
+
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3198,7 +3198,7 @@ do {								\
+ 	/* Report first abort since mount */			\
+ 	if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,	\
+ 			&((trans)->fs_info->fs_state))) {	\
+-		if ((errno) != -EIO) {				\
++		if ((errno) != -EIO && (errno) != -EROFS) {		\
+ 			WARN(1, KERN_DEBUG				\
+ 			"BTRFS: Transaction aborted (error %d)\n",	\
+ 			(errno));					\
diff --git a/queue-5.8/btrfs-fix-race-between-page-release-and-a-fast-fsync.patch b/queue-5.8/btrfs-fix-race-between-page-release-and-a-fast-fsync.patch
new file mode 100644
index 00000000000..95cba149dea
--- /dev/null
+++ b/queue-5.8/btrfs-fix-race-between-page-release-and-a-fast-fsync.patch
@@ -0,0 +1,92 @@
+From 3d6448e631591756da36efb3ea6355ff6f383c3a Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 22 Jul 2020 12:28:37 +0100
+Subject: btrfs: fix race between page release and a fast fsync
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 3d6448e631591756da36efb3ea6355ff6f383c3a upstream.
+
+When releasing an extent map, done through the page release callback, we
+can race with an ongoing fast fsync and cause the fsync to miss a new
+extent and not log it. The steps for this to happen are the following:
+
+1) A page is dirtied for some inode I;
+
+2) Writeback for that page is triggered by a path other than fsync, for
+   example by the system due to memory pressure;
+
+3) When the ordered extent for the extent (a single 4K page) finishes,
+   we unpin the corresponding extent map and set its generation to N,
+   the current transaction's generation;
+
+4) The btrfs_releasepage() callback is invoked by the system due to
+   memory pressure for that no longer dirty page of inode I;
+
+5) At the same time, some task calls fsync on inode I, joins transaction
+   N, and at btrfs_log_inode() it sees that the inode does not have the
+   full sync flag set, so we proceed with a fast fsync. But before we get
+   into btrfs_log_changed_extents() and lock the inode's extent map tree:
+
+6) Through btrfs_releasepage() we end up at try_release_extent_mapping()
+   and we remove the extent map for the new 4Kb extent, because it is
+   neither pinned anymore nor locked. By calling remove_extent_mapping(),
+   we remove the extent map from the list of modified extents, since the
+   extent map does not have the logging flag set. We unlock the inode's
+   extent map tree;
+
+7) The task doing the fast fsync now enters btrfs_log_changed_extents(),
+   locks the inode's extent map tree and iterates its list of modified
+   extents, which no longer has the 4Kb extent in it, so it does not log
+   the extent;
+
+8) The fsync finishes;
+
+9) Before transaction N is committed, a power failure happens. After
+   replaying the log, the 4K extent of inode I will be missing, since
+   it was not logged due to the race with try_release_extent_mapping().
+
+So fix this by teaching try_release_extent_mapping() to not remove an
+extent map if it's still in the list of modified extents.
+
+Fixes: ff44c6e36dc9dc ("Btrfs: do not hold the write_lock on the extent tree while logging")
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4502,15 +4502,25 @@ int try_release_extent_mapping(struct pa
+ 				free_extent_map(em);
+ 				break;
+ 			}
+-			if (!test_range_bit(tree, em->start,
+-					    extent_map_end(em) - 1,
+-					    EXTENT_LOCKED, 0, NULL)) {
++			if (test_range_bit(tree, em->start,
++					   extent_map_end(em) - 1,
++					   EXTENT_LOCKED, 0, NULL))
++				goto next;
++			/*
++			 * If it's not in the list of modified extents, used
++			 * by a fast fsync, we can remove it. If it's being
++			 * logged we can safely remove it since fsync took an
++			 * extra reference on the em.
++			 */
++			if (list_empty(&em->list) ||
++			    test_bit(EXTENT_FLAG_LOGGING, &em->flags)) {
+ 				set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ 					&btrfs_inode->runtime_flags);
+ 				remove_extent_mapping(map, em);
+ 				/* once for the rb tree */
+ 				free_extent_map(em);
+ 			}
++next:
+ 			start = extent_map_end(em);
+ 			write_unlock(&map->lock);
+ 
diff --git a/queue-5.8/btrfs-free-anon-block-device-right-after-subvolume-deletion.patch b/queue-5.8/btrfs-free-anon-block-device-right-after-subvolume-deletion.patch
new file mode 100644
index 00000000000..c0ffb0341ff
--- /dev/null
+++ b/queue-5.8/btrfs-free-anon-block-device-right-after-subvolume-deletion.patch
@@ -0,0 +1,68 @@
+From 082b6c970f02fefd278c7833880cda29691a5f34 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 16 Jun 2020 10:17:37 +0800
+Subject: btrfs: free anon block device right after subvolume deletion
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 082b6c970f02fefd278c7833880cda29691a5f34 upstream.
+
+[BUG]
+When a lot of subvolumes are created, there is a user report about
+transaction aborted caused by slow anonymous block device reclaim:
+
+  BTRFS: Transaction aborted (error -24)
+  WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
+  RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
+  Call Trace:
+   create_pending_snapshots+0x82/0xa0 [btrfs]
+   btrfs_commit_transaction+0x275/0x8c0 [btrfs]
+   btrfs_mksubvol+0x4b9/0x500 [btrfs]
+   btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
+   btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
+   btrfs_ioctl+0x11a4/0x2da0 [btrfs]
+   do_vfs_ioctl+0xa9/0x640
+   ksys_ioctl+0x67/0x90
+   __x64_sys_ioctl+0x1a/0x20
+   do_syscall_64+0x5a/0x110
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  ---[ end trace 33f2f83f3d5250e9 ]---
+  BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
+  BTRFS info (device sda1): forced readonly
+  BTRFS warning (device sda1): Skipping commit of aborted transaction.
+  BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
+
+[CAUSE]
+The anonymous device pool is shared and its size is 1M. It's possible to
+hit that limit if the subvolume deletion is not fast enough and the
+subvolumes to be cleaned keep the ids allocated.
+
+[WORKAROUND]
+We can't avoid the anon device pool exhaustion but we can shorten the
+time the id is attached to the subvolume root once the subvolume becomes
+invisible to the user.
+
+Reported-by: Greed Rong <greedrong@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4041,6 +4041,8 @@ int btrfs_delete_subvolume(struct inode
+ 		}
+ 	}
+ 
++	free_anon_bdev(dest->anon_dev);
++	dest->anon_dev = 0;
+ out_end_trans:
+ 	trans->block_rsv = NULL;
+ 	trans->bytes_reserved = 0;
diff --git a/queue-5.8/btrfs-move-the-chunk_mutex-in-btrfs_read_chunk_tree.patch b/queue-5.8/btrfs-move-the-chunk_mutex-in-btrfs_read_chunk_tree.patch
new file mode 100644
index 00000000000..19e27ec8f55
--- /dev/null
+++ b/queue-5.8/btrfs-move-the-chunk_mutex-in-btrfs_read_chunk_tree.patch
@@ -0,0 +1,166 @@
+From 01d01caf19ff7c537527d352d169c4368375c0a1 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 17 Jul 2020 15:12:28 -0400
+Subject: btrfs: move the chunk_mutex in btrfs_read_chunk_tree
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 01d01caf19ff7c537527d352d169c4368375c0a1 upstream.
+
+We are currently getting this lockdep splat in btrfs/161:
+
+  ======================================================
+  WARNING: possible circular locking dependency detected
+  5.8.0-rc5+ #20 Tainted: G            E
+  ------------------------------------------------------
+  mount/678048 is trying to acquire lock:
+  ffff9b769f15b6e0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: clone_fs_devices+0x4d/0x170 [btrfs]
+
+  but task is already holding lock:
+  ffff9b76abdb08d0 (&fs_info->chunk_mutex){+.+.}-{3:3}, at: btrfs_read_chunk_tree+0x6a/0x800 [btrfs]
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}:
+	 __mutex_lock+0x8b/0x8f0
+	 btrfs_init_new_device+0x2d2/0x1240 [btrfs]
+	 btrfs_ioctl+0x1de/0x2d20 [btrfs]
+	 ksys_ioctl+0x87/0xc0
+	 __x64_sys_ioctl+0x16/0x20
+	 do_syscall_64+0x52/0xb0
+	 entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #0 (&fs_devs->device_list_mutex){+.+.}-{3:3}:
+	 __lock_acquire+0x1240/0x2460
+	 lock_acquire+0xab/0x360
+	 __mutex_lock+0x8b/0x8f0
+	 clone_fs_devices+0x4d/0x170 [btrfs]
+	 btrfs_read_chunk_tree+0x330/0x800 [btrfs]
+	 open_ctree+0xb7c/0x18ce [btrfs]
+	 btrfs_mount_root.cold+0x13/0xfa [btrfs]
+	 legacy_get_tree+0x30/0x50
+	 vfs_get_tree+0x28/0xc0
+	 fc_mount+0xe/0x40
+	 vfs_kern_mount.part.0+0x71/0x90
+	 btrfs_mount+0x13b/0x3e0 [btrfs]
+	 legacy_get_tree+0x30/0x50
+	 vfs_get_tree+0x28/0xc0
+	 do_mount+0x7de/0xb30
+	 __x64_sys_mount+0x8e/0xd0
+	 do_syscall_64+0x52/0xb0
+	 entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  other info that might help us debug this:
+
+   Possible unsafe locking scenario:
+
+	 CPU0                    CPU1
+	 ----                    ----
+    lock(&fs_info->chunk_mutex);
+				 lock(&fs_devs->device_list_mutex);
+				 lock(&fs_info->chunk_mutex);
+    lock(&fs_devs->device_list_mutex);
+
+   *** DEADLOCK ***
+
+  3 locks held by mount/678048:
+   #0: ffff9b75ff5fb0e0 (&type->s_umount_key#63/1){+.+.}-{3:3}, at: alloc_super+0xb5/0x380
+   #1: ffffffffc0c2fbc8 (uuid_mutex){+.+.}-{3:3}, at: btrfs_read_chunk_tree+0x54/0x800 [btrfs]
+   #2: ffff9b76abdb08d0 (&fs_info->chunk_mutex){+.+.}-{3:3}, at: btrfs_read_chunk_tree+0x6a/0x800 [btrfs]
+
+  stack backtrace:
+  CPU: 2 PID: 678048 Comm: mount Tainted: G            E     5.8.0-rc5+ #20
+  Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./890FX Deluxe5, BIOS P1.40 05/03/2011
+  Call Trace:
+   dump_stack+0x96/0xd0
+   check_noncircular+0x162/0x180
+   __lock_acquire+0x1240/0x2460
+   ? asm_sysvec_apic_timer_interrupt+0x12/0x20
+   lock_acquire+0xab/0x360
+   ? clone_fs_devices+0x4d/0x170 [btrfs]
+   __mutex_lock+0x8b/0x8f0
+   ? clone_fs_devices+0x4d/0x170 [btrfs]
+   ? rcu_read_lock_sched_held+0x52/0x60
+   ? cpumask_next+0x16/0x20
+   ? module_assert_mutex_or_preempt+0x14/0x40
+   ? __module_address+0x28/0xf0
+   ? clone_fs_devices+0x4d/0x170 [btrfs]
+   ? static_obj+0x4f/0x60
+   ? lockdep_init_map_waits+0x43/0x200
+   ? clone_fs_devices+0x4d/0x170 [btrfs]
+   clone_fs_devices+0x4d/0x170 [btrfs]
+   btrfs_read_chunk_tree+0x330/0x800 [btrfs]
+   open_ctree+0xb7c/0x18ce [btrfs]
+   ? super_setup_bdi_name+0x79/0xd0
+   btrfs_mount_root.cold+0x13/0xfa [btrfs]
+   ? vfs_parse_fs_string+0x84/0xb0
+   ? rcu_read_lock_sched_held+0x52/0x60
+   ? kfree+0x2b5/0x310
+   legacy_get_tree+0x30/0x50
+   vfs_get_tree+0x28/0xc0
+   fc_mount+0xe/0x40
+   vfs_kern_mount.part.0+0x71/0x90
+   btrfs_mount+0x13b/0x3e0 [btrfs]
+   ? cred_has_capability+0x7c/0x120
+   ? rcu_read_lock_sched_held+0x52/0x60
+   ? legacy_get_tree+0x30/0x50
+   legacy_get_tree+0x30/0x50
+   vfs_get_tree+0x28/0xc0
+   do_mount+0x7de/0xb30
+   ? memdup_user+0x4e/0x90
+   __x64_sys_mount+0x8e/0xd0
+   do_syscall_64+0x52/0xb0
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+This is because btrfs_read_chunk_tree() can come upon DEV_EXTENT's and
+then read the device, which takes the device_list_mutex.  The
+device_list_mutex needs to be taken before the chunk_mutex, so this is a
+problem.  We only really need the chunk mutex around adding the chunk,
+so move the mutex around read_one_chunk.
+
+An argument could be made that we don't even need the chunk_mutex here
+as it's during mount, and we are protected by various other locks.
+However we already have special rules for ->device_list_mutex, and I'd
+rather not have another special case for ->chunk_mutex.
+
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -7064,7 +7064,6 @@ int btrfs_read_chunk_tree(struct btrfs_f
+ 	 * otherwise we don't need it.
+ 	 */
+ 	mutex_lock(&uuid_mutex);
+-	mutex_lock(&fs_info->chunk_mutex);
+ 
+ 	/*
+ 	 * It is possible for mount and umount to race in such a way that
+@@ -7109,7 +7108,9 @@ int btrfs_read_chunk_tree(struct btrfs_f
+ 		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
+ 			struct btrfs_chunk *chunk;
+ 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
++			mutex_lock(&fs_info->chunk_mutex);
+ 			ret = read_one_chunk(&found_key, leaf, chunk);
++			mutex_unlock(&fs_info->chunk_mutex);
+ 			if (ret)
+ 				goto error;
+ 		}
+@@ -7139,7 +7140,6 @@ int btrfs_read_chunk_tree(struct btrfs_f
+ 	}
+ 	ret = 0;
+ error:
+-	mutex_unlock(&fs_info->chunk_mutex);
+ 	mutex_unlock(&uuid_mutex);
+ 
+ 	btrfs_free_path(path);
diff --git a/queue-5.8/btrfs-only-commit-delayed-items-at-fsync-if-we-are-logging-a-directory.patch b/queue-5.8/btrfs-only-commit-delayed-items-at-fsync-if-we-are-logging-a-directory.patch
new file mode 100644
index 00000000000..4d75382a23c
--- /dev/null
+++ b/queue-5.8/btrfs-only-commit-delayed-items-at-fsync-if-we-are-logging-a-directory.patch
@@ -0,0 +1,93 @@
+From 5aa7d1a7f4a2f8ca6be1f32415e9365d026e8fa7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 2 Jul 2020 12:32:20 +0100
+Subject: btrfs: only commit delayed items at fsync if we are logging a directory
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 5aa7d1a7f4a2f8ca6be1f32415e9365d026e8fa7 upstream.
+
+When logging an inode we are committing its delayed items if either the
+inode is a directory or if it is a new inode, created in the current
+transaction.
+
+We need to do it for directories, since new directory indexes are stored
+as delayed items of the inode and when logging a directory we need to be
+able to access all indexes from the fs/subvolume tree in order to figure
+out which index ranges need to be logged.
+
+However for new inodes that are not directories, we do not need to do it
+because the only type of delayed item they can have is the inode item, and
+we are guaranteed to always log an up to date version of the inode item:
+
+*) for a full fsync we do it by committing the delayed inode and then
+   copying the item from the fs/subvolume tree with
+   copy_inode_items_to_log();
+
+*) for a fast fsync we always log the inode item based on the contents of
+   the in-memory struct btrfs_inode. We guarantee this is always done since
+   commit e4545de5b035c7 ("Btrfs: fix fsync data loss after append write").
+
+So stop running delayed items for a new inodes that are not directories,
+since that forces committing the delayed inode into the fs/subvolume tree,
+wasting time and adding contention to the tree when a full fsync is not
+required. We will only do it in case a fast fsync is needed.
+
+This patch is part of a series that has the following patches:
+
+1/4 btrfs: only commit the delayed inode when doing a full fsync
+2/4 btrfs: only commit delayed items at fsync if we are logging a directory
+3/4 btrfs: stop incremening log_batch for the log root tree when syncing log
+4/4 btrfs: remove no longer needed use of log_writers for the log root tree
+
+After the entire patchset applied I saw about 12% decrease on max latency
+reported by dbench. The test was done on a qemu vm, with 8 cores, 16Gb of
+ram, using kvm and using a raw NVMe device directly (no intermediary fs on
+the host). The test was invoked like the following:
+
+  mkfs.btrfs -f /dev/sdk
+  mount -o ssd -o nospace_cache /dev/sdk /mnt/sdk
+  dbench -D /mnt/sdk -t 300 8
+  umount /mnt/dsk
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -5122,7 +5122,6 @@ static int btrfs_log_inode(struct btrfs_
+ 			   const loff_t end,
+ 			   struct btrfs_log_ctx *ctx)
+ {
+-	struct btrfs_fs_info *fs_info = root->fs_info;
+ 	struct btrfs_path *path;
+ 	struct btrfs_path *dst_path;
+ 	struct btrfs_key min_key;
+@@ -5165,15 +5164,17 @@ static int btrfs_log_inode(struct btrfs_
+ 	max_key.offset = (u64)-1;
+ 
+ 	/*
+-	 * Only run delayed items if we are a dir or a new file.
++	 * Only run delayed items if we are a directory. We want to make sure
++	 * all directory indexes hit the fs/subvolume tree so we can find them
++	 * and figure out which index ranges have to be logged.
++	 *
+ 	 * Otherwise commit the delayed inode only if the full sync flag is set,
+ 	 * as we want to make sure an up to date version is in the subvolume
+ 	 * tree so copy_inode_items_to_log() / copy_items() can find it and copy
+ 	 * it to the log tree. For a non full sync, we always log the inode item
+ 	 * based on the in-memory struct btrfs_inode which is always up to date.
+ 	 */
+-	if (S_ISDIR(inode->vfs_inode.i_mode) ||
+-	    inode->generation > fs_info->last_trans_committed)
++	if (S_ISDIR(inode->vfs_inode.i_mode))
+ 		ret = btrfs_commit_inode_delayed_items(trans, inode);
+ 	else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+ 		ret = btrfs_commit_inode_delayed_inode(inode);
diff --git a/queue-5.8/btrfs-only-commit-the-delayed-inode-when-doing-a-full-fsync.patch b/queue-5.8/btrfs-only-commit-the-delayed-inode-when-doing-a-full-fsync.patch
new file mode 100644
index 00000000000..dcf746fc4e9
--- /dev/null
+++ b/queue-5.8/btrfs-only-commit-the-delayed-inode-when-doing-a-full-fsync.patch
@@ -0,0 +1,88 @@
+From 8c8648dd1f6d62aeb912deeb788b6ac33cb782e7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 2 Jul 2020 12:31:59 +0100
+Subject: btrfs: only commit the delayed inode when doing a full fsync
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8c8648dd1f6d62aeb912deeb788b6ac33cb782e7 upstream.
+
+Commit 2c2c452b0cafdc ("Btrfs: fix fsync when extend references are added
+to an inode") forced a commit of the delayed inode when logging an inode
+in order to ensure we would end up logging the inode item during a full
+fsync. By committing the delayed inode, we updated the inode item in the
+fs/subvolume tree and then later when copying items from leafs modified in
+the current transaction into the log tree (with copy_inode_items_to_log())
+we ended up copying the inode item from the fs/subvolume tree into the log
+tree. Logging an up to date version of the inode item is required to make
+sure at log replay time we get the link count fixup triggered among other
+things (replay xattr deletes, etc). The test case generic/040 from fstests
+exercises the bug which that commit fixed.
+
+However for a fast fsync we don't need to commit the delayed inode because
+we always log an up to date version of the inode item based on the struct
+btrfs_inode we have in-memory. We started doing this for fast fsyncs since
+commit e4545de5b035c7 ("Btrfs: fix fsync data loss after append write").
+
+So just stop committing the delayed inode if we are doing a fast fsync,
+we are only wasting time and adding contention on fs/subvolume tree.
+
+This patch is part of a series that has the following patches:
+
+1/4 btrfs: only commit the delayed inode when doing a full fsync
+2/4 btrfs: only commit delayed items at fsync if we are logging a directory
+3/4 btrfs: stop incremening log_batch for the log root tree when syncing log
+4/4 btrfs: remove no longer needed use of log_writers for the log root tree
+
+After the entire patchset applied I saw about 12% decrease on max latency
+reported by dbench. The test was done on a qemu vm, with 8 cores, 16Gb of
+ram, using kvm and using a raw NVMe device directly (no intermediary fs on
+the host). The test was invoked like the following:
+
+  mkfs.btrfs -f /dev/sdk
+  mount -o ssd -o nospace_cache /dev/sdk /mnt/sdk
+  dbench -D /mnt/sdk -t 300 8
+  umount /mnt/dsk
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -5130,7 +5130,7 @@ static int btrfs_log_inode(struct btrfs_
+ 	struct btrfs_key max_key;
+ 	struct btrfs_root *log = root->log_root;
+ 	int err = 0;
+-	int ret;
++	int ret = 0;
+ 	bool fast_search = false;
+ 	u64 ino = btrfs_ino(inode);
+ 	struct extent_map_tree *em_tree = &inode->extent_tree;
+@@ -5167,14 +5167,16 @@ static int btrfs_log_inode(struct btrfs_
+ 
+ 	/*
+ 	 * Only run delayed items if we are a dir or a new file.
+-	 * Otherwise commit the delayed inode only, which is needed in
+-	 * order for the log replay code to mark inodes for link count
+-	 * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
++	 * Otherwise commit the delayed inode only if the full sync flag is set,
++	 * as we want to make sure an up to date version is in the subvolume
++	 * tree so copy_inode_items_to_log() / copy_items() can find it and copy
++	 * it to the log tree. For a non full sync, we always log the inode item
++	 * based on the in-memory struct btrfs_inode which is always up to date.
+ 	 */
+ 	if (S_ISDIR(inode->vfs_inode.i_mode) ||
+ 	    inode->generation > fs_info->last_trans_committed)
+ 		ret = btrfs_commit_inode_delayed_items(trans, inode);
+-	else
++	else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+ 		ret = btrfs_commit_inode_delayed_inode(inode);
+ 
+ 	if (ret) {
diff --git a/queue-5.8/btrfs-open-device-without-device_list_mutex.patch b/queue-5.8/btrfs-open-device-without-device_list_mutex.patch
new file mode 100644
index 00000000000..abfb3defa38
--- /dev/null
+++ b/queue-5.8/btrfs-open-device-without-device_list_mutex.patch
@@ -0,0 +1,253 @@
+From 18c850fdc5a801bad4977b0f1723761d42267e45 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 17 Jul 2020 15:12:27 -0400
+Subject: btrfs: open device without device_list_mutex
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 18c850fdc5a801bad4977b0f1723761d42267e45 upstream.
+
+There's long existed a lockdep splat because we open our bdev's under
+the ->device_list_mutex at mount time, which acquires the bd_mutex.
+Usually this goes unnoticed, but if you do loopback devices at all
+suddenly the bd_mutex comes with a whole host of other dependencies,
+which results in the splat when you mount a btrfs file system.
+
+======================================================
+WARNING: possible circular locking dependency detected
+5.8.0-0.rc3.1.fc33.x86_64+debug #1 Not tainted
+------------------------------------------------------
+systemd-journal/509 is trying to acquire lock:
+ffff970831f84db0 (&fs_info->reloc_mutex){+.+.}-{3:3}, at: btrfs_record_root_in_trans+0x44/0x70 [btrfs]
+
+but task is already holding lock:
+ffff97083144d598 (sb_pagefaults){.+.+}-{0:0}, at: btrfs_page_mkwrite+0x59/0x560 [btrfs]
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+ -> #6 (sb_pagefaults){.+.+}-{0:0}:
+       __sb_start_write+0x13e/0x220
+       btrfs_page_mkwrite+0x59/0x560 [btrfs]
+       do_page_mkwrite+0x4f/0x130
+       do_wp_page+0x3b0/0x4f0
+       handle_mm_fault+0xf47/0x1850
+       do_user_addr_fault+0x1fc/0x4b0
+       exc_page_fault+0x88/0x300
+       asm_exc_page_fault+0x1e/0x30
+
+ -> #5 (&mm->mmap_lock#2){++++}-{3:3}:
+       __might_fault+0x60/0x80
+       _copy_from_user+0x20/0xb0
+       get_sg_io_hdr+0x9a/0xb0
+       scsi_cmd_ioctl+0x1ea/0x2f0
+       cdrom_ioctl+0x3c/0x12b4
+       sr_block_ioctl+0xa4/0xd0
+       block_ioctl+0x3f/0x50
+       ksys_ioctl+0x82/0xc0
+       __x64_sys_ioctl+0x16/0x20
+       do_syscall_64+0x52/0xb0
+       entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ -> #4 (&cd->lock){+.+.}-{3:3}:
+       __mutex_lock+0x7b/0x820
+       sr_block_open+0xa2/0x180
+       __blkdev_get+0xdd/0x550
+       blkdev_get+0x38/0x150
+       do_dentry_open+0x16b/0x3e0
+       path_openat+0x3c9/0xa00
+       do_filp_open+0x75/0x100
+       do_sys_openat2+0x8a/0x140
+       __x64_sys_openat+0x46/0x70
+       do_syscall_64+0x52/0xb0
+       entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ -> #3 (&bdev->bd_mutex){+.+.}-{3:3}:
+       __mutex_lock+0x7b/0x820
+       __blkdev_get+0x6a/0x550
+       blkdev_get+0x85/0x150
+       blkdev_get_by_path+0x2c/0x70
+       btrfs_get_bdev_and_sb+0x1b/0xb0 [btrfs]
+       open_fs_devices+0x88/0x240 [btrfs]
+       btrfs_open_devices+0x92/0xa0 [btrfs]
+       btrfs_mount_root+0x250/0x490 [btrfs]
+       legacy_get_tree+0x30/0x50
+       vfs_get_tree+0x28/0xc0
+       vfs_kern_mount.part.0+0x71/0xb0
+       btrfs_mount+0x119/0x380 [btrfs]
+       legacy_get_tree+0x30/0x50
+       vfs_get_tree+0x28/0xc0
+       do_mount+0x8c6/0xca0
+       __x64_sys_mount+0x8e/0xd0
+       do_syscall_64+0x52/0xb0
+       entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ -> #2 (&fs_devs->device_list_mutex){+.+.}-{3:3}:
+       __mutex_lock+0x7b/0x820
+       btrfs_run_dev_stats+0x36/0x420 [btrfs]
+       commit_cowonly_roots+0x91/0x2d0 [btrfs]
+       btrfs_commit_transaction+0x4e6/0x9f0 [btrfs]
+       btrfs_sync_file+0x38a/0x480 [btrfs]
+       __x64_sys_fdatasync+0x47/0x80
+       do_syscall_64+0x52/0xb0
+       entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ -> #1 (&fs_info->tree_log_mutex){+.+.}-{3:3}:
+       __mutex_lock+0x7b/0x820
+       btrfs_commit_transaction+0x48e/0x9f0 [btrfs]
+       btrfs_sync_file+0x38a/0x480 [btrfs]
+       __x64_sys_fdatasync+0x47/0x80
+       do_syscall_64+0x52/0xb0
+       entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ -> #0 (&fs_info->reloc_mutex){+.+.}-{3:3}:
+       __lock_acquire+0x1241/0x20c0
+       lock_acquire+0xb0/0x400
+       __mutex_lock+0x7b/0x820
+       btrfs_record_root_in_trans+0x44/0x70 [btrfs]
+       start_transaction+0xd2/0x500 [btrfs]
+       btrfs_dirty_inode+0x44/0xd0 [btrfs]
+       file_update_time+0xc6/0x120
+       btrfs_page_mkwrite+0xda/0x560 [btrfs]
+       do_page_mkwrite+0x4f/0x130
+       do_wp_page+0x3b0/0x4f0
+       handle_mm_fault+0xf47/0x1850
+       do_user_addr_fault+0x1fc/0x4b0
+       exc_page_fault+0x88/0x300
+       asm_exc_page_fault+0x1e/0x30
+
+other info that might help us debug this:
+
+Chain exists of:
+  &fs_info->reloc_mutex --> &mm->mmap_lock#2 --> sb_pagefaults
+
+Possible unsafe locking scenario:
+
+     CPU0                    CPU1
+     ----                    ----
+ lock(sb_pagefaults);
+                             lock(&mm->mmap_lock#2);
+                             lock(sb_pagefaults);
+ lock(&fs_info->reloc_mutex);
+
+ *** DEADLOCK ***
+
+3 locks held by systemd-journal/509:
+ #0: ffff97083bdec8b8 (&mm->mmap_lock#2){++++}-{3:3}, at: do_user_addr_fault+0x12e/0x4b0
+ #1: ffff97083144d598 (sb_pagefaults){.+.+}-{0:0}, at: btrfs_page_mkwrite+0x59/0x560 [btrfs]
+ #2: ffff97083144d6a8 (sb_internal){.+.+}-{0:0}, at: start_transaction+0x3f8/0x500 [btrfs]
+
+stack backtrace:
+CPU: 0 PID: 509 Comm: systemd-journal Not tainted 5.8.0-0.rc3.1.fc33.x86_64+debug #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+Call Trace:
+ dump_stack+0x92/0xc8
+ check_noncircular+0x134/0x150
+ __lock_acquire+0x1241/0x20c0
+ lock_acquire+0xb0/0x400
+ ? btrfs_record_root_in_trans+0x44/0x70 [btrfs]
+ ? lock_acquire+0xb0/0x400
+ ? btrfs_record_root_in_trans+0x44/0x70 [btrfs]
+ __mutex_lock+0x7b/0x820
+ ? btrfs_record_root_in_trans+0x44/0x70 [btrfs]
+ ? kvm_sched_clock_read+0x14/0x30
+ ? sched_clock+0x5/0x10
+ ? sched_clock_cpu+0xc/0xb0
+ btrfs_record_root_in_trans+0x44/0x70 [btrfs]
+ start_transaction+0xd2/0x500 [btrfs]
+ btrfs_dirty_inode+0x44/0xd0 [btrfs]
+ file_update_time+0xc6/0x120
+ btrfs_page_mkwrite+0xda/0x560 [btrfs]
+ ? sched_clock+0x5/0x10
+ do_page_mkwrite+0x4f/0x130
+ do_wp_page+0x3b0/0x4f0
+ handle_mm_fault+0xf47/0x1850
+ do_user_addr_fault+0x1fc/0x4b0
+ exc_page_fault+0x88/0x300
+ ? asm_exc_page_fault+0x8/0x30
+ asm_exc_page_fault+0x1e/0x30
+RIP: 0033:0x7fa3972fdbfe
+Code: Bad RIP value.
+
+Fix this by not holding the ->device_list_mutex at this point.  The
+device_list_mutex exists to protect us from modifying the device list
+while the file system is running.
+
+However it can also be modified by doing a scan on a device.  But this
+action is specifically protected by the uuid_mutex, which we are holding
+here.  We cannot race with opening at this point because we have the
+->s_mount lock held during the mount.  Not having the
+->device_list_mutex here is perfectly safe as we're not going to change
+the devices at this point.
+
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add some comments ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |   21 ++++++++++++++++++---
+ 1 file changed, 18 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -245,7 +245,9 @@ static int __btrfs_map_block(struct btrf
+  *
+  * global::fs_devs - add, remove, updates to the global list
+  *
+- * does not protect: manipulation of the fs_devices::devices list!
++ * does not protect: manipulation of the fs_devices::devices list in general
++ * but in mount context it could be used to exclude list modifications by eg.
++ * scan ioctl
+  *
+  * btrfs_device::name - renames (write side), read is RCU
+  *
+@@ -258,6 +260,9 @@ static int __btrfs_map_block(struct btrf
+  * may be used to exclude some operations from running concurrently without any
+  * modifications to the list (see write_all_supers)
+  *
++ * Is not required at mount and close times, because our device list is
++ * protected by the uuid_mutex at that point.
++ *
+  * balance_mutex
+  * -------------
+  * protects balance structures (status, state) and context accessed from
+@@ -602,6 +607,11 @@ static int btrfs_free_stale_devices(cons
+ 	return ret;
+ }
+ 
++/*
++ * This is only used on mount, and we are protected from competing things
++ * messing with our fs_devices by the uuid_mutex, thus we do not need the
++ * fs_devices->device_list_mutex here.
++ */
+ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
+ 			struct btrfs_device *device, fmode_t flags,
+ 			void *holder)
+@@ -1229,8 +1239,14 @@ int btrfs_open_devices(struct btrfs_fs_d
+ 	int ret;
+ 
+ 	lockdep_assert_held(&uuid_mutex);
++	/*
++	 * The device_list_mutex cannot be taken here in case opening the
++	 * underlying device takes further locks like bd_mutex.
++	 *
++	 * We also don't need the lock here as this is called during mount and
++	 * exclusion is provided by uuid_mutex
++	 */
+ 
+-	mutex_lock(&fs_devices->device_list_mutex);
+ 	if (fs_devices->opened) {
+ 		fs_devices->opened++;
+ 		ret = 0;
+@@ -1238,7 +1254,6 @@ int btrfs_open_devices(struct btrfs_fs_d
+ 		list_sort(NULL, &fs_devices->devices, devid_cmp);
+ 		ret = open_fs_devices(fs_devices, flags, holder);
+ 	}
+-	mutex_unlock(&fs_devices->device_list_mutex);
+ 
+ 	return ret;
+ }
diff --git a/queue-5.8/btrfs-pass-checksum-type-via-btrfs_ioc_fs_info-ioctl.patch b/queue-5.8/btrfs-pass-checksum-type-via-btrfs_ioc_fs_info-ioctl.patch
new file mode 100644
index 00000000000..916e536e29a
--- /dev/null
+++ b/queue-5.8/btrfs-pass-checksum-type-via-btrfs_ioc_fs_info-ioctl.patch
@@ -0,0 +1,125 @@
+From 137c541821a83debb63b3fa8abdd1cbc41bdf3a1 Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Mon, 13 Jul 2020 21:28:58 +0900
+Subject: btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+commit 137c541821a83debb63b3fa8abdd1cbc41bdf3a1 upstream.
+
+With the recent addition of filesystem checksum types other than CRC32c,
+it is not anymore hard-coded which checksum type a btrfs filesystem uses.
+
+Up to now there is no good way to read the filesystem checksum, apart from
+reading the filesystem UUID and then query sysfs for the checksum type.
+
+Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl
+command which usually is used to query filesystem features. Also add a
+flags member indicating that the kernel responded with a set csum_type and
+csum_size field.
+
+For compatibility reasons, only return the csum_type and csum_size if
+the BTRFS_FS_INFO_FLAG_CSUM_INFO flag was passed to the kernel. Also
+clear any unknown flags so we don't pass false positives to user-space
+newer than the kernel.
+
+To simplify further additions to the ioctl, also switch the padding to a
+u8 array. Pahole was used to verify the result of this switch:
+
+The csum members are added before flags, which might look odd, but this
+is to keep the alignment requirements and not to introduce holes in the
+structure.
+
+  $ pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
+  struct btrfs_ioctl_fs_info_args {
+	  __u64                      max_id;               /*     0     8 */
+	  __u64                      num_devices;          /*     8     8 */
+	  __u8                       fsid[16];             /*    16    16 */
+	  __u32                      nodesize;             /*    32     4 */
+	  __u32                      sectorsize;           /*    36     4 */
+	  __u32                      clone_alignment;      /*    40     4 */
+	  __u16                      csum_type;            /*    44     2 */
+	  __u16                      csum_size;            /*    46     2 */
+	  __u64                      flags;                /*    48     8 */
+	  __u8                       reserved[968];        /*    56   968 */
+
+	  /* size: 1024, cachelines: 16, members: 10 */
+  };
+
+Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
+Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
+CC: stable@vger.kernel.org # 5.5+
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c           |   16 +++++++++++++---
+ include/uapi/linux/btrfs.h |   14 ++++++++++++--
+ 2 files changed, 25 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3217,11 +3217,15 @@ static long btrfs_ioctl_fs_info(struct b
+ 	struct btrfs_ioctl_fs_info_args *fi_args;
+ 	struct btrfs_device *device;
+ 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
++	u64 flags_in;
+ 	int ret = 0;
+ 
+-	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
+-	if (!fi_args)
+-		return -ENOMEM;
++	fi_args = memdup_user(arg, sizeof(*fi_args));
++	if (IS_ERR(fi_args))
++		return PTR_ERR(fi_args);
++
++	flags_in = fi_args->flags;
++	memset(fi_args, 0, sizeof(*fi_args));
+ 
+ 	rcu_read_lock();
+ 	fi_args->num_devices = fs_devices->num_devices;
+@@ -3237,6 +3241,12 @@ static long btrfs_ioctl_fs_info(struct b
+ 	fi_args->sectorsize = fs_info->sectorsize;
+ 	fi_args->clone_alignment = fs_info->sectorsize;
+ 
++	if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
++		fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
++		fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
++		fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
++	}
++
+ 	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
+ 		ret = -EFAULT;
+ 
+--- a/include/uapi/linux/btrfs.h
++++ b/include/uapi/linux/btrfs.h
+@@ -243,6 +243,13 @@ struct btrfs_ioctl_dev_info_args {
+ 	__u8 path[BTRFS_DEVICE_PATH_NAME_MAX];	/* out */
+ };
+ 
++/*
++ * Retrieve information about the filesystem
++ */
++
++/* Request information about checksum type and size */
++#define BTRFS_FS_INFO_FLAG_CSUM_INFO			(1 << 0)
++
+ struct btrfs_ioctl_fs_info_args {
+ 	__u64 max_id;				/* out */
+ 	__u64 num_devices;			/* out */
+@@ -250,8 +257,11 @@ struct btrfs_ioctl_fs_info_args {
+ 	__u32 nodesize;				/* out */
+ 	__u32 sectorsize;			/* out */
+ 	__u32 clone_alignment;			/* out */
+-	__u32 reserved32;
+-	__u64 reserved[122];			/* pad to 1k */
++	/* See BTRFS_FS_INFO_FLAG_* */
++	__u16 csum_type;			/* out */
++	__u16 csum_size;			/* out */
++	__u64 flags;				/* in/out */
++	__u8 reserved[968];			/* pad to 1k */
+ };
+ 
+ /*
diff --git a/queue-5.8/btrfs-preallocate-anon-block-device-at-first-phase-of-snapshot-creation.patch b/queue-5.8/btrfs-preallocate-anon-block-device-at-first-phase-of-snapshot-creation.patch
new file mode 100644
index 00000000000..dc115441569
--- /dev/null
+++ b/queue-5.8/btrfs-preallocate-anon-block-device-at-first-phase-of-snapshot-creation.patch
@@ -0,0 +1,293 @@
+From 2dfb1e43f57dd3aeaa66f7cf05d068db2d4c8788 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 16 Jun 2020 10:17:36 +0800
+Subject: btrfs: preallocate anon block device at first phase of snapshot creation
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 2dfb1e43f57dd3aeaa66f7cf05d068db2d4c8788 upstream.
+
+[BUG]
+When the anonymous block device pool is exhausted, subvolume/snapshot
+creation fails with EMFILE (Too many files open). This has been reported
+by a user. The allocation happens in the second phase during transaction
+commit where it's only way out is to abort the transaction
+
+  BTRFS: Transaction aborted (error -24)
+  WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
+  RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
+  Call Trace:
+   create_pending_snapshots+0x82/0xa0 [btrfs]
+   btrfs_commit_transaction+0x275/0x8c0 [btrfs]
+   btrfs_mksubvol+0x4b9/0x500 [btrfs]
+   btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
+   btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
+   btrfs_ioctl+0x11a4/0x2da0 [btrfs]
+   do_vfs_ioctl+0xa9/0x640
+   ksys_ioctl+0x67/0x90
+   __x64_sys_ioctl+0x1a/0x20
+   do_syscall_64+0x5a/0x110
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  ---[ end trace 33f2f83f3d5250e9 ]---
+  BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
+  BTRFS info (device sda1): forced readonly
+  BTRFS warning (device sda1): Skipping commit of aborted transaction.
+  BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
+
+[CAUSE]
+When the global anonymous block device pool is exhausted, the following
+call chain will fail, and lead to transaction abort:
+
+ btrfs_ioctl_snap_create_v2()
+ |- btrfs_ioctl_snap_create_transid()
+    |- btrfs_mksubvol()
+       |- btrfs_commit_transaction()
+          |- create_pending_snapshot()
+             |- btrfs_get_fs_root()
+                |- btrfs_init_fs_root()
+                   |- get_anon_bdev()
+
+[FIX]
+Although we can't enlarge the anonymous block device pool, at least we
+can preallocate anon_dev for subvolume/snapshot in the first phase,
+outside of transaction context and exactly at the moment the user calls
+the creation ioctl.
+
+Reported-by: Greed Rong <greedrong@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c     |   71 ++++++++++++++++++++++++++++++++++++++++++++-----
+ fs/btrfs/disk-io.h     |    2 +
+ fs/btrfs/ioctl.c       |   21 +++++++++++++-
+ fs/btrfs/transaction.c |    2 -
+ fs/btrfs/transaction.h |    2 +
+ 5 files changed, 89 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1395,7 +1395,12 @@ alloc_fail:
+ 	goto out;
+ }
+ 
+-static int btrfs_init_fs_root(struct btrfs_root *root)
++/*
++ * Initialize subvolume root in-memory structure
++ *
++ * @anon_dev:	anonymous device to attach to the root, if zero, allocate new
++ */
++static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
+ {
+ 	int ret;
+ 	unsigned int nofs_flag;
+@@ -1434,9 +1439,13 @@ static int btrfs_init_fs_root(struct btr
+ 	 */
+ 	if (is_fstree(root->root_key.objectid) &&
+ 	    btrfs_root_refs(&root->root_item) > 0) {
+-		ret = get_anon_bdev(&root->anon_dev);
+-		if (ret)
+-			goto fail;
++		if (!anon_dev) {
++			ret = get_anon_bdev(&root->anon_dev);
++			if (ret)
++				goto fail;
++		} else {
++			root->anon_dev = anon_dev;
++		}
+ 	}
+ 
+ 	mutex_lock(&root->objectid_mutex);
+@@ -1541,8 +1550,27 @@ void btrfs_free_fs_info(struct btrfs_fs_
+ }
+ 
+ 
+-struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+-				     u64 objectid, bool check_ref)
++/*
++ * Get an in-memory reference of a root structure.
++ *
++ * For essential trees like root/extent tree, we grab it from fs_info directly.
++ * For subvolume trees, we check the cached filesystem roots first. If not
++ * found, then read it from disk and add it to cached fs roots.
++ *
++ * Caller should release the root by calling btrfs_put_root() after the usage.
++ *
++ * NOTE: Reloc and log trees can't be read by this function as they share the
++ *	 same root objectid.
++ *
++ * @objectid:	root id
++ * @anon_dev:	preallocated anonymous block device number for new roots,
++ * 		pass 0 for new allocation.
++ * @check_ref:	whether to check root item references, If true, return -ENOENT
++ *		for orphan roots
++ */
++static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
++					     u64 objectid, dev_t anon_dev,
++					     bool check_ref)
+ {
+ 	struct btrfs_root *root;
+ 	struct btrfs_path *path;
+@@ -1571,6 +1599,8 @@ struct btrfs_root *btrfs_get_fs_root(str
+ again:
+ 	root = btrfs_lookup_fs_root(fs_info, objectid);
+ 	if (root) {
++		/* Shouldn't get preallocated anon_dev for cached roots */
++		ASSERT(!anon_dev);
+ 		if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
+ 			btrfs_put_root(root);
+ 			return ERR_PTR(-ENOENT);
+@@ -1590,7 +1620,7 @@ again:
+ 		goto fail;
+ 	}
+ 
+-	ret = btrfs_init_fs_root(root);
++	ret = btrfs_init_fs_root(root, anon_dev);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -1623,6 +1653,33 @@ fail:
+ 	return ERR_PTR(ret);
+ }
+ 
++/*
++ * Get in-memory reference of a root structure
++ *
++ * @objectid:	tree objectid
++ * @check_ref:	if set, verify that the tree exists and the item has at least
++ *		one reference
++ */
++struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
++				     u64 objectid, bool check_ref)
++{
++	return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
++}
++
++/*
++ * Get in-memory reference of a root structure, created as new, optionally pass
++ * the anonymous block device id
++ *
++ * @objectid:	tree objectid
++ * @anon_dev:	if zero, allocate a new anonymous block device or use the
++ *		parameter value
++ */
++struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
++					 u64 objectid, dev_t anon_dev)
++{
++	return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
++}
++
+ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
+ {
+ 	struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -67,6 +67,8 @@ void btrfs_free_fs_roots(struct btrfs_fs
+ 
+ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ 				     u64 objectid, bool check_ref);
++struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
++					 u64 objectid, dev_t anon_dev);
+ 
+ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
+ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -566,6 +566,7 @@ static noinline int create_subvol(struct
+ 	struct inode *inode;
+ 	int ret;
+ 	int err;
++	dev_t anon_dev = 0;
+ 	u64 objectid;
+ 	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
+ 	u64 index = 0;
+@@ -578,6 +579,10 @@ static noinline int create_subvol(struct
+ 	if (ret)
+ 		goto fail_free;
+ 
++	ret = get_anon_bdev(&anon_dev);
++	if (ret < 0)
++		goto fail_free;
++
+ 	/*
+ 	 * Don't create subvolume whose level is not zero. Or qgroup will be
+ 	 * screwed up since it assumes subvolume qgroup's level to be 0.
+@@ -660,12 +665,15 @@ static noinline int create_subvol(struct
+ 		goto fail;
+ 
+ 	key.offset = (u64)-1;
+-	new_root = btrfs_get_fs_root(fs_info, objectid, true);
++	new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ 	if (IS_ERR(new_root)) {
++		free_anon_bdev(anon_dev);
+ 		ret = PTR_ERR(new_root);
+ 		btrfs_abort_transaction(trans, ret);
+ 		goto fail;
+ 	}
++	/* Freeing will be done in btrfs_put_root() of new_root */
++	anon_dev = 0;
+ 
+ 	btrfs_record_root_in_trans(trans, new_root);
+ 
+@@ -735,6 +743,8 @@ fail:
+ 	return ret;
+ 
+ fail_free:
++	if (anon_dev)
++		free_anon_bdev(anon_dev);
+ 	kfree(root_item);
+ 	return ret;
+ }
+@@ -762,6 +772,9 @@ static int create_snapshot(struct btrfs_
+ 	if (!pending_snapshot)
+ 		return -ENOMEM;
+ 
++	ret = get_anon_bdev(&pending_snapshot->anon_dev);
++	if (ret < 0)
++		goto free_pending;
+ 	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
+ 			GFP_KERNEL);
+ 	pending_snapshot->path = btrfs_alloc_path();
+@@ -823,10 +836,16 @@ static int create_snapshot(struct btrfs_
+ 
+ 	d_instantiate(dentry, inode);
+ 	ret = 0;
++	pending_snapshot->anon_dev = 0;
+ fail:
++	/* Prevent double freeing of anon_dev */
++	if (ret && pending_snapshot->snap)
++		pending_snapshot->snap->anon_dev = 0;
+ 	btrfs_put_root(pending_snapshot->snap);
+ 	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
+ free_pending:
++	if (pending_snapshot->anon_dev)
++		free_anon_bdev(pending_snapshot->anon_dev);
+ 	kfree(pending_snapshot->root_item);
+ 	btrfs_free_path(pending_snapshot->path);
+ 	kfree(pending_snapshot);
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1630,7 +1630,7 @@ static noinline int create_pending_snaps
+ 	}
+ 
+ 	key.offset = (u64)-1;
+-	pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
++	pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ 	if (IS_ERR(pending->snap)) {
+ 		ret = PTR_ERR(pending->snap);
+ 		btrfs_abort_transaction(trans, ret);
+--- a/fs/btrfs/transaction.h
++++ b/fs/btrfs/transaction.h
+@@ -151,6 +151,8 @@ struct btrfs_pending_snapshot {
+ 	struct btrfs_block_rsv block_rsv;
+ 	/* extra metadata reservation for relocation */
+ 	int error;
++	/* Preallocated anonymous block device number */
++	dev_t anon_dev;
+ 	bool readonly;
+ 	struct list_head list;
+ };
diff --git a/queue-5.8/btrfs-ref-verify-fix-memory-leak-in-add_block_entry.patch b/queue-5.8/btrfs-ref-verify-fix-memory-leak-in-add_block_entry.patch
new file mode 100644
index 00000000000..6cf068cd448
--- /dev/null
+++ b/queue-5.8/btrfs-ref-verify-fix-memory-leak-in-add_block_entry.patch
@@ -0,0 +1,50 @@
+From d60ba8de1164e1b42e296ff270c622a070ef8fe7 Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Tue, 7 Jul 2020 06:29:08 -0700
+Subject: btrfs: ref-verify: fix memory leak in add_block_entry
+
+From: Tom Rix <trix@redhat.com>
+
+commit d60ba8de1164e1b42e296ff270c622a070ef8fe7 upstream.
+
+clang static analysis flags this error
+
+fs/btrfs/ref-verify.c:290:3: warning: Potential leak of memory pointed to by 're' [unix.Malloc]
+                kfree(be);
+                ^~~~~
+
+The problem is in this block of code:
+
+	if (root_objectid) {
+		struct root_entry *exist_re;
+
+		exist_re = insert_root_entry(&exist->roots, re);
+		if (exist_re)
+			kfree(re);
+	}
+
+There is no 'else' block freeing when root_objectid is 0. Add the
+missing kfree to the else branch.
+
+Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool")
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Tom Rix <trix@redhat.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ref-verify.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/ref-verify.c
++++ b/fs/btrfs/ref-verify.c
+@@ -286,6 +286,8 @@ static struct block_entry *add_block_ent
+ 			exist_re = insert_root_entry(&exist->roots, re);
+ 			if (exist_re)
+ 				kfree(re);
++		} else {
++			kfree(re);
+ 		}
+ 		kfree(be);
+ 		return exist;
diff --git a/queue-5.8/btrfs-relocation-review-the-call-sites-which-can-be-interrupted-by-signal.patch b/queue-5.8/btrfs-relocation-review-the-call-sites-which-can-be-interrupted-by-signal.patch
new file mode 100644
index 00000000000..2921a3728c1
--- /dev/null
+++ b/queue-5.8/btrfs-relocation-review-the-call-sites-which-can-be-interrupted-by-signal.patch
@@ -0,0 +1,104 @@
+From 44d354abf33e92a5e73b965c84caf5a5d5e58a0b Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 13 Jul 2020 09:03:21 +0800
+Subject: btrfs: relocation: review the call sites which can be interrupted by signal
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 44d354abf33e92a5e73b965c84caf5a5d5e58a0b upstream.
+
+Since most metadata reservation calls can return -EINTR when get
+interrupted by fatal signal, we need to review the all the metadata
+reservation call sites.
+
+In relocation code, the metadata reservation happens in the following
+sites:
+
+- btrfs_block_rsv_refill() in merge_reloc_root()
+  merge_reloc_root() is a pretty critical section, we don't want to be
+  interrupted by signal, so change the flush status to
+  BTRFS_RESERVE_FLUSH_LIMIT, so it won't get interrupted by signal.
+  Since such change can be ENPSPC-prone, also shrink the amount of
+  metadata to reserve least amount avoid deadly ENOSPC there.
+
+- btrfs_block_rsv_refill() in reserve_metadata_space()
+  It calls with BTRFS_RESERVE_FLUSH_LIMIT, which won't get interrupted
+  by signal.
+
+- btrfs_block_rsv_refill() in prepare_to_relocate()
+
+- btrfs_block_rsv_add() in prepare_to_relocate()
+
+- btrfs_block_rsv_refill() in relocate_block_group()
+
+- btrfs_delalloc_reserve_metadata() in relocate_file_extent_cluster()
+
+- btrfs_start_transaction() in relocate_block_group()
+
+- btrfs_start_transaction() in create_reloc_inode()
+  Can be interrupted by fatal signal and we can handle it easily.
+  For these call sites, just catch the -EINTR value in btrfs_balance()
+  and count them as canceled.
+
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c |   12 ++++++++++--
+ fs/btrfs/volumes.c    |   17 ++++++++++++++++-
+ 2 files changed, 26 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1686,12 +1686,20 @@ static noinline_for_stack int merge_relo
+ 		btrfs_unlock_up_safe(path, 0);
+ 	}
+ 
+-	min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
++	/*
++	 * In merge_reloc_root(), we modify the upper level pointer to swap the
++	 * tree blocks between reloc tree and subvolume tree.  Thus for tree
++	 * block COW, we COW at most from level 1 to root level for each tree.
++	 *
++	 * Thus the needed metadata size is at most root_level * nodesize,
++	 * and * 2 since we have two trees to COW.
++	 */
++	min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2;
+ 	memset(&next_key, 0, sizeof(next_key));
+ 
+ 	while (1) {
+ 		ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
+-					     BTRFS_RESERVE_FLUSH_ALL);
++					     BTRFS_RESERVE_FLUSH_LIMIT);
+ 		if (ret) {
+ 			err = ret;
+ 			goto out;
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4150,7 +4150,22 @@ int btrfs_balance(struct btrfs_fs_info *
+ 	mutex_lock(&fs_info->balance_mutex);
+ 	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
+ 		btrfs_info(fs_info, "balance: paused");
+-	else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req))
++	/*
++	 * Balance can be canceled by:
++	 *
++	 * - Regular cancel request
++	 *   Then ret == -ECANCELED and balance_cancel_req > 0
++	 *
++	 * - Fatal signal to "btrfs" process
++	 *   Either the signal caught by wait_reserve_ticket() and callers
++	 *   got -EINTR, or caught by btrfs_should_cancel_balance() and
++	 *   got -ECANCELED.
++	 *   Either way, in this case balance_cancel_req = 0, and
++	 *   ret == -EINTR or ret == -ECANCELED.
++	 *
++	 * So here we only check the return value to catch canceled balance.
++	 */
++	else if (ret == -ECANCELED || ret == -EINTR)
+ 		btrfs_info(fs_info, "balance: canceled");
+ 	else
+ 		btrfs_info(fs_info, "balance: ended with status: %d", ret);
diff --git a/queue-5.8/btrfs-remove-no-longer-needed-use-of-log_writers-for-the-log-root-tree.patch b/queue-5.8/btrfs-remove-no-longer-needed-use-of-log_writers-for-the-log-root-tree.patch
new file mode 100644
index 00000000000..5b6c945a87f
--- /dev/null
+++ b/queue-5.8/btrfs-remove-no-longer-needed-use-of-log_writers-for-the-log-root-tree.patch
@@ -0,0 +1,122 @@
+From a93e01682e283f6de09d6ce8f805dc52a2e942fb Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 2 Jul 2020 12:32:40 +0100
+Subject: btrfs: remove no longer needed use of log_writers for the log root tree
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit a93e01682e283f6de09d6ce8f805dc52a2e942fb upstream.
+
+When syncing the log, we used to update the log root tree without holding
+neither the log_mutex of the subvolume root nor the log_mutex of log root
+tree.
+
+We used to have two critical sections delimited by the log_mutex of the
+log root tree, so in the first one we incremented the log_writers of the
+log root tree and on the second one we decremented it and waited for the
+log_writers counter to go down to zero. This was because the update of
+the log root tree happened between the two critical sections.
+
+The use of two critical sections allowed a little bit more of parallelism
+and required the use of the log_writers counter, necessary to make sure
+we didn't miss any log root tree update when we have multiple tasks trying
+to sync the log in parallel.
+
+However after commit 06989c799f0481 ("Btrfs: fix race updating log root
+item during fsync") the log root tree update was moved into a critical
+section delimited by the subvolume's log_mutex. Later another commit
+moved the log tree update from that critical section into the second
+critical section delimited by the log_mutex of the log root tree. Both
+commits addressed different bugs.
+
+The end result is that the first critical section delimited by the
+log_mutex of the log root tree became pointless, since there's nothing
+done between it and the second critical section, we just have an unlock
+of the log_mutex followed by a lock operation. This means we can merge
+both critical sections, as the first one does almost nothing now, and we
+can stop using the log_writers counter of the log root tree, which was
+incremented in the first critical section and decremented in the second
+criticial section, used to make sure no one in the second critical section
+started writeback of the log root tree before some other task updated it.
+
+So just remove the mutex_unlock() followed by mutex_lock() of the log root
+tree, as well as the use of the log_writers counter for the log root tree.
+
+This patch is part of a series that has the following patches:
+
+1/4 btrfs: only commit the delayed inode when doing a full fsync
+2/4 btrfs: only commit delayed items at fsync if we are logging a directory
+3/4 btrfs: stop incremening log_batch for the log root tree when syncing log
+4/4 btrfs: remove no longer needed use of log_writers for the log root tree
+
+After the entire patchset applied I saw about 12% decrease on max latency
+reported by dbench. The test was done on a qemu vm, with 8 cores, 16Gb of
+ram, using kvm and using a raw NVMe device directly (no intermediary fs on
+the host). The test was invoked like the following:
+
+  mkfs.btrfs -f /dev/sdk
+  mount -o ssd -o nospace_cache /dev/sdk /mnt/sdk
+  dbench -D /mnt/sdk -t 300 8
+  umount /mnt/dsk
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h    |    1 +
+ fs/btrfs/tree-log.c |   13 -------------
+ 2 files changed, 1 insertion(+), 13 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1059,6 +1059,7 @@ struct btrfs_root {
+ 	wait_queue_head_t log_writer_wait;
+ 	wait_queue_head_t log_commit_wait[2];
+ 	struct list_head log_ctxs[2];
++	/* Used only for log trees of subvolumes, not for the log root tree */
+ 	atomic_t log_writers;
+ 	atomic_t log_commit[2];
+ 	/* Used only for log trees of subvolumes, not for the log root tree */
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -3116,28 +3116,17 @@ int btrfs_sync_log(struct btrfs_trans_ha
+ 	btrfs_init_log_ctx(&root_log_ctx, NULL);
+ 
+ 	mutex_lock(&log_root_tree->log_mutex);
+-	atomic_inc(&log_root_tree->log_writers);
+ 
+ 	index2 = log_root_tree->log_transid % 2;
+ 	list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
+ 	root_log_ctx.log_transid = log_root_tree->log_transid;
+ 
+-	mutex_unlock(&log_root_tree->log_mutex);
+-
+-	mutex_lock(&log_root_tree->log_mutex);
+-
+ 	/*
+ 	 * Now we are safe to update the log_root_tree because we're under the
+ 	 * log_mutex, and we're a current writer so we're holding the commit
+ 	 * open until we drop the log_mutex.
+ 	 */
+ 	ret = update_log_root(trans, log, &new_root_item);
+-
+-	if (atomic_dec_and_test(&log_root_tree->log_writers)) {
+-		/* atomic_dec_and_test implies a barrier */
+-		cond_wake_up_nomb(&log_root_tree->log_writer_wait);
+-	}
+-
+ 	if (ret) {
+ 		if (!list_empty(&root_log_ctx.list))
+ 			list_del_init(&root_log_ctx.list);
+@@ -3183,8 +3172,6 @@ int btrfs_sync_log(struct btrfs_trans_ha
+ 				root_log_ctx.log_transid - 1);
+ 	}
+ 
+-	wait_for_writer(log_root_tree);
+-
+ 	/*
+ 	 * now that we've moved on to the tree of log tree roots,
+ 	 * check the full commit flag again
diff --git a/queue-5.8/btrfs-return-erofs-for-btrfs_fs_state_error-cases.patch b/queue-5.8/btrfs-return-erofs-for-btrfs_fs_state_error-cases.patch
new file mode 100644
index 00000000000..b3bd34fd008
--- /dev/null
+++ b/queue-5.8/btrfs-return-erofs-for-btrfs_fs_state_error-cases.patch
@@ -0,0 +1,152 @@
+From fbabd4a36faaf74c83142d0b3d950c11ec14fda1 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Tue, 21 Jul 2020 10:38:37 -0400
+Subject: btrfs: return EROFS for BTRFS_FS_STATE_ERROR cases
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit fbabd4a36faaf74c83142d0b3d950c11ec14fda1 upstream.
+
+Eric reported seeing this message while running generic/475
+
+  BTRFS: error (device dm-3) in btrfs_sync_log:3084: errno=-117 Filesystem corrupted
+
+Full stack trace:
+
+  BTRFS: error (device dm-0) in btrfs_commit_transaction:2323: errno=-5 IO failure (Error while writing out transaction)
+  BTRFS info (device dm-0): forced readonly
+  BTRFS warning (device dm-0): Skipping commit of aborted transaction.
+  ------------[ cut here ]------------
+  BTRFS: error (device dm-0) in cleanup_transaction:1894: errno=-5 IO failure
+  BTRFS: Transaction aborted (error -117)
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c6480 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c6488 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c6490 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c6498 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c64a0 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c64a8 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c64b0 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c64b8 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3555 rw 0,0 sector 0x1c64c0 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3572 rw 0,0 sector 0x1b85e8 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3572 rw 0,0 sector 0x1b85f0 len 4096 err no 10
+  WARNING: CPU: 3 PID: 23985 at fs/btrfs/tree-log.c:3084 btrfs_sync_log+0xbc8/0xd60 [btrfs]
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d4288 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d4290 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d4298 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42a0 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42a8 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42b0 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42b8 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42c0 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42c8 len 4096 err no 10
+  BTRFS warning (device dm-0): direct IO failed ino 3548 rw 0,0 sector 0x1d42d0 len 4096 err no 10
+  CPU: 3 PID: 23985 Comm: fsstress Tainted: G        W    L    5.8.0-rc4-default+ #1181
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014
+  RIP: 0010:btrfs_sync_log+0xbc8/0xd60 [btrfs]
+  RSP: 0018:ffff909a44d17bd0 EFLAGS: 00010286
+  RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000001
+  RDX: ffff8f3be41cb940 RSI: ffffffffb0108d2b RDI: ffffffffb0108ff7
+  RBP: ffff909a44d17e70 R08: 0000000000000000 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000037988 R12: ffff8f3bd20e4000
+  R13: ffff8f3bd20e4428 R14: 00000000ffffff8b R15: ffff909a44d17c70
+  FS:  00007f6a6ed3fb80(0000) GS:ffff8f3c3dc00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00007f6a6ed3e000 CR3: 00000000525c0003 CR4: 0000000000160ee0
+  Call Trace:
+   ? finish_wait+0x90/0x90
+   ? __mutex_unlock_slowpath+0x45/0x2a0
+   ? lock_acquire+0xa3/0x440
+   ? lockref_put_or_lock+0x9/0x30
+   ? dput+0x20/0x4a0
+   ? dput+0x20/0x4a0
+   ? do_raw_spin_unlock+0x4b/0xc0
+   ? _raw_spin_unlock+0x1f/0x30
+   btrfs_sync_file+0x335/0x490 [btrfs]
+   do_fsync+0x38/0x70
+   __x64_sys_fsync+0x10/0x20
+   do_syscall_64+0x50/0xe0
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  RIP: 0033:0x7f6a6ef1b6e3
+  Code: Bad RIP value.
+  RSP: 002b:00007ffd01e20038 EFLAGS: 00000246 ORIG_RAX: 000000000000004a
+  RAX: ffffffffffffffda RBX: 000000000007a120 RCX: 00007f6a6ef1b6e3
+  RDX: 00007ffd01e1ffa0 RSI: 00007ffd01e1ffa0 RDI: 0000000000000003
+  RBP: 0000000000000003 R08: 0000000000000001 R09: 00007ffd01e2004c
+  R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000009f
+  R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+  irq event stamp: 0
+  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+  hardirqs last disabled at (0): [<ffffffffb007fe0b>] copy_process+0x67b/0x1b00
+  softirqs last  enabled at (0): [<ffffffffb007fe0b>] copy_process+0x67b/0x1b00
+  softirqs last disabled at (0): [<0000000000000000>] 0x0
+  ---[ end trace af146e0e38433456 ]---
+  BTRFS: error (device dm-0) in btrfs_sync_log:3084: errno=-117 Filesystem corrupted
+
+This ret came from btrfs_write_marked_extents().  If we get an aborted
+transaction via EIO before, we'll see it in btree_write_cache_pages()
+and return EUCLEAN, which gets printed as "Filesystem corrupted".
+
+Except we shouldn't be returning EUCLEAN here, we need to be returning
+EROFS because EUCLEAN is reserved for actual corruption, not IO errors.
+
+We are inconsistent about our handling of BTRFS_FS_STATE_ERROR
+elsewhere, but we want to use EROFS for this particular case.  The
+original transaction abort has the real error code for why we ended up
+with an aborted transaction, all subsequent actions just need to return
+EROFS because they may not have a trans handle and have no idea about
+the original cause of the abort.
+
+After patch "btrfs: don't WARN if we abort a transaction with EROFS" the
+stacktrace will not be dumped either.
+
+Reported-by: Eric Sandeen <esandeen@redhat.com>
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add full test stacktrace ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c   |    2 +-
+ fs/btrfs/scrub.c       |    2 +-
+ fs/btrfs/transaction.c |    5 ++++-
+ 3 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4127,7 +4127,7 @@ retry:
+ 	if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ 		ret = flush_write_bio(&epd);
+ 	} else {
+-		ret = -EUCLEAN;
++		ret = -EROFS;
+ 		end_write_bio(&epd, ret);
+ 	}
+ 	return ret;
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -3758,7 +3758,7 @@ static noinline_for_stack int scrub_supe
+ 	struct btrfs_fs_info *fs_info = sctx->fs_info;
+ 
+ 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+-		return -EIO;
++		return -EROFS;
+ 
+ 	/* Seed devices of a new filesystem has their own generation. */
+ 	if (scrub_dev->fs_devices != fs_info->fs_devices)
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -937,7 +937,10 @@ static int __btrfs_end_transaction(struc
+ 	if (TRANS_ABORTED(trans) ||
+ 	    test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
+ 		wake_up_process(info->transaction_kthread);
+-		err = -EIO;
++		if (TRANS_ABORTED(trans))
++			err = trans->aborted;
++		else
++			err = -EROFS;
+ 	}
+ 
+ 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
diff --git a/queue-5.8/btrfs-stop-incremening-log_batch-for-the-log-root-tree-when-syncing-log.patch b/queue-5.8/btrfs-stop-incremening-log_batch-for-the-log-root-tree-when-syncing-log.patch
new file mode 100644
index 00000000000..1cb0b387d2a
--- /dev/null
+++ b/queue-5.8/btrfs-stop-incremening-log_batch-for-the-log-root-tree-when-syncing-log.patch
@@ -0,0 +1,68 @@
+From 28a9579561bcb9082715e720eac93012e708ab94 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 2 Jul 2020 12:32:31 +0100
+Subject: btrfs: stop incremening log_batch for the log root tree when syncing log
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 28a9579561bcb9082715e720eac93012e708ab94 upstream.
+
+We are incrementing the log_batch atomic counter of the root log tree but
+we never use that counter, it's used only for the log trees of subvolume
+roots. We started doing it when we moved the log_batch and log_write
+counters from the global, per fs, btrfs_fs_info structure, into the
+btrfs_root structure in commit 7237f1833601dc ("Btrfs: fix tree logs
+parallel sync").
+
+So just stop doing it for the log root tree and add a comment over the
+field declaration so inform it's used only for log trees of subvolume
+roots.
+
+This patch is part of a series that has the following patches:
+
+1/4 btrfs: only commit the delayed inode when doing a full fsync
+2/4 btrfs: only commit delayed items at fsync if we are logging a directory
+3/4 btrfs: stop incremening log_batch for the log root tree when syncing log
+4/4 btrfs: remove no longer needed use of log_writers for the log root tree
+
+After the entire patchset applied I saw about 12% decrease on max latency
+reported by dbench. The test was done on a qemu vm, with 8 cores, 16Gb of
+ram, using kvm and using a raw NVMe device directly (no intermediary fs on
+the host). The test was invoked like the following:
+
+  mkfs.btrfs -f /dev/sdk
+  mount -o ssd -o nospace_cache /dev/sdk /mnt/sdk
+  dbench -D /mnt/sdk -t 300 8
+  umount /mnt/dsk
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h    |    1 +
+ fs/btrfs/tree-log.c |    1 -
+ 2 files changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1061,6 +1061,7 @@ struct btrfs_root {
+ 	struct list_head log_ctxs[2];
+ 	atomic_t log_writers;
+ 	atomic_t log_commit[2];
++	/* Used only for log trees of subvolumes, not for the log root tree */
+ 	atomic_t log_batch;
+ 	int log_transid;
+ 	/* No matter the commit succeeds or not*/
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -3116,7 +3116,6 @@ int btrfs_sync_log(struct btrfs_trans_ha
+ 	btrfs_init_log_ctx(&root_log_ctx, NULL);
+ 
+ 	mutex_lock(&log_root_tree->log_mutex);
+-	atomic_inc(&log_root_tree->log_batch);
+ 	atomic_inc(&log_root_tree->log_writers);
+ 
+ 	index2 = log_root_tree->log_transid % 2;
diff --git a/queue-5.8/btrfs-sysfs-use-nofs-for-device-creation.patch b/queue-5.8/btrfs-sysfs-use-nofs-for-device-creation.patch
new file mode 100644
index 00000000000..c7d28414e15
--- /dev/null
+++ b/queue-5.8/btrfs-sysfs-use-nofs-for-device-creation.patch
@@ -0,0 +1,181 @@
+From a47bd78d0c44621efb98b525d04d60dc4d1a79b0 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Tue, 21 Jul 2020 10:17:50 -0400
+Subject: btrfs: sysfs: use NOFS for device creation
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit a47bd78d0c44621efb98b525d04d60dc4d1a79b0 upstream.
+
+Dave hit this splat during testing btrfs/078:
+
+  ======================================================
+  WARNING: possible circular locking dependency detected
+  5.8.0-rc6-default+ #1191 Not tainted
+  ------------------------------------------------------
+  kswapd0/75 is trying to acquire lock:
+  ffffa040e9d04ff8 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+
+  but task is already holding lock:
+  ffffffff8b0c8040 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #2 (fs_reclaim){+.+.}-{0:0}:
+	 __lock_acquire+0x56f/0xaa0
+	 lock_acquire+0xa3/0x440
+	 fs_reclaim_acquire.part.0+0x25/0x30
+	 __kmalloc_track_caller+0x49/0x330
+	 kstrdup+0x2e/0x60
+	 __kernfs_new_node.constprop.0+0x44/0x250
+	 kernfs_new_node+0x25/0x50
+	 kernfs_create_link+0x34/0xa0
+	 sysfs_do_create_link_sd+0x5e/0xd0
+	 btrfs_sysfs_add_devices_dir+0x65/0x100 [btrfs]
+	 btrfs_init_new_device+0x44c/0x12b0 [btrfs]
+	 btrfs_ioctl+0xc3c/0x25c0 [btrfs]
+	 ksys_ioctl+0x68/0xa0
+	 __x64_sys_ioctl+0x16/0x20
+	 do_syscall_64+0x50/0xe0
+	 entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}:
+	 __lock_acquire+0x56f/0xaa0
+	 lock_acquire+0xa3/0x440
+	 __mutex_lock+0xa0/0xaf0
+	 btrfs_chunk_alloc+0x137/0x3e0 [btrfs]
+	 find_free_extent+0xb44/0xfb0 [btrfs]
+	 btrfs_reserve_extent+0x9b/0x180 [btrfs]
+	 btrfs_alloc_tree_block+0xc1/0x350 [btrfs]
+	 alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs]
+	 __btrfs_cow_block+0x143/0x7a0 [btrfs]
+	 btrfs_cow_block+0x15f/0x310 [btrfs]
+	 push_leaf_right+0x150/0x240 [btrfs]
+	 split_leaf+0x3cd/0x6d0 [btrfs]
+	 btrfs_search_slot+0xd14/0xf70 [btrfs]
+	 btrfs_insert_empty_items+0x64/0xc0 [btrfs]
+	 __btrfs_commit_inode_delayed_items+0xb2/0x840 [btrfs]
+	 btrfs_async_run_delayed_root+0x10e/0x1d0 [btrfs]
+	 btrfs_work_helper+0x2f9/0x650 [btrfs]
+	 process_one_work+0x22c/0x600
+	 worker_thread+0x50/0x3b0
+	 kthread+0x137/0x150
+	 ret_from_fork+0x1f/0x30
+
+  -> #0 (&delayed_node->mutex){+.+.}-{3:3}:
+	 check_prev_add+0x98/0xa20
+	 validate_chain+0xa8c/0x2a00
+	 __lock_acquire+0x56f/0xaa0
+	 lock_acquire+0xa3/0x440
+	 __mutex_lock+0xa0/0xaf0
+	 __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+	 btrfs_evict_inode+0x3bf/0x560 [btrfs]
+	 evict+0xd6/0x1c0
+	 dispose_list+0x48/0x70
+	 prune_icache_sb+0x54/0x80
+	 super_cache_scan+0x121/0x1a0
+	 do_shrink_slab+0x175/0x420
+	 shrink_slab+0xb1/0x2e0
+	 shrink_node+0x192/0x600
+	 balance_pgdat+0x31f/0x750
+	 kswapd+0x206/0x510
+	 kthread+0x137/0x150
+	 ret_from_fork+0x1f/0x30
+
+  other info that might help us debug this:
+
+  Chain exists of:
+    &delayed_node->mutex --> &fs_info->chunk_mutex --> fs_reclaim
+
+   Possible unsafe locking scenario:
+
+	 CPU0                    CPU1
+	 ----                    ----
+    lock(fs_reclaim);
+				 lock(&fs_info->chunk_mutex);
+				 lock(fs_reclaim);
+    lock(&delayed_node->mutex);
+
+   *** DEADLOCK ***
+
+  3 locks held by kswapd0/75:
+   #0: ffffffff8b0c8040 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30
+   #1: ffffffff8b0b50b8 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x54/0x2e0
+   #2: ffffa040e057c0e8 (&type->s_umount_key#26){++++}-{3:3}, at: trylock_super+0x16/0x50
+
+  stack backtrace:
+  CPU: 2 PID: 75 Comm: kswapd0 Not tainted 5.8.0-rc6-default+ #1191
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014
+  Call Trace:
+   dump_stack+0x78/0xa0
+   check_noncircular+0x16f/0x190
+   check_prev_add+0x98/0xa20
+   validate_chain+0xa8c/0x2a00
+   __lock_acquire+0x56f/0xaa0
+   lock_acquire+0xa3/0x440
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+   __mutex_lock+0xa0/0xaf0
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+   ? __lock_acquire+0x56f/0xaa0
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+   ? lock_acquire+0xa3/0x440
+   ? btrfs_evict_inode+0x138/0x560 [btrfs]
+   ? btrfs_evict_inode+0x2fe/0x560 [btrfs]
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+   __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+   btrfs_evict_inode+0x3bf/0x560 [btrfs]
+   evict+0xd6/0x1c0
+   dispose_list+0x48/0x70
+   prune_icache_sb+0x54/0x80
+   super_cache_scan+0x121/0x1a0
+   do_shrink_slab+0x175/0x420
+   shrink_slab+0xb1/0x2e0
+   shrink_node+0x192/0x600
+   balance_pgdat+0x31f/0x750
+   kswapd+0x206/0x510
+   ? _raw_spin_unlock_irqrestore+0x3e/0x50
+   ? finish_wait+0x90/0x90
+   ? balance_pgdat+0x750/0x750
+   kthread+0x137/0x150
+   ? kthread_stop+0x2a0/0x2a0
+   ret_from_fork+0x1f/0x30
+
+This is because we're holding the chunk_mutex while adding this device
+and adding its sysfs entries.  We actually hold different locks in
+different places when calling this function, the dev_replace semaphore
+for instance in dev replace, so instead of moving this call around
+simply wrap it's operations in NOFS.
+
+CC: stable@vger.kernel.org # 4.14+
+Reported-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/sysfs.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -1273,7 +1273,9 @@ int btrfs_sysfs_add_devices_dir(struct b
+ {
+ 	int error = 0;
+ 	struct btrfs_device *dev;
++	unsigned int nofs_flag;
+ 
++	nofs_flag = memalloc_nofs_save();
+ 	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
+ 
+ 		if (one_device && one_device != dev)
+@@ -1301,6 +1303,7 @@ int btrfs_sysfs_add_devices_dir(struct b
+ 			break;
+ 		}
+ 	}
++	memalloc_nofs_restore(nofs_flag);
+ 
+ 	return error;
+ }
diff --git a/queue-5.8/series b/queue-5.8/series
index 75ce70eafd2..ba8273426cc 100644
--- a/queue-5.8/series
+++ b/queue-5.8/series
@@ -10,3 +10,23 @@ pci-mark-amd-navi10-gpu-rev-0x00-ats-as-broken.patch
 pci-add-device-even-if-driver-attach-failed.patch
 pci-qcom-define-some-parf-params-needed-for-ipq8064-soc.patch
 pci-qcom-add-support-for-tx-term-offset-for-rev-2.1.0.patch
+btrfs-allow-use-of-global-block-reserve-for-balance-item-deletion.patch
+btrfs-free-anon-block-device-right-after-subvolume-deletion.patch
+btrfs-don-t-allocate-anonymous-block-device-for-user-invisible-roots.patch
+btrfs-preallocate-anon-block-device-at-first-phase-of-snapshot-creation.patch
+btrfs-ref-verify-fix-memory-leak-in-add_block_entry.patch
+btrfs-only-commit-the-delayed-inode-when-doing-a-full-fsync.patch
+btrfs-stop-incremening-log_batch-for-the-log-root-tree-when-syncing-log.patch
+btrfs-only-commit-delayed-items-at-fsync-if-we-are-logging-a-directory.patch
+btrfs-remove-no-longer-needed-use-of-log_writers-for-the-log-root-tree.patch
+btrfs-don-t-traverse-into-the-seed-devices-in-show_devname.patch
+btrfs-pass-checksum-type-via-btrfs_ioc_fs_info-ioctl.patch
+btrfs-open-device-without-device_list_mutex.patch
+btrfs-move-the-chunk_mutex-in-btrfs_read_chunk_tree.patch
+btrfs-relocation-review-the-call-sites-which-can-be-interrupted-by-signal.patch
+btrfs-add-missing-check-for-nocow-and-compression-inode-flags.patch
+btrfs-avoid-possible-signal-interruption-of-btrfs_drop_snapshot-on-relocation-tree.patch
+btrfs-return-erofs-for-btrfs_fs_state_error-cases.patch
+btrfs-sysfs-use-nofs-for-device-creation.patch
+btrfs-don-t-warn-if-we-abort-a-transaction-with-erofs.patch
+btrfs-fix-race-between-page-release-and-a-fast-fsync.patch