--- /dev/null
+From 0be88e367fd8fbdb45257615d691f4675dda062f Mon Sep 17 00:00:00 2001
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Thu, 6 Sep 2018 17:18:15 -0400
+Subject: btrfs: don't attempt to trim devices that don't support it
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit 0be88e367fd8fbdb45257615d691f4675dda062f upstream.
+
+We check whether any device the file system is using supports discard in
+the ioctl call, but then we attempt to trim free extents on every device
+regardless of whether discard is supported. Due to the way we mask off
+EOPNOTSUPP, we can end up issuing the trim operations on each free range
+on devices that don't support it, just wasting time.
+
+Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -10976,6 +10976,10 @@ static int btrfs_trim_free_extents(struc
+
+ *trimmed = 0;
+
++ /* Discard not supported = nothing to do. */
++ if (!blk_queue_discard(bdev_get_queue(device->bdev)))
++ return 0;
++
+ /* Not writeable = nothing to do. */
+ if (!device->writeable)
+ return 0;
--- /dev/null
+From 93bba24d4b5ad1e5cd8b43f64e66ff9d6355dd20 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 7 Sep 2018 14:16:23 +0800
+Subject: btrfs: Enhance btrfs_trim_fs function to handle error better
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 93bba24d4b5ad1e5cd8b43f64e66ff9d6355dd20 upstream.
+
+Function btrfs_trim_fs() doesn't handle errors in a consistent way. If
+error happens when trimming existing block groups, it will skip the
+remaining blocks and continue to trim unallocated space for each device.
+
+The return value will only reflect the final error from device trimming.
+
+This patch will fix such behavior by:
+
+1) Recording the last error from block group or device trimming
+ The return value will also reflect the last error during trimming.
+ Make developer more aware of the problem.
+
+2) Continuing trimming if possible
+ If we failed to trim one block group or device, we could still try
+ the next block group or device.
+
+3) Report number of failures during block group and device trimming
+ It would be less noisy, but still gives user a brief summary of
+ what's going wrong.
+
+Such behavior can avoid confusion for cases like failure to trim the
+first block group and then only unallocated space is trimmed.
+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add bg_ret and dev_ret to the messages ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 38 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -11037,6 +11037,15 @@ static int btrfs_trim_free_extents(struc
+ return ret;
+ }
+
++/*
++ * Trim the whole filesystem by:
++ * 1) trimming the free space in each block group
++ * 2) trimming the unallocated space on each device
++ *
++ * This will also continue trimming even if a block group or device encounters
++ * an error. The return value will be the last error, or 0 if nothing bad
++ * happens.
++ */
+ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
+ {
+ struct btrfs_block_group_cache *cache = NULL;
+@@ -11047,6 +11056,10 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+ u64 end;
+ u64 trimmed = 0;
+ u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
++ u64 bg_failed = 0;
++ u64 dev_failed = 0;
++ int bg_ret = 0;
++ int dev_ret = 0;
+ int ret = 0;
+
+ /*
+@@ -11057,7 +11070,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+ else
+ cache = btrfs_lookup_block_group(fs_info, range->start);
+
+- while (cache) {
++ for (; cache; cache = next_block_group(fs_info, cache)) {
+ if (cache->key.objectid >= (range->start + range->len)) {
+ btrfs_put_block_group(cache);
+ break;
+@@ -11071,13 +11084,15 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+ if (!block_group_cache_done(cache)) {
+ ret = cache_block_group(cache, 0);
+ if (ret) {
+- btrfs_put_block_group(cache);
+- break;
++ bg_failed++;
++ bg_ret = ret;
++ continue;
+ }
+ ret = wait_block_group_cache_done(cache);
+ if (ret) {
+- btrfs_put_block_group(cache);
+- break;
++ bg_failed++;
++ bg_ret = ret;
++ continue;
+ }
+ }
+ ret = btrfs_trim_block_group(cache,
+@@ -11088,28 +11103,40 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+
+ trimmed += group_trimmed;
+ if (ret) {
+- btrfs_put_block_group(cache);
+- break;
++ bg_failed++;
++ bg_ret = ret;
++ continue;
+ }
+ }
+-
+- cache = next_block_group(fs_info, cache);
+ }
+
++ if (bg_failed)
++ btrfs_warn(fs_info,
++ "failed to trim %llu block group(s), last error %d",
++ bg_failed, bg_ret);
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ devices = &fs_info->fs_devices->alloc_list;
+ list_for_each_entry(device, devices, dev_alloc_list) {
+ ret = btrfs_trim_free_extents(device, range->minlen,
+ &group_trimmed);
+- if (ret)
++ if (ret) {
++ dev_failed++;
++ dev_ret = ret;
+ break;
++ }
+
+ trimmed += group_trimmed;
+ }
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
++ if (dev_failed)
++ btrfs_warn(fs_info,
++ "failed to trim %llu device(s), last error %d",
++ dev_failed, dev_ret);
+ range->len = trimmed;
+- return ret;
++ if (bg_ret)
++ return bg_ret;
++ return dev_ret;
+ }
+
+ /*
--- /dev/null
+From 6ba9fc8e628becf0e3ec94083450d089b0dec5f5 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 7 Sep 2018 14:16:24 +0800
+Subject: btrfs: Ensure btrfs_trim_fs can trim the whole filesystem
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 6ba9fc8e628becf0e3ec94083450d089b0dec5f5 upstream.
+
+[BUG]
+fstrim on some btrfs only trims the unallocated space, not trimming any
+space in existing block groups.
+
+[CAUSE]
+Before fstrim_range passed to btrfs_trim_fs(), it gets truncated to
+range [0, super->total_bytes). So later btrfs_trim_fs() will only be
+able to trim block groups in range [0, super->total_bytes).
+
+While for btrfs, any bytenr aligned to sectorsize is valid, since btrfs
+uses its logical address space, there is nothing limiting the location
+where we put block groups.
+
+For filesystem with frequent balance, it's quite easy to relocate all
+block groups and bytenr of block groups will start beyond
+super->total_bytes.
+
+In that case, btrfs will not trim existing block groups.
+
+[FIX]
+Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs()
+can get the unmodified range, which is normally set to [0, U64_MAX].
+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl")
+CC: <stable@vger.kernel.org> # v4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 10 +---------
+ fs/btrfs/ioctl.c | 11 +++++++----
+ 2 files changed, 8 insertions(+), 13 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -11055,21 +11055,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+ u64 start;
+ u64 end;
+ u64 trimmed = 0;
+- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+ u64 bg_failed = 0;
+ u64 dev_failed = 0;
+ int bg_ret = 0;
+ int dev_ret = 0;
+ int ret = 0;
+
+- /*
+- * try to trim all FS space, our block group may start from non-zero.
+- */
+- if (range->len == total_bytes)
+- cache = btrfs_lookup_first_block_group(fs_info, range->start);
+- else
+- cache = btrfs_lookup_block_group(fs_info, range->start);
+-
++ cache = btrfs_lookup_first_block_group(fs_info, range->start);
+ for (; cache; cache = next_block_group(fs_info, cache)) {
+ if (cache->key.objectid >= (range->start + range->len)) {
+ btrfs_put_block_group(cache);
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -352,7 +352,6 @@ static noinline int btrfs_ioctl_fitrim(s
+ struct fstrim_range range;
+ u64 minlen = ULLONG_MAX;
+ u64 num_devices = 0;
+- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+@@ -376,11 +375,15 @@ static noinline int btrfs_ioctl_fitrim(s
+ return -EOPNOTSUPP;
+ if (copy_from_user(&range, arg, sizeof(range)))
+ return -EFAULT;
+- if (range.start > total_bytes ||
+- range.len < fs_info->sb->s_blocksize)
++
++ /*
++ * NOTE: Don't truncate the range using super->total_bytes. Bytenr of
++ * block group is in the logical address space, which can be any
++ * sectorsize aligned bytenr in the range [0, U64_MAX].
++ */
++ if (range.len < fs_info->sb->s_blocksize)
+ return -EINVAL;
+
+- range.len = min(range.len, total_bytes - range.start);
+ range.minlen = max(range.minlen, minlen);
+ ret = btrfs_trim_fs(fs_info, &range);
+ if (ret < 0)
--- /dev/null
+From 374b0e2d6ba5da7fd1cadb3247731ff27d011f6f Mon Sep 17 00:00:00 2001
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Thu, 6 Sep 2018 16:59:33 -0400
+Subject: btrfs: fix error handling in free_log_tree
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit 374b0e2d6ba5da7fd1cadb3247731ff27d011f6f upstream.
+
+When we hit an I/O error in free_log_tree->walk_log_tree during file system
+shutdown we can crash due to there not being a valid transaction handle.
+
+Use btrfs_handle_fs_error when there's no transaction handle to use.
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000060
+ IP: free_log_tree+0xd2/0x140 [btrfs]
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
+ Modules linked in: <modules>
+ CPU: 2 PID: 23544 Comm: umount Tainted: G W 4.12.14-kvmsmall #9 SLE15 (unreleased)
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
+ task: ffff96bfd3478880 task.stack: ffffa7cf40d78000
+ RIP: 0010:free_log_tree+0xd2/0x140 [btrfs]
+ RSP: 0018:ffffa7cf40d7bd10 EFLAGS: 00010282
+ RAX: 00000000fffffffb RBX: 00000000fffffffb RCX: 0000000000000002
+ RDX: 0000000000000000 RSI: ffff96c02f07d4c8 RDI: 0000000000000282
+ RBP: ffff96c013cf1000 R08: ffff96c02f07d4c8 R09: ffff96c02f07d4d0
+ R10: 0000000000000000 R11: 0000000000000002 R12: 0000000000000000
+ R13: ffff96c005e800c0 R14: ffffa7cf40d7bdb8 R15: 0000000000000000
+ FS: 00007f17856bcfc0(0000) GS:ffff96c03f600000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000060 CR3: 0000000045ed6002 CR4: 00000000003606e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ ? wait_for_writer+0xb0/0xb0 [btrfs]
+ btrfs_free_log+0x17/0x30 [btrfs]
+ btrfs_drop_and_free_fs_root+0x9a/0xe0 [btrfs]
+ btrfs_free_fs_roots+0xc0/0x130 [btrfs]
+ ? wait_for_completion+0xf2/0x100
+ close_ctree+0xea/0x2e0 [btrfs]
+ ? kthread_stop+0x161/0x260
+ generic_shutdown_super+0x6c/0x120
+ kill_anon_super+0xe/0x20
+ btrfs_kill_super+0x13/0x100 [btrfs]
+ deactivate_locked_super+0x3f/0x70
+ cleanup_mnt+0x3b/0x70
+ task_work_run+0x78/0x90
+ exit_to_usermode_loop+0x77/0xa6
+ do_syscall_64+0x1c5/0x1e0
+ entry_SYSCALL_64_after_hwframe+0x42/0xb7
+ RIP: 0033:0x7f1784f90827
+ RSP: 002b:00007ffdeeb03118 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
+ RAX: 0000000000000000 RBX: 0000556a60c62970 RCX: 00007f1784f90827
+ RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556a60c62b50
+ RBP: 0000000000000000 R08: 0000000000000005 R09: 00000000ffffffff
+ R10: 0000556a60c63900 R11: 0000000000000246 R12: 0000556a60c62b50
+ R13: 00007f17854a81c4 R14: 0000000000000000 R15: 0000000000000000
+ RIP: free_log_tree+0xd2/0x140 [btrfs] RSP: ffffa7cf40d7bd10
+ CR2: 0000000000000060
+
+Fixes: 681ae50917df9 ("Btrfs: cleanup reserved space when freeing tree log on error")
+CC: <stable@vger.kernel.org> # v3.13
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -3078,9 +3078,12 @@ static void free_log_tree(struct btrfs_t
+ };
+
+ ret = walk_log_tree(trans, log, &wc);
+- /* I don't think this can happen but just in case */
+- if (ret)
+- btrfs_abort_transaction(trans, ret);
++ if (ret) {
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(log->fs_info, ret, NULL);
++ }
+
+ while (1) {
+ ret = find_first_extent_bit(&log->dirty_log_pages,
--- /dev/null
+From f2d72f42d5fa3bf33761d9e47201745f624fcff5 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 8 Oct 2018 11:12:55 +0100
+Subject: Btrfs: fix warning when replaying log after fsync of a tmpfile
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit f2d72f42d5fa3bf33761d9e47201745f624fcff5 upstream.
+
+When replaying a log which contains a tmpfile (which necessarily has a
+link count of 0) we end up calling inc_nlink(), at
+fs/btrfs/tree-log.c:replay_one_buffer(), which produces a warning like
+the following:
+
+ [195191.943673] WARNING: CPU: 0 PID: 6924 at fs/inode.c:342 inc_nlink+0x33/0x40
+ [195191.943723] CPU: 0 PID: 6924 Comm: mount Not tainted 4.19.0-rc6-btrfs-next-38 #1
+ [195191.943724] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014
+ [195191.943726] RIP: 0010:inc_nlink+0x33/0x40
+ [195191.943728] RSP: 0018:ffffb96e425e3870 EFLAGS: 00010246
+ [195191.943730] RAX: 0000000000000000 RBX: ffff8c0d1e6af4f0 RCX: 0000000000000006
+ [195191.943731] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8c0d1e6af4f0
+ [195191.943731] RBP: 0000000000000097 R08: 0000000000000001 R09: 0000000000000000
+ [195191.943732] R10: 0000000000000000 R11: 0000000000000000 R12: ffffb96e425e3a60
+ [195191.943733] R13: ffff8c0d10cff0c8 R14: ffff8c0d0d515348 R15: ffff8c0d78a1b3f8
+ [195191.943735] FS: 00007f570ee24480(0000) GS:ffff8c0dfb200000(0000) knlGS:0000000000000000
+ [195191.943736] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [195191.943737] CR2: 00005593286277c8 CR3: 00000000bb8f2006 CR4: 00000000003606f0
+ [195191.943739] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [195191.943740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [195191.943741] Call Trace:
+ [195191.943778] replay_one_buffer+0x797/0x7d0 [btrfs]
+ [195191.943802] walk_up_log_tree+0x1c1/0x250 [btrfs]
+ [195191.943809] ? rcu_read_lock_sched_held+0x3f/0x70
+ [195191.943825] walk_log_tree+0xae/0x1d0 [btrfs]
+ [195191.943840] btrfs_recover_log_trees+0x1d7/0x4d0 [btrfs]
+ [195191.943856] ? replay_dir_deletes+0x280/0x280 [btrfs]
+ [195191.943870] open_ctree+0x1c3b/0x22a0 [btrfs]
+ [195191.943887] btrfs_mount_root+0x6b4/0x800 [btrfs]
+ [195191.943894] ? rcu_read_lock_sched_held+0x3f/0x70
+ [195191.943899] ? pcpu_alloc+0x55b/0x7c0
+ [195191.943906] ? mount_fs+0x3b/0x140
+ [195191.943908] mount_fs+0x3b/0x140
+ [195191.943912] ? __init_waitqueue_head+0x36/0x50
+ [195191.943916] vfs_kern_mount+0x62/0x160
+ [195191.943927] btrfs_mount+0x134/0x890 [btrfs]
+ [195191.943936] ? rcu_read_lock_sched_held+0x3f/0x70
+ [195191.943938] ? pcpu_alloc+0x55b/0x7c0
+ [195191.943943] ? mount_fs+0x3b/0x140
+ [195191.943952] ? btrfs_remount+0x570/0x570 [btrfs]
+ [195191.943954] mount_fs+0x3b/0x140
+ [195191.943956] ? __init_waitqueue_head+0x36/0x50
+ [195191.943960] vfs_kern_mount+0x62/0x160
+ [195191.943963] do_mount+0x1f9/0xd40
+ [195191.943967] ? memdup_user+0x4b/0x70
+ [195191.943971] ksys_mount+0x7e/0xd0
+ [195191.943974] __x64_sys_mount+0x21/0x30
+ [195191.943977] do_syscall_64+0x60/0x1b0
+ [195191.943980] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+ [195191.943983] RIP: 0033:0x7f570e4e524a
+ [195191.943986] RSP: 002b:00007ffd83589478 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+ [195191.943989] RAX: ffffffffffffffda RBX: 0000563f335b2060 RCX: 00007f570e4e524a
+ [195191.943990] RDX: 0000563f335b2240 RSI: 0000563f335b2280 RDI: 0000563f335b2260
+ [195191.943992] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000020
+ [195191.943993] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000563f335b2260
+ [195191.943994] R13: 0000563f335b2240 R14: 0000000000000000 R15: 00000000ffffffff
+ [195191.944002] irq event stamp: 8688
+ [195191.944010] hardirqs last enabled at (8687): [<ffffffff9cb004c3>] console_unlock+0x503/0x640
+ [195191.944012] hardirqs last disabled at (8688): [<ffffffff9ca037dd>] trace_hardirqs_off_thunk+0x1a/0x1c
+ [195191.944018] softirqs last enabled at (8638): [<ffffffff9cc0a5d1>] __set_page_dirty_nobuffers+0x101/0x150
+ [195191.944020] softirqs last disabled at (8634): [<ffffffff9cc26bbe>] wb_wakeup_delayed+0x2e/0x60
+ [195191.944022] ---[ end trace 5d6e873a9a0b811a ]---
+
+This happens because the inode does not have the flag I_LINKABLE set,
+which is a runtime only flag, not meant to be persisted, set when the
+inode is created through open(2) if the flag O_EXCL is not passed to it.
+Except for the warning, there are no other consequences (like corruptions
+or metadata inconsistencies).
+
+Since it's pointless to replay a tmpfile as it would be deleted in a
+later phase of the log replay procedure (it has a link count of 0), fix
+this by not logging tmpfiles and if a tmpfile is found in a log (created
+by a kernel without this change), skip the replay of the inode.
+
+A test case for fstests follows soon.
+
+Fixes: 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay")
+CC: stable@vger.kernel.org # 4.18+
+Reported-by: Martin Steigerwald <martin@lichtvoll.de>
+Link: https://lore.kernel.org/linux-btrfs/3666619.NTnn27ZJZE@merkaba/
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c | 42 ++++++++++++++++++++++++++++++++----------
+ 1 file changed, 32 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -273,6 +273,13 @@ struct walk_control {
+ /* what stage of the replay code we're currently in */
+ int stage;
+
++ /*
++ * Ignore any items from the inode currently being processed. Needs
++ * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in
++ * the LOG_WALK_REPLAY_INODES stage.
++ */
++ bool ignore_cur_inode;
++
+ /* the root we are currently replaying */
+ struct btrfs_root *replay_dest;
+
+@@ -2363,6 +2370,20 @@ static int replay_one_buffer(struct btrf
+
+ inode_item = btrfs_item_ptr(eb, i,
+ struct btrfs_inode_item);
++ /*
++ * If we have a tmpfile (O_TMPFILE) that got fsync'ed
++ * and never got linked before the fsync, skip it, as
++ * replaying it is pointless since it would be deleted
++ * later. We skip logging tmpfiles, but it's always
++ * possible we are replaying a log created with a kernel
++ * that used to log tmpfiles.
++ */
++ if (btrfs_inode_nlink(eb, inode_item) == 0) {
++ wc->ignore_cur_inode = true;
++ continue;
++ } else {
++ wc->ignore_cur_inode = false;
++ }
+ ret = replay_xattr_deletes(wc->trans, root, log,
+ path, key.objectid);
+ if (ret)
+@@ -2400,16 +2421,8 @@ static int replay_one_buffer(struct btrf
+ root->fs_info->sectorsize);
+ ret = btrfs_drop_extents(wc->trans, root, inode,
+ from, (u64)-1, 1);
+- /*
+- * If the nlink count is zero here, the iput
+- * will free the inode. We bump it to make
+- * sure it doesn't get freed until the link
+- * count fixup is done.
+- */
+ if (!ret) {
+- if (inode->i_nlink == 0)
+- inc_nlink(inode);
+- /* Update link count and nbytes. */
++ /* Update the inode's nbytes. */
+ ret = btrfs_update_inode(wc->trans,
+ root, inode);
+ }
+@@ -2424,6 +2437,9 @@ static int replay_one_buffer(struct btrf
+ break;
+ }
+
++ if (wc->ignore_cur_inode)
++ continue;
++
+ if (key.type == BTRFS_DIR_INDEX_KEY &&
+ wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
+ ret = replay_one_dir_item(wc->trans, root, path,
+@@ -5644,7 +5660,13 @@ static int btrfs_log_inode_parent(struct
+ if (ret)
+ goto end_no_trans;
+
+- if (btrfs_inode_in_log(inode, trans->transid)) {
++ /*
++ * Skip already logged inodes or inodes corresponding to tmpfiles
++ * (since logging them is pointless, a link count of 0 means they
++ * will never be accessible).
++ */
++ if (btrfs_inode_in_log(inode, trans->transid) ||
++ inode->vfs_inode.i_nlink == 0) {
+ ret = BTRFS_NO_LOG_SYNC;
+ goto end_no_trans;
+ }
--- /dev/null
+From 0f375eed92b5a407657532637ed9652611a682f5 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 9 Oct 2018 15:05:29 +0100
+Subject: Btrfs: fix wrong dentries after fsync of file that got its parent replaced
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0f375eed92b5a407657532637ed9652611a682f5 upstream.
+
+In a scenario like the following:
+
+ mkdir /mnt/A # inode 258
+ mkdir /mnt/B # inode 259
+ touch /mnt/B/bar # inode 260
+
+ sync
+
+ mv /mnt/B/bar /mnt/A/bar
+ mv -T /mnt/A /mnt/B
+ fsync /mnt/B/bar
+
+ <power fail>
+
+After replaying the log we end up with file bar having 2 hard links, both
+with the name 'bar' and one in the directory with inode number 258 and the
+other in the directory with inode number 259. Also, we end up with the
+directory inode 259 still existing and with the directory inode 258 still
+named as 'A', instead of 'B'. In this scenario, file 'bar' should only
+have one hard link, located at directory inode 258, the directory inode
+259 should not exist anymore and the name for directory inode 258 should
+be 'B'.
+
+This incorrect behaviour happens because when attempting to log the old
+parents of an inode, we skip any parents that no longer exist. Fix this
+by forcing a full commit if an old parent no longer exists.
+
+A test case for fstests follows soon.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c | 30 +++++++++++++++++++++++++++---
+ 1 file changed, 27 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -5583,9 +5583,33 @@ static int btrfs_log_all_parents(struct
+
+ dir_inode = btrfs_iget(fs_info->sb, &inode_key,
+ root, NULL);
+- /* If parent inode was deleted, skip it. */
+- if (IS_ERR(dir_inode))
+- continue;
++ /*
++ * If the parent inode was deleted, return an error to
++ * fallback to a transaction commit. This is to prevent
++ * getting an inode that was moved from one parent A to
++ * a parent B, got its former parent A deleted and then
++ * it got fsync'ed, from existing at both parents after
++ * a log replay (and the old parent still existing).
++ * Example:
++ *
++ * mkdir /mnt/A
++ * mkdir /mnt/B
++ * touch /mnt/B/bar
++ * sync
++ * mv /mnt/B/bar /mnt/A/bar
++ * mv -T /mnt/A /mnt/B
++ * fsync /mnt/B/bar
++ * <power fail>
++ *
++ * If we ignore the old parent B which got deleted,
++ * after a log replay we would have file bar linked
++ * at both parents and the old parent B would still
++ * exist.
++ */
++ if (IS_ERR(dir_inode)) {
++ ret = PTR_ERR(dir_inode);
++ goto out;
++ }
+
+ if (ctx)
+ ctx->log_new_dentries = false;
--- /dev/null
+From 65c6e82becec33731f48786e5a30f98662c86b16 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 21 Aug 2018 09:42:03 +0800
+Subject: btrfs: Handle owner mismatch gracefully when walking up tree
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 65c6e82becec33731f48786e5a30f98662c86b16 upstream.
+
+[BUG]
+When mounting certain crafted image, btrfs will trigger kernel BUG_ON()
+when trying to recover balance:
+
+ kernel BUG at fs/btrfs/extent-tree.c:8956!
+ invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+ CPU: 1 PID: 662 Comm: mount Not tainted 4.18.0-rc1-custom+ #10
+ RIP: 0010:walk_up_proc+0x336/0x480 [btrfs]
+ RSP: 0018:ffffb53540c9b890 EFLAGS: 00010202
+ Call Trace:
+ walk_up_tree+0x172/0x1f0 [btrfs]
+ btrfs_drop_snapshot+0x3a4/0x830 [btrfs]
+ merge_reloc_roots+0xe1/0x1d0 [btrfs]
+ btrfs_recover_relocation+0x3ea/0x420 [btrfs]
+ open_ctree+0x1af3/0x1dd0 [btrfs]
+ btrfs_mount_root+0x66b/0x740 [btrfs]
+ mount_fs+0x3b/0x16a
+ vfs_kern_mount.part.9+0x54/0x140
+ btrfs_mount+0x16d/0x890 [btrfs]
+ mount_fs+0x3b/0x16a
+ vfs_kern_mount.part.9+0x54/0x140
+ do_mount+0x1fd/0xda0
+ ksys_mount+0xba/0xd0
+ __x64_sys_mount+0x21/0x30
+ do_syscall_64+0x60/0x210
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+[CAUSE]
+Extent tree corruption. In this particular case, reloc tree root's
+owner is DATA_RELOC_TREE (should be TREE_RELOC), thus its backref is
+corrupted and we failed the owner check in walk_up_tree().
+
+[FIX]
+It's pretty hard to take care of every extent tree corruption, but at
+least we can remove such BUG_ON() and exit more gracefully.
+
+And since in this particular image, DATA_RELOC_TREE and TREE_RELOC share
+the same root (which is obviously invalid), we needs to make
+__del_reloc_root() more robust to detect such invalid sharing to avoid
+possible NULL dereference as root->node can be NULL in this case.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200411
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 18 ++++++++++++------
+ fs/btrfs/relocation.c | 2 +-
+ 2 files changed, 13 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9028,15 +9028,14 @@ static noinline int walk_up_proc(struct
+ if (eb == root->node) {
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ parent = eb->start;
+- else
+- BUG_ON(root->root_key.objectid !=
+- btrfs_header_owner(eb));
++ else if (root->root_key.objectid != btrfs_header_owner(eb))
++ goto owner_mismatch;
+ } else {
+ if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ parent = path->nodes[level + 1]->start;
+- else
+- BUG_ON(root->root_key.objectid !=
+- btrfs_header_owner(path->nodes[level + 1]));
++ else if (root->root_key.objectid !=
++ btrfs_header_owner(path->nodes[level + 1]))
++ goto owner_mismatch;
+ }
+
+ btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
+@@ -9044,6 +9043,11 @@ out:
+ wc->refs[level] = 0;
+ wc->flags[level] = 0;
+ return 0;
++
++owner_mismatch:
++ btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
++ btrfs_header_owner(eb), root->root_key.objectid);
++ return -EUCLEAN;
+ }
+
+ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+@@ -9097,6 +9101,8 @@ static noinline int walk_up_tree(struct
+ ret = walk_up_proc(trans, root, path, wc);
+ if (ret > 0)
+ return 0;
++ if (ret < 0)
++ return ret;
+
+ if (path->locks[level]) {
+ btrfs_tree_unlock_rw(path->nodes[level],
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1334,7 +1334,7 @@ static void __del_reloc_root(struct btrf
+ struct mapping_node *node = NULL;
+ struct reloc_control *rc = fs_info->reloc_ctl;
+
+- if (rc) {
++ if (rc && root->node) {
+ spin_lock(&rc->reloc_root_tree.lock);
+ rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+ root->node->start);
--- /dev/null
+From d4e329de5e5e21594df2e0dd59da9acee71f133b Mon Sep 17 00:00:00 2001
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Thu, 6 Sep 2018 17:18:14 -0400
+Subject: btrfs: iterate all devices during trim, instead of fs_devices::alloc_list
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit d4e329de5e5e21594df2e0dd59da9acee71f133b upstream.
+
+btrfs_trim_fs iterates over the fs_devices->alloc_list while holding the
+device_list_mutex. The problem is that ->alloc_list is protected by the
+chunk mutex. We don't want to hold the chunk mutex over the trim of the
+entire file system. Fortunately, the ->dev_list list is protected by
+the dev_list mutex and while it will give us all devices, including
+read-only devices, we already just skip the read-only devices. Then we
+can continue to take and release the chunk mutex while scanning each
+device.
+
+Fixes: 499f377f49f ("btrfs: iterate over unused chunk space in FITRIM")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -11107,8 +11107,8 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+ "failed to trim %llu block group(s), last error %d",
+ bg_failed, bg_ret);
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+- devices = &fs_info->fs_devices->alloc_list;
+- list_for_each_entry(device, devices, dev_alloc_list) {
++ devices = &fs_info->fs_devices->devices;
++ list_for_each_entry(device, devices, dev_list) {
+ ret = btrfs_trim_free_extents(device, range->minlen,
+ &group_trimmed);
+ if (ret) {
--- /dev/null
+From b72c3aba09a53fc7c1824250d71180ca154517a7 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 21 Aug 2018 09:53:47 +0800
+Subject: btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid deadlock
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit b72c3aba09a53fc7c1824250d71180ca154517a7 upstream.
+
+[BUG]
+For certain crafted image, whose csum root leaf has missing backref, if
+we try to trigger write with data csum, it could cause deadlock with the
+following kernel WARN_ON():
+
+ WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400
+ CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8
+ Workqueue: btrfs-endio-write btrfs_endio_write_helper
+ RIP: 0010:btrfs_tree_lock+0x3e2/0x400
+ Call Trace:
+ btrfs_alloc_tree_block+0x39f/0x770
+ __btrfs_cow_block+0x285/0x9e0
+ btrfs_cow_block+0x191/0x2e0
+ btrfs_search_slot+0x492/0x1160
+ btrfs_lookup_csum+0xec/0x280
+ btrfs_csum_file_blocks+0x2be/0xa60
+ add_pending_csums+0xaf/0xf0
+ btrfs_finish_ordered_io+0x74b/0xc90
+ finish_ordered_fn+0x15/0x20
+ normal_work_helper+0xf6/0x500
+ btrfs_endio_write_helper+0x12/0x20
+ process_one_work+0x302/0x770
+ worker_thread+0x81/0x6d0
+ kthread+0x180/0x1d0
+ ret_from_fork+0x35/0x40
+
+[CAUSE]
+That crafted image has missing backref for csum tree root leaf. And
+when we try to allocate new tree block, since there is no
+EXTENT/METADATA_ITEM for csum tree root, btrfs consider it's free slot
+and use it.
+
+The extent tree of the image looks like:
+
+ Normal image | This fuzzed image
+ ----------------------------------+--------------------------------
+ BG 29360128 | BG 29360128
+ One empty slot | One empty slot
+ 29364224: backref to UUID tree | 29364224: backref to UUID tree
+ Two empty slots | Two empty slots
+ 29376512: backref to CSUM tree | One empty slot (bad type) <<<
+ 29380608: backref to D_RELOC tree | 29380608: backref to D_RELOC tree
+ ... | ...
+
+Since bytenr 29376512 has no METADATA/EXTENT_ITEM, when btrfs try to
+alloc tree block, it's an valid slot for btrfs.
+
+And for finish_ordered_write, when we need to insert csum, we try to CoW
+csum tree root.
+
+By accident, empty slots at bytenr BG_OFFSET, BG_OFFSET + 8K,
+BG_OFFSET + 12K is already used by tree block COW for other trees, the
+next empty slot is BG_OFFSET + 16K, which should be the backref for CSUM
+tree.
+
+But due to the bad type, btrfs can recognize it and still consider it as
+an empty slot, and will try to use it for csum tree CoW.
+
+Then in the following call trace, we will try to lock the new tree
+block, which turns out to be the old csum tree root which is already
+locked:
+
+btrfs_search_slot() called on csum tree root, which is at 29376512
+|- btrfs_cow_block()
+ |- btrfs_set_lock_block()
+ | |- Now locks tree block 29376512 (old csum tree root)
+ |- __btrfs_cow_block()
+ |- btrfs_alloc_tree_block()
+ |- btrfs_reserve_extent()
+ | Now it returns tree block 29376512, which extent tree
+ | shows its empty slot, but it's already hold by csum tree
+ |- btrfs_init_new_buffer()
+ |- btrfs_tree_lock()
+ | Triggers WARN_ON(eb->lock_owner == current->pid)
+ |- wait_event()
+ Wait lock owner to release the lock, but it's
+ locked by ourself, so it will deadlock
+
+[FIX]
+This patch will do the lock_owner and current->pid check at
+btrfs_init_new_buffer().
+So above deadlock can be avoided.
+
+Since such problem can only happen in crafted image, we will still
+trigger kernel warning for later aborted transaction, but with a little
+more meaningful warning message.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200405
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -8398,6 +8398,19 @@ btrfs_init_new_buffer(struct btrfs_trans
+ if (IS_ERR(buf))
+ return buf;
+
++ /*
++ * Extra safety check in case the extent tree is corrupted and extent
++ * allocator chooses to use a tree block which is already used and
++ * locked.
++ */
++ if (buf->lock_owner == current->pid) {
++ btrfs_err_rl(fs_info,
++"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
++ buf->start, btrfs_header_owner(buf), current->pid);
++ free_extent_buffer(buf);
++ return ERR_PTR(-EUCLEAN);
++ }
++
+ btrfs_set_header_generation(buf, trans->transid);
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
+ btrfs_tree_lock(buf);
--- /dev/null
+From 545e3366db823dc3342ca9d7fea803f829c9062f Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 28 Sep 2018 07:18:02 -0400
+Subject: btrfs: make sure we create all new block groups
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 545e3366db823dc3342ca9d7fea803f829c9062f upstream.
+
+Allocating new chunks modifies both the extent and chunk tree, which can
+trigger new chunk allocations. So instead of doing list_for_each_safe,
+just do while (!list_empty()) so we make sure we don't exit with other
+pending bg's still on our list.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -10270,7 +10270,7 @@ error:
+ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+ {
+- struct btrfs_block_group_cache *block_group, *tmp;
++ struct btrfs_block_group_cache *block_group;
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_block_group_item item;
+ struct btrfs_key key;
+@@ -10278,7 +10278,10 @@ void btrfs_create_pending_block_groups(s
+ bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
+
+ trans->can_flush_pending_bgs = false;
+- list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
++ while (!list_empty(&trans->new_bgs)) {
++ block_group = list_first_entry(&trans->new_bgs,
++ struct btrfs_block_group_cache,
++ bg_list);
+ if (ret)
+ goto next;
+
--- /dev/null
+From 84de76a2fb217dc1b6bc2965cc397d1648aa1404 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 28 Sep 2018 07:17:49 -0400
+Subject: btrfs: protect space cache inode alloc with GFP_NOFS
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 84de76a2fb217dc1b6bc2965cc397d1648aa1404 upstream.
+
+If we're allocating a new space cache inode it's likely going to be
+under a transaction handle, so we need to use memalloc_nofs_save() in
+order to avoid deadlocks, and more importantly lockdep messages that
+make xfstests fail.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/free-space-cache.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -22,6 +22,7 @@
+ #include <linux/slab.h>
+ #include <linux/math64.h>
+ #include <linux/ratelimit.h>
++#include <linux/sched/mm.h>
+ #include "ctree.h"
+ #include "free-space-cache.h"
+ #include "transaction.h"
+@@ -59,6 +60,7 @@ static struct inode *__lookup_free_space
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode = NULL;
++ unsigned nofs_flag;
+ int ret;
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+@@ -80,7 +82,13 @@ static struct inode *__lookup_free_space
+ btrfs_disk_key_to_cpu(&location, &disk_key);
+ btrfs_release_path(path);
+
++ /*
++ * We are often under a trans handle at this point, so we need to make
++ * sure NOFS is set to keep us from deadlocking.
++ */
++ nofs_flag = memalloc_nofs_save();
+ inode = btrfs_iget(fs_info->sb, &location, root, NULL);
++ memalloc_nofs_restore(nofs_flag);
+ if (IS_ERR(inode))
+ return inode;
+ if (is_bad_inode(inode)) {
--- /dev/null
+From 3628b4ca64f24a4ec55055597d0cb1c814729f8b Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 9 Oct 2018 14:36:45 +0800
+Subject: btrfs: qgroup: Avoid calling qgroup functions if qgroup is not enabled
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 3628b4ca64f24a4ec55055597d0cb1c814729f8b upstream.
+
+Some qgroup trace events like btrfs_qgroup_release_data() and
+btrfs_qgroup_free_delayed_ref() can still be triggered even if qgroup is
+not enabled.
+
+This is caused by the lack of qgroup status check before calling some
+qgroup functions. Thankfully the functions can handle quota disabled
+case well and just do nothing for qgroup disabled case.
+
+This patch will do earlier check before triggering related trace events.
+
+And for enabled <-> disabled race case:
+
+1) For enabled->disabled case
+ Disable will wipe out all qgroups data including reservation and
+ excl/rfer. Even if we leak some reservation or numbers, it will
+ still be cleared, so nothing will go wrong.
+
+2) For disabled -> enabled case
+ Current btrfs_qgroup_release_data() will use extent_io tree to ensure
+ we won't underflow reservation. And for delayed_ref we use
+ head->qgroup_reserved to record the reserved space, so in that case
+ head->qgroup_reserved should be 0 and we won't underflow.
+
+CC: stable@vger.kernel.org # 4.14+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+Link: https://lore.kernel.org/linux-btrfs/CAJCQCtQau7DtuUUeycCkZ36qjbKuxNzsgqJ7+sJ6W0dK_NLE3w@mail.gmail.com/
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c | 4 ++++
+ fs/btrfs/qgroup.h | 2 ++
+ 2 files changed, 6 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2972,6 +2972,10 @@ static int __btrfs_qgroup_release_data(s
+ int trace_op = QGROUP_RELEASE;
+ int ret;
+
++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED,
++ &BTRFS_I(inode)->root->fs_info->flags))
++ return 0;
++
+ /* In release case, we shouldn't have @reserved */
+ WARN_ON(!free && reserved);
+ if (free && reserved)
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -232,6 +232,8 @@ void btrfs_qgroup_free_refroot(struct bt
+ static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
+ u64 ref_root, u64 num_bytes)
+ {
++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
++ return;
+ trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
+ btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
+ }
--- /dev/null
+From 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 10 Aug 2018 10:20:26 +0800
+Subject: btrfs: qgroup: Dirty all qgroups before rescan
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 upstream.
+
+[BUG]
+In the following case, rescan won't zero out the number of qgroup 1/0:
+
+ $ mkfs.btrfs -fq $DEV
+ $ mount $DEV /mnt
+
+ $ btrfs quota enable /mnt
+ $ btrfs qgroup create 1/0 /mnt
+ $ btrfs sub create /mnt/sub
+ $ btrfs qgroup assign 0/257 1/0 /mnt
+
+ $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
+ $ btrfs sub snap /mnt/sub /mnt/snap
+ $ btrfs quota rescan -w /mnt
+ $ btrfs qgroup show -pcre /mnt
+ qgroupid rfer excl max_rfer max_excl parent child
+ -------- ---- ---- -------- -------- ------ -----
+ 0/5 16.00KiB 16.00KiB none none --- ---
+ 0/257 1016.00KiB 16.00KiB none none 1/0 ---
+ 0/258 1016.00KiB 16.00KiB none none --- ---
+ 1/0 1016.00KiB 16.00KiB none none --- 0/257
+
+So far so good, but:
+
+ $ btrfs qgroup remove 0/257 1/0 /mnt
+ WARNING: quotas may be inconsistent, rescan needed
+ $ btrfs quota rescan -w /mnt
+ $ btrfs qgroup show -pcre /mnt
+ qgoupid rfer excl max_rfer max_excl parent child
+ -------- ---- ---- -------- -------- ------ -----
+ 0/5 16.00KiB 16.00KiB none none --- ---
+ 0/257 1016.00KiB 16.00KiB none none --- ---
+ 0/258 1016.00KiB 16.00KiB none none --- ---
+ 1/0 1016.00KiB 16.00KiB none none --- ---
+ ^^^^^^^^^^ ^^^^^^^^ not cleared
+
+[CAUSE]
+Before rescan we call qgroup_rescan_zero_tracking() to zero out all
+qgroups' accounting numbers.
+
+However we don't mark all qgroups dirty, but rely on rescan to do so.
+
+If we have any high level qgroup without children, it won't be marked
+dirty during rescan, since we cannot reach that qgroup.
+
+This will cause QGROUP_INFO items of childless qgroups never get updated
+in the quota tree, thus their numbers will stay the same in "btrfs
+qgroup show" output.
+
+[FIX]
+Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even if
+we have childless qgroups, their QGROUP_INFO items will still get
+updated during rescan.
+
+Reported-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Tested-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2763,6 +2763,7 @@ qgroup_rescan_zero_tracking(struct btrfs
+ qgroup->rfer_cmpr = 0;
+ qgroup->excl = 0;
+ qgroup->excl_cmpr = 0;
++ qgroup_dirty(fs_info, qgroup);
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ }
--- /dev/null
+From 553cceb49681d60975d00892877d4c871bf220f9 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 28 Sep 2018 07:18:00 -0400
+Subject: btrfs: reset max_extent_size on clear in a bitmap
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 553cceb49681d60975d00892877d4c871bf220f9 upstream.
+
+We need to clear the max_extent_size when we clear bits from a bitmap
+since it could have been from the range that contains the
+max_extent_size.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/free-space-cache.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -1710,6 +1710,8 @@ static inline void __bitmap_clear_bits(s
+ bitmap_clear(info->bitmap, start, count);
+
+ info->bytes -= bytes;
++ if (info->max_extent_size > ctl->unit)
++ info->max_extent_size = 0;
+ }
+
+ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
--- /dev/null
+From 3aa7c7a31c26321696b92841d5103461c6f3f517 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 12 Sep 2018 10:45:45 -0400
+Subject: btrfs: wait on caching when putting the bg cache
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 3aa7c7a31c26321696b92841d5103461c6f3f517 upstream.
+
+While testing my backport I noticed there was a panic if I ran
+generic/416 generic/417 generic/418 all in a row. This just happened to
+uncover a race where we had outstanding IO after we destroy all of our
+workqueues, and then we'd go to queue the endio work on those free'd
+workqueues.
+
+This is because we aren't waiting for the caching threads to be done
+before freeing everything up, so to fix this make sure we wait on any
+outstanding caching that's being done before we free up the block group,
+so we're sure to be done with all IO by the time we get to
+btrfs_stop_all_workers(). This fixes the panic I was seeing
+consistently in testing.
+
+------------[ cut here ]------------
+kernel BUG at fs/btrfs/volumes.c:6112!
+SMP PTI
+Modules linked in:
+CPU: 1 PID: 27165 Comm: kworker/u4:7 Not tainted 4.16.0-02155-g3553e54a578d-dirty #875
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+Workqueue: btrfs-cache btrfs_cache_helper
+RIP: 0010:btrfs_map_bio+0x346/0x370
+RSP: 0000:ffffc900061e79d0 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: ffff880071542e00 RCX: 0000000000533000
+RDX: ffff88006bb74380 RSI: 0000000000000008 RDI: ffff880078160000
+RBP: 0000000000000001 R08: ffff8800781cd200 R09: 0000000000503000
+R10: ffff88006cd21200 R11: 0000000000000000 R12: 0000000000000000
+R13: 0000000000000000 R14: ffff8800781cd200 R15: ffff880071542e00
+FS: 0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000817ffc4 CR3: 0000000078314000 CR4: 00000000000006e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ btree_submit_bio_hook+0x8a/0xd0
+ submit_one_bio+0x5d/0x80
+ read_extent_buffer_pages+0x18a/0x320
+ btree_read_extent_buffer_pages+0xbc/0x200
+ ? alloc_extent_buffer+0x359/0x3e0
+ read_tree_block+0x3d/0x60
+ read_block_for_search.isra.30+0x1a5/0x360
+ btrfs_search_slot+0x41b/0xa10
+ btrfs_next_old_leaf+0x212/0x470
+ caching_thread+0x323/0x490
+ normal_work_helper+0xc5/0x310
+ process_one_work+0x141/0x340
+ worker_thread+0x44/0x3c0
+ kthread+0xf8/0x130
+ ? process_one_work+0x340/0x340
+ ? kthread_bind+0x10/0x10
+ ret_from_fork+0x35/0x40
+RIP: btrfs_map_bio+0x346/0x370 RSP: ffffc900061e79d0
+---[ end trace 827eb13e50846033 ]---
+Kernel panic - not syncing: Fatal exception
+Kernel Offset: disabled
+---[ end Kernel panic - not syncing: Fatal exception
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9881,6 +9881,7 @@ void btrfs_put_block_group_cache(struct
+
+ block_group = btrfs_lookup_first_block_group(info, last);
+ while (block_group) {
++ wait_block_group_cache_done(block_group);
+ spin_lock(&block_group->lock);
+ if (block_group->iref)
+ break;
arm64-dts-stratix10-correct-system-manager-register-size.patch
soc-tegra-pmc-fix-child-node-lookup.patch
selftests-powerpc-fix-ptrace-tm-failure.patch
+btrfs-qgroup-avoid-calling-qgroup-functions-if-qgroup-is-not-enabled.patch
+btrfs-handle-owner-mismatch-gracefully-when-walking-up-tree.patch
+btrfs-locking-add-extra-check-in-btrfs_init_new_buffer-to-avoid-deadlock.patch
+btrfs-fix-error-handling-in-free_log_tree.patch
+btrfs-enhance-btrfs_trim_fs-function-to-handle-error-better.patch
+btrfs-ensure-btrfs_trim_fs-can-trim-the-whole-filesystem.patch
+btrfs-iterate-all-devices-during-trim-instead-of-fs_devices-alloc_list.patch
+btrfs-don-t-attempt-to-trim-devices-that-don-t-support-it.patch
+btrfs-wait-on-caching-when-putting-the-bg-cache.patch
+btrfs-protect-space-cache-inode-alloc-with-gfp_nofs.patch
+btrfs-reset-max_extent_size-on-clear-in-a-bitmap.patch
+btrfs-make-sure-we-create-all-new-block-groups.patch
+btrfs-fix-warning-when-replaying-log-after-fsync-of-a-tmpfile.patch
+btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch
+btrfs-qgroup-dirty-all-qgroups-before-rescan.patch