From 1e0d9b5cb32dfafed46b90a39819f7db0230bd91 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Sun, 26 Jul 2020 12:04:27 -0400
Subject: [PATCH] Fixes for 4.9

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...t-failure-caused-by-race-with-umount.patch | 108 ++++++++++++++++++
 queue-4.9/series                              |   1 +
 2 files changed, 109 insertions(+)
 create mode 100644 queue-4.9/btrfs-fix-mount-failure-caused-by-race-with-umount.patch

diff --git a/queue-4.9/btrfs-fix-mount-failure-caused-by-race-with-umount.patch b/queue-4.9/btrfs-fix-mount-failure-caused-by-race-with-umount.patch
new file mode 100644
index 00000000000..69b7340ef57
--- /dev/null
+++ b/queue-4.9/btrfs-fix-mount-failure-caused-by-race-with-umount.patch
@@ -0,0 +1,108 @@
+From a7da6bdd02b026152d82425d4d62bc25adcebc91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jul 2020 13:29:46 -0700
+Subject: btrfs: fix mount failure caused by race with umount
+
+From: Boris Burkov <boris@bur.io>
+
+[ Upstream commit 48cfa61b58a1fee0bc49eef04f8ccf31493b7cdd ]
+
+It is possible to cause a btrfs mount to fail by racing it with a slow
+umount. The crux of the sequence is generic_shutdown_super not yet
+calling sop->put_super before btrfs_mount_root calls btrfs_open_devices.
+If that occurs, btrfs_open_devices will decide the opened counter is
+non-zero, increment it, and skip resetting fs_devices->total_rw_bytes to
+0. From here, mount will call sget which will result in grab_super
+trying to take the super block umount semaphore. That semaphore will be
+held by the slow umount, so mount will block. Before up-ing the
+semaphore, umount will delete the super block, resulting in mount's sget
+reliably allocating a new one, which causes the mount path to dutifully
+fill it out, and increment total_rw_bytes a second time, which causes
+the mount to fail, as we see double the expected bytes.
+
+Here is the sequence laid out in greater detail:
+
+CPU0                                                    CPU1
+down_write sb->s_umount
+btrfs_kill_super
+  kill_anon_super(sb)
+    generic_shutdown_super(sb);
+      shrink_dcache_for_umount(sb);
+      sync_filesystem(sb);
+      evict_inodes(sb); // SLOW
+
+                                              btrfs_mount_root
+                                                btrfs_scan_one_device
+                                                fs_devices = device->fs_devices
+                                                fs_info->fs_devices = fs_devices
+                                                // fs_devices-opened makes this a no-op
+                                                btrfs_open_devices(fs_devices, mode, fs_type)
+                                                s = sget(fs_type, test, set, flags, fs_info);
+                                                  find sb in s_instances
+                                                  grab_super(sb);
+                                                    down_write(&s->s_umount); // blocks
+
+      sop->put_super(sb)
+        // sb->fs_devices->opened == 2; no-op
+      spin_lock(&sb_lock);
+      hlist_del_init(&sb->s_instances);
+      spin_unlock(&sb_lock);
+      up_write(&sb->s_umount);
+                                                    return 0;
+                                                  retry lookup
+                                                  don't find sb in s_instances (deleted by CPU0)
+                                                  s = alloc_super
+                                                  return s;
+                                                btrfs_fill_super(s, fs_devices, data)
+                                                  open_ctree // fs_devices total_rw_bytes improperly set!
+                                                    btrfs_read_chunk_tree
+                                                      read_one_dev // increment total_rw_bytes again!!
+                                                      super_total_bytes < fs_devices->total_rw_bytes // ERROR!!!
+
+To fix this, we clear total_rw_bytes from within btrfs_read_chunk_tree
+before the calls to read_one_dev, while holding the sb umount semaphore
+and the uuid mutex.
+
+To reproduce, it is sufficient to dirty a decent number of inodes, then
+quickly umount and mount.
+
+  for i in $(seq 0 500)
+  do
+    dd if=/dev/zero of="/mnt/foo/$i" bs=1M count=1
+  done
+  umount /mnt/foo&
+  mount /mnt/foo
+
+does the trick for me.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Boris Burkov <boris@bur.io>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/volumes.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 70aa22a8a9cce..bace03a546b2d 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6854,6 +6854,14 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
+ 	mutex_lock(&uuid_mutex);
+ 	lock_chunks(root);
+ 
++	/*
++	 * It is possible for mount and umount to race in such a way that
++	 * we execute this code path, but open_fs_devices failed to clear
++	 * total_rw_bytes. We certainly want it cleared before reading the
++	 * device items, so clear it here.
++	 */
++	root->fs_info->fs_devices->total_rw_bytes = 0;
++
+ 	/*
+ 	 * Read all device items, and then all the chunk items. All
+ 	 * device items are found before any chunk item (their object id
+-- 
+2.25.1
+
diff --git a/queue-4.9/series b/queue-4.9/series
index 0544bee822b..c04df4beb97 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -11,3 +11,4 @@ uprobes-change-handle_swbp-to-send-sigtrap-with-si_code-si_kernel-to-fix-gdb-reg
 alsa-info-drop-warn_on-from-buffer-null-sanity-check.patch
 asoc-rt5670-correct-rt5670_ldo_sel_mask.patch
 btrfs-fix-double-free-on-ulist-after-backref-resolution-failure.patch
+btrfs-fix-mount-failure-caused-by-race-with-umount.patch
-- 
2.47.3