]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.4
authorSasha Levin <sashal@kernel.org>
Sun, 26 Jul 2020 16:04:28 +0000 (12:04 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 26 Jul 2020 16:04:28 +0000 (12:04 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.4/btrfs-fix-mount-failure-caused-by-race-with-umount.patch [new file with mode: 0644]
queue-4.4/series

diff --git a/queue-4.4/btrfs-fix-mount-failure-caused-by-race-with-umount.patch b/queue-4.4/btrfs-fix-mount-failure-caused-by-race-with-umount.patch
new file mode 100644 (file)
index 0000000..bb81fd7
--- /dev/null
@@ -0,0 +1,108 @@
+From 45176e41cdbc8472f83ae45d58ad92846c218f95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jul 2020 13:29:46 -0700
+Subject: btrfs: fix mount failure caused by race with umount
+
+From: Boris Burkov <boris@bur.io>
+
+[ Upstream commit 48cfa61b58a1fee0bc49eef04f8ccf31493b7cdd ]
+
+It is possible to cause a btrfs mount to fail by racing it with a slow
+umount. The crux of the sequence is generic_shutdown_super not yet
+calling sop->put_super before btrfs_mount_root calls btrfs_open_devices.
+If that occurs, btrfs_open_devices will decide the opened counter is
+non-zero, increment it, and skip resetting fs_devices->total_rw_bytes to
+0. From here, mount will call sget which will result in grab_super
+trying to take the super block umount semaphore. That semaphore will be
+held by the slow umount, so mount will block. Before up-ing the
+semaphore, umount will delete the super block, resulting in mount's sget
+reliably allocating a new one, which causes the mount path to dutifully
+fill it out, and increment total_rw_bytes a second time, which causes
+the mount to fail, as we see double the expected bytes.
+
+Here is the sequence laid out in greater detail:
+
+CPU0                                                    CPU1
+down_write sb->s_umount
+btrfs_kill_super
+  kill_anon_super(sb)
+    generic_shutdown_super(sb);
+      shrink_dcache_for_umount(sb);
+      sync_filesystem(sb);
+      evict_inodes(sb); // SLOW
+
+                                              btrfs_mount_root
+                                                btrfs_scan_one_device
+                                                fs_devices = device->fs_devices
+                                                fs_info->fs_devices = fs_devices
+                                                // fs_devices-opened makes this a no-op
+                                                btrfs_open_devices(fs_devices, mode, fs_type)
+                                                s = sget(fs_type, test, set, flags, fs_info);
+                                                  find sb in s_instances
+                                                  grab_super(sb);
+                                                    down_write(&s->s_umount); // blocks
+
+      sop->put_super(sb)
+        // sb->fs_devices->opened == 2; no-op
+      spin_lock(&sb_lock);
+      hlist_del_init(&sb->s_instances);
+      spin_unlock(&sb_lock);
+      up_write(&sb->s_umount);
+                                                    return 0;
+                                                  retry lookup
+                                                  don't find sb in s_instances (deleted by CPU0)
+                                                  s = alloc_super
+                                                  return s;
+                                                btrfs_fill_super(s, fs_devices, data)
+                                                  open_ctree // fs_devices total_rw_bytes improperly set!
+                                                    btrfs_read_chunk_tree
+                                                      read_one_dev // increment total_rw_bytes again!!
+                                                      super_total_bytes < fs_devices->total_rw_bytes // ERROR!!!
+
+To fix this, we clear total_rw_bytes from within btrfs_read_chunk_tree
+before the calls to read_one_dev, while holding the sb umount semaphore
+and the uuid mutex.
+
+To reproduce, it is sufficient to dirty a decent number of inodes, then
+quickly umount and mount.
+
+  for i in $(seq 0 500)
+  do
+    dd if=/dev/zero of="/mnt/foo/$i" bs=1M count=1
+  done
+  umount /mnt/foo&
+  mount /mnt/foo
+
+does the trick for me.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Boris Burkov <boris@bur.io>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/volumes.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 55ce6543050d9..dcae0cf4924b7 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6693,6 +6693,14 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
+       mutex_lock(&uuid_mutex);
+       lock_chunks(root);
++      /*
++       * It is possible for mount and umount to race in such a way that
++       * we execute this code path, but open_fs_devices failed to clear
++       * total_rw_bytes. We certainly want it cleared before reading the
++       * device items, so clear it here.
++       */
++      root->fs_info->fs_devices->total_rw_bytes = 0;
++
+       /*
+        * Read all device items, and then all the chunk items. All
+        * device items are found before any chunk item (their object id
+-- 
+2.25.1
+
index c3643afc9ad2e40e132b608cad5deeb53d4fb50c..cfe32fd723abaac82608c4651bc1cf0261156e36 100644 (file)
@@ -13,3 +13,4 @@ alsa-info-drop-warn_on-from-buffer-null-sanity-check.patch
 asoc-rt5670-correct-rt5670_ldo_sel_mask.patch
 btrfs-fix-double-free-on-ulist-after-backref-resolution-failure.patch
 x86-fpu-disable-bottom-halves-while-loading-fpu-regi.patch
+btrfs-fix-mount-failure-caused-by-race-with-umount.patch