--- /dev/null
+From 73b1c4acaad39877bfe0ee9001df2d87f9f2581e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Jul 2020 11:12:46 -0400
+Subject: btrfs: don't show full path of bind mounts in subvol=
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit 3ef3959b29c4a5bd65526ab310a1a18ae533172a ]
+
+Chris Murphy reported a problem where rpm ostree will bind mount a bunch
+of things for whatever voodoo it's doing. But when it does this
+/proc/mounts shows something like
+
+ /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0
+ /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo/bar 0 0
+
+Despite subvolid=256 being subvol=/foo. This is because we're just
+spitting out the dentry of the mount point, which in the case of bind
+mounts is the source path for the mountpoint. Instead we should spit
+out the path to the actual subvol. Fix this by looking up the name for
+the subvolid we have mounted. With this fix the same test looks like
+this
+
+ /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0
+ /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo 0 0
+
+Reported-by: Chris Murphy <chris@colorremedies.com>
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/super.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index ca95e57b60ee1..eb64d4b159e07 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -1221,6 +1221,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
+ {
+ struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
+ char *compress_type;
++ const char *subvol_name;
+
+ if (btrfs_test_opt(info, DEGRADED))
+ seq_puts(seq, ",degraded");
+@@ -1307,8 +1308,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
+ #endif
+ seq_printf(seq, ",subvolid=%llu",
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+- seq_puts(seq, ",subvol=");
+- seq_dentry(seq, dentry, " \t\n\\");
++ subvol_name = btrfs_get_subvol_name_from_objectid(info,
++ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
++ if (!IS_ERR(subvol_name)) {
++ seq_puts(seq, ",subvol=");
++ seq_escape(seq, subvol_name, " \t\n\\");
++ kfree(subvol_name);
++ }
+ return 0;
+ }
+
+--
+2.25.1
+
--- /dev/null
+From f84c46c31e2e89ddf8b0b7b6508a55d74fe34403 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Feb 2020 14:56:12 +0100
+Subject: btrfs: export helpers for subvolume name/id resolution
+
+From: Marcos Paulo de Souza <mpdesouza@suse.com>
+
+[ Upstream commit c0c907a47dccf2cf26251a8fb4a8e7a3bf79ce84 ]
+
+The functions will be used outside of export.c and super.c to allow
+resolving subvolume name from a given id, eg. for subvolume deletion by
+id ioctl.
+
+Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ split from the next patch ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h | 2 ++
+ fs/btrfs/export.c | 8 ++++----
+ fs/btrfs/export.h | 5 +++++
+ fs/btrfs/super.c | 8 ++++----
+ 4 files changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 5412b12491cb8..de951987fd23d 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3262,6 +3262,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ unsigned long new_flags);
+ int btrfs_sync_fs(struct super_block *sb, int wait);
++char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
++ u64 subvol_objectid);
+
+ static inline __printf(2, 3)
+ void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
+diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
+index 3aeb5770f8965..b6ce765aa7f33 100644
+--- a/fs/btrfs/export.c
++++ b/fs/btrfs/export.c
+@@ -56,9 +56,9 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ return type;
+ }
+
+-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+- u64 root_objectid, u32 generation,
+- int check_generation)
++struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
++ u64 root_objectid, u32 generation,
++ int check_generation)
+ {
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_root *root;
+@@ -151,7 +151,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
+ }
+
+-static struct dentry *btrfs_get_parent(struct dentry *child)
++struct dentry *btrfs_get_parent(struct dentry *child)
+ {
+ struct inode *dir = d_inode(child);
+ struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
+index 91b3908e7c549..15db024621414 100644
+--- a/fs/btrfs/export.h
++++ b/fs/btrfs/export.h
+@@ -17,4 +17,9 @@ struct btrfs_fid {
+ u64 parent_root_objectid;
+ } __attribute__ ((packed));
+
++struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
++ u64 root_objectid, u32 generation,
++ int check_generation);
++struct dentry *btrfs_get_parent(struct dentry *child);
++
+ #endif
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 17a8463ef35c1..ca95e57b60ee1 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -939,8 +939,8 @@ out:
+ return error;
+ }
+
+-static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+- u64 subvol_objectid)
++char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
++ u64 subvol_objectid)
+ {
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_root *fs_root;
+@@ -1427,8 +1427,8 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
+ goto out;
+ }
+ }
+- subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
+- subvol_objectid);
++ subvol_name = btrfs_get_subvol_name_from_objectid(
++ btrfs_sb(mnt->mnt_sb), subvol_objectid);
+ if (IS_ERR(subvol_name)) {
+ root = ERR_CAST(subvol_name);
+ subvol_name = NULL;
+--
+2.25.1
+
--- /dev/null
+From 100f0653e012f14174d1f7ea130107ebe07dd25b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jul 2020 16:39:26 +0800
+Subject: btrfs: inode: fix NULL pointer dereference if inode doesn't need
+ compression
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 1e6e238c3002ea3611465ce5f32777ddd6a40126 ]
+
+[BUG]
+There is a bug report of NULL pointer dereference caused in
+compress_file_extent():
+
+ Oops: Kernel access of bad area, sig: 11 [#1]
+ LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+ Workqueue: btrfs-delalloc btrfs_delalloc_helper [btrfs]
+ NIP [c008000006dd4d34] compress_file_range.constprop.41+0x75c/0x8a0 [btrfs]
+ LR [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs]
+ Call Trace:
+ [c000000c69093b00] [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] (unreliable)
+ [c000000c69093bd0] [c008000006dd4ebc] async_cow_start+0x44/0xa0 [btrfs]
+ [c000000c69093c10] [c008000006e14824] normal_work_helper+0xdc/0x598 [btrfs]
+ [c000000c69093c80] [c0000000001608c0] process_one_work+0x2c0/0x5b0
+ [c000000c69093d10] [c000000000160c38] worker_thread+0x88/0x660
+ [c000000c69093db0] [c00000000016b55c] kthread+0x1ac/0x1c0
+ [c000000c69093e20] [c00000000000b660] ret_from_kernel_thread+0x5c/0x7c
+ ---[ end trace f16954aa20d822f6 ]---
+
+[CAUSE]
+For the following execution route of compress_file_range(), it's
+possible to hit NULL pointer dereference:
+
+ compress_file_extent()
+ |- pages = NULL;
+ |- start = async_chunk->start = 0;
+ |- end = async_chunk = 4095;
+ |- nr_pages = 1;
+ |- inode_need_compress() == false; <<< Possible, see later explanation
+ | Now, we have nr_pages = 1, pages = NULL
+ |- cont:
+ |- ret = cow_file_range_inline();
+ |- if (ret <= 0) {
+ |- for (i = 0; i < nr_pages; i++) {
+ |- WARN_ON(pages[i]->mapping); <<< Crash
+
+To enter above call execution branch, we need the following race:
+
+ Thread 1 (chattr) | Thread 2 (writeback)
+--------------------------+------------------------------
+ | btrfs_run_delalloc_range
+ | |- inode_need_compress = true
+ | |- cow_file_range_async()
+btrfs_ioctl_set_flag() |
+|- binode_flags |= |
+ BTRFS_INODE_NOCOMPRESS |
+ | compress_file_range()
+ | |- inode_need_compress = false
+ | |- nr_page = 1 while pages = NULL
+ | | Then hit the crash
+
+[FIX]
+This patch will fix it by checking @pages before doing accessing it.
+This patch is only designed as a hot fix and easy to backport.
+
+More elegant fix may make btrfs only check inode_need_compress() once to
+avoid such race, but that would be another story.
+
+Reported-by: Luciano Chavez <chavez@us.ibm.com>
+Fixes: 4d3a800ebb12 ("btrfs: merge nr_pages input and output parameter in compress_pages")
+CC: stable@vger.kernel.org # 4.14.x: cecc8d9038d16: btrfs: Move free_pages_out label in inline extent handling branch in compress_file_range
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index dc520749f51db..17856e92b93d1 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -630,11 +630,18 @@ cont:
+ start,
+ end - start + 1);
+
+- for (i = 0; i < nr_pages; i++) {
+- WARN_ON(pages[i]->mapping);
+- put_page(pages[i]);
++ /*
++ * Ensure we only free the compressed pages if we have
++ * them allocated, as we can still reach here with
++ * inode_need_compress() == false.
++ */
++ if (pages) {
++ for (i = 0; i < nr_pages; i++) {
++ WARN_ON(pages[i]->mapping);
++ put_page(pages[i]);
++ }
++ kfree(pages);
+ }
+- kfree(pages);
+
+ return;
+ }
+--
+2.25.1
+
--- /dev/null
+From 3f9d136ee7b1741f5b715179f3a9cd44de4efd84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jul 2019 14:41:45 +0300
+Subject: btrfs: Move free_pages_out label in inline extent handling branch in
+ compress_file_range
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+[ Upstream commit cecc8d9038d164eda61fbcd72520975a554ea63e ]
+
+This label is only executed if compress_file_range fails to create an
+inline extent. So move its code in the semantically related inline
+extent handling branch. No functional changes.
+
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 57908ee964a20..dc520749f51db 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -629,7 +629,14 @@ cont:
+ btrfs_free_reserved_data_space_noquota(inode,
+ start,
+ end - start + 1);
+- goto free_pages_out;
++
++ for (i = 0; i < nr_pages; i++) {
++ WARN_ON(pages[i]->mapping);
++ put_page(pages[i]);
++ }
++ kfree(pages);
++
++ return;
+ }
+ }
+
+@@ -708,13 +715,6 @@ cleanup_and_bail_uncompressed:
+ *num_added += 1;
+
+ return;
+-
+-free_pages_out:
+- for (i = 0; i < nr_pages; i++) {
+- WARN_ON(pages[i]->mapping);
+- put_page(pages[i]);
+- }
+- kfree(pages);
+ }
+
+ static void free_async_extent_pages(struct async_extent *async_extent)
+--
+2.25.1
+
--- /dev/null
+From 8e2073529525e69ef39ac62d70caa93777e132e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jul 2020 10:17:50 -0400
+Subject: btrfs: sysfs: use NOFS for device creation
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+Dave hit this splat during testing btrfs/078:
+
+ ======================================================
+ WARNING: possible circular locking dependency detected
+ 5.8.0-rc6-default+ #1191 Not tainted
+ ------------------------------------------------------
+ kswapd0/75 is trying to acquire lock:
+ ffffa040e9d04ff8 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+
+ but task is already holding lock:
+ ffffffff8b0c8040 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+
+ -> #2 (fs_reclaim){+.+.}-{0:0}:
+ __lock_acquire+0x56f/0xaa0
+ lock_acquire+0xa3/0x440
+ fs_reclaim_acquire.part.0+0x25/0x30
+ __kmalloc_track_caller+0x49/0x330
+ kstrdup+0x2e/0x60
+ __kernfs_new_node.constprop.0+0x44/0x250
+ kernfs_new_node+0x25/0x50
+ kernfs_create_link+0x34/0xa0
+ sysfs_do_create_link_sd+0x5e/0xd0
+ btrfs_sysfs_add_devices_dir+0x65/0x100 [btrfs]
+ btrfs_init_new_device+0x44c/0x12b0 [btrfs]
+ btrfs_ioctl+0xc3c/0x25c0 [btrfs]
+ ksys_ioctl+0x68/0xa0
+ __x64_sys_ioctl+0x16/0x20
+ do_syscall_64+0x50/0xe0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}:
+ __lock_acquire+0x56f/0xaa0
+ lock_acquire+0xa3/0x440
+ __mutex_lock+0xa0/0xaf0
+ btrfs_chunk_alloc+0x137/0x3e0 [btrfs]
+ find_free_extent+0xb44/0xfb0 [btrfs]
+ btrfs_reserve_extent+0x9b/0x180 [btrfs]
+ btrfs_alloc_tree_block+0xc1/0x350 [btrfs]
+ alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs]
+ __btrfs_cow_block+0x143/0x7a0 [btrfs]
+ btrfs_cow_block+0x15f/0x310 [btrfs]
+ push_leaf_right+0x150/0x240 [btrfs]
+ split_leaf+0x3cd/0x6d0 [btrfs]
+ btrfs_search_slot+0xd14/0xf70 [btrfs]
+ btrfs_insert_empty_items+0x64/0xc0 [btrfs]
+ __btrfs_commit_inode_delayed_items+0xb2/0x840 [btrfs]
+ btrfs_async_run_delayed_root+0x10e/0x1d0 [btrfs]
+ btrfs_work_helper+0x2f9/0x650 [btrfs]
+ process_one_work+0x22c/0x600
+ worker_thread+0x50/0x3b0
+ kthread+0x137/0x150
+ ret_from_fork+0x1f/0x30
+
+ -> #0 (&delayed_node->mutex){+.+.}-{3:3}:
+ check_prev_add+0x98/0xa20
+ validate_chain+0xa8c/0x2a00
+ __lock_acquire+0x56f/0xaa0
+ lock_acquire+0xa3/0x440
+ __mutex_lock+0xa0/0xaf0
+ __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+ btrfs_evict_inode+0x3bf/0x560 [btrfs]
+ evict+0xd6/0x1c0
+ dispose_list+0x48/0x70
+ prune_icache_sb+0x54/0x80
+ super_cache_scan+0x121/0x1a0
+ do_shrink_slab+0x175/0x420
+ shrink_slab+0xb1/0x2e0
+ shrink_node+0x192/0x600
+ balance_pgdat+0x31f/0x750
+ kswapd+0x206/0x510
+ kthread+0x137/0x150
+ ret_from_fork+0x1f/0x30
+
+ other info that might help us debug this:
+
+ Chain exists of:
+ &delayed_node->mutex --> &fs_info->chunk_mutex --> fs_reclaim
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(fs_reclaim);
+ lock(&fs_info->chunk_mutex);
+ lock(fs_reclaim);
+ lock(&delayed_node->mutex);
+
+ *** DEADLOCK ***
+
+ 3 locks held by kswapd0/75:
+ #0: ffffffff8b0c8040 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30
+ #1: ffffffff8b0b50b8 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x54/0x2e0
+ #2: ffffa040e057c0e8 (&type->s_umount_key#26){++++}-{3:3}, at: trylock_super+0x16/0x50
+
+ stack backtrace:
+ CPU: 2 PID: 75 Comm: kswapd0 Not tainted 5.8.0-rc6-default+ #1191
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014
+ Call Trace:
+ dump_stack+0x78/0xa0
+ check_noncircular+0x16f/0x190
+ check_prev_add+0x98/0xa20
+ validate_chain+0xa8c/0x2a00
+ __lock_acquire+0x56f/0xaa0
+ lock_acquire+0xa3/0x440
+ ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+ __mutex_lock+0xa0/0xaf0
+ ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+ ? __lock_acquire+0x56f/0xaa0
+ ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+ ? lock_acquire+0xa3/0x440
+ ? btrfs_evict_inode+0x138/0x560 [btrfs]
+ ? btrfs_evict_inode+0x2fe/0x560 [btrfs]
+ ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+ __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs]
+ btrfs_evict_inode+0x3bf/0x560 [btrfs]
+ evict+0xd6/0x1c0
+ dispose_list+0x48/0x70
+ prune_icache_sb+0x54/0x80
+ super_cache_scan+0x121/0x1a0
+ do_shrink_slab+0x175/0x420
+ shrink_slab+0xb1/0x2e0
+ shrink_node+0x192/0x600
+ balance_pgdat+0x31f/0x750
+ kswapd+0x206/0x510
+ ? _raw_spin_unlock_irqrestore+0x3e/0x50
+ ? finish_wait+0x90/0x90
+ ? balance_pgdat+0x750/0x750
+ kthread+0x137/0x150
+ ? kthread_stop+0x2a0/0x2a0
+ ret_from_fork+0x1f/0x30
+
+This is because we're holding the chunk_mutex while adding this device
+and adding its sysfs entries. We actually hold different locks in
+different places when calling this function, the dev_replace semaphore
+for instance in dev replace, so instead of moving this call around
+simply wrap it's operations in NOFS.
+
+CC: stable@vger.kernel.org # 4.14+
+Reported-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/sysfs.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
+index f05341bda1d14..383546ff62f04 100644
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -25,6 +25,7 @@
+ #include <linux/bug.h>
+ #include <linux/genhd.h>
+ #include <linux/debugfs.h>
++#include <linux/sched/mm.h>
+
+ #include "ctree.h"
+ #include "disk-io.h"
+@@ -749,7 +750,9 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
+ {
+ int error = 0;
+ struct btrfs_device *dev;
++ unsigned int nofs_flag;
+
++ nofs_flag = memalloc_nofs_save();
+ list_for_each_entry(dev, &fs_devices->devices, dev_list) {
+ struct hd_struct *disk;
+ struct kobject *disk_kobj;
+@@ -768,6 +771,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
+ if (error)
+ break;
+ }
++ memalloc_nofs_restore(nofs_flag);
+
+ return error;
+ }
+--
+2.25.1
+
--- /dev/null
+From a3b3f77521585f5548f773354c2284ff238e5a98 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Jul 2020 16:49:11 +0100
+Subject: drm/vgem: Replace opencoded version of drm_gem_dumb_map_offset()
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+[ Upstream commit 119c53d2d4044c59c450c4f5a568d80b9d861856 ]
+
+drm_gem_dumb_map_offset() now exists and does everything
+vgem_gem_dump_map does and *ought* to do.
+
+In particular, vgem_gem_dumb_map() was trying to reject mmapping an
+imported dmabuf by checking the existence of obj->filp. Unfortunately,
+we always allocated an obj->filp, even if unused for an imported dmabuf.
+Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192
+("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the
+obj->import_attach to reject such invalid mmaps.
+
+This prevents vgem from allowing userspace mmapping the dumb handle and
+attempting to incorrectly fault in remote pages belonging to another
+device, where there may not even be a struct page.
+
+v2: Use the default drm_gem_dumb_map_offset() callback
+
+Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: <stable@vger.kernel.org> # v4.13+
+Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@chris-wilson.co.uk
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/vgem/vgem_drv.c | 27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
+index aa592277d5108..67037eb9a80ee 100644
+--- a/drivers/gpu/drm/vgem/vgem_drv.c
++++ b/drivers/gpu/drm/vgem/vgem_drv.c
+@@ -220,32 +220,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
+ return 0;
+ }
+
+-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
+- uint32_t handle, uint64_t *offset)
+-{
+- struct drm_gem_object *obj;
+- int ret;
+-
+- obj = drm_gem_object_lookup(file, handle);
+- if (!obj)
+- return -ENOENT;
+-
+- if (!obj->filp) {
+- ret = -EINVAL;
+- goto unref;
+- }
+-
+- ret = drm_gem_create_mmap_offset(obj);
+- if (ret)
+- goto unref;
+-
+- *offset = drm_vma_node_offset_addr(&obj->vma_node);
+-unref:
+- drm_gem_object_put_unlocked(obj);
+-
+- return ret;
+-}
+-
+ static struct drm_ioctl_desc vgem_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+@@ -439,7 +413,6 @@ static struct drm_driver vgem_driver = {
+ .fops = &vgem_driver_fops,
+
+ .dumb_create = vgem_gem_dumb_create,
+- .dumb_map_offset = vgem_gem_dumb_map,
+
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+--
+2.25.1
+
--- /dev/null
+From fa056309ff894d8a3e5575094b72f3265834f25b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Aug 2020 17:42:02 -0700
+Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit f3f99d63a8156c7a4a6b20aac22b53c5579c7dc1 ]
+
+syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in
+__khugepaged_enter(): yes, when one thread is about to dump core, has set
+core_state, and is waiting for others, another might do something calling
+__khugepaged_enter(), which now crashes because I lumped the core_state
+test (known as "mmget_still_valid") into khugepaged_test_exit(). I still
+think it's best to lump them together, so just in this exceptional case,
+check mm->mm_users directly instead of khugepaged_test_exit().
+
+Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Yang Shi <shy828301@gmail.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Song Liu <songliubraving@fb.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: <stable@vger.kernel.org> [4.8+]
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index a1b7475c05d04..9dfe364d4c0d1 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -407,7 +407,7 @@ int __khugepaged_enter(struct mm_struct *mm)
+ return -ENOMEM;
+
+ /* __khugepaged_exit() must not run from under us */
+- VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
++ VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
+ if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
+ free_mm_slot(mm_slot);
+ return 0;
+--
+2.25.1
+
--- /dev/null
+From 55df81fc13e7d52519d0b86d994fac5725102ade Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Aug 2020 23:26:25 -0700
+Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid()
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit bbe98f9cadff58cdd6a4acaeba0efa8565dabe65 ]
+
+Move collapse_huge_page()'s mmget_still_valid() check into
+khugepaged_test_exit() itself. collapse_huge_page() is used for anon THP
+only, and earned its mmget_still_valid() check because it inserts a huge
+pmd entry in place of the page table's pmd entry; whereas
+collapse_file()'s retract_page_tables() or collapse_pte_mapped_thp()
+merely clears the page table's pmd entry. But core dumping without mmap
+lock must have been as open to mistaking a racily cleared pmd entry for a
+page table at physical page 0, as exit_mmap() was. And we certainly have
+no interest in mapping as a THP once dumping core.
+
+Fixes: 59ea6d06cfa9 ("coredump: fix race condition between collapse_huge_page() and core dumping")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Song Liu <songliubraving@fb.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: <stable@vger.kernel.org> [4.8+]
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021217020.27773@eggly.anvils
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 04b4c38d0c184..a1b7475c05d04 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -394,7 +394,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
+
+ static inline int khugepaged_test_exit(struct mm_struct *mm)
+ {
+- return atomic_read(&mm->mm_users) == 0;
++ return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
+ }
+
+ int __khugepaged_enter(struct mm_struct *mm)
+@@ -1006,9 +1006,6 @@ static void collapse_huge_page(struct mm_struct *mm,
+ * handled by the anon_vma lock + PG_lock.
+ */
+ down_write(&mm->mmap_sem);
+- result = SCAN_ANY_PROCESS;
+- if (!mmget_still_valid(mm))
+- goto out;
+ result = hugepage_vma_revalidate(mm, address, &vma);
+ if (result)
+ goto out;
+--
+2.25.1
+
--- /dev/null
+From e3ae49bcd65f6ddea463635ed15ef04e050ef84c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jul 2020 22:11:23 +0900
+Subject: perf probe: Fix memory leakage when the probe point is not found
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit 12d572e785b15bc764e956caaa8a4c846fd15694 ]
+
+Fix the memory leakage in debuginfo__find_trace_events() when the probe
+point is not found in the debuginfo. If there is no probe point found in
+the debuginfo, debuginfo__find_probes() will NOT return -ENOENT, but 0.
+
+Thus the caller of debuginfo__find_probes() must check the tf.ntevs and
+release the allocated memory for the array of struct probe_trace_event.
+
+The current code releases the memory only if the debuginfo__find_probes()
+hits an error but not checks tf.ntevs. In the result, the memory allocated
+on *tevs are not released if tf.ntevs == 0.
+
+This fixes the memory leakage by checking tf.ntevs == 0 in addition to
+ret < 0.
+
+Fixes: ff741783506c ("perf probe: Introduce debuginfo to encapsulate dwarf information")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: stable@vger.kernel.org
+Link: http://lore.kernel.org/lkml/159438668346.62703.10887420400718492503.stgit@devnote2
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/probe-finder.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
+index 8f7f9d05f38c0..bfa6d9d215569 100644
+--- a/tools/perf/util/probe-finder.c
++++ b/tools/perf/util/probe-finder.c
+@@ -1354,7 +1354,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
+ tf.ntevs = 0;
+
+ ret = debuginfo__find_probes(dbg, &tf.pf);
+- if (ret < 0) {
++ if (ret < 0 || tf.ntevs == 0) {
+ for (i = 0; i < tf.ntevs; i++)
+ clear_probe_trace_event(&tf.tevs[i]);
+ zfree(tevs);
+--
+2.25.1
+
--- /dev/null
+From 37b26bb493c39a3d0798104ef907b24e3cc8c521 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Jul 2020 19:25:25 +1000
+Subject: powerpc: Allow 4224 bytes of stack expansion for the signal frame
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+[ Upstream commit 63dee5df43a31f3844efabc58972f0a206ca4534 ]
+
+We have powerpc specific logic in our page fault handling to decide if
+an access to an unmapped address below the stack pointer should expand
+the stack VMA.
+
+The code was originally added in 2004 "ported from 2.4". The rough
+logic is that the stack is allowed to grow to 1MB with no extra
+checking. Over 1MB the access must be within 2048 bytes of the stack
+pointer, or be from a user instruction that updates the stack pointer.
+
+The 2048 byte allowance below the stack pointer is there to cover the
+288 byte "red zone" as well as the "about 1.5kB" needed by the signal
+delivery code.
+
+Unfortunately since then the signal frame has expanded, and is now
+4224 bytes on 64-bit kernels with transactional memory enabled. This
+means if a process has consumed more than 1MB of stack, and its stack
+pointer lies less than 4224 bytes from the next page boundary, signal
+delivery will fault when trying to expand the stack and the process
+will see a SEGV.
+
+The total size of the signal frame is the size of struct rt_sigframe
+(which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
+64-bit).
+
+The 2048 byte allowance was correct until 2008 as the signal frame
+was:
+
+struct rt_sigframe {
+ struct ucontext uc; /* 0 1440 */
+ /* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
+ long unsigned int _unused[2]; /* 1440 16 */
+ unsigned int tramp[6]; /* 1456 24 */
+ struct siginfo * pinfo; /* 1480 8 */
+ void * puc; /* 1488 8 */
+ struct siginfo info; /* 1496 128 */
+ /* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
+ char abigap[288]; /* 1624 288 */
+
+ /* size: 1920, cachelines: 15, members: 7 */
+ /* padding: 8 */
+};
+
+1920 + 128 = 2048
+
+Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
+ptrace and signal support") (Jul 2008) the signal frame expanded to
+2304 bytes:
+
+struct rt_sigframe {
+ struct ucontext uc; /* 0 1696 */ <--
+ /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
+ long unsigned int _unused[2]; /* 1696 16 */
+ unsigned int tramp[6]; /* 1712 24 */
+ struct siginfo * pinfo; /* 1736 8 */
+ void * puc; /* 1744 8 */
+ struct siginfo info; /* 1752 128 */
+ /* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
+ char abigap[288]; /* 1880 288 */
+
+ /* size: 2176, cachelines: 17, members: 7 */
+ /* padding: 8 */
+};
+
+2176 + 128 = 2304
+
+At this point we should have been exposed to the bug, though as far as
+I know it was never reported. I no longer have a system old enough to
+easily test on.
+
+Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
+grow-down stack segment") caused our stack expansion code to never
+trigger, as there was always a VMA found for a write up to PAGE_SIZE
+below r1.
+
+That meant the bug was hidden as we continued to expand the signal
+frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
+state to the signal context") (Feb 2013):
+
+struct rt_sigframe {
+ struct ucontext uc; /* 0 1696 */
+ /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
+ struct ucontext uc_transact; /* 1696 1696 */ <--
+ /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
+ long unsigned int _unused[2]; /* 3392 16 */
+ unsigned int tramp[6]; /* 3408 24 */
+ struct siginfo * pinfo; /* 3432 8 */
+ void * puc; /* 3440 8 */
+ struct siginfo info; /* 3448 128 */
+ /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
+ char abigap[288]; /* 3576 288 */
+
+ /* size: 3872, cachelines: 31, members: 8 */
+ /* padding: 8 */
+ /* last cacheline: 32 bytes */
+};
+
+3872 + 128 = 4000
+
+And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
+userspace to 512 bytes") (Feb 2014):
+
+struct rt_sigframe {
+ struct ucontext uc; /* 0 1696 */
+ /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
+ struct ucontext uc_transact; /* 1696 1696 */
+ /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
+ long unsigned int _unused[2]; /* 3392 16 */
+ unsigned int tramp[6]; /* 3408 24 */
+ struct siginfo * pinfo; /* 3432 8 */
+ void * puc; /* 3440 8 */
+ struct siginfo info; /* 3448 128 */
+ /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
+ char abigap[512]; /* 3576 512 */ <--
+
+ /* size: 4096, cachelines: 32, members: 8 */
+ /* padding: 8 */
+};
+
+4096 + 128 = 4224
+
+Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
+gap, between vmas") exposed us to the existing bug, because it changed
+the stack VMA to be the correct/real size, meaning our stack expansion
+code is now triggered.
+
+Fix it by increasing the allowance to 4224 bytes.
+
+Hard-coding 4224 is obviously unsafe against future expansions of the
+signal frame in the same way as the existing code. We can't easily use
+sizeof() because the signal frame structure is not in a header. We
+will either fix that, or rip out all the custom stack expansion
+checking logic entirely.
+
+Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
+Cc: stable@vger.kernel.org # v2.6.27+
+Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
+Tested-by: Daniel Axtens <dja@axtens.net>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20200724092528.1578671-2-mpe@ellerman.id.au
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/fault.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
+index 998c77e600a43..ebe97e5500ee5 100644
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -224,6 +224,9 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
+ return is_exec || (address >= TASK_SIZE);
+ }
+
++// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
++#define SIGFRAME_MAX_SIZE (4096 + 128)
++
+ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ struct vm_area_struct *vma, unsigned int flags,
+ bool *must_retry)
+@@ -231,7 +234,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ /*
+ * N.B. The POWER/Open ABI allows programs to access up to
+ * 288 bytes below the stack pointer.
+- * The kernel signal delivery code writes up to about 1.5kB
++ * The kernel signal delivery code writes a bit over 4KB
+ * below the stack pointer (r1) before decrementing it.
+ * The exec code can write slightly over 640kB to the stack
+ * before setting the user r1. Thus we allow the stack to
+@@ -256,7 +259,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ * between the last mapped region and the stack will
+ * expand the stack rather than segfaulting.
+ */
+- if (address + 2048 >= uregs->gpr[1])
++ if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
+ return false;
+
+ if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
+--
+2.25.1
+
--- /dev/null
+From 678ba6516fac09d9fbb6f23d38b5659df1554622 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 May 2018 10:53:22 +0200
+Subject: powerpc/mm: Only read faulting instruction when necessary in
+ do_page_fault()
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+[ Upstream commit 0e36b0d12501e278686634712975b785bae11641 ]
+
+Commit a7a9dcd882a67 ("powerpc: Avoid taking a data miss on every
+userspace instruction miss") has shown that limiting the read of
+faulting instruction to likely cases improves performance.
+
+This patch goes further into this direction by limiting the read
+of the faulting instruction to the only cases where it is likely
+needed.
+
+On an MPC885, with the same benchmark app as in the commit referred
+above, we see a reduction of about 3900 dTLB misses (approx 3%):
+
+Before the patch:
+ Performance counter stats for './fault 500' (10 runs):
+
+ 683033312 cpu-cycles ( +- 0.03% )
+ 134538 dTLB-load-misses ( +- 0.03% )
+ 46099 iTLB-load-misses ( +- 0.02% )
+ 19681 faults ( +- 0.02% )
+
+ 5.389747878 seconds time elapsed ( +- 0.06% )
+
+With the patch:
+
+ Performance counter stats for './fault 500' (10 runs):
+
+ 682112862 cpu-cycles ( +- 0.03% )
+ 130619 dTLB-load-misses ( +- 0.03% )
+ 46073 iTLB-load-misses ( +- 0.05% )
+ 19681 faults ( +- 0.01% )
+
+ 5.381342641 seconds time elapsed ( +- 0.07% )
+
+The proper work of the huge stack expansion was tested with the
+following app:
+
+int main(int argc, char **argv)
+{
+ char buf[1024 * 1025];
+
+ sprintf(buf, "Hello world !\n");
+ printf(buf);
+
+ exit(0);
+}
+
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
+[mpe: Add include of pagemap.h to fix build errors]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/fault.c | 50 ++++++++++++++++++++++++++++-------------
+ 1 file changed, 34 insertions(+), 16 deletions(-)
+
+diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
+index 5fc8a010fdf07..998c77e600a43 100644
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -22,6 +22,7 @@
+ #include <linux/errno.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
++#include <linux/pagemap.h>
+ #include <linux/ptrace.h>
+ #include <linux/mman.h>
+ #include <linux/mm.h>
+@@ -66,15 +67,11 @@ static inline bool notify_page_fault(struct pt_regs *regs)
+ }
+
+ /*
+- * Check whether the instruction at regs->nip is a store using
++ * Check whether the instruction inst is a store using
+ * an update addressing form which will update r1.
+ */
+-static bool store_updates_sp(struct pt_regs *regs)
++static bool store_updates_sp(unsigned int inst)
+ {
+- unsigned int inst;
+-
+- if (get_user(inst, (unsigned int __user *)regs->nip))
+- return false;
+ /* check for 1 in the rA field */
+ if (((inst >> 16) & 0x1f) != 1)
+ return false;
+@@ -228,8 +225,8 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
+ }
+
+ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+- struct vm_area_struct *vma,
+- bool store_update_sp)
++ struct vm_area_struct *vma, unsigned int flags,
++ bool *must_retry)
+ {
+ /*
+ * N.B. The POWER/Open ABI allows programs to access up to
+@@ -241,6 +238,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ * expand to 1MB without further checks.
+ */
+ if (address + 0x100000 < vma->vm_end) {
++ unsigned int __user *nip = (unsigned int __user *)regs->nip;
+ /* get user regs even if this fault is in kernel mode */
+ struct pt_regs *uregs = current->thread.regs;
+ if (uregs == NULL)
+@@ -258,8 +256,22 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ * between the last mapped region and the stack will
+ * expand the stack rather than segfaulting.
+ */
+- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+- return true;
++ if (address + 2048 >= uregs->gpr[1])
++ return false;
++
++ if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
++ access_ok(VERIFY_READ, nip, sizeof(*nip))) {
++ unsigned int inst;
++ int res;
++
++ pagefault_disable();
++ res = __get_user_inatomic(inst, nip);
++ pagefault_enable();
++ if (!res)
++ return !store_updates_sp(inst);
++ *must_retry = true;
++ }
++ return true;
+ }
+ return false;
+ }
+@@ -392,7 +404,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+ int is_user = user_mode(regs);
+ int is_write = page_fault_is_write(error_code);
+ int fault, major = 0;
+- bool store_update_sp = false;
++ bool must_retry = false;
+
+ if (notify_page_fault(regs))
+ return 0;
+@@ -439,9 +451,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+ * can result in fault, which will cause a deadlock when called with
+ * mmap_sem held
+ */
+- if (is_write && is_user)
+- store_update_sp = store_updates_sp(regs);
+-
+ if (is_user)
+ flags |= FAULT_FLAG_USER;
+ if (is_write)
+@@ -488,8 +497,17 @@ retry:
+ return bad_area(regs, address);
+
+ /* The stack is being expanded, check if it's valid */
+- if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp)))
+- return bad_area(regs, address);
++ if (unlikely(bad_stack_expansion(regs, address, vma, flags,
++ &must_retry))) {
++ if (!must_retry)
++ return bad_area(regs, address);
++
++ up_read(&mm->mmap_sem);
++ if (fault_in_pages_readable((const char __user *)regs->nip,
++ sizeof(unsigned int)))
++ return bad_area_nosemaphore(regs, address);
++ goto retry;
++ }
+
+ /* Try to expand it */
+ if (unlikely(expand_stack(vma, address)))
+--
+2.25.1
+
--- /dev/null
+drm-vgem-replace-opencoded-version-of-drm_gem_dumb_m.patch
+perf-probe-fix-memory-leakage-when-the-probe-point-i.patch
+khugepaged-khugepaged_test_exit-check-mmget_still_va.patch
+khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch
+powerpc-mm-only-read-faulting-instruction-when-neces.patch
+powerpc-allow-4224-bytes-of-stack-expansion-for-the-.patch
+btrfs-export-helpers-for-subvolume-name-id-resolutio.patch
+btrfs-don-t-show-full-path-of-bind-mounts-in-subvol.patch
+btrfs-move-free_pages_out-label-in-inline-extent-han.patch
+btrfs-inode-fix-null-pointer-dereference-if-inode-do.patch
+btrfs-sysfs-use-nofs-for-device-creation.patch