From: Sasha Levin Date: Sun, 23 Aug 2020 01:16:36 +0000 (-0400) Subject: Fixes for 4.14 X-Git-Tag: v4.4.234~59 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=054ee671130b6ac84b96e561002f6179a5e6887d;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 4.14 Signed-off-by: Sasha Levin --- diff --git a/queue-4.14/btrfs-don-t-show-full-path-of-bind-mounts-in-subvol.patch b/queue-4.14/btrfs-don-t-show-full-path-of-bind-mounts-in-subvol.patch new file mode 100644 index 00000000000..fae2d58ea04 --- /dev/null +++ b/queue-4.14/btrfs-don-t-show-full-path-of-bind-mounts-in-subvol.patch @@ -0,0 +1,67 @@ +From 73b1c4acaad39877bfe0ee9001df2d87f9f2581e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 Jul 2020 11:12:46 -0400 +Subject: btrfs: don't show full path of bind mounts in subvol= + +From: Josef Bacik + +[ Upstream commit 3ef3959b29c4a5bd65526ab310a1a18ae533172a ] + +Chris Murphy reported a problem where rpm ostree will bind mount a bunch +of things for whatever voodoo it's doing. But when it does this +/proc/mounts shows something like + + /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 + /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo/bar 0 0 + +Despite subvolid=256 being subvol=/foo. This is because we're just +spitting out the dentry of the mount point, which in the case of bind +mounts is the source path for the mountpoint. Instead we should spit +out the path to the actual subvol. Fix this by looking up the name for +the subvolid we have mounted. With this fix the same test looks like +this + + /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 + /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 + +Reported-by: Chris Murphy +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/super.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index ca95e57b60ee1..eb64d4b159e07 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -1221,6 +1221,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) + { + struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); + char *compress_type; ++ const char *subvol_name; + + if (btrfs_test_opt(info, DEGRADED)) + seq_puts(seq, ",degraded"); +@@ -1307,8 +1308,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) + #endif + seq_printf(seq, ",subvolid=%llu", + BTRFS_I(d_inode(dentry))->root->root_key.objectid); +- seq_puts(seq, ",subvol="); +- seq_dentry(seq, dentry, " \t\n\\"); ++ subvol_name = btrfs_get_subvol_name_from_objectid(info, ++ BTRFS_I(d_inode(dentry))->root->root_key.objectid); ++ if (!IS_ERR(subvol_name)) { ++ seq_puts(seq, ",subvol="); ++ seq_escape(seq, subvol_name, " \t\n\\"); ++ kfree(subvol_name); ++ } + return 0; + } + +-- +2.25.1 + diff --git a/queue-4.14/btrfs-export-helpers-for-subvolume-name-id-resolutio.patch b/queue-4.14/btrfs-export-helpers-for-subvolume-name-id-resolutio.patch new file mode 100644 index 00000000000..d63b9e05f98 --- /dev/null +++ b/queue-4.14/btrfs-export-helpers-for-subvolume-name-id-resolutio.patch @@ -0,0 +1,107 @@ +From f84c46c31e2e89ddf8b0b7b6508a55d74fe34403 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 21 Feb 2020 14:56:12 +0100 +Subject: btrfs: export helpers for subvolume name/id resolution + +From: Marcos Paulo de Souza + +[ Upstream commit c0c907a47dccf2cf26251a8fb4a8e7a3bf79ce84 ] + +The functions will be used outside of export.c and super.c to allow +resolving subvolume name from a given id, eg. for subvolume deletion by +id ioctl. + +Signed-off-by: Marcos Paulo de Souza +Reviewed-by: David Sterba +[ split from the next patch ] +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.h | 2 ++ + fs/btrfs/export.c | 8 ++++---- + fs/btrfs/export.h | 5 +++++ + fs/btrfs/super.c | 8 ++++---- + 4 files changed, 15 insertions(+), 8 deletions(-) + +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 5412b12491cb8..de951987fd23d 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3262,6 +3262,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); + int btrfs_parse_options(struct btrfs_fs_info *info, char *options, + unsigned long new_flags); + int btrfs_sync_fs(struct super_block *sb, int wait); ++char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, ++ u64 subvol_objectid); + + static inline __printf(2, 3) + void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) +diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c +index 3aeb5770f8965..b6ce765aa7f33 100644 +--- a/fs/btrfs/export.c ++++ b/fs/btrfs/export.c +@@ -56,9 +56,9 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + return type; + } + +-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, +- u64 root_objectid, u32 generation, +- int check_generation) ++struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, ++ u64 root_objectid, u32 generation, ++ int check_generation) + { + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_root *root; +@@ -151,7 +151,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, + return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); + } + +-static struct dentry *btrfs_get_parent(struct dentry *child) ++struct dentry *btrfs_get_parent(struct dentry *child) + { + struct inode *dir = d_inode(child); + struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); +diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h +index 91b3908e7c549..15db024621414 100644 +--- a/fs/btrfs/export.h ++++ b/fs/btrfs/export.h +@@ -17,4 +17,9 @@ struct btrfs_fid { + u64 parent_root_objectid; + } __attribute__ ((packed)); + ++struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, ++ u64 root_objectid, u32 generation, ++ int check_generation); ++struct dentry *btrfs_get_parent(struct dentry *child); ++ + #endif +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index 17a8463ef35c1..ca95e57b60ee1 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -939,8 +939,8 @@ out: + return error; + } + +-static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, +- u64 subvol_objectid) ++char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, ++ u64 subvol_objectid) + { + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_root *fs_root; +@@ -1427,8 +1427,8 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, + goto out; + } + } +- subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb), +- subvol_objectid); ++ subvol_name = btrfs_get_subvol_name_from_objectid( ++ btrfs_sb(mnt->mnt_sb), subvol_objectid); + if (IS_ERR(subvol_name)) { + root = ERR_CAST(subvol_name); + subvol_name = NULL; +-- +2.25.1 + diff --git a/queue-4.14/btrfs-inode-fix-null-pointer-dereference-if-inode-do.patch b/queue-4.14/btrfs-inode-fix-null-pointer-dereference-if-inode-do.patch new file mode 100644 index 00000000000..e10e30c0638 --- /dev/null +++ b/queue-4.14/btrfs-inode-fix-null-pointer-dereference-if-inode-do.patch @@ -0,0 +1,109 @@ +From 100f0653e012f14174d1f7ea130107ebe07dd25b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Jul 2020 16:39:26 +0800 +Subject: btrfs: inode: fix NULL pointer dereference if inode doesn't need + compression + +From: Qu Wenruo + +[ Upstream commit 1e6e238c3002ea3611465ce5f32777ddd6a40126 ] + +[BUG] +There is a bug report of NULL pointer dereference caused in +compress_file_extent(): + + Oops: Kernel access of bad area, sig: 11 [#1] + LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries + Workqueue: btrfs-delalloc btrfs_delalloc_helper [btrfs] + NIP [c008000006dd4d34] compress_file_range.constprop.41+0x75c/0x8a0 [btrfs] + LR [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] + Call Trace: + [c000000c69093b00] [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] (unreliable) + [c000000c69093bd0] [c008000006dd4ebc] async_cow_start+0x44/0xa0 [btrfs] + [c000000c69093c10] [c008000006e14824] normal_work_helper+0xdc/0x598 [btrfs] + [c000000c69093c80] [c0000000001608c0] process_one_work+0x2c0/0x5b0 + [c000000c69093d10] [c000000000160c38] worker_thread+0x88/0x660 + [c000000c69093db0] [c00000000016b55c] kthread+0x1ac/0x1c0 + [c000000c69093e20] [c00000000000b660] ret_from_kernel_thread+0x5c/0x7c + ---[ end trace f16954aa20d822f6 ]--- + +[CAUSE] +For the following execution route of compress_file_range(), it's +possible to hit NULL pointer dereference: + + compress_file_extent() + |- pages = NULL; + |- start = async_chunk->start = 0; + |- end = async_chunk = 4095; + |- nr_pages = 1; + |- inode_need_compress() == false; <<< Possible, see later explanation + | Now, we have nr_pages = 1, pages = NULL + |- cont: + |- ret = cow_file_range_inline(); + |- if (ret <= 0) { + |- for (i = 0; i < nr_pages; i++) { + |- WARN_ON(pages[i]->mapping); <<< Crash + +To enter above call execution branch, we need the following race: + + Thread 1 (chattr) | Thread 2 (writeback) +--------------------------+------------------------------ + | btrfs_run_delalloc_range + | |- inode_need_compress = true + | |- cow_file_range_async() +btrfs_ioctl_set_flag() | +|- binode_flags |= | + BTRFS_INODE_NOCOMPRESS | + | compress_file_range() + | |- inode_need_compress = false + | |- nr_page = 1 while pages = NULL + | | Then hit the crash + +[FIX] +This patch will fix it by checking @pages before doing accessing it. +This patch is only designed as a hot fix and easy to backport. + +More elegant fix may make btrfs only check inode_need_compress() once to +avoid such race, but that would be another story. + +Reported-by: Luciano Chavez +Fixes: 4d3a800ebb12 ("btrfs: merge nr_pages input and output parameter in compress_pages") +CC: stable@vger.kernel.org # 4.14.x: cecc8d9038d16: btrfs: Move free_pages_out label in inline extent handling branch in compress_file_range +CC: stable@vger.kernel.org # 4.14+ +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/inode.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index dc520749f51db..17856e92b93d1 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -630,11 +630,18 @@ cont: + start, + end - start + 1); + +- for (i = 0; i < nr_pages; i++) { +- WARN_ON(pages[i]->mapping); +- put_page(pages[i]); ++ /* ++ * Ensure we only free the compressed pages if we have ++ * them allocated, as we can still reach here with ++ * inode_need_compress() == false. ++ */ ++ if (pages) { ++ for (i = 0; i < nr_pages; i++) { ++ WARN_ON(pages[i]->mapping); ++ put_page(pages[i]); ++ } ++ kfree(pages); + } +- kfree(pages); + + return; + } +-- +2.25.1 + diff --git a/queue-4.14/btrfs-move-free_pages_out-label-in-inline-extent-han.patch b/queue-4.14/btrfs-move-free_pages_out-label-in-inline-extent-han.patch new file mode 100644 index 00000000000..a3e548d75ac --- /dev/null +++ b/queue-4.14/btrfs-move-free_pages_out-label-in-inline-extent-han.patch @@ -0,0 +1,59 @@ +From 3f9d136ee7b1741f5b715179f3a9cd44de4efd84 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jul 2019 14:41:45 +0300 +Subject: btrfs: Move free_pages_out label in inline extent handling branch in + compress_file_range + +From: Nikolay Borisov + +[ Upstream commit cecc8d9038d164eda61fbcd72520975a554ea63e ] + +This label is only executed if compress_file_range fails to create an +inline extent. So move its code in the semantically related inline +extent handling branch. No functional changes. + +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/inode.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 57908ee964a20..dc520749f51db 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -629,7 +629,14 @@ cont: + btrfs_free_reserved_data_space_noquota(inode, + start, + end - start + 1); +- goto free_pages_out; ++ ++ for (i = 0; i < nr_pages; i++) { ++ WARN_ON(pages[i]->mapping); ++ put_page(pages[i]); ++ } ++ kfree(pages); ++ ++ return; + } + } + +@@ -708,13 +715,6 @@ cleanup_and_bail_uncompressed: + *num_added += 1; + + return; +- +-free_pages_out: +- for (i = 0; i < nr_pages; i++) { +- WARN_ON(pages[i]->mapping); +- put_page(pages[i]); +- } +- kfree(pages); + } + + static void free_async_extent_pages(struct async_extent *async_extent) +-- +2.25.1 + diff --git a/queue-4.14/btrfs-sysfs-use-nofs-for-device-creation.patch b/queue-4.14/btrfs-sysfs-use-nofs-for-device-creation.patch new file mode 100644 index 00000000000..8258daaa7d2 --- /dev/null +++ b/queue-4.14/btrfs-sysfs-use-nofs-for-device-creation.patch @@ -0,0 +1,190 @@ +From 8e2073529525e69ef39ac62d70caa93777e132e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Jul 2020 10:17:50 -0400 +Subject: btrfs: sysfs: use NOFS for device creation + +From: Josef Bacik + +Dave hit this splat during testing btrfs/078: + + ====================================================== + WARNING: possible circular locking dependency detected + 5.8.0-rc6-default+ #1191 Not tainted + ------------------------------------------------------ + kswapd0/75 is trying to acquire lock: + ffffa040e9d04ff8 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + + but task is already holding lock: + ffffffff8b0c8040 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #2 (fs_reclaim){+.+.}-{0:0}: + __lock_acquire+0x56f/0xaa0 + lock_acquire+0xa3/0x440 + fs_reclaim_acquire.part.0+0x25/0x30 + __kmalloc_track_caller+0x49/0x330 + kstrdup+0x2e/0x60 + __kernfs_new_node.constprop.0+0x44/0x250 + kernfs_new_node+0x25/0x50 + kernfs_create_link+0x34/0xa0 + sysfs_do_create_link_sd+0x5e/0xd0 + btrfs_sysfs_add_devices_dir+0x65/0x100 [btrfs] + btrfs_init_new_device+0x44c/0x12b0 [btrfs] + btrfs_ioctl+0xc3c/0x25c0 [btrfs] + ksys_ioctl+0x68/0xa0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0xe0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}: + __lock_acquire+0x56f/0xaa0 + lock_acquire+0xa3/0x440 + __mutex_lock+0xa0/0xaf0 + btrfs_chunk_alloc+0x137/0x3e0 [btrfs] + find_free_extent+0xb44/0xfb0 [btrfs] + btrfs_reserve_extent+0x9b/0x180 [btrfs] + btrfs_alloc_tree_block+0xc1/0x350 [btrfs] + alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs] + __btrfs_cow_block+0x143/0x7a0 [btrfs] + btrfs_cow_block+0x15f/0x310 [btrfs] + push_leaf_right+0x150/0x240 [btrfs] + split_leaf+0x3cd/0x6d0 [btrfs] + btrfs_search_slot+0xd14/0xf70 [btrfs] + btrfs_insert_empty_items+0x64/0xc0 [btrfs] + __btrfs_commit_inode_delayed_items+0xb2/0x840 [btrfs] + btrfs_async_run_delayed_root+0x10e/0x1d0 [btrfs] + btrfs_work_helper+0x2f9/0x650 [btrfs] + process_one_work+0x22c/0x600 + worker_thread+0x50/0x3b0 + kthread+0x137/0x150 + ret_from_fork+0x1f/0x30 + + -> #0 (&delayed_node->mutex){+.+.}-{3:3}: + check_prev_add+0x98/0xa20 + validate_chain+0xa8c/0x2a00 + __lock_acquire+0x56f/0xaa0 + lock_acquire+0xa3/0x440 + __mutex_lock+0xa0/0xaf0 + __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + btrfs_evict_inode+0x3bf/0x560 [btrfs] + evict+0xd6/0x1c0 + dispose_list+0x48/0x70 + prune_icache_sb+0x54/0x80 + super_cache_scan+0x121/0x1a0 + do_shrink_slab+0x175/0x420 + shrink_slab+0xb1/0x2e0 + shrink_node+0x192/0x600 + balance_pgdat+0x31f/0x750 + kswapd+0x206/0x510 + kthread+0x137/0x150 + ret_from_fork+0x1f/0x30 + + other info that might help us debug this: + + Chain exists of: + &delayed_node->mutex --> &fs_info->chunk_mutex --> fs_reclaim + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(fs_reclaim); + lock(&fs_info->chunk_mutex); + lock(fs_reclaim); + lock(&delayed_node->mutex); + + *** DEADLOCK *** + + 3 locks held by kswapd0/75: + #0: ffffffff8b0c8040 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 + #1: ffffffff8b0b50b8 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x54/0x2e0 + #2: ffffa040e057c0e8 (&type->s_umount_key#26){++++}-{3:3}, at: trylock_super+0x16/0x50 + + stack backtrace: + CPU: 2 PID: 75 Comm: kswapd0 Not tainted 5.8.0-rc6-default+ #1191 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014 + Call Trace: + dump_stack+0x78/0xa0 + check_noncircular+0x16f/0x190 + check_prev_add+0x98/0xa20 + validate_chain+0xa8c/0x2a00 + __lock_acquire+0x56f/0xaa0 + lock_acquire+0xa3/0x440 + ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + __mutex_lock+0xa0/0xaf0 + ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + ? __lock_acquire+0x56f/0xaa0 + ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + ? lock_acquire+0xa3/0x440 + ? btrfs_evict_inode+0x138/0x560 [btrfs] + ? btrfs_evict_inode+0x2fe/0x560 [btrfs] + ? __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + __btrfs_release_delayed_node.part.0+0x3f/0x310 [btrfs] + btrfs_evict_inode+0x3bf/0x560 [btrfs] + evict+0xd6/0x1c0 + dispose_list+0x48/0x70 + prune_icache_sb+0x54/0x80 + super_cache_scan+0x121/0x1a0 + do_shrink_slab+0x175/0x420 + shrink_slab+0xb1/0x2e0 + shrink_node+0x192/0x600 + balance_pgdat+0x31f/0x750 + kswapd+0x206/0x510 + ? _raw_spin_unlock_irqrestore+0x3e/0x50 + ? finish_wait+0x90/0x90 + ? balance_pgdat+0x750/0x750 + kthread+0x137/0x150 + ? kthread_stop+0x2a0/0x2a0 + ret_from_fork+0x1f/0x30 + +This is because we're holding the chunk_mutex while adding this device +and adding its sysfs entries. We actually hold different locks in +different places when calling this function, the dev_replace semaphore +for instance in dev replace, so instead of moving this call around +simply wrap it's operations in NOFS. + +CC: stable@vger.kernel.org # 4.14+ +Reported-by: David Sterba +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +--- + fs/btrfs/sysfs.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index f05341bda1d14..383546ff62f04 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #include "ctree.h" + #include "disk-io.h" +@@ -749,7 +750,9 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices, + { + int error = 0; + struct btrfs_device *dev; ++ unsigned int nofs_flag; + ++ nofs_flag = memalloc_nofs_save(); + list_for_each_entry(dev, &fs_devices->devices, dev_list) { + struct hd_struct *disk; + struct kobject *disk_kobj; +@@ -768,6 +771,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices, + if (error) + break; + } ++ memalloc_nofs_restore(nofs_flag); + + return error; + } +-- +2.25.1 + diff --git a/queue-4.14/drm-vgem-replace-opencoded-version-of-drm_gem_dumb_m.patch b/queue-4.14/drm-vgem-replace-opencoded-version-of-drm_gem_dumb_m.patch new file mode 100644 index 00000000000..f1d9376f71f --- /dev/null +++ b/queue-4.14/drm-vgem-replace-opencoded-version-of-drm_gem_dumb_m.patch @@ -0,0 +1,83 @@ +From a3b3f77521585f5548f773354c2284ff238e5a98 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Jul 2020 16:49:11 +0100 +Subject: drm/vgem: Replace opencoded version of drm_gem_dumb_map_offset() + +From: Chris Wilson + +[ Upstream commit 119c53d2d4044c59c450c4f5a568d80b9d861856 ] + +drm_gem_dumb_map_offset() now exists and does everything +vgem_gem_dump_map does and *ought* to do. + +In particular, vgem_gem_dumb_map() was trying to reject mmapping an +imported dmabuf by checking the existence of obj->filp. Unfortunately, +we always allocated an obj->filp, even if unused for an imported dmabuf. +Instead, the drm_gem_dumb_map_offset(), since commit 90378e589192 +("drm/gem: drm_gem_dumb_map_offset(): reject dma-buf"), uses the +obj->import_attach to reject such invalid mmaps. + +This prevents vgem from allowing userspace mmapping the dumb handle and +attempting to incorrectly fault in remote pages belonging to another +device, where there may not even be a struct page. + +v2: Use the default drm_gem_dumb_map_offset() callback + +Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces") +Signed-off-by: Chris Wilson +Reviewed-by: Daniel Vetter +Cc: # v4.13+ +Link: https://patchwork.freedesktop.org/patch/msgid/20200708154911.21236-1-chris@chris-wilson.co.uk +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vgem/vgem_drv.c | 27 --------------------------- + 1 file changed, 27 deletions(-) + +diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c +index aa592277d5108..67037eb9a80ee 100644 +--- a/drivers/gpu/drm/vgem/vgem_drv.c ++++ b/drivers/gpu/drm/vgem/vgem_drv.c +@@ -220,32 +220,6 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev, + return 0; + } + +-static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev, +- uint32_t handle, uint64_t *offset) +-{ +- struct drm_gem_object *obj; +- int ret; +- +- obj = drm_gem_object_lookup(file, handle); +- if (!obj) +- return -ENOENT; +- +- if (!obj->filp) { +- ret = -EINVAL; +- goto unref; +- } +- +- ret = drm_gem_create_mmap_offset(obj); +- if (ret) +- goto unref; +- +- *offset = drm_vma_node_offset_addr(&obj->vma_node); +-unref: +- drm_gem_object_put_unlocked(obj); +- +- return ret; +-} +- + static struct drm_ioctl_desc vgem_ioctls[] = { + DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +@@ -439,7 +413,6 @@ static struct drm_driver vgem_driver = { + .fops = &vgem_driver_fops, + + .dumb_create = vgem_gem_dumb_create, +- .dumb_map_offset = vgem_gem_dumb_map, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, +-- +2.25.1 + diff --git a/queue-4.14/khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch b/queue-4.14/khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch new file mode 100644 index 00000000000..76068e43e93 --- /dev/null +++ b/queue-4.14/khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch @@ -0,0 +1,51 @@ +From fa056309ff894d8a3e5575094b72f3265834f25b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Aug 2020 17:42:02 -0700 +Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter() + +From: Hugh Dickins + +[ Upstream commit f3f99d63a8156c7a4a6b20aac22b53c5579c7dc1 ] + +syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in +__khugepaged_enter(): yes, when one thread is about to dump core, has set +core_state, and is waiting for others, another might do something calling +__khugepaged_enter(), which now crashes because I lumped the core_state +test (known as "mmget_still_valid") into khugepaged_test_exit(). I still +think it's best to lump them together, so just in this exceptional case, +check mm->mm_users directly instead of khugepaged_test_exit(). + +Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()") +Reported-by: syzbot +Signed-off-by: Hugh Dickins +Signed-off-by: Andrew Morton +Acked-by: Yang Shi +Cc: "Kirill A. Shutemov" +Cc: Andrea Arcangeli +Cc: Song Liu +Cc: Mike Kravetz +Cc: Eric Dumazet +Cc: [4.8+] +Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/khugepaged.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index a1b7475c05d04..9dfe364d4c0d1 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -407,7 +407,7 @@ int __khugepaged_enter(struct mm_struct *mm) + return -ENOMEM; + + /* __khugepaged_exit() must not run from under us */ +- VM_BUG_ON_MM(khugepaged_test_exit(mm), mm); ++ VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm); + if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) { + free_mm_slot(mm_slot); + return 0; +-- +2.25.1 + diff --git a/queue-4.14/khugepaged-khugepaged_test_exit-check-mmget_still_va.patch b/queue-4.14/khugepaged-khugepaged_test_exit-check-mmget_still_va.patch new file mode 100644 index 00000000000..d8fe346b8a0 --- /dev/null +++ b/queue-4.14/khugepaged-khugepaged_test_exit-check-mmget_still_va.patch @@ -0,0 +1,60 @@ +From 55df81fc13e7d52519d0b86d994fac5725102ade Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Aug 2020 23:26:25 -0700 +Subject: khugepaged: khugepaged_test_exit() check mmget_still_valid() + +From: Hugh Dickins + +[ Upstream commit bbe98f9cadff58cdd6a4acaeba0efa8565dabe65 ] + +Move collapse_huge_page()'s mmget_still_valid() check into +khugepaged_test_exit() itself. collapse_huge_page() is used for anon THP +only, and earned its mmget_still_valid() check because it inserts a huge +pmd entry in place of the page table's pmd entry; whereas +collapse_file()'s retract_page_tables() or collapse_pte_mapped_thp() +merely clears the page table's pmd entry. But core dumping without mmap +lock must have been as open to mistaking a racily cleared pmd entry for a +page table at physical page 0, as exit_mmap() was. And we certainly have +no interest in mapping as a THP once dumping core. + +Fixes: 59ea6d06cfa9 ("coredump: fix race condition between collapse_huge_page() and core dumping") +Signed-off-by: Hugh Dickins +Signed-off-by: Andrew Morton +Cc: Andrea Arcangeli +Cc: Song Liu +Cc: Mike Kravetz +Cc: Kirill A. Shutemov +Cc: [4.8+] +Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021217020.27773@eggly.anvils +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/khugepaged.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index 04b4c38d0c184..a1b7475c05d04 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -394,7 +394,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm, + + static inline int khugepaged_test_exit(struct mm_struct *mm) + { +- return atomic_read(&mm->mm_users) == 0; ++ return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm); + } + + int __khugepaged_enter(struct mm_struct *mm) +@@ -1006,9 +1006,6 @@ static void collapse_huge_page(struct mm_struct *mm, + * handled by the anon_vma lock + PG_lock. + */ + down_write(&mm->mmap_sem); +- result = SCAN_ANY_PROCESS; +- if (!mmget_still_valid(mm)) +- goto out; + result = hugepage_vma_revalidate(mm, address, &vma); + if (result) + goto out; +-- +2.25.1 + diff --git a/queue-4.14/perf-probe-fix-memory-leakage-when-the-probe-point-i.patch b/queue-4.14/perf-probe-fix-memory-leakage-when-the-probe-point-i.patch new file mode 100644 index 00000000000..9af9415e032 --- /dev/null +++ b/queue-4.14/perf-probe-fix-memory-leakage-when-the-probe-point-i.patch @@ -0,0 +1,52 @@ +From e3ae49bcd65f6ddea463635ed15ef04e050ef84c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Jul 2020 22:11:23 +0900 +Subject: perf probe: Fix memory leakage when the probe point is not found + +From: Masami Hiramatsu + +[ Upstream commit 12d572e785b15bc764e956caaa8a4c846fd15694 ] + +Fix the memory leakage in debuginfo__find_trace_events() when the probe +point is not found in the debuginfo. If there is no probe point found in +the debuginfo, debuginfo__find_probes() will NOT return -ENOENT, but 0. + +Thus the caller of debuginfo__find_probes() must check the tf.ntevs and +release the allocated memory for the array of struct probe_trace_event. + +The current code releases the memory only if the debuginfo__find_probes() +hits an error but not checks tf.ntevs. In the result, the memory allocated +on *tevs are not released if tf.ntevs == 0. + +This fixes the memory leakage by checking tf.ntevs == 0 in addition to +ret < 0. + +Fixes: ff741783506c ("perf probe: Introduce debuginfo to encapsulate dwarf information") +Signed-off-by: Masami Hiramatsu +Reviewed-by: Srikar Dronamraju +Cc: Andi Kleen +Cc: Oleg Nesterov +Cc: stable@vger.kernel.org +Link: http://lore.kernel.org/lkml/159438668346.62703.10887420400718492503.stgit@devnote2 +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/probe-finder.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c +index 8f7f9d05f38c0..bfa6d9d215569 100644 +--- a/tools/perf/util/probe-finder.c ++++ b/tools/perf/util/probe-finder.c +@@ -1354,7 +1354,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, + tf.ntevs = 0; + + ret = debuginfo__find_probes(dbg, &tf.pf); +- if (ret < 0) { ++ if (ret < 0 || tf.ntevs == 0) { + for (i = 0; i < tf.ntevs; i++) + clear_probe_trace_event(&tf.tevs[i]); + zfree(tevs); +-- +2.25.1 + diff --git a/queue-4.14/powerpc-allow-4224-bytes-of-stack-expansion-for-the-.patch b/queue-4.14/powerpc-allow-4224-bytes-of-stack-expansion-for-the-.patch new file mode 100644 index 00000000000..8e9bd001a03 --- /dev/null +++ b/queue-4.14/powerpc-allow-4224-bytes-of-stack-expansion-for-the-.patch @@ -0,0 +1,188 @@ +From 37b26bb493c39a3d0798104ef907b24e3cc8c521 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Jul 2020 19:25:25 +1000 +Subject: powerpc: Allow 4224 bytes of stack expansion for the signal frame + +From: Michael Ellerman + +[ Upstream commit 63dee5df43a31f3844efabc58972f0a206ca4534 ] + +We have powerpc specific logic in our page fault handling to decide if +an access to an unmapped address below the stack pointer should expand +the stack VMA. + +The code was originally added in 2004 "ported from 2.4". The rough +logic is that the stack is allowed to grow to 1MB with no extra +checking. Over 1MB the access must be within 2048 bytes of the stack +pointer, or be from a user instruction that updates the stack pointer. + +The 2048 byte allowance below the stack pointer is there to cover the +288 byte "red zone" as well as the "about 1.5kB" needed by the signal +delivery code. + +Unfortunately since then the signal frame has expanded, and is now +4224 bytes on 64-bit kernels with transactional memory enabled. This +means if a process has consumed more than 1MB of stack, and its stack +pointer lies less than 4224 bytes from the next page boundary, signal +delivery will fault when trying to expand the stack and the process +will see a SEGV. + +The total size of the signal frame is the size of struct rt_sigframe +(which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on +64-bit). + +The 2048 byte allowance was correct until 2008 as the signal frame +was: + +struct rt_sigframe { + struct ucontext uc; /* 0 1440 */ + /* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */ + long unsigned int _unused[2]; /* 1440 16 */ + unsigned int tramp[6]; /* 1456 24 */ + struct siginfo * pinfo; /* 1480 8 */ + void * puc; /* 1488 8 */ + struct siginfo info; /* 1496 128 */ + /* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */ + char abigap[288]; /* 1624 288 */ + + /* size: 1920, cachelines: 15, members: 7 */ + /* padding: 8 */ +}; + +1920 + 128 = 2048 + +Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore, +ptrace and signal support") (Jul 2008) the signal frame expanded to +2304 bytes: + +struct rt_sigframe { + struct ucontext uc; /* 0 1696 */ <-- + /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */ + long unsigned int _unused[2]; /* 1696 16 */ + unsigned int tramp[6]; /* 1712 24 */ + struct siginfo * pinfo; /* 1736 8 */ + void * puc; /* 1744 8 */ + struct siginfo info; /* 1752 128 */ + /* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */ + char abigap[288]; /* 1880 288 */ + + /* size: 2176, cachelines: 17, members: 7 */ + /* padding: 8 */ +}; + +2176 + 128 = 2304 + +At this point we should have been exposed to the bug, though as far as +I know it was never reported. I no longer have a system old enough to +easily test on. + +Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a +grow-down stack segment") caused our stack expansion code to never +trigger, as there was always a VMA found for a write up to PAGE_SIZE +below r1. + +That meant the bug was hidden as we continued to expand the signal +frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory +state to the signal context") (Feb 2013): + +struct rt_sigframe { + struct ucontext uc; /* 0 1696 */ + /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */ + struct ucontext uc_transact; /* 1696 1696 */ <-- + /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */ + long unsigned int _unused[2]; /* 3392 16 */ + unsigned int tramp[6]; /* 3408 24 */ + struct siginfo * pinfo; /* 3432 8 */ + void * puc; /* 3440 8 */ + struct siginfo info; /* 3448 128 */ + /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */ + char abigap[288]; /* 3576 288 */ + + /* size: 3872, cachelines: 31, members: 8 */ + /* padding: 8 */ + /* last cacheline: 32 bytes */ +}; + +3872 + 128 = 4000 + +And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit +userspace to 512 bytes") (Feb 2014): + +struct rt_sigframe { + struct ucontext uc; /* 0 1696 */ + /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */ + struct ucontext uc_transact; /* 1696 1696 */ + /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */ + long unsigned int _unused[2]; /* 3392 16 */ + unsigned int tramp[6]; /* 3408 24 */ + struct siginfo * pinfo; /* 3432 8 */ + void * puc; /* 3440 8 */ + struct siginfo info; /* 3448 128 */ + /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */ + char abigap[512]; /* 3576 512 */ <-- + + /* size: 4096, cachelines: 32, members: 8 */ + /* padding: 8 */ +}; + +4096 + 128 = 4224 + +Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard +gap, between vmas") exposed us to the existing bug, because it changed +the stack VMA to be the correct/real size, meaning our stack expansion +code is now triggered. + +Fix it by increasing the allowance to 4224 bytes. + +Hard-coding 4224 is obviously unsafe against future expansions of the +signal frame in the same way as the existing code. We can't easily use +sizeof() because the signal frame structure is not in a header. We +will either fix that, or rip out all the custom stack expansion +checking logic entirely. + +Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support") +Cc: stable@vger.kernel.org # v2.6.27+ +Reported-by: Tom Lane +Tested-by: Daniel Axtens +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20200724092528.1578671-2-mpe@ellerman.id.au +Signed-off-by: Sasha Levin +--- + arch/powerpc/mm/fault.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c +index 998c77e600a43..ebe97e5500ee5 100644 +--- a/arch/powerpc/mm/fault.c ++++ b/arch/powerpc/mm/fault.c +@@ -224,6 +224,9 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code, + return is_exec || (address >= TASK_SIZE); + } + ++// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE ++#define SIGFRAME_MAX_SIZE (4096 + 128) ++ + static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, + struct vm_area_struct *vma, unsigned int flags, + bool *must_retry) +@@ -231,7 +234,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, + /* + * N.B. The POWER/Open ABI allows programs to access up to + * 288 bytes below the stack pointer. +- * The kernel signal delivery code writes up to about 1.5kB ++ * The kernel signal delivery code writes a bit over 4KB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to +@@ -256,7 +259,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ +- if (address + 2048 >= uregs->gpr[1]) ++ if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1]) + return false; + + if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && +-- +2.25.1 + diff --git a/queue-4.14/powerpc-mm-only-read-faulting-instruction-when-neces.patch b/queue-4.14/powerpc-mm-only-read-faulting-instruction-when-neces.patch new file mode 100644 index 00000000000..afe0dd9a1e6 --- /dev/null +++ b/queue-4.14/powerpc-mm-only-read-faulting-instruction-when-neces.patch @@ -0,0 +1,180 @@ +From 678ba6516fac09d9fbb6f23d38b5659df1554622 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 May 2018 10:53:22 +0200 +Subject: powerpc/mm: Only read faulting instruction when necessary in + do_page_fault() + +From: Christophe Leroy + +[ Upstream commit 0e36b0d12501e278686634712975b785bae11641 ] + +Commit a7a9dcd882a67 ("powerpc: Avoid taking a data miss on every +userspace instruction miss") has shown that limiting the read of +faulting instruction to likely cases improves performance. + +This patch goes further into this direction by limiting the read +of the faulting instruction to the only cases where it is likely +needed. + +On an MPC885, with the same benchmark app as in the commit referred +above, we see a reduction of about 3900 dTLB misses (approx 3%): + +Before the patch: + Performance counter stats for './fault 500' (10 runs): + + 683033312 cpu-cycles ( +- 0.03% ) + 134538 dTLB-load-misses ( +- 0.03% ) + 46099 iTLB-load-misses ( +- 0.02% ) + 19681 faults ( +- 0.02% ) + + 5.389747878 seconds time elapsed ( +- 0.06% ) + +With the patch: + + Performance counter stats for './fault 500' (10 runs): + + 682112862 cpu-cycles ( +- 0.03% ) + 130619 dTLB-load-misses ( +- 0.03% ) + 46073 iTLB-load-misses ( +- 0.05% ) + 19681 faults ( +- 0.01% ) + + 5.381342641 seconds time elapsed ( +- 0.07% ) + +The proper work of the huge stack expansion was tested with the +following app: + +int main(int argc, char **argv) +{ + char buf[1024 * 1025]; + + sprintf(buf, "Hello world !\n"); + printf(buf); + + exit(0); +} + +Signed-off-by: Christophe Leroy +Reviewed-by: Nicholas Piggin +[mpe: Add include of pagemap.h to fix build errors] +Signed-off-by: Michael Ellerman +Signed-off-by: Sasha Levin +--- + arch/powerpc/mm/fault.c | 50 ++++++++++++++++++++++++++++------------- + 1 file changed, 34 insertions(+), 16 deletions(-) + +diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c +index 5fc8a010fdf07..998c77e600a43 100644 +--- a/arch/powerpc/mm/fault.c ++++ b/arch/powerpc/mm/fault.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -66,15 +67,11 @@ static inline bool notify_page_fault(struct pt_regs *regs) + } + + /* +- * Check whether the instruction at regs->nip is a store using ++ * Check whether the instruction inst is a store using + * an update addressing form which will update r1. + */ +-static bool store_updates_sp(struct pt_regs *regs) ++static bool store_updates_sp(unsigned int inst) + { +- unsigned int inst; +- +- if (get_user(inst, (unsigned int __user *)regs->nip)) +- return false; + /* check for 1 in the rA field */ + if (((inst >> 16) & 0x1f) != 1) + return false; +@@ -228,8 +225,8 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code, + } + + static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, +- struct vm_area_struct *vma, +- bool store_update_sp) ++ struct vm_area_struct *vma, unsigned int flags, ++ bool *must_retry) + { + /* + * N.B. The POWER/Open ABI allows programs to access up to +@@ -241,6 +238,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { ++ unsigned int __user *nip = (unsigned int __user *)regs->nip; + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) +@@ -258,8 +256,22 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ +- if (address + 2048 < uregs->gpr[1] && !store_update_sp) +- return true; ++ if (address + 2048 >= uregs->gpr[1]) ++ return false; ++ ++ if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && ++ access_ok(VERIFY_READ, nip, sizeof(*nip))) { ++ unsigned int inst; ++ int res; ++ ++ pagefault_disable(); ++ res = __get_user_inatomic(inst, nip); ++ pagefault_enable(); ++ if (!res) ++ return !store_updates_sp(inst); ++ *must_retry = true; ++ } ++ return true; + } + return false; + } +@@ -392,7 +404,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, + int is_user = user_mode(regs); + int is_write = page_fault_is_write(error_code); + int fault, major = 0; +- bool store_update_sp = false; ++ bool must_retry = false; + + if (notify_page_fault(regs)) + return 0; +@@ -439,9 +451,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, + * can result in fault, which will cause a deadlock when called with + * mmap_sem held + */ +- if (is_write && is_user) +- store_update_sp = store_updates_sp(regs); +- + if (is_user) + flags |= FAULT_FLAG_USER; + if (is_write) +@@ -488,8 +497,17 @@ retry: + return bad_area(regs, address); + + /* The stack is being expanded, check if it's valid */ +- if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp))) +- return bad_area(regs, address); ++ if (unlikely(bad_stack_expansion(regs, address, vma, flags, ++ &must_retry))) { ++ if (!must_retry) ++ return bad_area(regs, address); ++ ++ up_read(&mm->mmap_sem); ++ if (fault_in_pages_readable((const char __user *)regs->nip, ++ sizeof(unsigned int))) ++ return bad_area_nosemaphore(regs, address); ++ goto retry; ++ } + + /* Try to expand it */ + if (unlikely(expand_stack(vma, address))) +-- +2.25.1 + diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..74f8034aa03 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,11 @@ +drm-vgem-replace-opencoded-version-of-drm_gem_dumb_m.patch +perf-probe-fix-memory-leakage-when-the-probe-point-i.patch +khugepaged-khugepaged_test_exit-check-mmget_still_va.patch +khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch +powerpc-mm-only-read-faulting-instruction-when-neces.patch +powerpc-allow-4224-bytes-of-stack-expansion-for-the-.patch +btrfs-export-helpers-for-subvolume-name-id-resolutio.patch +btrfs-don-t-show-full-path-of-bind-mounts-in-subvol.patch +btrfs-move-free_pages_out-label-in-inline-extent-han.patch +btrfs-inode-fix-null-pointer-dereference-if-inode-do.patch +btrfs-sysfs-use-nofs-for-device-creation.patch