--- /dev/null
+From 69af7e23a6870df2ea6fa79ca16493d59b3eebeb Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:03:54 +0100
+Subject: ARM: 8769/1: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 69af7e23a6870df2ea6fa79ca16493d59b3eebeb upstream.
+
+Since get_kprobe_ctlblk() uses smp_processor_id() to access
+per-cpu variable, it hits smp_processor_id sanity check as below.
+
+[ 7.006928] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
+[ 7.007859] caller is debug_smp_processor_id+0x20/0x24
+[ 7.008438] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1-00192-g4eb17253e4b5 #1
+[ 7.008890] Hardware name: Generic DT based system
+[ 7.009917] [<c0313f0c>] (unwind_backtrace) from [<c030e6d8>] (show_stack+0x20/0x24)
+[ 7.010473] [<c030e6d8>] (show_stack) from [<c0c64694>] (dump_stack+0x84/0x98)
+[ 7.010990] [<c0c64694>] (dump_stack) from [<c071ca5c>] (check_preemption_disabled+0x138/0x13c)
+[ 7.011592] [<c071ca5c>] (check_preemption_disabled) from [<c071ca80>] (debug_smp_processor_id+0x20/0x24)
+[ 7.012214] [<c071ca80>] (debug_smp_processor_id) from [<c03335e0>] (optimized_callback+0x2c/0xe4)
+[ 7.013077] [<c03335e0>] (optimized_callback) from [<bf0021b0>] (0xbf0021b0)
+
+To fix this issue, call get_kprobe_ctlblk() right after
+irq-disabled since that disables preemption.
+
+Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/probes/kprobes/opt-arm.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/probes/kprobes/opt-arm.c
++++ b/arch/arm/probes/kprobes/opt-arm.c
+@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kpro
+ {
+ unsigned long flags;
+ struct kprobe *p = &op->kp;
+- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
++ struct kprobe_ctlblk *kcb;
+
+ /* Save skipped registers */
+ regs->ARM_pc = (unsigned long)op->kp.addr;
+ regs->ARM_ORIG_r0 = ~0UL;
+
+ local_irq_save(flags);
++ kcb = get_kprobe_ctlblk();
+
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
--- /dev/null
+From 70948c05fdde0aac32f9667856a88725c192fa40 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:04:10 +0100
+Subject: ARM: 8770/1: kprobes: Prohibit probing on optimized_callback
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 70948c05fdde0aac32f9667856a88725c192fa40 upstream.
+
+Prohibit probing on optimized_callback() because
+it is called from kprobes itself. If we put a kprobes
+on it, that will cause a recursive call loop.
+Mark it NOKPROBE_SYMBOL.
+
+Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/probes/kprobes/opt-arm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/probes/kprobes/opt-arm.c
++++ b/arch/arm/probes/kprobes/opt-arm.c
+@@ -192,6 +192,7 @@ optimized_callback(struct optimized_kpro
+
+ local_irq_restore(flags);
+ }
++NOKPROBE_SYMBOL(optimized_callback)
+
+ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
+ {
--- /dev/null
+From eb0146daefdde65665b7f076fbff7b49dade95b9 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:04:16 +0100
+Subject: ARM: 8771/1: kprobes: Prohibit kprobes on do_undefinstr
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit eb0146daefdde65665b7f076fbff7b49dade95b9 upstream.
+
+Prohibit kprobes on do_undefinstr because kprobes on
+arm is implemented by undefined instruction. This means
+if we probe do_undefinstr(), it can cause infinit
+recursive exception.
+
+Fixes: 24ba613c9d6c ("ARM kprobes: core code")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kernel/traps.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/kernel/traps.c
++++ b/arch/arm/kernel/traps.c
+@@ -19,6 +19,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/hardirq.h>
+ #include <linux/kdebug.h>
++#include <linux/kprobes.h>
+ #include <linux/module.h>
+ #include <linux/kexec.h>
+ #include <linux/bug.h>
+@@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_
+ raw_spin_unlock_irqrestore(&undef_lock, flags);
+ }
+
+-static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
++static nokprobe_inline
++int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+ {
+ struct undef_hook *hook;
+ unsigned long flags;
+@@ -490,6 +492,7 @@ die_sig:
+
+ arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
+ }
++NOKPROBE_SYMBOL(do_undefinstr)
+
+ /*
+ * Handle FIQ similarly to NMI on x86 systems.
--- /dev/null
+From 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:04:29 +0100
+Subject: ARM: 8772/1: kprobes: Prohibit kprobes on get_user functions
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c upstream.
+
+Since do_undefinstr() uses get_user to get the undefined
+instruction, it can be called before kprobes processes
+recursive check. This can cause an infinit recursive
+exception.
+Prohibit probing on get_user functions.
+
+Fixes: 24ba613c9d6c ("ARM kprobes: core code")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/asm/assembler.h | 10 ++++++++++
+ arch/arm/lib/getuser.S | 10 ++++++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/arch/arm/include/asm/assembler.h
++++ b/arch/arm/include/asm/assembler.h
+@@ -536,4 +536,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
+ #endif
+ .endm
+
++#ifdef CONFIG_KPROBES
++#define _ASM_NOKPROBE(entry) \
++ .pushsection "_kprobe_blacklist", "aw" ; \
++ .balign 4 ; \
++ .long entry; \
++ .popsection
++#else
++#define _ASM_NOKPROBE(entry)
++#endif
++
+ #endif /* __ASM_ASSEMBLER_H__ */
+--- a/arch/arm/lib/getuser.S
++++ b/arch/arm/lib/getuser.S
+@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_1)
++_ASM_NOKPROBE(__get_user_1)
+
+ ENTRY(__get_user_2)
+ check_uaccess r0, 2, r1, r2, __get_user_bad
+@@ -58,6 +59,7 @@ rb .req r0
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_2)
++_ASM_NOKPROBE(__get_user_2)
+
+ ENTRY(__get_user_4)
+ check_uaccess r0, 4, r1, r2, __get_user_bad
+@@ -65,6 +67,7 @@ ENTRY(__get_user_4)
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_4)
++_ASM_NOKPROBE(__get_user_4)
+
+ ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad8
+@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_8)
++_ASM_NOKPROBE(__get_user_8)
+
+ #ifdef __ARMEB__
+ ENTRY(__get_user_32t_8)
+@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8)
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_32t_8)
++_ASM_NOKPROBE(__get_user_32t_8)
+
+ ENTRY(__get_user_64t_1)
+ check_uaccess r0, 1, r1, r2, __get_user_bad8
+@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1)
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_64t_1)
++_ASM_NOKPROBE(__get_user_64t_1)
+
+ ENTRY(__get_user_64t_2)
+ check_uaccess r0, 2, r1, r2, __get_user_bad8
+@@ -114,6 +120,7 @@ rb .req r0
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_64t_2)
++_ASM_NOKPROBE(__get_user_64t_2)
+
+ ENTRY(__get_user_64t_4)
+ check_uaccess r0, 4, r1, r2, __get_user_bad8
+@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4)
+ mov r0, #0
+ ret lr
+ ENDPROC(__get_user_64t_4)
++_ASM_NOKPROBE(__get_user_64t_4)
+ #endif
+
+ __get_user_bad8:
+@@ -131,6 +139,8 @@ __get_user_bad:
+ ret lr
+ ENDPROC(__get_user_bad)
+ ENDPROC(__get_user_bad8)
++_ASM_NOKPROBE(__get_user_bad)
++_ASM_NOKPROBE(__get_user_bad8)
+
+ .pushsection __ex_table, "a"
+ .long 1b, __get_user_bad
--- /dev/null
+From 02ee654d3a04563c67bfe658a05384548b9bb105 Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Thu, 17 May 2018 15:16:51 +0800
+Subject: btrfs: fix crash when trying to resume balance without the resume flag
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 02ee654d3a04563c67bfe658a05384548b9bb105 upstream.
+
+We set the BTRFS_BALANCE_RESUME flag in the btrfs_recover_balance()
+only, which isn't called during the remount. So when resuming from
+the paused balance we hit the bug:
+
+ kernel: kernel BUG at fs/btrfs/volumes.c:3890!
+ ::
+ kernel: balance_kthread+0x51/0x60 [btrfs]
+ kernel: kthread+0x111/0x130
+ ::
+ kernel: RIP: btrfs_balance+0x12e1/0x1570 [btrfs] RSP: ffffba7d0090bde8
+
+Reproducer:
+ On a mounted filesystem:
+
+ btrfs balance start --full-balance /btrfs
+ btrfs balance pause /btrfs
+ mount -o remount,ro /dev/sdb /btrfs
+ mount -o remount,rw /dev/sdb /btrfs
+
+To fix this set the BTRFS_BALANCE_RESUME flag in
+btrfs_resume_balance_async().
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4046,6 +4046,15 @@ int btrfs_resume_balance_async(struct bt
+ return 0;
+ }
+
++ /*
++ * A ro->rw remount sequence should continue with the paused balance
++ * regardless of who pauses it, system or the user as of now, so set
++ * the resume flag.
++ */
++ spin_lock(&fs_info->balance_lock);
++ fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
++ spin_unlock(&fs_info->balance_lock);
++
+ tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+ return PTR_ERR_OR_ZERO(tsk);
+ }
--- /dev/null
+From fe816d0f1d4c31c4c31d42ca78a87660565fc800 Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Fri, 27 Apr 2018 12:21:53 +0300
+Subject: btrfs: Fix delalloc inodes invalidation during transaction abort
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+commit fe816d0f1d4c31c4c31d42ca78a87660565fc800 upstream.
+
+When a transaction is aborted btrfs_cleanup_transaction is called to
+cleanup all the various in-flight bits and pieces which migth be
+active. One of those is delalloc inodes - inodes which have dirty
+pages which haven't been persisted yet. Currently the process of
+freeing such delalloc inodes in exceptional circumstances such as
+transaction abort boiled down to calling btrfs_invalidate_inodes whose
+sole job is to invalidate the dentries for all inodes related to a
+root. This is in fact wrong and insufficient since such delalloc inodes
+will likely have pending pages or ordered-extents and will be linked to
+the sb->s_inode_list. This means that unmounting a btrfs instance with
+an aborted transaction could potentially lead inodes/their pages
+visible to the system long after their superblock has been freed. This
+in turn leads to a "use-after-free" situation once page shrink is
+triggered. This situation could be simulated by running generic/019
+which would cause such inodes to be left hanging, followed by
+generic/176 which causes memory pressure and page eviction which lead
+to touching the freed super block instance. This situation is
+additionally detected by the unmount code of VFS with the following
+message:
+
+"VFS: Busy inodes after unmount of Self-destruct in 5 seconds. Have a nice day..."
+
+Additionally btrfs hits WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+in free_fs_root for the same reason.
+
+This patch aims to rectify the sitaution by doing the following:
+
+1. Change btrfs_destroy_delalloc_inodes so that it calls
+invalidate_inode_pages2 for every inode on the delalloc list, this
+ensures that all the pages of the inode are released. This function
+boils down to calling btrfs_releasepage. During test I observed cases
+where inodes on the delalloc list were having an i_count of 0, so this
+necessitates using igrab to be sure we are working on a non-freed inode.
+
+2. Since calling btrfs_releasepage might queue delayed iputs move the
+call out to btrfs_cleanup_transaction in btrfs_error_commit_super before
+calling run_delayed_iputs for the last time. This is necessary to ensure
+that delayed iputs are run.
+
+Note: this patch is tagged for 4.14 stable but the fix applies to older
+versions too but needs to be backported manually due to conflicts.
+
+CC: stable@vger.kernel.org # 4.14.x: 2b8773313494: btrfs: Split btrfs_del_delalloc_inode into 2 functions
+CC: stable@vger.kernel.org # 4.14.x
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add comment to igrab ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3744,6 +3744,7 @@ void close_ctree(struct btrfs_fs_info *f
+ set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
+
+ btrfs_free_qgroup_config(fs_info);
++ ASSERT(list_empty(&fs_info->delalloc_roots));
+
+ if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
+ btrfs_info(fs_info, "at unmount delalloc count %lld",
+@@ -4049,15 +4050,15 @@ static int btrfs_check_super_valid(struc
+
+ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
+ {
++ /* cleanup FS via transaction */
++ btrfs_cleanup_transaction(fs_info);
++
+ mutex_lock(&fs_info->cleaner_mutex);
+ btrfs_run_delayed_iputs(fs_info);
+ mutex_unlock(&fs_info->cleaner_mutex);
+
+ down_write(&fs_info->cleanup_work_sem);
+ up_write(&fs_info->cleanup_work_sem);
+-
+- /* cleanup FS via transaction */
+- btrfs_cleanup_transaction(fs_info);
+ }
+
+ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
+@@ -4182,19 +4183,23 @@ static void btrfs_destroy_delalloc_inode
+ list_splice_init(&root->delalloc_inodes, &splice);
+
+ while (!list_empty(&splice)) {
++ struct inode *inode = NULL;
+ btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
+ delalloc_inodes);
+-
+- list_del_init(&btrfs_inode->delalloc_inodes);
+- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+- &btrfs_inode->runtime_flags);
++ __btrfs_del_delalloc_inode(root, btrfs_inode);
+ spin_unlock(&root->delalloc_lock);
+
+- btrfs_invalidate_inodes(btrfs_inode->root);
+-
++ /*
++ * Make sure we get a live inode and that it'll not disappear
++ * meanwhile.
++ */
++ inode = igrab(&btrfs_inode->vfs_inode);
++ if (inode) {
++ invalidate_inode_pages2(inode->i_mapping);
++ iput(inode);
++ }
+ spin_lock(&root->delalloc_lock);
+ }
+-
+ spin_unlock(&root->delalloc_lock);
+ }
+
+@@ -4210,7 +4215,6 @@ static void btrfs_destroy_all_delalloc_i
+ while (!list_empty(&splice)) {
+ root = list_first_entry(&splice, struct btrfs_root,
+ delalloc_root);
+- list_del_init(&root->delalloc_root);
+ root = btrfs_grab_fs_root(root);
+ BUG_ON(!root);
+ spin_unlock(&fs_info->delalloc_root_lock);
--- /dev/null
+From 9a8fca62aacc1599fea8e813d01e1955513e4fad Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 11 May 2018 16:42:42 +0100
+Subject: Btrfs: fix xattr loss after power failure
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 9a8fca62aacc1599fea8e813d01e1955513e4fad upstream.
+
+If a file has xattrs, we fsync it, to ensure we clear the flags
+BTRFS_INODE_NEEDS_FULL_SYNC and BTRFS_INODE_COPY_EVERYTHING from its
+inode, the current transaction commits and then we fsync it (without
+either of those bits being set in its inode), we end up not logging
+all its xattrs. This results in deleting all xattrs when replying the
+log after a power failure.
+
+Trivial reproducer
+
+ $ mkfs.btrfs -f /dev/sdb
+ $ mount /dev/sdb /mnt
+
+ $ touch /mnt/foobar
+ $ setfattr -n user.xa -v qwerty /mnt/foobar
+ $ xfs_io -c "fsync" /mnt/foobar
+
+ $ sync
+
+ $ xfs_io -c "pwrite -S 0xab 0 64K" /mnt/foobar
+ $ xfs_io -c "fsync" /mnt/foobar
+ <power failure>
+
+ $ mount /dev/sdb /mnt
+ $ getfattr --absolute-names --dump /mnt/foobar
+ <empty output>
+ $
+
+So fix this by making sure all xattrs are logged if we log a file's inode
+item and neither the flags BTRFS_INODE_NEEDS_FULL_SYNC nor
+BTRFS_INODE_COPY_EVERYTHING were set in the inode.
+
+Fixes: 36283bf777d9 ("Btrfs: fix fsync xattr loss in the fast fsync path")
+Cc: <stable@vger.kernel.org> # 4.2+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4749,6 +4749,7 @@ static int btrfs_log_inode(struct btrfs_
+ struct extent_map_tree *em_tree = &inode->extent_tree;
+ u64 logged_isize = 0;
+ bool need_log_inode_item = true;
++ bool xattrs_logged = false;
+
+ path = btrfs_alloc_path();
+ if (!path)
+@@ -5050,6 +5051,7 @@ next_key:
+ err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+ if (err)
+ goto out_unlock;
++ xattrs_logged = true;
+ if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
+@@ -5062,6 +5064,11 @@ log_extents:
+ btrfs_release_path(dst_path);
+ if (need_log_inode_item) {
+ err = log_inode_item(trans, log, dst_path, inode);
++ if (!err && !xattrs_logged) {
++ err = btrfs_log_all_xattrs(trans, root, inode, path,
++ dst_path);
++ btrfs_release_path(path);
++ }
+ if (err)
+ goto out_unlock;
+ }
--- /dev/null
+From 1a63c198ddb810c790101d693c7071cca703b3c7 Mon Sep 17 00:00:00 2001
+From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Date: Tue, 15 May 2018 16:51:26 +0900
+Subject: btrfs: property: Set incompat flag if lzo/zstd compression is set
+
+From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+
+commit 1a63c198ddb810c790101d693c7071cca703b3c7 upstream.
+
+Incompat flag of LZO/ZSTD compression should be set at:
+
+ 1. mount time (-o compress/compress-force)
+ 2. when defrag is done
+ 3. when property is set
+
+Currently 3. is missing and this commit adds this.
+
+This could lead to a filesystem that uses ZSTD but is not marked as
+such. If a kernel without a ZSTD support encounteres a ZSTD compressed
+extent, it will handle that but this could be confusing to the user.
+
+Typically the filesystem is mounted with the ZSTD option, but the
+discrepancy can arise when a filesystem is never mounted with ZSTD and
+then the property on some file is set (and some new extents are
+written). A simple mount with -o compress=zstd will fix that up on an
+unpatched kernel.
+
+Same goes for LZO, but this has been around for a very long time
+(2.6.37) so it's unlikely that a pre-LZO kernel would be used.
+
+Fixes: 5c1aab1dd544 ("btrfs: Add zstd support")
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add user visible impact ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/props.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/props.c
++++ b/fs/btrfs/props.c
+@@ -393,6 +393,7 @@ static int prop_compression_apply(struct
+ const char *value,
+ size_t len)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ int type;
+
+ if (len == 0) {
+@@ -403,14 +404,17 @@ static int prop_compression_apply(struct
+ return 0;
+ }
+
+- if (!strncmp("lzo", value, 3))
++ if (!strncmp("lzo", value, 3)) {
+ type = BTRFS_COMPRESS_LZO;
+- else if (!strncmp("zlib", value, 4))
++ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
++ } else if (!strncmp("zlib", value, 4)) {
+ type = BTRFS_COMPRESS_ZLIB;
+- else if (!strncmp("zstd", value, len))
++ } else if (!strncmp("zstd", value, len)) {
+ type = BTRFS_COMPRESS_ZSTD;
+- else
++ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
++ } else {
+ return -EINVAL;
++ }
+
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
--- /dev/null
+From 6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 Mon Sep 17 00:00:00 2001
+From: Robbie Ko <robbieko@synology.com>
+Date: Mon, 14 May 2018 10:51:34 +0800
+Subject: Btrfs: send, fix invalid access to commit roots due to concurrent snapshotting
+
+From: Robbie Ko <robbieko@synology.com>
+
+commit 6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 upstream.
+
+[BUG]
+btrfs incremental send BUG happens when creating a snapshot of snapshot
+that is being used by send.
+
+[REASON]
+The problem can happen if while we are doing a send one of the snapshots
+used (parent or send) is snapshotted, because snapshoting implies COWing
+the root of the source subvolume/snapshot.
+
+1. When doing an incremental send, the send process will get the commit
+ roots from the parent and send snapshots, and add references to them
+ through extent_buffer_get().
+
+2. When a snapshot/subvolume is snapshotted, its root node is COWed
+ (transaction.c:create_pending_snapshot()).
+
+3. COWing releases the space used by the node immediately, through:
+
+ __btrfs_cow_block()
+ --btrfs_free_tree_block()
+ ----btrfs_add_free_space(bytenr of node)
+
+4. Because send doesn't hold a transaction open, it's possible that
+ the transaction used to create the snapshot commits, switches the
+ commit root and the old space used by the previous root node gets
+ assigned to some other node allocation. Allocation of a new node will
+ use the existing extent buffer found in memory, which we previously
+ got a reference through extent_buffer_get(), and allow the extent
+ buffer's content (pages) to be modified:
+
+ btrfs_alloc_tree_block
+ --btrfs_reserve_extent
+ ----find_free_extent (get bytenr of old node)
+ --btrfs_init_new_buffer (use bytenr of old node)
+ ----btrfs_find_create_tree_block
+ ------alloc_extent_buffer
+ --------find_extent_buffer (get old node)
+
+5. So send can access invalid memory content and have unpredictable
+ behaviour.
+
+[FIX]
+So we fix the problem by copying the commit roots of the send and
+parent snapshots and use those copies.
+
+CallTrace looks like this:
+ ------------[ cut here ]------------
+ kernel BUG at fs/btrfs/ctree.c:1861!
+ invalid opcode: 0000 [#1] SMP
+ CPU: 6 PID: 24235 Comm: btrfs Tainted: P O 3.10.105 #23721
+ ffff88046652d680 ti: ffff88041b720000 task.ti: ffff88041b720000
+ RIP: 0010:[<ffffffffa08dd0e8>] read_node_slot+0x108/0x110 [btrfs]
+ RSP: 0018:ffff88041b723b68 EFLAGS: 00010246
+ RAX: ffff88043ca6b000 RBX: ffff88041b723c50 RCX: ffff880000000000
+ RDX: 000000000000004c RSI: ffff880314b133f8 RDI: ffff880458b24000
+ RBP: 0000000000000000 R08: 0000000000000001 R09: ffff88041b723c66
+ R10: 0000000000000001 R11: 0000000000001000 R12: ffff8803f3e48890
+ R13: ffff8803f3e48880 R14: ffff880466351800 R15: 0000000000000001
+ FS: 00007f8c321dc8c0(0000) GS:ffff88047fcc0000(0000)
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ R2: 00007efd1006d000 CR3: 0000000213a24000 CR4: 00000000003407e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Stack:
+ ffff88041b723c50 ffff8803f3e48880 ffff8803f3e48890 ffff8803f3e48880
+ ffff880466351800 0000000000000001 ffffffffa08dd9d7 ffff88041b723c50
+ ffff8803f3e48880 ffff88041b723c66 ffffffffa08dde85 a9ff88042d2c4400
+ Call Trace:
+ [<ffffffffa08dd9d7>] ? tree_move_down.isra.33+0x27/0x50 [btrfs]
+ [<ffffffffa08dde85>] ? tree_advance+0xb5/0xc0 [btrfs]
+ [<ffffffffa08e83d4>] ? btrfs_compare_trees+0x2d4/0x760 [btrfs]
+ [<ffffffffa0982050>] ? finish_inode_if_needed+0x870/0x870 [btrfs]
+ [<ffffffffa09841ea>] ? btrfs_ioctl_send+0xeda/0x1050 [btrfs]
+ [<ffffffffa094bd3d>] ? btrfs_ioctl+0x1e3d/0x33f0 [btrfs]
+ [<ffffffff81111133>] ? handle_pte_fault+0x373/0x990
+ [<ffffffff8153a096>] ? atomic_notifier_call_chain+0x16/0x20
+ [<ffffffff81063256>] ? set_task_cpu+0xb6/0x1d0
+ [<ffffffff811122c3>] ? handle_mm_fault+0x143/0x2a0
+ [<ffffffff81539cc0>] ? __do_page_fault+0x1d0/0x500
+ [<ffffffff81062f07>] ? check_preempt_curr+0x57/0x90
+ [<ffffffff8115075a>] ? do_vfs_ioctl+0x4aa/0x990
+ [<ffffffff81034f83>] ? do_fork+0x113/0x3b0
+ [<ffffffff812dd7d7>] ? trace_hardirqs_off_thunk+0x3a/0x6c
+ [<ffffffff81150cc8>] ? SyS_ioctl+0x88/0xa0
+ [<ffffffff8153e422>] ? system_call_fastpath+0x16/0x1b
+ ---[ end trace 29576629ee80b2e1 ]---
+
+Fixes: 7069830a9e38 ("Btrfs: add btrfs_compare_trees function")
+CC: stable@vger.kernel.org # 3.6+
+Signed-off-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -5460,12 +5460,24 @@ int btrfs_compare_trees(struct btrfs_roo
+ down_read(&fs_info->commit_root_sem);
+ left_level = btrfs_header_level(left_root->commit_root);
+ left_root_level = left_level;
+- left_path->nodes[left_level] = left_root->commit_root;
++ left_path->nodes[left_level] =
++ btrfs_clone_extent_buffer(left_root->commit_root);
++ if (!left_path->nodes[left_level]) {
++ up_read(&fs_info->commit_root_sem);
++ ret = -ENOMEM;
++ goto out;
++ }
+ extent_buffer_get(left_path->nodes[left_level]);
+
+ right_level = btrfs_header_level(right_root->commit_root);
+ right_root_level = right_level;
+- right_path->nodes[right_level] = right_root->commit_root;
++ right_path->nodes[right_level] =
++ btrfs_clone_extent_buffer(right_root->commit_root);
++ if (!right_path->nodes[right_level]) {
++ up_read(&fs_info->commit_root_sem);
++ ret = -ENOMEM;
++ goto out;
++ }
+ extent_buffer_get(right_path->nodes[right_level]);
+ up_read(&fs_info->commit_root_sem);
+
--- /dev/null
+From 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Fri, 4 May 2018 07:59:58 +0200
+Subject: efi: Avoid potential crashes, fix the 'struct efi_pci_io_protocol_32' definition for mixed mode
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 upstream.
+
+Mixed mode allows a kernel built for x86_64 to interact with 32-bit
+EFI firmware, but requires us to define all struct definitions carefully
+when it comes to pointer sizes.
+
+'struct efi_pci_io_protocol_32' currently uses a 'void *' for the
+'romimage' field, which will be interpreted as a 64-bit field
+on such kernels, potentially resulting in bogus memory references
+and subsequent crashes.
+
+Tested-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: <stable@vger.kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-efi@vger.kernel.org
+Link: http://lkml.kernel.org/r/20180504060003.19618-13-ard.biesheuvel@linaro.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/boot/compressed/eboot.c | 6 ++++--
+ include/linux/efi.h | 8 ++++----
+ 2 files changed, 8 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/boot/compressed/eboot.c
++++ b/arch/x86/boot/compressed/eboot.c
+@@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32
+ if (status != EFI_SUCCESS)
+ goto free_struct;
+
+- memcpy(rom->romdata, pci->romimage, pci->romsize);
++ memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
++ pci->romsize);
+ return status;
+
+ free_struct:
+@@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64
+ if (status != EFI_SUCCESS)
+ goto free_struct;
+
+- memcpy(rom->romdata, pci->romimage, pci->romsize);
++ memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
++ pci->romsize);
+ return status;
+
+ free_struct:
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -395,8 +395,8 @@ typedef struct {
+ u32 attributes;
+ u32 get_bar_attributes;
+ u32 set_bar_attributes;
+- uint64_t romsize;
+- void *romimage;
++ u64 romsize;
++ u32 romimage;
+ } efi_pci_io_protocol_32;
+
+ typedef struct {
+@@ -415,8 +415,8 @@ typedef struct {
+ u64 attributes;
+ u64 get_bar_attributes;
+ u64 set_bar_attributes;
+- uint64_t romsize;
+- void *romimage;
++ u64 romsize;
++ u64 romimage;
+ } efi_pci_io_protocol_64;
+
+ typedef struct {
s390-qdio-don-t-release-memory-in-qdio_setup_irq.patch
s390-remove-indirect-branch-from-do_softirq_own_stack.patch
bcache-return-0-from-bch_debug_init-if-config_debug_fs-n.patch
+x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch
+x86-pkeys-do-not-special-case-protection-key-0.patch
+efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch
+arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch
+x86-apic-x2apic-initialize-cluster-id-properly.patch
+x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch
+tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch
+arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch
+arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch
+arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch
+btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch
+btrfs-fix-xattr-loss-after-power-failure.patch
+btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch
+btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch
+btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch
--- /dev/null
+From 5596fe34495cf0f645f417eb928ef224df3e3cb4 Mon Sep 17 00:00:00 2001
+From: Dexuan Cui <decui@microsoft.com>
+Date: Tue, 15 May 2018 19:52:50 +0000
+Subject: tick/broadcast: Use for_each_cpu() specially on UP kernels
+
+From: Dexuan Cui <decui@microsoft.com>
+
+commit 5596fe34495cf0f645f417eb928ef224df3e3cb4 upstream.
+
+for_each_cpu() unintuitively reports CPU0 as set independent of the actual
+cpumask content on UP kernels. This causes an unexpected PIT interrupt
+storm on a UP kernel running in an SMP virtual machine on Hyper-V, and as
+a result, the virtual machine can suffer from a strange random delay of 1~20
+minutes during boot-up, and sometimes it can hang forever.
+
+Protect if by checking whether the cpumask is empty before entering the
+for_each_cpu() loop.
+
+[ tglx: Use !IS_ENABLED(CONFIG_SMP) instead of #ifdeffery ]
+
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Josh Poulson <jopoulso@microsoft.com>
+Cc: "Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: stable@vger.kernel.org
+Cc: Rakib Mullick <rakib.mullick@gmail.com>
+Cc: Jork Loeser <Jork.Loeser@microsoft.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: KY Srinivasan <kys@microsoft.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Alexey Dobriyan <adobriyan@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Link: https://lkml.kernel.org/r/KL1P15301MB000678289FE55BA365B3279ABF990@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM
+Link: https://lkml.kernel.org/r/KL1P15301MB0006FA63BC22BEB64902EAA0BF930@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/tick-broadcast.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/kernel/time/tick-broadcast.c
++++ b/kernel/time/tick-broadcast.c
+@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcas
+ now = ktime_get();
+ /* Find all expired events */
+ for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
++ /*
++ * Required for !SMP because for_each_cpu() reports
++ * unconditionally CPU0 as set on UP kernels.
++ */
++ if (!IS_ENABLED(CONFIG_SMP) &&
++ cpumask_empty(tick_broadcast_oneshot_mask))
++ break;
++
+ td = &per_cpu(tick_cpu_device, cpu);
+ if (td->evtdev->next_event <= now) {
+ cpumask_set_cpu(cpu, tmpmask);
--- /dev/null
+From fed71f7d98795ed0fa1d431910787f0f4a68324f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 17 May 2018 14:36:39 +0200
+Subject: x86/apic/x2apic: Initialize cluster ID properly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit fed71f7d98795ed0fa1d431910787f0f4a68324f upstream.
+
+Rick bisected a regression on large systems which use the x2apic cluster
+mode for interrupt delivery to the commit wich reworked the cluster
+management.
+
+The problem is caused by a missing initialization of the clusterid field
+in the shared cluster data structures. So all structures end up with
+cluster ID 0 which only allows sharing between all CPUs which belong to
+cluster 0. All other CPUs with a cluster ID > 0 cannot share the data
+structure because they cannot find existing data with their cluster
+ID. This causes malfunction with IPIs because IPIs are sent to the wrong
+cluster and the caller waits for ever that the target CPU handles the IPI.
+
+Add the missing initialization when a upcoming CPU is the first in a
+cluster so that the later booting CPUs can find the data and share it for
+proper operation.
+
+Fixes: 023a611748fd ("x86/apic/x2apic: Simplify cluster management")
+Reported-by: Rick Warner <rick@microway.com>
+Bisected-by: Rick Warner <rick@microway.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Rick Warner <rick@microway.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1805171418210.1947@nanos.tec.linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/x2apic_cluster.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/apic/x2apic_cluster.c
++++ b/arch/x86/kernel/apic/x2apic_cluster.c
+@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
+ goto update;
+ }
+ cmsk = cluster_hotplug_mask;
++ cmsk->clusterid = cluster;
+ cluster_hotplug_mask = NULL;
+ update:
+ this_cpu_write(cluster_masks, cmsk);
--- /dev/null
+From acf46020012ccbca1172e9c7aeab399c950d9212 Mon Sep 17 00:00:00 2001
+From: Dmitry Safonov <dima@arista.com>
+Date: Fri, 18 May 2018 00:35:10 +0100
+Subject: x86/mm: Drop TS_COMPAT on 64-bit exec() syscall
+
+From: Dmitry Safonov <dima@arista.com>
+
+commit acf46020012ccbca1172e9c7aeab399c950d9212 upstream.
+
+The x86 mmap() code selects the mmap base for an allocation depending on
+the bitness of the syscall. For 64bit sycalls it select mm->mmap_base and
+for 32bit mm->mmap_compat_base.
+
+exec() calls mmap() which in turn uses in_compat_syscall() to check whether
+the mapping is for a 32bit or a 64bit task. The decision is made on the
+following criteria:
+
+ ia32 child->thread.status & TS_COMPAT
+ x32 child->pt_regs.orig_ax & __X32_SYSCALL_BIT
+ ia64 !ia32 && !x32
+
+__set_personality_x32() was dropping TS_COMPAT flag, but
+set_personality_64bit() has kept compat syscall flag making
+in_compat_syscall() return true during the first exec() syscall.
+
+Which in result has user-visible effects, mentioned by Alexey:
+1) It breaks ASAN
+$ gcc -fsanitize=address wrap.c -o wrap-asan
+$ ./wrap32 ./wrap-asan true
+==1217==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING.
+==1217==ASan shadow was supposed to be located in the [0x00007fff7000-0x10007fff7fff] range.
+==1217==Process memory map follows:
+ 0x000000400000-0x000000401000 /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan
+ 0x000000600000-0x000000601000 /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan
+ 0x000000601000-0x000000602000 /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan
+ 0x0000f7dbd000-0x0000f7de2000 /lib64/ld-2.27.so
+ 0x0000f7fe2000-0x0000f7fe3000 /lib64/ld-2.27.so
+ 0x0000f7fe3000-0x0000f7fe4000 /lib64/ld-2.27.so
+ 0x0000f7fe4000-0x0000f7fe5000
+ 0x7fed9abff000-0x7fed9af54000
+ 0x7fed9af54000-0x7fed9af6b000 /lib64/libgcc_s.so.1
+[snip]
+
+2) It doesn't seem to be great for security if an attacker always knows
+that ld.so is going to be mapped into the first 4GB in this case
+(the same thing happens for PIEs as well).
+
+The testcase:
+$ cat wrap.c
+
+int main(int argc, char *argv[]) {
+ execvp(argv[1], &argv[1]);
+ return 127;
+}
+
+$ gcc wrap.c -o wrap
+$ LD_SHOW_AUXV=1 ./wrap ./wrap true |& grep AT_BASE
+AT_BASE: 0x7f63b8309000
+AT_BASE: 0x7faec143c000
+AT_BASE: 0x7fbdb25fa000
+
+$ gcc -m32 wrap.c -o wrap32
+$ LD_SHOW_AUXV=1 ./wrap32 ./wrap true |& grep AT_BASE
+AT_BASE: 0xf7eff000
+AT_BASE: 0xf7cee000
+AT_BASE: 0x7f8b9774e000
+
+Fixes: 1b028f784e8c ("x86/mm: Introduce mmap_compat_base() for 32-bit mmap()")
+Fixes: ada26481dfe6 ("x86/mm: Make in_compat_syscall() work during exec")
+Reported-by: Alexey Izbyshev <izbyshev@ispras.ru>
+Bisected-by: Alexander Monakov <amonakov@ispras.ru>
+Investigated-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Alexander Monakov <amonakov@ispras.ru>
+Cc: Dmitry Safonov <0x7f454c46@gmail.com>
+Cc: stable@vger.kernel.org
+Cc: linux-mm@kvack.org
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180517233510.24996-1-dima@arista.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/process_64.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -528,6 +528,7 @@ void set_personality_64bit(void)
+ clear_thread_flag(TIF_X32);
+ /* Pretend that this comes from a 64bit execve */
+ task_pt_regs(current)->orig_ax = __NR_execve;
++ current_thread_info()->status &= ~TS_COMPAT;
+
+ /* Ensure the corresponding mm is not marked. */
+ if (current->mm)
--- /dev/null
+From 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 9 May 2018 10:13:58 -0700
+Subject: x86/pkeys: Do not special case protection key 0
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 upstream.
+
+mm_pkey_is_allocated() treats pkey 0 as unallocated. That is
+inconsistent with the manpages, and also inconsistent with
+mm->context.pkey_allocation_map. Stop special casing it and only
+disallow values that are actually bad (< 0).
+
+The end-user visible effect of this is that you can now use
+mprotect_pkey() to set pkey=0.
+
+This is a bit nicer than what Ram proposed[1] because it is simpler
+and removes special-casing for pkey 0. On the other hand, it does
+allow applications to pkey_free() pkey-0, but that's just a silly
+thing to do, so we are not going to protect against it.
+
+The scenario that could happen is similar to what happens if you free
+any other pkey that is in use: it might get reallocated later and used
+to protect some other data. The most likely scenario is that pkey-0
+comes back from pkey_alloc(), an access-disable or write-disable bit
+is set in PKRU for it, and the next stack access will SIGSEGV. It's
+not horribly different from if you mprotect()'d your stack or heap to
+be unreadable or unwritable, which is generally very foolish, but also
+not explicitly prevented by the kernel.
+
+1. http://lkml.kernel.org/r/1522112702-27853-1-git-send-email-linuxram@us.ibm.com
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>p
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michael Ellermen <mpe@ellerman.id.au>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ram Pai <linuxram@us.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Cc: stable@vger.kernel.org
+Fixes: 58ab9a088dda ("x86/pkeys: Check against max pkey to avoid overflows")
+Link: http://lkml.kernel.org/r/20180509171358.47FD785E@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h | 2 +-
+ arch/x86/include/asm/pkeys.h | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -192,7 +192,7 @@ static inline int init_new_context(struc
+
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+ if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+- /* pkey 0 is the default and always allocated */
++ /* pkey 0 is the default and allocated implicitly */
+ mm->context.pkey_allocation_map = 0x1;
+ /* -1 means unallocated or invalid */
+ mm->context.execute_only_pkey = -1;
+--- a/arch/x86/include/asm/pkeys.h
++++ b/arch/x86/include/asm/pkeys.h
+@@ -51,10 +51,10 @@ bool mm_pkey_is_allocated(struct mm_stru
+ {
+ /*
+ * "Allocated" pkeys are those that have been returned
+- * from pkey_alloc(). pkey 0 is special, and never
+- * returned from pkey_alloc().
++ * from pkey_alloc() or pkey 0 which is allocated
++ * implicitly when the mm is created.
+ */
+- if (pkey <= 0)
++ if (pkey < 0)
+ return false;
+ if (pkey >= arch_max_pkey())
+ return false;
--- /dev/null
+From 0a0b152083cfc44ec1bb599b57b7aab41327f998 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 9 May 2018 10:13:51 -0700
+Subject: x86/pkeys: Override pkey when moving away from PROT_EXEC
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 0a0b152083cfc44ec1bb599b57b7aab41327f998 upstream.
+
+I got a bug report that the following code (roughly) was
+causing a SIGSEGV:
+
+ mprotect(ptr, size, PROT_EXEC);
+ mprotect(ptr, size, PROT_NONE);
+ mprotect(ptr, size, PROT_READ);
+ *ptr = 100;
+
+The problem is hit when the mprotect(PROT_EXEC)
+is implicitly assigned a protection key to the VMA, and made
+that key ACCESS_DENY|WRITE_DENY. The PROT_NONE mprotect()
+failed to remove the protection key, and the PROT_NONE->
+PROT_READ left the PTE usable, but the pkey still in place
+and left the memory inaccessible.
+
+To fix this, we ensure that we always "override" the pkee
+at mprotect() if the VMA does not have execute-only
+permissions, but the VMA has the execute-only pkey.
+
+We had a check for PROT_READ/WRITE, but it did not work
+for PROT_NONE. This entirely removes the PROT_* checks,
+which ensures that PROT_NONE now works.
+
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michael Ellermen <mpe@ellerman.id.au>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ram Pai <linuxram@us.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Cc: stable@vger.kernel.org
+Fixes: 62b5f7d013f ("mm/core, x86/mm/pkeys: Add execute-only protection keys support")
+Link: http://lkml.kernel.org/r/20180509171351.084C5A71@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pkeys.h | 12 +++++++++++-
+ arch/x86/mm/pkeys.c | 21 +++++++++++----------
+ 2 files changed, 22 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/pkeys.h
++++ b/arch/x86/include/asm/pkeys.h
+@@ -2,6 +2,8 @@
+ #ifndef _ASM_X86_PKEYS_H
+ #define _ASM_X86_PKEYS_H
+
++#define ARCH_DEFAULT_PKEY 0
++
+ #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
+
+ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm
+ static inline int execute_only_pkey(struct mm_struct *mm)
+ {
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+- return 0;
++ return ARCH_DEFAULT_PKEY;
+
+ return __execute_only_pkey(mm);
+ }
+@@ -56,6 +58,14 @@ bool mm_pkey_is_allocated(struct mm_stru
+ return false;
+ if (pkey >= arch_max_pkey())
+ return false;
++ /*
++ * The exec-only pkey is set in the allocation map, but
++ * is not available to any of the user interfaces like
++ * mprotect_pkey().
++ */
++ if (pkey == mm->context.execute_only_pkey)
++ return false;
++
+ return mm_pkey_allocation_map(mm) & (1U << pkey);
+ }
+
+--- a/arch/x86/mm/pkeys.c
++++ b/arch/x86/mm/pkeys.c
+@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct
+ */
+ if (pkey != -1)
+ return pkey;
+- /*
+- * Look for a protection-key-drive execute-only mapping
+- * which is now being given permissions that are not
+- * execute-only. Move it back to the default pkey.
+- */
+- if (vma_is_pkey_exec_only(vma) &&
+- (prot & (PROT_READ|PROT_WRITE))) {
+- return 0;
+- }
++
+ /*
+ * The mapping is execute-only. Go try to get the
+ * execute-only protection key. If we fail to do that,
+ * fall through as if we do not have execute-only
+- * support.
++ * support in this mm.
+ */
+ if (prot == PROT_EXEC) {
+ pkey = execute_only_pkey(vma->vm_mm);
+ if (pkey > 0)
+ return pkey;
++ } else if (vma_is_pkey_exec_only(vma)) {
++ /*
++ * Protections are *not* PROT_EXEC, but the mapping
++ * is using the exec-only pkey. This mapping was
++ * PROT_EXEC and will no longer be. Move back to
++ * the default pkey.
++ */
++ return ARCH_DEFAULT_PKEY;
+ }
++
+ /*
+ * This is a vanilla, non-pkey mprotect (or we failed to
+ * setup execute-only), inherit the pkey from the VMA we