]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.16-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 May 2018 06:41:50 +0000 (08:41 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 May 2018 06:41:50 +0000 (08:41 +0200)
added patches:
arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch
arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch
arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch
arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch
btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch
btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch
btrfs-fix-xattr-loss-after-power-failure.patch
btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch
btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch
efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch
tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch
x86-apic-x2apic-initialize-cluster-id-properly.patch
x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch
x86-pkeys-do-not-special-case-protection-key-0.patch
x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch

16 files changed:
queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch [new file with mode: 0644]
queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch [new file with mode: 0644]
queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch [new file with mode: 0644]
queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch [new file with mode: 0644]
queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch [new file with mode: 0644]
queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch [new file with mode: 0644]
queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch [new file with mode: 0644]
queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch [new file with mode: 0644]
queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch [new file with mode: 0644]
queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch [new file with mode: 0644]
queue-4.16/series
queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch [new file with mode: 0644]
queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch [new file with mode: 0644]
queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch [new file with mode: 0644]
queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch [new file with mode: 0644]
queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch [new file with mode: 0644]

diff --git a/queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch b/queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch
new file mode 100644 (file)
index 0000000..42e72ed
--- /dev/null
@@ -0,0 +1,54 @@
+From 69af7e23a6870df2ea6fa79ca16493d59b3eebeb Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:03:54 +0100
+Subject: ARM: 8769/1: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 69af7e23a6870df2ea6fa79ca16493d59b3eebeb upstream.
+
+Since get_kprobe_ctlblk() uses smp_processor_id() to access
+per-cpu variable, it hits smp_processor_id sanity check as below.
+
+[    7.006928] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
+[    7.007859] caller is debug_smp_processor_id+0x20/0x24
+[    7.008438] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1-00192-g4eb17253e4b5 #1
+[    7.008890] Hardware name: Generic DT based system
+[    7.009917] [<c0313f0c>] (unwind_backtrace) from [<c030e6d8>] (show_stack+0x20/0x24)
+[    7.010473] [<c030e6d8>] (show_stack) from [<c0c64694>] (dump_stack+0x84/0x98)
+[    7.010990] [<c0c64694>] (dump_stack) from [<c071ca5c>] (check_preemption_disabled+0x138/0x13c)
+[    7.011592] [<c071ca5c>] (check_preemption_disabled) from [<c071ca80>] (debug_smp_processor_id+0x20/0x24)
+[    7.012214] [<c071ca80>] (debug_smp_processor_id) from [<c03335e0>] (optimized_callback+0x2c/0xe4)
+[    7.013077] [<c03335e0>] (optimized_callback) from [<bf0021b0>] (0xbf0021b0)
+
+To fix this issue, call get_kprobe_ctlblk() right after
+irq-disabled since that disables preemption.
+
+Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/probes/kprobes/opt-arm.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/probes/kprobes/opt-arm.c
++++ b/arch/arm/probes/kprobes/opt-arm.c
+@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kpro
+ {
+       unsigned long flags;
+       struct kprobe *p = &op->kp;
+-      struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
++      struct kprobe_ctlblk *kcb;
+       /* Save skipped registers */
+       regs->ARM_pc = (unsigned long)op->kp.addr;
+       regs->ARM_ORIG_r0 = ~0UL;
+       local_irq_save(flags);
++      kcb = get_kprobe_ctlblk();
+       if (kprobe_running()) {
+               kprobes_inc_nmissed_count(&op->kp);
diff --git a/queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch b/queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch
new file mode 100644 (file)
index 0000000..f3afa83
--- /dev/null
@@ -0,0 +1,34 @@
+From 70948c05fdde0aac32f9667856a88725c192fa40 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:04:10 +0100
+Subject: ARM: 8770/1: kprobes: Prohibit probing on optimized_callback
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 70948c05fdde0aac32f9667856a88725c192fa40 upstream.
+
+Prohibit probing on optimized_callback() because
+it is called from kprobes itself. If we put a kprobes
+on it, that will cause a recursive call loop.
+Mark it NOKPROBE_SYMBOL.
+
+Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/probes/kprobes/opt-arm.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/probes/kprobes/opt-arm.c
++++ b/arch/arm/probes/kprobes/opt-arm.c
+@@ -192,6 +192,7 @@ optimized_callback(struct optimized_kpro
+       local_irq_restore(flags);
+ }
++NOKPROBE_SYMBOL(optimized_callback)
+ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
+ {
diff --git a/queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch b/queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch
new file mode 100644 (file)
index 0000000..061bd3f
--- /dev/null
@@ -0,0 +1,52 @@
+From eb0146daefdde65665b7f076fbff7b49dade95b9 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:04:16 +0100
+Subject: ARM: 8771/1: kprobes: Prohibit kprobes on do_undefinstr
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit eb0146daefdde65665b7f076fbff7b49dade95b9 upstream.
+
+Prohibit kprobes on do_undefinstr because kprobes on
+arm is implemented by undefined instruction. This means
+if we probe do_undefinstr(), it can cause infinit
+recursive exception.
+
+Fixes: 24ba613c9d6c ("ARM kprobes: core code")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kernel/traps.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/kernel/traps.c
++++ b/arch/arm/kernel/traps.c
+@@ -19,6 +19,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/hardirq.h>
+ #include <linux/kdebug.h>
++#include <linux/kprobes.h>
+ #include <linux/module.h>
+ #include <linux/kexec.h>
+ #include <linux/bug.h>
+@@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_
+       raw_spin_unlock_irqrestore(&undef_lock, flags);
+ }
+-static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
++static nokprobe_inline
++int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+ {
+       struct undef_hook *hook;
+       unsigned long flags;
+@@ -490,6 +492,7 @@ die_sig:
+       arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
+ }
++NOKPROBE_SYMBOL(do_undefinstr)
+ /*
+  * Handle FIQ similarly to NMI on x86 systems.
diff --git a/queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch b/queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch
new file mode 100644 (file)
index 0000000..a5bae2c
--- /dev/null
@@ -0,0 +1,118 @@
+From 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Sun, 13 May 2018 05:04:29 +0100
+Subject: ARM: 8772/1: kprobes: Prohibit kprobes on get_user functions
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c upstream.
+
+Since do_undefinstr() uses get_user to get the undefined
+instruction, it can be called before kprobes processes
+recursive check. This can cause an infinit recursive
+exception.
+Prohibit probing on get_user functions.
+
+Fixes: 24ba613c9d6c ("ARM kprobes: core code")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/asm/assembler.h |   10 ++++++++++
+ arch/arm/lib/getuser.S           |   10 ++++++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/arch/arm/include/asm/assembler.h
++++ b/arch/arm/include/asm/assembler.h
+@@ -536,4 +536,14 @@ THUMB(    orr     \reg , \reg , #PSR_T_BIT        )
+ #endif
+       .endm
++#ifdef CONFIG_KPROBES
++#define _ASM_NOKPROBE(entry)                          \
++      .pushsection "_kprobe_blacklist", "aw" ;        \
++      .balign 4 ;                                     \
++      .long entry;                                    \
++      .popsection
++#else
++#define _ASM_NOKPROBE(entry)
++#endif
++
+ #endif /* __ASM_ASSEMBLER_H__ */
+--- a/arch/arm/lib/getuser.S
++++ b/arch/arm/lib/getuser.S
+@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_1)
++_ASM_NOKPROBE(__get_user_1)
+ ENTRY(__get_user_2)
+       check_uaccess r0, 2, r1, r2, __get_user_bad
+@@ -58,6 +59,7 @@ rb   .req    r0
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_2)
++_ASM_NOKPROBE(__get_user_2)
+ ENTRY(__get_user_4)
+       check_uaccess r0, 4, r1, r2, __get_user_bad
+@@ -65,6 +67,7 @@ ENTRY(__get_user_4)
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_4)
++_ASM_NOKPROBE(__get_user_4)
+ ENTRY(__get_user_8)
+       check_uaccess r0, 8, r1, r2, __get_user_bad8
+@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_8)
++_ASM_NOKPROBE(__get_user_8)
+ #ifdef __ARMEB__
+ ENTRY(__get_user_32t_8)
+@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8)
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_32t_8)
++_ASM_NOKPROBE(__get_user_32t_8)
+ ENTRY(__get_user_64t_1)
+       check_uaccess r0, 1, r1, r2, __get_user_bad8
+@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1)
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_64t_1)
++_ASM_NOKPROBE(__get_user_64t_1)
+ ENTRY(__get_user_64t_2)
+       check_uaccess r0, 2, r1, r2, __get_user_bad8
+@@ -114,6 +120,7 @@ rb .req    r0
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_64t_2)
++_ASM_NOKPROBE(__get_user_64t_2)
+ ENTRY(__get_user_64t_4)
+       check_uaccess r0, 4, r1, r2, __get_user_bad8
+@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4)
+       mov     r0, #0
+       ret     lr
+ ENDPROC(__get_user_64t_4)
++_ASM_NOKPROBE(__get_user_64t_4)
+ #endif
+ __get_user_bad8:
+@@ -131,6 +139,8 @@ __get_user_bad:
+       ret     lr
+ ENDPROC(__get_user_bad)
+ ENDPROC(__get_user_bad8)
++_ASM_NOKPROBE(__get_user_bad)
++_ASM_NOKPROBE(__get_user_bad8)
+ .pushsection __ex_table, "a"
+       .long   1b, __get_user_bad
diff --git a/queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch b/queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch
new file mode 100644 (file)
index 0000000..7e75796
--- /dev/null
@@ -0,0 +1,59 @@
+From 02ee654d3a04563c67bfe658a05384548b9bb105 Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Thu, 17 May 2018 15:16:51 +0800
+Subject: btrfs: fix crash when trying to resume balance without the resume flag
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 02ee654d3a04563c67bfe658a05384548b9bb105 upstream.
+
+We set the BTRFS_BALANCE_RESUME flag in the btrfs_recover_balance()
+only, which isn't called during the remount. So when resuming from
+the paused balance we hit the bug:
+
+ kernel: kernel BUG at fs/btrfs/volumes.c:3890!
+ ::
+ kernel:  balance_kthread+0x51/0x60 [btrfs]
+ kernel:  kthread+0x111/0x130
+ ::
+ kernel: RIP: btrfs_balance+0x12e1/0x1570 [btrfs] RSP: ffffba7d0090bde8
+
+Reproducer:
+  On a mounted filesystem:
+
+  btrfs balance start --full-balance /btrfs
+  btrfs balance pause /btrfs
+  mount -o remount,ro /dev/sdb /btrfs
+  mount -o remount,rw /dev/sdb /btrfs
+
+To fix this set the BTRFS_BALANCE_RESUME flag in
+btrfs_resume_balance_async().
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4046,6 +4046,15 @@ int btrfs_resume_balance_async(struct bt
+               return 0;
+       }
++      /*
++       * A ro->rw remount sequence should continue with the paused balance
++       * regardless of who pauses it, system or the user as of now, so set
++       * the resume flag.
++       */
++      spin_lock(&fs_info->balance_lock);
++      fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
++      spin_unlock(&fs_info->balance_lock);
++
+       tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+       return PTR_ERR_OR_ZERO(tsk);
+ }
diff --git a/queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch b/queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch
new file mode 100644 (file)
index 0000000..939bf88
--- /dev/null
@@ -0,0 +1,131 @@
+From fe816d0f1d4c31c4c31d42ca78a87660565fc800 Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Fri, 27 Apr 2018 12:21:53 +0300
+Subject: btrfs: Fix delalloc inodes invalidation during transaction abort
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+commit fe816d0f1d4c31c4c31d42ca78a87660565fc800 upstream.
+
+When a transaction is aborted btrfs_cleanup_transaction is called to
+cleanup all the various in-flight bits and pieces which migth be
+active. One of those is delalloc inodes - inodes which have dirty
+pages which haven't been persisted yet. Currently the process of
+freeing such delalloc inodes in exceptional circumstances such as
+transaction abort boiled down to calling btrfs_invalidate_inodes whose
+sole job is to invalidate the dentries for all inodes related to a
+root. This is in fact wrong and insufficient since such delalloc inodes
+will likely have pending pages or ordered-extents and will be linked to
+the sb->s_inode_list. This means that unmounting a btrfs instance with
+an aborted transaction could potentially lead inodes/their pages
+visible to the system long after their superblock has been freed. This
+in turn leads to a "use-after-free" situation once page shrink is
+triggered. This situation could be simulated by running generic/019
+which would cause such inodes to be left hanging, followed by
+generic/176 which causes memory pressure and page eviction which lead
+to touching the freed super block instance. This situation is
+additionally detected by the unmount code of VFS with the following
+message:
+
+"VFS: Busy inodes after unmount of Self-destruct in 5 seconds.  Have a nice day..."
+
+Additionally btrfs hits WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+in free_fs_root for the same reason.
+
+This patch aims to rectify the sitaution by doing the following:
+
+1. Change btrfs_destroy_delalloc_inodes so that it calls
+invalidate_inode_pages2 for every inode on the delalloc list, this
+ensures that all the pages of the inode are released. This function
+boils down to calling btrfs_releasepage. During test I observed cases
+where inodes on the delalloc list were having an i_count of 0, so this
+necessitates using igrab to be sure we are working on a non-freed inode.
+
+2. Since calling btrfs_releasepage might queue delayed iputs move the
+call out to btrfs_cleanup_transaction in btrfs_error_commit_super before
+calling run_delayed_iputs for the last time. This is necessary to ensure
+that delayed iputs are run.
+
+Note: this patch is tagged for 4.14 stable but the fix applies to older
+versions too but needs to be backported manually due to conflicts.
+
+CC: stable@vger.kernel.org # 4.14.x: 2b8773313494: btrfs: Split btrfs_del_delalloc_inode into 2 functions
+CC: stable@vger.kernel.org # 4.14.x
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add comment to igrab ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3744,6 +3744,7 @@ void close_ctree(struct btrfs_fs_info *f
+       set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
+       btrfs_free_qgroup_config(fs_info);
++      ASSERT(list_empty(&fs_info->delalloc_roots));
+       if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
+               btrfs_info(fs_info, "at unmount delalloc count %lld",
+@@ -4049,15 +4050,15 @@ static int btrfs_check_super_valid(struc
+ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
+ {
++      /* cleanup FS via transaction */
++      btrfs_cleanup_transaction(fs_info);
++
+       mutex_lock(&fs_info->cleaner_mutex);
+       btrfs_run_delayed_iputs(fs_info);
+       mutex_unlock(&fs_info->cleaner_mutex);
+       down_write(&fs_info->cleanup_work_sem);
+       up_write(&fs_info->cleanup_work_sem);
+-
+-      /* cleanup FS via transaction */
+-      btrfs_cleanup_transaction(fs_info);
+ }
+ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
+@@ -4182,19 +4183,23 @@ static void btrfs_destroy_delalloc_inode
+       list_splice_init(&root->delalloc_inodes, &splice);
+       while (!list_empty(&splice)) {
++              struct inode *inode = NULL;
+               btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
+                                              delalloc_inodes);
+-
+-              list_del_init(&btrfs_inode->delalloc_inodes);
+-              clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+-                        &btrfs_inode->runtime_flags);
++              __btrfs_del_delalloc_inode(root, btrfs_inode);
+               spin_unlock(&root->delalloc_lock);
+-              btrfs_invalidate_inodes(btrfs_inode->root);
+-
++              /*
++               * Make sure we get a live inode and that it'll not disappear
++               * meanwhile.
++               */
++              inode = igrab(&btrfs_inode->vfs_inode);
++              if (inode) {
++                      invalidate_inode_pages2(inode->i_mapping);
++                      iput(inode);
++              }
+               spin_lock(&root->delalloc_lock);
+       }
+-
+       spin_unlock(&root->delalloc_lock);
+ }
+@@ -4210,7 +4215,6 @@ static void btrfs_destroy_all_delalloc_i
+       while (!list_empty(&splice)) {
+               root = list_first_entry(&splice, struct btrfs_root,
+                                        delalloc_root);
+-              list_del_init(&root->delalloc_root);
+               root = btrfs_grab_fs_root(root);
+               BUG_ON(!root);
+               spin_unlock(&fs_info->delalloc_root_lock);
diff --git a/queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch b/queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch
new file mode 100644 (file)
index 0000000..b31c751
--- /dev/null
@@ -0,0 +1,80 @@
+From 9a8fca62aacc1599fea8e813d01e1955513e4fad Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 11 May 2018 16:42:42 +0100
+Subject: Btrfs: fix xattr loss after power failure
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 9a8fca62aacc1599fea8e813d01e1955513e4fad upstream.
+
+If a file has xattrs, we fsync it, to ensure we clear the flags
+BTRFS_INODE_NEEDS_FULL_SYNC and BTRFS_INODE_COPY_EVERYTHING from its
+inode, the current transaction commits and then we fsync it (without
+either of those bits being set in its inode), we end up not logging
+all its xattrs. This results in deleting all xattrs when replying the
+log after a power failure.
+
+Trivial reproducer
+
+  $ mkfs.btrfs -f /dev/sdb
+  $ mount /dev/sdb /mnt
+
+  $ touch /mnt/foobar
+  $ setfattr -n user.xa -v qwerty /mnt/foobar
+  $ xfs_io -c "fsync" /mnt/foobar
+
+  $ sync
+
+  $ xfs_io -c "pwrite -S 0xab 0 64K" /mnt/foobar
+  $ xfs_io -c "fsync" /mnt/foobar
+  <power failure>
+
+  $ mount /dev/sdb /mnt
+  $ getfattr --absolute-names --dump /mnt/foobar
+  <empty output>
+  $
+
+So fix this by making sure all xattrs are logged if we log a file's inode
+item and neither the flags BTRFS_INODE_NEEDS_FULL_SYNC nor
+BTRFS_INODE_COPY_EVERYTHING were set in the inode.
+
+Fixes: 36283bf777d9 ("Btrfs: fix fsync xattr loss in the fast fsync path")
+Cc: <stable@vger.kernel.org> # 4.2+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4749,6 +4749,7 @@ static int btrfs_log_inode(struct btrfs_
+       struct extent_map_tree *em_tree = &inode->extent_tree;
+       u64 logged_isize = 0;
+       bool need_log_inode_item = true;
++      bool xattrs_logged = false;
+       path = btrfs_alloc_path();
+       if (!path)
+@@ -5050,6 +5051,7 @@ next_key:
+       err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+       if (err)
+               goto out_unlock;
++      xattrs_logged = true;
+       if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+               btrfs_release_path(path);
+               btrfs_release_path(dst_path);
+@@ -5062,6 +5064,11 @@ log_extents:
+       btrfs_release_path(dst_path);
+       if (need_log_inode_item) {
+               err = log_inode_item(trans, log, dst_path, inode);
++              if (!err && !xattrs_logged) {
++                      err = btrfs_log_all_xattrs(trans, root, inode, path,
++                                                 dst_path);
++                      btrfs_release_path(path);
++              }
+               if (err)
+                       goto out_unlock;
+       }
diff --git a/queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch b/queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch
new file mode 100644 (file)
index 0000000..0be6b44
--- /dev/null
@@ -0,0 +1,75 @@
+From 1a63c198ddb810c790101d693c7071cca703b3c7 Mon Sep 17 00:00:00 2001
+From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Date: Tue, 15 May 2018 16:51:26 +0900
+Subject: btrfs: property: Set incompat flag if lzo/zstd compression is set
+
+From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
+
+commit 1a63c198ddb810c790101d693c7071cca703b3c7 upstream.
+
+Incompat flag of LZO/ZSTD compression should be set at:
+
+ 1. mount time (-o compress/compress-force)
+ 2. when defrag is done
+ 3. when property is set
+
+Currently 3. is missing and this commit adds this.
+
+This could lead to a filesystem that uses ZSTD but is not marked as
+such. If a kernel without a ZSTD support encounteres a ZSTD compressed
+extent, it will handle that but this could be confusing to the user.
+
+Typically the filesystem is mounted with the ZSTD option, but the
+discrepancy can arise when a filesystem is never mounted with ZSTD and
+then the property on some file is set (and some new extents are
+written). A simple mount with -o compress=zstd will fix that up on an
+unpatched kernel.
+
+Same goes for LZO, but this has been around for a very long time
+(2.6.37) so it's unlikely that a pre-LZO kernel would be used.
+
+Fixes: 5c1aab1dd544 ("btrfs: Add zstd support")
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ add user visible impact ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/props.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/props.c
++++ b/fs/btrfs/props.c
+@@ -393,6 +393,7 @@ static int prop_compression_apply(struct
+                                 const char *value,
+                                 size_t len)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       int type;
+       if (len == 0) {
+@@ -403,14 +404,17 @@ static int prop_compression_apply(struct
+               return 0;
+       }
+-      if (!strncmp("lzo", value, 3))
++      if (!strncmp("lzo", value, 3)) {
+               type = BTRFS_COMPRESS_LZO;
+-      else if (!strncmp("zlib", value, 4))
++              btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
++      } else if (!strncmp("zlib", value, 4)) {
+               type = BTRFS_COMPRESS_ZLIB;
+-      else if (!strncmp("zstd", value, len))
++      } else if (!strncmp("zstd", value, len)) {
+               type = BTRFS_COMPRESS_ZSTD;
+-      else
++              btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
++      } else {
+               return -EINVAL;
++      }
+       BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+       BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
diff --git a/queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch b/queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch
new file mode 100644 (file)
index 0000000..b91f0b6
--- /dev/null
@@ -0,0 +1,136 @@
+From 6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 Mon Sep 17 00:00:00 2001
+From: Robbie Ko <robbieko@synology.com>
+Date: Mon, 14 May 2018 10:51:34 +0800
+Subject: Btrfs: send, fix invalid access to commit roots due to concurrent snapshotting
+
+From: Robbie Ko <robbieko@synology.com>
+
+commit 6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 upstream.
+
+[BUG]
+btrfs incremental send BUG happens when creating a snapshot of snapshot
+that is being used by send.
+
+[REASON]
+The problem can happen if while we are doing a send one of the snapshots
+used (parent or send) is snapshotted, because snapshoting implies COWing
+the root of the source subvolume/snapshot.
+
+1. When doing an incremental send, the send process will get the commit
+   roots from the parent and send snapshots, and add references to them
+   through extent_buffer_get().
+
+2. When a snapshot/subvolume is snapshotted, its root node is COWed
+   (transaction.c:create_pending_snapshot()).
+
+3. COWing releases the space used by the node immediately, through:
+
+   __btrfs_cow_block()
+   --btrfs_free_tree_block()
+   ----btrfs_add_free_space(bytenr of node)
+
+4. Because send doesn't hold a transaction open, it's possible that
+   the transaction used to create the snapshot commits, switches the
+   commit root and the old space used by the previous root node gets
+   assigned to some other node allocation. Allocation of a new node will
+   use the existing extent buffer found in memory, which we previously
+   got a reference through extent_buffer_get(), and allow the extent
+   buffer's content (pages) to be modified:
+
+   btrfs_alloc_tree_block
+   --btrfs_reserve_extent
+   ----find_free_extent (get bytenr of old node)
+   --btrfs_init_new_buffer (use bytenr of old node)
+   ----btrfs_find_create_tree_block
+   ------alloc_extent_buffer
+   --------find_extent_buffer (get old node)
+
+5. So send can access invalid memory content and have unpredictable
+   behaviour.
+
+[FIX]
+So we fix the problem by copying the commit roots of the send and
+parent snapshots and use those copies.
+
+CallTrace looks like this:
+ ------------[ cut here ]------------
+ kernel BUG at fs/btrfs/ctree.c:1861!
+ invalid opcode: 0000 [#1] SMP
+ CPU: 6 PID: 24235 Comm: btrfs Tainted: P           O 3.10.105 #23721
+ ffff88046652d680 ti: ffff88041b720000 task.ti: ffff88041b720000
+ RIP: 0010:[<ffffffffa08dd0e8>] read_node_slot+0x108/0x110 [btrfs]
+ RSP: 0018:ffff88041b723b68  EFLAGS: 00010246
+ RAX: ffff88043ca6b000 RBX: ffff88041b723c50 RCX: ffff880000000000
+ RDX: 000000000000004c RSI: ffff880314b133f8 RDI: ffff880458b24000
+ RBP: 0000000000000000 R08: 0000000000000001 R09: ffff88041b723c66
+ R10: 0000000000000001 R11: 0000000000001000 R12: ffff8803f3e48890
+ R13: ffff8803f3e48880 R14: ffff880466351800 R15: 0000000000000001
+ FS:  00007f8c321dc8c0(0000) GS:ffff88047fcc0000(0000)
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ R2: 00007efd1006d000 CR3: 0000000213a24000 CR4: 00000000003407e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Stack:
+ ffff88041b723c50 ffff8803f3e48880 ffff8803f3e48890 ffff8803f3e48880
+ ffff880466351800 0000000000000001 ffffffffa08dd9d7 ffff88041b723c50
+ ffff8803f3e48880 ffff88041b723c66 ffffffffa08dde85 a9ff88042d2c4400
+ Call Trace:
+ [<ffffffffa08dd9d7>] ? tree_move_down.isra.33+0x27/0x50 [btrfs]
+ [<ffffffffa08dde85>] ? tree_advance+0xb5/0xc0 [btrfs]
+ [<ffffffffa08e83d4>] ? btrfs_compare_trees+0x2d4/0x760 [btrfs]
+ [<ffffffffa0982050>] ? finish_inode_if_needed+0x870/0x870 [btrfs]
+ [<ffffffffa09841ea>] ? btrfs_ioctl_send+0xeda/0x1050 [btrfs]
+ [<ffffffffa094bd3d>] ? btrfs_ioctl+0x1e3d/0x33f0 [btrfs]
+ [<ffffffff81111133>] ? handle_pte_fault+0x373/0x990
+ [<ffffffff8153a096>] ? atomic_notifier_call_chain+0x16/0x20
+ [<ffffffff81063256>] ? set_task_cpu+0xb6/0x1d0
+ [<ffffffff811122c3>] ? handle_mm_fault+0x143/0x2a0
+ [<ffffffff81539cc0>] ? __do_page_fault+0x1d0/0x500
+ [<ffffffff81062f07>] ? check_preempt_curr+0x57/0x90
+ [<ffffffff8115075a>] ? do_vfs_ioctl+0x4aa/0x990
+ [<ffffffff81034f83>] ? do_fork+0x113/0x3b0
+ [<ffffffff812dd7d7>] ? trace_hardirqs_off_thunk+0x3a/0x6c
+ [<ffffffff81150cc8>] ? SyS_ioctl+0x88/0xa0
+ [<ffffffff8153e422>] ? system_call_fastpath+0x16/0x1b
+ ---[ end trace 29576629ee80b2e1 ]---
+
+Fixes: 7069830a9e38 ("Btrfs: add btrfs_compare_trees function")
+CC: stable@vger.kernel.org # 3.6+
+Signed-off-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -5460,12 +5460,24 @@ int btrfs_compare_trees(struct btrfs_roo
+       down_read(&fs_info->commit_root_sem);
+       left_level = btrfs_header_level(left_root->commit_root);
+       left_root_level = left_level;
+-      left_path->nodes[left_level] = left_root->commit_root;
++      left_path->nodes[left_level] =
++                      btrfs_clone_extent_buffer(left_root->commit_root);
++      if (!left_path->nodes[left_level]) {
++              up_read(&fs_info->commit_root_sem);
++              ret = -ENOMEM;
++              goto out;
++      }
+       extent_buffer_get(left_path->nodes[left_level]);
+       right_level = btrfs_header_level(right_root->commit_root);
+       right_root_level = right_level;
+-      right_path->nodes[right_level] = right_root->commit_root;
++      right_path->nodes[right_level] =
++                      btrfs_clone_extent_buffer(right_root->commit_root);
++      if (!right_path->nodes[right_level]) {
++              up_read(&fs_info->commit_root_sem);
++              ret = -ENOMEM;
++              goto out;
++      }
+       extent_buffer_get(right_path->nodes[right_level]);
+       up_read(&fs_info->commit_root_sem);
diff --git a/queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch b/queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch
new file mode 100644 (file)
index 0000000..e9e05e4
--- /dev/null
@@ -0,0 +1,81 @@
+From 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Fri, 4 May 2018 07:59:58 +0200
+Subject: efi: Avoid potential crashes, fix the 'struct efi_pci_io_protocol_32' definition for mixed mode
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 upstream.
+
+Mixed mode allows a kernel built for x86_64 to interact with 32-bit
+EFI firmware, but requires us to define all struct definitions carefully
+when it comes to pointer sizes.
+
+'struct efi_pci_io_protocol_32' currently uses a 'void *' for the
+'romimage' field, which will be interpreted as a 64-bit field
+on such kernels, potentially resulting in bogus memory references
+and subsequent crashes.
+
+Tested-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: <stable@vger.kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-efi@vger.kernel.org
+Link: http://lkml.kernel.org/r/20180504060003.19618-13-ard.biesheuvel@linaro.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/boot/compressed/eboot.c |    6 ++++--
+ include/linux/efi.h              |    8 ++++----
+ 2 files changed, 8 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/boot/compressed/eboot.c
++++ b/arch/x86/boot/compressed/eboot.c
+@@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32
+       if (status != EFI_SUCCESS)
+               goto free_struct;
+-      memcpy(rom->romdata, pci->romimage, pci->romsize);
++      memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
++             pci->romsize);
+       return status;
+ free_struct:
+@@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64
+       if (status != EFI_SUCCESS)
+               goto free_struct;
+-      memcpy(rom->romdata, pci->romimage, pci->romsize);
++      memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
++             pci->romsize);
+       return status;
+ free_struct:
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -395,8 +395,8 @@ typedef struct {
+       u32 attributes;
+       u32 get_bar_attributes;
+       u32 set_bar_attributes;
+-      uint64_t romsize;
+-      void *romimage;
++      u64 romsize;
++      u32 romimage;
+ } efi_pci_io_protocol_32;
+ typedef struct {
+@@ -415,8 +415,8 @@ typedef struct {
+       u64 attributes;
+       u64 get_bar_attributes;
+       u64 set_bar_attributes;
+-      uint64_t romsize;
+-      void *romimage;
++      u64 romsize;
++      u64 romimage;
+ } efi_pci_io_protocol_64;
+ typedef struct {
index 4e31f8578b223c00298d07fd44316caae654895f..b72f836aa0a1a5e5604c337104f73c1cbfb183e5 100644 (file)
@@ -42,3 +42,18 @@ s390-cpum_sf-ensure-sample-frequency-of-perf-event-attributes-is-non-zero.patch
 s390-qdio-don-t-release-memory-in-qdio_setup_irq.patch
 s390-remove-indirect-branch-from-do_softirq_own_stack.patch
 bcache-return-0-from-bch_debug_init-if-config_debug_fs-n.patch
+x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch
+x86-pkeys-do-not-special-case-protection-key-0.patch
+efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch
+arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch
+x86-apic-x2apic-initialize-cluster-id-properly.patch
+x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch
+tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch
+arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch
+arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch
+arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch
+btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch
+btrfs-fix-xattr-loss-after-power-failure.patch
+btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch
+btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch
+btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch
diff --git a/queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch b/queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch
new file mode 100644 (file)
index 0000000..8691c69
--- /dev/null
@@ -0,0 +1,60 @@
+From 5596fe34495cf0f645f417eb928ef224df3e3cb4 Mon Sep 17 00:00:00 2001
+From: Dexuan Cui <decui@microsoft.com>
+Date: Tue, 15 May 2018 19:52:50 +0000
+Subject: tick/broadcast: Use for_each_cpu() specially on UP kernels
+
+From: Dexuan Cui <decui@microsoft.com>
+
+commit 5596fe34495cf0f645f417eb928ef224df3e3cb4 upstream.
+
+for_each_cpu() unintuitively reports CPU0 as set independent of the actual
+cpumask content on UP kernels. This causes an unexpected PIT interrupt
+storm on a UP kernel running in an SMP virtual machine on Hyper-V, and as
+a result, the virtual machine can suffer from a strange random delay of 1~20
+minutes during boot-up, and sometimes it can hang forever.
+
+Protect if by checking whether the cpumask is empty before entering the
+for_each_cpu() loop.
+
+[ tglx: Use !IS_ENABLED(CONFIG_SMP) instead of #ifdeffery ]
+
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Josh Poulson <jopoulso@microsoft.com>
+Cc: "Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: stable@vger.kernel.org
+Cc: Rakib Mullick <rakib.mullick@gmail.com>
+Cc: Jork Loeser <Jork.Loeser@microsoft.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: KY Srinivasan <kys@microsoft.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Alexey Dobriyan <adobriyan@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Link: https://lkml.kernel.org/r/KL1P15301MB000678289FE55BA365B3279ABF990@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM
+Link: https://lkml.kernel.org/r/KL1P15301MB0006FA63BC22BEB64902EAA0BF930@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/tick-broadcast.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/kernel/time/tick-broadcast.c
++++ b/kernel/time/tick-broadcast.c
+@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcas
+       now = ktime_get();
+       /* Find all expired events */
+       for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
++              /*
++               * Required for !SMP because for_each_cpu() reports
++               * unconditionally CPU0 as set on UP kernels.
++               */
++              if (!IS_ENABLED(CONFIG_SMP) &&
++                  cpumask_empty(tick_broadcast_oneshot_mask))
++                      break;
++
+               td = &per_cpu(tick_cpu_device, cpu);
+               if (td->evtdev->next_event <= now) {
+                       cpumask_set_cpu(cpu, tmpmask);
diff --git a/queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch b/queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch
new file mode 100644 (file)
index 0000000..f9dee58
--- /dev/null
@@ -0,0 +1,48 @@
+From fed71f7d98795ed0fa1d431910787f0f4a68324f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 17 May 2018 14:36:39 +0200
+Subject: x86/apic/x2apic: Initialize cluster ID properly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit fed71f7d98795ed0fa1d431910787f0f4a68324f upstream.
+
+Rick bisected a regression on large systems which use the x2apic cluster
+mode for interrupt delivery to the commit wich reworked the cluster
+management.
+
+The problem is caused by a missing initialization of the clusterid field
+in the shared cluster data structures. So all structures end up with
+cluster ID 0 which only allows sharing between all CPUs which belong to
+cluster 0. All other CPUs with a cluster ID > 0 cannot share the data
+structure because they cannot find existing data with their cluster
+ID. This causes malfunction with IPIs because IPIs are sent to the wrong
+cluster and the caller waits for ever that the target CPU handles the IPI.
+
+Add the missing initialization when a upcoming CPU is the first in a
+cluster so that the later booting CPUs can find the data and share it for
+proper operation.
+
+Fixes: 023a611748fd ("x86/apic/x2apic: Simplify cluster management")
+Reported-by: Rick Warner <rick@microway.com>
+Bisected-by: Rick Warner <rick@microway.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Rick Warner <rick@microway.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1805171418210.1947@nanos.tec.linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/x2apic_cluster.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/apic/x2apic_cluster.c
++++ b/arch/x86/kernel/apic/x2apic_cluster.c
+@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
+                       goto update;
+       }
+       cmsk = cluster_hotplug_mask;
++      cmsk->clusterid = cluster;
+       cluster_hotplug_mask = NULL;
+ update:
+       this_cpu_write(cluster_masks, cmsk);
diff --git a/queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch b/queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch
new file mode 100644 (file)
index 0000000..b70fd4a
--- /dev/null
@@ -0,0 +1,101 @@
+From acf46020012ccbca1172e9c7aeab399c950d9212 Mon Sep 17 00:00:00 2001
+From: Dmitry Safonov <dima@arista.com>
+Date: Fri, 18 May 2018 00:35:10 +0100
+Subject: x86/mm: Drop TS_COMPAT on 64-bit exec() syscall
+
+From: Dmitry Safonov <dima@arista.com>
+
+commit acf46020012ccbca1172e9c7aeab399c950d9212 upstream.
+
+The x86 mmap() code selects the mmap base for an allocation depending on
+the bitness of the syscall. For 64bit sycalls it select mm->mmap_base and
+for 32bit mm->mmap_compat_base.
+
+exec() calls mmap() which in turn uses in_compat_syscall() to check whether
+the mapping is for a 32bit or a 64bit task. The decision is made on the
+following criteria:
+
+  ia32    child->thread.status & TS_COMPAT
+   x32    child->pt_regs.orig_ax & __X32_SYSCALL_BIT
+  ia64    !ia32 && !x32
+
+__set_personality_x32() was dropping TS_COMPAT flag, but
+set_personality_64bit() has kept compat syscall flag making
+in_compat_syscall() return true during the first exec() syscall.
+
+Which in result has user-visible effects, mentioned by Alexey:
+1) It breaks ASAN
+$ gcc -fsanitize=address wrap.c -o wrap-asan
+$ ./wrap32 ./wrap-asan true
+==1217==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING.
+==1217==ASan shadow was supposed to be located in the [0x00007fff7000-0x10007fff7fff] range.
+==1217==Process memory map follows:
+        0x000000400000-0x000000401000   /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan
+        0x000000600000-0x000000601000   /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan
+        0x000000601000-0x000000602000   /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan
+        0x0000f7dbd000-0x0000f7de2000   /lib64/ld-2.27.so
+        0x0000f7fe2000-0x0000f7fe3000   /lib64/ld-2.27.so
+        0x0000f7fe3000-0x0000f7fe4000   /lib64/ld-2.27.so
+        0x0000f7fe4000-0x0000f7fe5000
+        0x7fed9abff000-0x7fed9af54000
+        0x7fed9af54000-0x7fed9af6b000   /lib64/libgcc_s.so.1
+[snip]
+
+2) It doesn't seem to be great for security if an attacker always knows
+that ld.so is going to be mapped into the first 4GB in this case
+(the same thing happens for PIEs as well).
+
+The testcase:
+$ cat wrap.c
+
+int main(int argc, char *argv[]) {
+  execvp(argv[1], &argv[1]);
+  return 127;
+}
+
+$ gcc wrap.c -o wrap
+$ LD_SHOW_AUXV=1 ./wrap ./wrap true |& grep AT_BASE
+AT_BASE:         0x7f63b8309000
+AT_BASE:         0x7faec143c000
+AT_BASE:         0x7fbdb25fa000
+
+$ gcc -m32 wrap.c -o wrap32
+$ LD_SHOW_AUXV=1 ./wrap32 ./wrap true |& grep AT_BASE
+AT_BASE:         0xf7eff000
+AT_BASE:         0xf7cee000
+AT_BASE:         0x7f8b9774e000
+
+Fixes: 1b028f784e8c ("x86/mm: Introduce mmap_compat_base() for 32-bit mmap()")
+Fixes: ada26481dfe6 ("x86/mm: Make in_compat_syscall() work during exec")
+Reported-by: Alexey Izbyshev <izbyshev@ispras.ru>
+Bisected-by: Alexander Monakov <amonakov@ispras.ru>
+Investigated-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Alexander Monakov <amonakov@ispras.ru>
+Cc: Dmitry Safonov <0x7f454c46@gmail.com>
+Cc: stable@vger.kernel.org
+Cc: linux-mm@kvack.org
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180517233510.24996-1-dima@arista.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/process_64.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -528,6 +528,7 @@ void set_personality_64bit(void)
+       clear_thread_flag(TIF_X32);
+       /* Pretend that this comes from a 64bit execve */
+       task_pt_regs(current)->orig_ax = __NR_execve;
++      current_thread_info()->status &= ~TS_COMPAT;
+       /* Ensure the corresponding mm is not marked. */
+       if (current->mm)
diff --git a/queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch b/queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch
new file mode 100644 (file)
index 0000000..53a0aac
--- /dev/null
@@ -0,0 +1,81 @@
+From 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 9 May 2018 10:13:58 -0700
+Subject: x86/pkeys: Do not special case protection key 0
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 upstream.
+
+mm_pkey_is_allocated() treats pkey 0 as unallocated.  That is
+inconsistent with the manpages, and also inconsistent with
+mm->context.pkey_allocation_map.  Stop special casing it and only
+disallow values that are actually bad (< 0).
+
+The end-user visible effect of this is that you can now use
+mprotect_pkey() to set pkey=0.
+
+This is a bit nicer than what Ram proposed[1] because it is simpler
+and removes special-casing for pkey 0.  On the other hand, it does
+allow applications to pkey_free() pkey-0, but that's just a silly
+thing to do, so we are not going to protect against it.
+
+The scenario that could happen is similar to what happens if you free
+any other pkey that is in use: it might get reallocated later and used
+to protect some other data.  The most likely scenario is that pkey-0
+comes back from pkey_alloc(), an access-disable or write-disable bit
+is set in PKRU for it, and the next stack access will SIGSEGV.  It's
+not horribly different from if you mprotect()'d your stack or heap to
+be unreadable or unwritable, which is generally very foolish, but also
+not explicitly prevented by the kernel.
+
+1. http://lkml.kernel.org/r/1522112702-27853-1-git-send-email-linuxram@us.ibm.com
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>p
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michael Ellermen <mpe@ellerman.id.au>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ram Pai <linuxram@us.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Cc: stable@vger.kernel.org
+Fixes: 58ab9a088dda ("x86/pkeys: Check against max pkey to avoid overflows")
+Link: http://lkml.kernel.org/r/20180509171358.47FD785E@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h |    2 +-
+ arch/x86/include/asm/pkeys.h       |    6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -192,7 +192,7 @@ static inline int init_new_context(struc
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+       if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+-              /* pkey 0 is the default and always allocated */
++              /* pkey 0 is the default and allocated implicitly */
+               mm->context.pkey_allocation_map = 0x1;
+               /* -1 means unallocated or invalid */
+               mm->context.execute_only_pkey = -1;
+--- a/arch/x86/include/asm/pkeys.h
++++ b/arch/x86/include/asm/pkeys.h
+@@ -51,10 +51,10 @@ bool mm_pkey_is_allocated(struct mm_stru
+ {
+       /*
+        * "Allocated" pkeys are those that have been returned
+-       * from pkey_alloc().  pkey 0 is special, and never
+-       * returned from pkey_alloc().
++       * from pkey_alloc() or pkey 0 which is allocated
++       * implicitly when the mm is created.
+        */
+-      if (pkey <= 0)
++      if (pkey < 0)
+               return false;
+       if (pkey >= arch_max_pkey())
+               return false;
diff --git a/queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch b/queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch
new file mode 100644 (file)
index 0000000..51dec04
--- /dev/null
@@ -0,0 +1,129 @@
+From 0a0b152083cfc44ec1bb599b57b7aab41327f998 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 9 May 2018 10:13:51 -0700
+Subject: x86/pkeys: Override pkey when moving away from PROT_EXEC
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 0a0b152083cfc44ec1bb599b57b7aab41327f998 upstream.
+
+I got a bug report that the following code (roughly) was
+causing a SIGSEGV:
+
+       mprotect(ptr, size, PROT_EXEC);
+       mprotect(ptr, size, PROT_NONE);
+       mprotect(ptr, size, PROT_READ);
+       *ptr = 100;
+
+The problem is hit when the mprotect(PROT_EXEC)
+is implicitly assigned a protection key to the VMA, and made
+that key ACCESS_DENY|WRITE_DENY.  The PROT_NONE mprotect()
+failed to remove the protection key, and the PROT_NONE->
+PROT_READ left the PTE usable, but the pkey still in place
+and left the memory inaccessible.
+
+To fix this, we ensure that we always "override" the pkee
+at mprotect() if the VMA does not have execute-only
+permissions, but the VMA has the execute-only pkey.
+
+We had a check for PROT_READ/WRITE, but it did not work
+for PROT_NONE.  This entirely removes the PROT_* checks,
+which ensures that PROT_NONE now works.
+
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michael Ellermen <mpe@ellerman.id.au>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ram Pai <linuxram@us.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Cc: stable@vger.kernel.org
+Fixes: 62b5f7d013f ("mm/core, x86/mm/pkeys: Add execute-only protection keys support")
+Link: http://lkml.kernel.org/r/20180509171351.084C5A71@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pkeys.h |   12 +++++++++++-
+ arch/x86/mm/pkeys.c          |   21 +++++++++++----------
+ 2 files changed, 22 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/pkeys.h
++++ b/arch/x86/include/asm/pkeys.h
+@@ -2,6 +2,8 @@
+ #ifndef _ASM_X86_PKEYS_H
+ #define _ASM_X86_PKEYS_H
++#define ARCH_DEFAULT_PKEY     0
++
+ #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
+ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm
+ static inline int execute_only_pkey(struct mm_struct *mm)
+ {
+       if (!boot_cpu_has(X86_FEATURE_OSPKE))
+-              return 0;
++              return ARCH_DEFAULT_PKEY;
+       return __execute_only_pkey(mm);
+ }
+@@ -56,6 +58,14 @@ bool mm_pkey_is_allocated(struct mm_stru
+               return false;
+       if (pkey >= arch_max_pkey())
+               return false;
++      /*
++       * The exec-only pkey is set in the allocation map, but
++       * is not available to any of the user interfaces like
++       * mprotect_pkey().
++       */
++      if (pkey == mm->context.execute_only_pkey)
++              return false;
++
+       return mm_pkey_allocation_map(mm) & (1U << pkey);
+ }
+--- a/arch/x86/mm/pkeys.c
++++ b/arch/x86/mm/pkeys.c
+@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct
+        */
+       if (pkey != -1)
+               return pkey;
+-      /*
+-       * Look for a protection-key-drive execute-only mapping
+-       * which is now being given permissions that are not
+-       * execute-only.  Move it back to the default pkey.
+-       */
+-      if (vma_is_pkey_exec_only(vma) &&
+-          (prot & (PROT_READ|PROT_WRITE))) {
+-              return 0;
+-      }
++
+       /*
+        * The mapping is execute-only.  Go try to get the
+        * execute-only protection key.  If we fail to do that,
+        * fall through as if we do not have execute-only
+-       * support.
++       * support in this mm.
+        */
+       if (prot == PROT_EXEC) {
+               pkey = execute_only_pkey(vma->vm_mm);
+               if (pkey > 0)
+                       return pkey;
++      } else if (vma_is_pkey_exec_only(vma)) {
++              /*
++               * Protections are *not* PROT_EXEC, but the mapping
++               * is using the exec-only pkey.  This mapping was
++               * PROT_EXEC and will no longer be.  Move back to
++               * the default pkey.
++               */
++              return ARCH_DEFAULT_PKEY;
+       }
++
+       /*
+        * This is a vanilla, non-pkey mprotect (or we failed to
+        * setup execute-only), inherit the pkey from the VMA we