From 2f926872f4f85f3548c809a2996da7cdd9d859f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 May 2018 08:41:50 +0200 Subject: [PATCH] 4.16-stable patches added patches: arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch btrfs-fix-xattr-loss-after-power-failure.patch btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch x86-apic-x2apic-initialize-cluster-id-properly.patch x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch x86-pkeys-do-not-special-case-protection-key-0.patch x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch --- ...-get_kprobe_ctlblk-after-irq-disabed.patch | 54 +++++++ ...ohibit-probing-on-optimized_callback.patch | 34 +++++ ...es-prohibit-kprobes-on-do_undefinstr.patch | 52 +++++++ ...ohibit-kprobes-on-get_user-functions.patch | 118 +++++++++++++++ ...sume-balance-without-the-resume-flag.patch | 59 ++++++++ ...nvalidation-during-transaction-abort.patch | 131 +++++++++++++++++ ...s-fix-xattr-loss-after-power-failure.patch | 80 +++++++++++ ...-flag-if-lzo-zstd-compression-is-set.patch | 75 ++++++++++ ...roots-due-to-concurrent-snapshotting.patch | 136 ++++++++++++++++++ ...rotocol_32-definition-for-mixed-mode.patch | 81 +++++++++++ queue-4.16/series | 15 ++ ...for_each_cpu-specially-on-up-kernels.patch | 60 ++++++++ ...2apic-initialize-cluster-id-properly.patch | 48 +++++++ ...rop-ts_compat-on-64-bit-exec-syscall.patch | 101 +++++++++++++ ...do-not-special-case-protection-key-0.patch | 81 +++++++++++ ...pkey-when-moving-away-from-prot_exec.patch | 129 +++++++++++++++++ 16 files changed, 1254 insertions(+) create mode 100644 queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch create mode 100644 queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch create mode 100644 queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch create mode 100644 queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch create mode 100644 queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch create mode 100644 queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch create mode 100644 queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch create mode 100644 queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch create mode 100644 queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch create mode 100644 queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch create mode 100644 queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch create mode 100644 queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch create mode 100644 queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch create mode 100644 queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch create mode 100644 queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch diff --git a/queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch b/queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch new file mode 100644 index 00000000000..42e72edff7f --- /dev/null +++ b/queue-4.16/arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch @@ -0,0 +1,54 @@ +From 69af7e23a6870df2ea6fa79ca16493d59b3eebeb Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Sun, 13 May 2018 05:03:54 +0100 +Subject: ARM: 8769/1: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed + +From: Masami Hiramatsu + +commit 69af7e23a6870df2ea6fa79ca16493d59b3eebeb upstream. + +Since get_kprobe_ctlblk() uses smp_processor_id() to access +per-cpu variable, it hits smp_processor_id sanity check as below. + +[ 7.006928] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 +[ 7.007859] caller is debug_smp_processor_id+0x20/0x24 +[ 7.008438] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1-00192-g4eb17253e4b5 #1 +[ 7.008890] Hardware name: Generic DT based system +[ 7.009917] [] (unwind_backtrace) from [] (show_stack+0x20/0x24) +[ 7.010473] [] (show_stack) from [] (dump_stack+0x84/0x98) +[ 7.010990] [] (dump_stack) from [] (check_preemption_disabled+0x138/0x13c) +[ 7.011592] [] (check_preemption_disabled) from [] (debug_smp_processor_id+0x20/0x24) +[ 7.012214] [] (debug_smp_processor_id) from [] (optimized_callback+0x2c/0xe4) +[ 7.013077] [] (optimized_callback) from [] (0xbf0021b0) + +To fix this issue, call get_kprobe_ctlblk() right after +irq-disabled since that disables preemption. + +Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32") +Signed-off-by: Masami Hiramatsu +Cc: stable@vger.kernel.org +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/probes/kprobes/opt-arm.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/arm/probes/kprobes/opt-arm.c ++++ b/arch/arm/probes/kprobes/opt-arm.c +@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kpro + { + unsigned long flags; + struct kprobe *p = &op->kp; +- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); ++ struct kprobe_ctlblk *kcb; + + /* Save skipped registers */ + regs->ARM_pc = (unsigned long)op->kp.addr; + regs->ARM_ORIG_r0 = ~0UL; + + local_irq_save(flags); ++ kcb = get_kprobe_ctlblk(); + + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); diff --git a/queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch b/queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch new file mode 100644 index 00000000000..f3afa83dd04 --- /dev/null +++ b/queue-4.16/arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch @@ -0,0 +1,34 @@ +From 70948c05fdde0aac32f9667856a88725c192fa40 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Sun, 13 May 2018 05:04:10 +0100 +Subject: ARM: 8770/1: kprobes: Prohibit probing on optimized_callback + +From: Masami Hiramatsu + +commit 70948c05fdde0aac32f9667856a88725c192fa40 upstream. + +Prohibit probing on optimized_callback() because +it is called from kprobes itself. If we put a kprobes +on it, that will cause a recursive call loop. +Mark it NOKPROBE_SYMBOL. + +Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32") +Signed-off-by: Masami Hiramatsu +Cc: stable@vger.kernel.org +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/probes/kprobes/opt-arm.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm/probes/kprobes/opt-arm.c ++++ b/arch/arm/probes/kprobes/opt-arm.c +@@ -192,6 +192,7 @@ optimized_callback(struct optimized_kpro + + local_irq_restore(flags); + } ++NOKPROBE_SYMBOL(optimized_callback) + + int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) + { diff --git a/queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch b/queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch new file mode 100644 index 00000000000..061bd3f4cd4 --- /dev/null +++ b/queue-4.16/arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch @@ -0,0 +1,52 @@ +From eb0146daefdde65665b7f076fbff7b49dade95b9 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Sun, 13 May 2018 05:04:16 +0100 +Subject: ARM: 8771/1: kprobes: Prohibit kprobes on do_undefinstr + +From: Masami Hiramatsu + +commit eb0146daefdde65665b7f076fbff7b49dade95b9 upstream. + +Prohibit kprobes on do_undefinstr because kprobes on +arm is implemented by undefined instruction. This means +if we probe do_undefinstr(), it can cause infinit +recursive exception. + +Fixes: 24ba613c9d6c ("ARM kprobes: core code") +Signed-off-by: Masami Hiramatsu +Cc: stable@vger.kernel.org +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/kernel/traps.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/arm/kernel/traps.c ++++ b/arch/arm/kernel/traps.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_ + raw_spin_unlock_irqrestore(&undef_lock, flags); + } + +-static int call_undef_hook(struct pt_regs *regs, unsigned int instr) ++static nokprobe_inline ++int call_undef_hook(struct pt_regs *regs, unsigned int instr) + { + struct undef_hook *hook; + unsigned long flags; +@@ -490,6 +492,7 @@ die_sig: + + arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); + } ++NOKPROBE_SYMBOL(do_undefinstr) + + /* + * Handle FIQ similarly to NMI on x86 systems. diff --git a/queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch b/queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch new file mode 100644 index 00000000000..a5bae2ce19f --- /dev/null +++ b/queue-4.16/arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch @@ -0,0 +1,118 @@ +From 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Sun, 13 May 2018 05:04:29 +0100 +Subject: ARM: 8772/1: kprobes: Prohibit kprobes on get_user functions + +From: Masami Hiramatsu + +commit 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c upstream. + +Since do_undefinstr() uses get_user to get the undefined +instruction, it can be called before kprobes processes +recursive check. This can cause an infinit recursive +exception. +Prohibit probing on get_user functions. + +Fixes: 24ba613c9d6c ("ARM kprobes: core code") +Signed-off-by: Masami Hiramatsu +Cc: stable@vger.kernel.org +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/assembler.h | 10 ++++++++++ + arch/arm/lib/getuser.S | 10 ++++++++++ + 2 files changed, 20 insertions(+) + +--- a/arch/arm/include/asm/assembler.h ++++ b/arch/arm/include/asm/assembler.h +@@ -536,4 +536,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) + #endif + .endm + ++#ifdef CONFIG_KPROBES ++#define _ASM_NOKPROBE(entry) \ ++ .pushsection "_kprobe_blacklist", "aw" ; \ ++ .balign 4 ; \ ++ .long entry; \ ++ .popsection ++#else ++#define _ASM_NOKPROBE(entry) ++#endif ++ + #endif /* __ASM_ASSEMBLER_H__ */ +--- a/arch/arm/lib/getuser.S ++++ b/arch/arm/lib/getuser.S +@@ -38,6 +38,7 @@ ENTRY(__get_user_1) + mov r0, #0 + ret lr + ENDPROC(__get_user_1) ++_ASM_NOKPROBE(__get_user_1) + + ENTRY(__get_user_2) + check_uaccess r0, 2, r1, r2, __get_user_bad +@@ -58,6 +59,7 @@ rb .req r0 + mov r0, #0 + ret lr + ENDPROC(__get_user_2) ++_ASM_NOKPROBE(__get_user_2) + + ENTRY(__get_user_4) + check_uaccess r0, 4, r1, r2, __get_user_bad +@@ -65,6 +67,7 @@ ENTRY(__get_user_4) + mov r0, #0 + ret lr + ENDPROC(__get_user_4) ++_ASM_NOKPROBE(__get_user_4) + + ENTRY(__get_user_8) + check_uaccess r0, 8, r1, r2, __get_user_bad8 +@@ -78,6 +81,7 @@ ENTRY(__get_user_8) + mov r0, #0 + ret lr + ENDPROC(__get_user_8) ++_ASM_NOKPROBE(__get_user_8) + + #ifdef __ARMEB__ + ENTRY(__get_user_32t_8) +@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8) + mov r0, #0 + ret lr + ENDPROC(__get_user_32t_8) ++_ASM_NOKPROBE(__get_user_32t_8) + + ENTRY(__get_user_64t_1) + check_uaccess r0, 1, r1, r2, __get_user_bad8 +@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1) + mov r0, #0 + ret lr + ENDPROC(__get_user_64t_1) ++_ASM_NOKPROBE(__get_user_64t_1) + + ENTRY(__get_user_64t_2) + check_uaccess r0, 2, r1, r2, __get_user_bad8 +@@ -114,6 +120,7 @@ rb .req r0 + mov r0, #0 + ret lr + ENDPROC(__get_user_64t_2) ++_ASM_NOKPROBE(__get_user_64t_2) + + ENTRY(__get_user_64t_4) + check_uaccess r0, 4, r1, r2, __get_user_bad8 +@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4) + mov r0, #0 + ret lr + ENDPROC(__get_user_64t_4) ++_ASM_NOKPROBE(__get_user_64t_4) + #endif + + __get_user_bad8: +@@ -131,6 +139,8 @@ __get_user_bad: + ret lr + ENDPROC(__get_user_bad) + ENDPROC(__get_user_bad8) ++_ASM_NOKPROBE(__get_user_bad) ++_ASM_NOKPROBE(__get_user_bad8) + + .pushsection __ex_table, "a" + .long 1b, __get_user_bad diff --git a/queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch b/queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch new file mode 100644 index 00000000000..7e7579641f8 --- /dev/null +++ b/queue-4.16/btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch @@ -0,0 +1,59 @@ +From 02ee654d3a04563c67bfe658a05384548b9bb105 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Thu, 17 May 2018 15:16:51 +0800 +Subject: btrfs: fix crash when trying to resume balance without the resume flag + +From: Anand Jain + +commit 02ee654d3a04563c67bfe658a05384548b9bb105 upstream. + +We set the BTRFS_BALANCE_RESUME flag in the btrfs_recover_balance() +only, which isn't called during the remount. So when resuming from +the paused balance we hit the bug: + + kernel: kernel BUG at fs/btrfs/volumes.c:3890! + :: + kernel: balance_kthread+0x51/0x60 [btrfs] + kernel: kthread+0x111/0x130 + :: + kernel: RIP: btrfs_balance+0x12e1/0x1570 [btrfs] RSP: ffffba7d0090bde8 + +Reproducer: + On a mounted filesystem: + + btrfs balance start --full-balance /btrfs + btrfs balance pause /btrfs + mount -o remount,ro /dev/sdb /btrfs + mount -o remount,rw /dev/sdb /btrfs + +To fix this set the BTRFS_BALANCE_RESUME flag in +btrfs_resume_balance_async(). + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/volumes.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -4046,6 +4046,15 @@ int btrfs_resume_balance_async(struct bt + return 0; + } + ++ /* ++ * A ro->rw remount sequence should continue with the paused balance ++ * regardless of who pauses it, system or the user as of now, so set ++ * the resume flag. ++ */ ++ spin_lock(&fs_info->balance_lock); ++ fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME; ++ spin_unlock(&fs_info->balance_lock); ++ + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); + return PTR_ERR_OR_ZERO(tsk); + } diff --git a/queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch b/queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch new file mode 100644 index 00000000000..939bf88b487 --- /dev/null +++ b/queue-4.16/btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch @@ -0,0 +1,131 @@ +From fe816d0f1d4c31c4c31d42ca78a87660565fc800 Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Fri, 27 Apr 2018 12:21:53 +0300 +Subject: btrfs: Fix delalloc inodes invalidation during transaction abort + +From: Nikolay Borisov + +commit fe816d0f1d4c31c4c31d42ca78a87660565fc800 upstream. + +When a transaction is aborted btrfs_cleanup_transaction is called to +cleanup all the various in-flight bits and pieces which migth be +active. One of those is delalloc inodes - inodes which have dirty +pages which haven't been persisted yet. Currently the process of +freeing such delalloc inodes in exceptional circumstances such as +transaction abort boiled down to calling btrfs_invalidate_inodes whose +sole job is to invalidate the dentries for all inodes related to a +root. This is in fact wrong and insufficient since such delalloc inodes +will likely have pending pages or ordered-extents and will be linked to +the sb->s_inode_list. This means that unmounting a btrfs instance with +an aborted transaction could potentially lead inodes/their pages +visible to the system long after their superblock has been freed. This +in turn leads to a "use-after-free" situation once page shrink is +triggered. This situation could be simulated by running generic/019 +which would cause such inodes to be left hanging, followed by +generic/176 which causes memory pressure and page eviction which lead +to touching the freed super block instance. This situation is +additionally detected by the unmount code of VFS with the following +message: + +"VFS: Busy inodes after unmount of Self-destruct in 5 seconds. Have a nice day..." + +Additionally btrfs hits WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); +in free_fs_root for the same reason. + +This patch aims to rectify the sitaution by doing the following: + +1. Change btrfs_destroy_delalloc_inodes so that it calls +invalidate_inode_pages2 for every inode on the delalloc list, this +ensures that all the pages of the inode are released. This function +boils down to calling btrfs_releasepage. During test I observed cases +where inodes on the delalloc list were having an i_count of 0, so this +necessitates using igrab to be sure we are working on a non-freed inode. + +2. Since calling btrfs_releasepage might queue delayed iputs move the +call out to btrfs_cleanup_transaction in btrfs_error_commit_super before +calling run_delayed_iputs for the last time. This is necessary to ensure +that delayed iputs are run. + +Note: this patch is tagged for 4.14 stable but the fix applies to older +versions too but needs to be backported manually due to conflicts. + +CC: stable@vger.kernel.org # 4.14.x: 2b8773313494: btrfs: Split btrfs_del_delalloc_inode into 2 functions +CC: stable@vger.kernel.org # 4.14.x +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +[ add comment to igrab ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3744,6 +3744,7 @@ void close_ctree(struct btrfs_fs_info *f + set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); + + btrfs_free_qgroup_config(fs_info); ++ ASSERT(list_empty(&fs_info->delalloc_roots)); + + if (percpu_counter_sum(&fs_info->delalloc_bytes)) { + btrfs_info(fs_info, "at unmount delalloc count %lld", +@@ -4049,15 +4050,15 @@ static int btrfs_check_super_valid(struc + + static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) + { ++ /* cleanup FS via transaction */ ++ btrfs_cleanup_transaction(fs_info); ++ + mutex_lock(&fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(fs_info); + mutex_unlock(&fs_info->cleaner_mutex); + + down_write(&fs_info->cleanup_work_sem); + up_write(&fs_info->cleanup_work_sem); +- +- /* cleanup FS via transaction */ +- btrfs_cleanup_transaction(fs_info); + } + + static void btrfs_destroy_ordered_extents(struct btrfs_root *root) +@@ -4182,19 +4183,23 @@ static void btrfs_destroy_delalloc_inode + list_splice_init(&root->delalloc_inodes, &splice); + + while (!list_empty(&splice)) { ++ struct inode *inode = NULL; + btrfs_inode = list_first_entry(&splice, struct btrfs_inode, + delalloc_inodes); +- +- list_del_init(&btrfs_inode->delalloc_inodes); +- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, +- &btrfs_inode->runtime_flags); ++ __btrfs_del_delalloc_inode(root, btrfs_inode); + spin_unlock(&root->delalloc_lock); + +- btrfs_invalidate_inodes(btrfs_inode->root); +- ++ /* ++ * Make sure we get a live inode and that it'll not disappear ++ * meanwhile. ++ */ ++ inode = igrab(&btrfs_inode->vfs_inode); ++ if (inode) { ++ invalidate_inode_pages2(inode->i_mapping); ++ iput(inode); ++ } + spin_lock(&root->delalloc_lock); + } +- + spin_unlock(&root->delalloc_lock); + } + +@@ -4210,7 +4215,6 @@ static void btrfs_destroy_all_delalloc_i + while (!list_empty(&splice)) { + root = list_first_entry(&splice, struct btrfs_root, + delalloc_root); +- list_del_init(&root->delalloc_root); + root = btrfs_grab_fs_root(root); + BUG_ON(!root); + spin_unlock(&fs_info->delalloc_root_lock); diff --git a/queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch b/queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch new file mode 100644 index 00000000000..b31c751b482 --- /dev/null +++ b/queue-4.16/btrfs-fix-xattr-loss-after-power-failure.patch @@ -0,0 +1,80 @@ +From 9a8fca62aacc1599fea8e813d01e1955513e4fad Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 11 May 2018 16:42:42 +0100 +Subject: Btrfs: fix xattr loss after power failure + +From: Filipe Manana + +commit 9a8fca62aacc1599fea8e813d01e1955513e4fad upstream. + +If a file has xattrs, we fsync it, to ensure we clear the flags +BTRFS_INODE_NEEDS_FULL_SYNC and BTRFS_INODE_COPY_EVERYTHING from its +inode, the current transaction commits and then we fsync it (without +either of those bits being set in its inode), we end up not logging +all its xattrs. This results in deleting all xattrs when replying the +log after a power failure. + +Trivial reproducer + + $ mkfs.btrfs -f /dev/sdb + $ mount /dev/sdb /mnt + + $ touch /mnt/foobar + $ setfattr -n user.xa -v qwerty /mnt/foobar + $ xfs_io -c "fsync" /mnt/foobar + + $ sync + + $ xfs_io -c "pwrite -S 0xab 0 64K" /mnt/foobar + $ xfs_io -c "fsync" /mnt/foobar + + + $ mount /dev/sdb /mnt + $ getfattr --absolute-names --dump /mnt/foobar + + $ + +So fix this by making sure all xattrs are logged if we log a file's inode +item and neither the flags BTRFS_INODE_NEEDS_FULL_SYNC nor +BTRFS_INODE_COPY_EVERYTHING were set in the inode. + +Fixes: 36283bf777d9 ("Btrfs: fix fsync xattr loss in the fast fsync path") +Cc: # 4.2+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -4749,6 +4749,7 @@ static int btrfs_log_inode(struct btrfs_ + struct extent_map_tree *em_tree = &inode->extent_tree; + u64 logged_isize = 0; + bool need_log_inode_item = true; ++ bool xattrs_logged = false; + + path = btrfs_alloc_path(); + if (!path) +@@ -5050,6 +5051,7 @@ next_key: + err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + if (err) + goto out_unlock; ++ xattrs_logged = true; + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); +@@ -5062,6 +5064,11 @@ log_extents: + btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); ++ if (!err && !xattrs_logged) { ++ err = btrfs_log_all_xattrs(trans, root, inode, path, ++ dst_path); ++ btrfs_release_path(path); ++ } + if (err) + goto out_unlock; + } diff --git a/queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch b/queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch new file mode 100644 index 00000000000..0be6b44188a --- /dev/null +++ b/queue-4.16/btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch @@ -0,0 +1,75 @@ +From 1a63c198ddb810c790101d693c7071cca703b3c7 Mon Sep 17 00:00:00 2001 +From: Misono Tomohiro +Date: Tue, 15 May 2018 16:51:26 +0900 +Subject: btrfs: property: Set incompat flag if lzo/zstd compression is set + +From: Misono Tomohiro + +commit 1a63c198ddb810c790101d693c7071cca703b3c7 upstream. + +Incompat flag of LZO/ZSTD compression should be set at: + + 1. mount time (-o compress/compress-force) + 2. when defrag is done + 3. when property is set + +Currently 3. is missing and this commit adds this. + +This could lead to a filesystem that uses ZSTD but is not marked as +such. If a kernel without a ZSTD support encounteres a ZSTD compressed +extent, it will handle that but this could be confusing to the user. + +Typically the filesystem is mounted with the ZSTD option, but the +discrepancy can arise when a filesystem is never mounted with ZSTD and +then the property on some file is set (and some new extents are +written). A simple mount with -o compress=zstd will fix that up on an +unpatched kernel. + +Same goes for LZO, but this has been around for a very long time +(2.6.37) so it's unlikely that a pre-LZO kernel would be used. + +Fixes: 5c1aab1dd544 ("btrfs: Add zstd support") +CC: stable@vger.kernel.org # 4.14+ +Signed-off-by: Tomohiro Misono +Reviewed-by: Anand Jain +Reviewed-by: David Sterba +[ add user visible impact ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/props.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/props.c ++++ b/fs/btrfs/props.c +@@ -393,6 +393,7 @@ static int prop_compression_apply(struct + const char *value, + size_t len) + { ++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + int type; + + if (len == 0) { +@@ -403,14 +404,17 @@ static int prop_compression_apply(struct + return 0; + } + +- if (!strncmp("lzo", value, 3)) ++ if (!strncmp("lzo", value, 3)) { + type = BTRFS_COMPRESS_LZO; +- else if (!strncmp("zlib", value, 4)) ++ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); ++ } else if (!strncmp("zlib", value, 4)) { + type = BTRFS_COMPRESS_ZLIB; +- else if (!strncmp("zstd", value, len)) ++ } else if (!strncmp("zstd", value, len)) { + type = BTRFS_COMPRESS_ZSTD; +- else ++ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); ++ } else { + return -EINVAL; ++ } + + BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; + BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; diff --git a/queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch b/queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch new file mode 100644 index 00000000000..b91f0b6a8b6 --- /dev/null +++ b/queue-4.16/btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch @@ -0,0 +1,136 @@ +From 6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 Mon Sep 17 00:00:00 2001 +From: Robbie Ko +Date: Mon, 14 May 2018 10:51:34 +0800 +Subject: Btrfs: send, fix invalid access to commit roots due to concurrent snapshotting + +From: Robbie Ko + +commit 6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 upstream. + +[BUG] +btrfs incremental send BUG happens when creating a snapshot of snapshot +that is being used by send. + +[REASON] +The problem can happen if while we are doing a send one of the snapshots +used (parent or send) is snapshotted, because snapshoting implies COWing +the root of the source subvolume/snapshot. + +1. When doing an incremental send, the send process will get the commit + roots from the parent and send snapshots, and add references to them + through extent_buffer_get(). + +2. When a snapshot/subvolume is snapshotted, its root node is COWed + (transaction.c:create_pending_snapshot()). + +3. COWing releases the space used by the node immediately, through: + + __btrfs_cow_block() + --btrfs_free_tree_block() + ----btrfs_add_free_space(bytenr of node) + +4. Because send doesn't hold a transaction open, it's possible that + the transaction used to create the snapshot commits, switches the + commit root and the old space used by the previous root node gets + assigned to some other node allocation. Allocation of a new node will + use the existing extent buffer found in memory, which we previously + got a reference through extent_buffer_get(), and allow the extent + buffer's content (pages) to be modified: + + btrfs_alloc_tree_block + --btrfs_reserve_extent + ----find_free_extent (get bytenr of old node) + --btrfs_init_new_buffer (use bytenr of old node) + ----btrfs_find_create_tree_block + ------alloc_extent_buffer + --------find_extent_buffer (get old node) + +5. So send can access invalid memory content and have unpredictable + behaviour. + +[FIX] +So we fix the problem by copying the commit roots of the send and +parent snapshots and use those copies. + +CallTrace looks like this: + ------------[ cut here ]------------ + kernel BUG at fs/btrfs/ctree.c:1861! + invalid opcode: 0000 [#1] SMP + CPU: 6 PID: 24235 Comm: btrfs Tainted: P O 3.10.105 #23721 + ffff88046652d680 ti: ffff88041b720000 task.ti: ffff88041b720000 + RIP: 0010:[] read_node_slot+0x108/0x110 [btrfs] + RSP: 0018:ffff88041b723b68 EFLAGS: 00010246 + RAX: ffff88043ca6b000 RBX: ffff88041b723c50 RCX: ffff880000000000 + RDX: 000000000000004c RSI: ffff880314b133f8 RDI: ffff880458b24000 + RBP: 0000000000000000 R08: 0000000000000001 R09: ffff88041b723c66 + R10: 0000000000000001 R11: 0000000000001000 R12: ffff8803f3e48890 + R13: ffff8803f3e48880 R14: ffff880466351800 R15: 0000000000000001 + FS: 00007f8c321dc8c0(0000) GS:ffff88047fcc0000(0000) + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + R2: 00007efd1006d000 CR3: 0000000213a24000 CR4: 00000000003407e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Stack: + ffff88041b723c50 ffff8803f3e48880 ffff8803f3e48890 ffff8803f3e48880 + ffff880466351800 0000000000000001 ffffffffa08dd9d7 ffff88041b723c50 + ffff8803f3e48880 ffff88041b723c66 ffffffffa08dde85 a9ff88042d2c4400 + Call Trace: + [] ? tree_move_down.isra.33+0x27/0x50 [btrfs] + [] ? tree_advance+0xb5/0xc0 [btrfs] + [] ? btrfs_compare_trees+0x2d4/0x760 [btrfs] + [] ? finish_inode_if_needed+0x870/0x870 [btrfs] + [] ? btrfs_ioctl_send+0xeda/0x1050 [btrfs] + [] ? btrfs_ioctl+0x1e3d/0x33f0 [btrfs] + [] ? handle_pte_fault+0x373/0x990 + [] ? atomic_notifier_call_chain+0x16/0x20 + [] ? set_task_cpu+0xb6/0x1d0 + [] ? handle_mm_fault+0x143/0x2a0 + [] ? __do_page_fault+0x1d0/0x500 + [] ? check_preempt_curr+0x57/0x90 + [] ? do_vfs_ioctl+0x4aa/0x990 + [] ? do_fork+0x113/0x3b0 + [] ? trace_hardirqs_off_thunk+0x3a/0x6c + [] ? SyS_ioctl+0x88/0xa0 + [] ? system_call_fastpath+0x16/0x1b + ---[ end trace 29576629ee80b2e1 ]--- + +Fixes: 7069830a9e38 ("Btrfs: add btrfs_compare_trees function") +CC: stable@vger.kernel.org # 3.6+ +Signed-off-by: Robbie Ko +Reviewed-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -5460,12 +5460,24 @@ int btrfs_compare_trees(struct btrfs_roo + down_read(&fs_info->commit_root_sem); + left_level = btrfs_header_level(left_root->commit_root); + left_root_level = left_level; +- left_path->nodes[left_level] = left_root->commit_root; ++ left_path->nodes[left_level] = ++ btrfs_clone_extent_buffer(left_root->commit_root); ++ if (!left_path->nodes[left_level]) { ++ up_read(&fs_info->commit_root_sem); ++ ret = -ENOMEM; ++ goto out; ++ } + extent_buffer_get(left_path->nodes[left_level]); + + right_level = btrfs_header_level(right_root->commit_root); + right_root_level = right_level; +- right_path->nodes[right_level] = right_root->commit_root; ++ right_path->nodes[right_level] = ++ btrfs_clone_extent_buffer(right_root->commit_root); ++ if (!right_path->nodes[right_level]) { ++ up_read(&fs_info->commit_root_sem); ++ ret = -ENOMEM; ++ goto out; ++ } + extent_buffer_get(right_path->nodes[right_level]); + up_read(&fs_info->commit_root_sem); + diff --git a/queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch b/queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch new file mode 100644 index 00000000000..e9e05e4ba81 --- /dev/null +++ b/queue-4.16/efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch @@ -0,0 +1,81 @@ +From 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Fri, 4 May 2018 07:59:58 +0200 +Subject: efi: Avoid potential crashes, fix the 'struct efi_pci_io_protocol_32' definition for mixed mode + +From: Ard Biesheuvel + +commit 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 upstream. + +Mixed mode allows a kernel built for x86_64 to interact with 32-bit +EFI firmware, but requires us to define all struct definitions carefully +when it comes to pointer sizes. + +'struct efi_pci_io_protocol_32' currently uses a 'void *' for the +'romimage' field, which will be interpreted as a 64-bit field +on such kernels, potentially resulting in bogus memory references +and subsequent crashes. + +Tested-by: Hans de Goede +Signed-off-by: Ard Biesheuvel +Cc: +Cc: Linus Torvalds +Cc: Matt Fleming +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/20180504060003.19618-13-ard.biesheuvel@linaro.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/boot/compressed/eboot.c | 6 ++++-- + include/linux/efi.h | 8 ++++---- + 2 files changed, 8 insertions(+), 6 deletions(-) + +--- a/arch/x86/boot/compressed/eboot.c ++++ b/arch/x86/boot/compressed/eboot.c +@@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 + if (status != EFI_SUCCESS) + goto free_struct; + +- memcpy(rom->romdata, pci->romimage, pci->romsize); ++ memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, ++ pci->romsize); + return status; + + free_struct: +@@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 + if (status != EFI_SUCCESS) + goto free_struct; + +- memcpy(rom->romdata, pci->romimage, pci->romsize); ++ memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, ++ pci->romsize); + return status; + + free_struct: +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -395,8 +395,8 @@ typedef struct { + u32 attributes; + u32 get_bar_attributes; + u32 set_bar_attributes; +- uint64_t romsize; +- void *romimage; ++ u64 romsize; ++ u32 romimage; + } efi_pci_io_protocol_32; + + typedef struct { +@@ -415,8 +415,8 @@ typedef struct { + u64 attributes; + u64 get_bar_attributes; + u64 set_bar_attributes; +- uint64_t romsize; +- void *romimage; ++ u64 romsize; ++ u64 romimage; + } efi_pci_io_protocol_64; + + typedef struct { diff --git a/queue-4.16/series b/queue-4.16/series index 4e31f8578b2..b72f836aa0a 100644 --- a/queue-4.16/series +++ b/queue-4.16/series @@ -42,3 +42,18 @@ s390-cpum_sf-ensure-sample-frequency-of-perf-event-attributes-is-non-zero.patch s390-qdio-don-t-release-memory-in-qdio_setup_irq.patch s390-remove-indirect-branch-from-do_softirq_own_stack.patch bcache-return-0-from-bch_debug_init-if-config_debug_fs-n.patch +x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch +x86-pkeys-do-not-special-case-protection-key-0.patch +efi-avoid-potential-crashes-fix-the-struct-efi_pci_io_protocol_32-definition-for-mixed-mode.patch +arm-8771-1-kprobes-prohibit-kprobes-on-do_undefinstr.patch +x86-apic-x2apic-initialize-cluster-id-properly.patch +x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch +tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch +arm-8769-1-kprobes-fix-to-use-get_kprobe_ctlblk-after-irq-disabed.patch +arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch +arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch +btrfs-fix-delalloc-inodes-invalidation-during-transaction-abort.patch +btrfs-fix-xattr-loss-after-power-failure.patch +btrfs-send-fix-invalid-access-to-commit-roots-due-to-concurrent-snapshotting.patch +btrfs-property-set-incompat-flag-if-lzo-zstd-compression-is-set.patch +btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch diff --git a/queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch b/queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch new file mode 100644 index 00000000000..8691c6924d1 --- /dev/null +++ b/queue-4.16/tick-broadcast-use-for_each_cpu-specially-on-up-kernels.patch @@ -0,0 +1,60 @@ +From 5596fe34495cf0f645f417eb928ef224df3e3cb4 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Tue, 15 May 2018 19:52:50 +0000 +Subject: tick/broadcast: Use for_each_cpu() specially on UP kernels + +From: Dexuan Cui + +commit 5596fe34495cf0f645f417eb928ef224df3e3cb4 upstream. + +for_each_cpu() unintuitively reports CPU0 as set independent of the actual +cpumask content on UP kernels. This causes an unexpected PIT interrupt +storm on a UP kernel running in an SMP virtual machine on Hyper-V, and as +a result, the virtual machine can suffer from a strange random delay of 1~20 +minutes during boot-up, and sometimes it can hang forever. + +Protect if by checking whether the cpumask is empty before entering the +for_each_cpu() loop. + +[ tglx: Use !IS_ENABLED(CONFIG_SMP) instead of #ifdeffery ] + +Signed-off-by: Dexuan Cui +Signed-off-by: Thomas Gleixner +Cc: Josh Poulson +Cc: "Michael Kelley (EOSG)" +Cc: Peter Zijlstra +Cc: Frederic Weisbecker +Cc: stable@vger.kernel.org +Cc: Rakib Mullick +Cc: Jork Loeser +Cc: Greg Kroah-Hartman +Cc: Andrew Morton +Cc: KY Srinivasan +Cc: Linus Torvalds +Cc: Alexey Dobriyan +Cc: Dmitry Vyukov +Link: https://lkml.kernel.org/r/KL1P15301MB000678289FE55BA365B3279ABF990@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM +Link: https://lkml.kernel.org/r/KL1P15301MB0006FA63BC22BEB64902EAA0BF930@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/tick-broadcast.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/kernel/time/tick-broadcast.c ++++ b/kernel/time/tick-broadcast.c +@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcas + now = ktime_get(); + /* Find all expired events */ + for_each_cpu(cpu, tick_broadcast_oneshot_mask) { ++ /* ++ * Required for !SMP because for_each_cpu() reports ++ * unconditionally CPU0 as set on UP kernels. ++ */ ++ if (!IS_ENABLED(CONFIG_SMP) && ++ cpumask_empty(tick_broadcast_oneshot_mask)) ++ break; ++ + td = &per_cpu(tick_cpu_device, cpu); + if (td->evtdev->next_event <= now) { + cpumask_set_cpu(cpu, tmpmask); diff --git a/queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch b/queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch new file mode 100644 index 00000000000..f9dee582e62 --- /dev/null +++ b/queue-4.16/x86-apic-x2apic-initialize-cluster-id-properly.patch @@ -0,0 +1,48 @@ +From fed71f7d98795ed0fa1d431910787f0f4a68324f Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 17 May 2018 14:36:39 +0200 +Subject: x86/apic/x2apic: Initialize cluster ID properly + +From: Thomas Gleixner + +commit fed71f7d98795ed0fa1d431910787f0f4a68324f upstream. + +Rick bisected a regression on large systems which use the x2apic cluster +mode for interrupt delivery to the commit wich reworked the cluster +management. + +The problem is caused by a missing initialization of the clusterid field +in the shared cluster data structures. So all structures end up with +cluster ID 0 which only allows sharing between all CPUs which belong to +cluster 0. All other CPUs with a cluster ID > 0 cannot share the data +structure because they cannot find existing data with their cluster +ID. This causes malfunction with IPIs because IPIs are sent to the wrong +cluster and the caller waits for ever that the target CPU handles the IPI. + +Add the missing initialization when a upcoming CPU is the first in a +cluster so that the later booting CPUs can find the data and share it for +proper operation. + +Fixes: 023a611748fd ("x86/apic/x2apic: Simplify cluster management") +Reported-by: Rick Warner +Bisected-by: Rick Warner +Signed-off-by: Thomas Gleixner +Tested-by: Rick Warner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1805171418210.1947@nanos.tec.linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/x2apic_cluster.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/apic/x2apic_cluster.c ++++ b/arch/x86/kernel/apic/x2apic_cluster.c +@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void) + goto update; + } + cmsk = cluster_hotplug_mask; ++ cmsk->clusterid = cluster; + cluster_hotplug_mask = NULL; + update: + this_cpu_write(cluster_masks, cmsk); diff --git a/queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch b/queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch new file mode 100644 index 00000000000..b70fd4abde6 --- /dev/null +++ b/queue-4.16/x86-mm-drop-ts_compat-on-64-bit-exec-syscall.patch @@ -0,0 +1,101 @@ +From acf46020012ccbca1172e9c7aeab399c950d9212 Mon Sep 17 00:00:00 2001 +From: Dmitry Safonov +Date: Fri, 18 May 2018 00:35:10 +0100 +Subject: x86/mm: Drop TS_COMPAT on 64-bit exec() syscall + +From: Dmitry Safonov + +commit acf46020012ccbca1172e9c7aeab399c950d9212 upstream. + +The x86 mmap() code selects the mmap base for an allocation depending on +the bitness of the syscall. For 64bit sycalls it select mm->mmap_base and +for 32bit mm->mmap_compat_base. + +exec() calls mmap() which in turn uses in_compat_syscall() to check whether +the mapping is for a 32bit or a 64bit task. The decision is made on the +following criteria: + + ia32 child->thread.status & TS_COMPAT + x32 child->pt_regs.orig_ax & __X32_SYSCALL_BIT + ia64 !ia32 && !x32 + +__set_personality_x32() was dropping TS_COMPAT flag, but +set_personality_64bit() has kept compat syscall flag making +in_compat_syscall() return true during the first exec() syscall. + +Which in result has user-visible effects, mentioned by Alexey: +1) It breaks ASAN +$ gcc -fsanitize=address wrap.c -o wrap-asan +$ ./wrap32 ./wrap-asan true +==1217==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING. +==1217==ASan shadow was supposed to be located in the [0x00007fff7000-0x10007fff7fff] range. +==1217==Process memory map follows: + 0x000000400000-0x000000401000 /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan + 0x000000600000-0x000000601000 /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan + 0x000000601000-0x000000602000 /home/izbyshev/test/gcc/asan-exec-from-32bit/wrap-asan + 0x0000f7dbd000-0x0000f7de2000 /lib64/ld-2.27.so + 0x0000f7fe2000-0x0000f7fe3000 /lib64/ld-2.27.so + 0x0000f7fe3000-0x0000f7fe4000 /lib64/ld-2.27.so + 0x0000f7fe4000-0x0000f7fe5000 + 0x7fed9abff000-0x7fed9af54000 + 0x7fed9af54000-0x7fed9af6b000 /lib64/libgcc_s.so.1 +[snip] + +2) It doesn't seem to be great for security if an attacker always knows +that ld.so is going to be mapped into the first 4GB in this case +(the same thing happens for PIEs as well). + +The testcase: +$ cat wrap.c + +int main(int argc, char *argv[]) { + execvp(argv[1], &argv[1]); + return 127; +} + +$ gcc wrap.c -o wrap +$ LD_SHOW_AUXV=1 ./wrap ./wrap true |& grep AT_BASE +AT_BASE: 0x7f63b8309000 +AT_BASE: 0x7faec143c000 +AT_BASE: 0x7fbdb25fa000 + +$ gcc -m32 wrap.c -o wrap32 +$ LD_SHOW_AUXV=1 ./wrap32 ./wrap true |& grep AT_BASE +AT_BASE: 0xf7eff000 +AT_BASE: 0xf7cee000 +AT_BASE: 0x7f8b9774e000 + +Fixes: 1b028f784e8c ("x86/mm: Introduce mmap_compat_base() for 32-bit mmap()") +Fixes: ada26481dfe6 ("x86/mm: Make in_compat_syscall() work during exec") +Reported-by: Alexey Izbyshev +Bisected-by: Alexander Monakov +Investigated-by: Andy Lutomirski +Signed-off-by: Dmitry Safonov +Signed-off-by: Thomas Gleixner +Reviewed-by: Cyrill Gorcunov +Cc: Borislav Petkov +Cc: Alexander Monakov +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: linux-mm@kvack.org +Cc: Andy Lutomirski +Cc: "H. Peter Anvin" +Cc: Cyrill Gorcunov +Cc: "Kirill A. Shutemov" +Link: https://lkml.kernel.org/r/20180517233510.24996-1-dima@arista.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/process_64.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -528,6 +528,7 @@ void set_personality_64bit(void) + clear_thread_flag(TIF_X32); + /* Pretend that this comes from a 64bit execve */ + task_pt_regs(current)->orig_ax = __NR_execve; ++ current_thread_info()->status &= ~TS_COMPAT; + + /* Ensure the corresponding mm is not marked. */ + if (current->mm) diff --git a/queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch b/queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch new file mode 100644 index 00000000000..53a0aacc116 --- /dev/null +++ b/queue-4.16/x86-pkeys-do-not-special-case-protection-key-0.patch @@ -0,0 +1,81 @@ +From 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Wed, 9 May 2018 10:13:58 -0700 +Subject: x86/pkeys: Do not special case protection key 0 + +From: Dave Hansen + +commit 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 upstream. + +mm_pkey_is_allocated() treats pkey 0 as unallocated. That is +inconsistent with the manpages, and also inconsistent with +mm->context.pkey_allocation_map. Stop special casing it and only +disallow values that are actually bad (< 0). + +The end-user visible effect of this is that you can now use +mprotect_pkey() to set pkey=0. + +This is a bit nicer than what Ram proposed[1] because it is simpler +and removes special-casing for pkey 0. On the other hand, it does +allow applications to pkey_free() pkey-0, but that's just a silly +thing to do, so we are not going to protect against it. + +The scenario that could happen is similar to what happens if you free +any other pkey that is in use: it might get reallocated later and used +to protect some other data. The most likely scenario is that pkey-0 +comes back from pkey_alloc(), an access-disable or write-disable bit +is set in PKRU for it, and the next stack access will SIGSEGV. It's +not horribly different from if you mprotect()'d your stack or heap to +be unreadable or unwritable, which is generally very foolish, but also +not explicitly prevented by the kernel. + +1. http://lkml.kernel.org/r/1522112702-27853-1-git-send-email-linuxram@us.ibm.com + +Signed-off-by: Dave Hansen +Cc: Andrew Morton p +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Michael Ellermen +Cc: Peter Zijlstra +Cc: Ram Pai +Cc: Shuah Khan +Cc: Thomas Gleixner +Cc: linux-mm@kvack.org +Cc: stable@vger.kernel.org +Fixes: 58ab9a088dda ("x86/pkeys: Check against max pkey to avoid overflows") +Link: http://lkml.kernel.org/r/20180509171358.47FD785E@viggo.jf.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mmu_context.h | 2 +- + arch/x86/include/asm/pkeys.h | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -192,7 +192,7 @@ static inline int init_new_context(struc + + #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { +- /* pkey 0 is the default and always allocated */ ++ /* pkey 0 is the default and allocated implicitly */ + mm->context.pkey_allocation_map = 0x1; + /* -1 means unallocated or invalid */ + mm->context.execute_only_pkey = -1; +--- a/arch/x86/include/asm/pkeys.h ++++ b/arch/x86/include/asm/pkeys.h +@@ -51,10 +51,10 @@ bool mm_pkey_is_allocated(struct mm_stru + { + /* + * "Allocated" pkeys are those that have been returned +- * from pkey_alloc(). pkey 0 is special, and never +- * returned from pkey_alloc(). ++ * from pkey_alloc() or pkey 0 which is allocated ++ * implicitly when the mm is created. + */ +- if (pkey <= 0) ++ if (pkey < 0) + return false; + if (pkey >= arch_max_pkey()) + return false; diff --git a/queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch b/queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch new file mode 100644 index 00000000000..51dec0432b0 --- /dev/null +++ b/queue-4.16/x86-pkeys-override-pkey-when-moving-away-from-prot_exec.patch @@ -0,0 +1,129 @@ +From 0a0b152083cfc44ec1bb599b57b7aab41327f998 Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Wed, 9 May 2018 10:13:51 -0700 +Subject: x86/pkeys: Override pkey when moving away from PROT_EXEC + +From: Dave Hansen + +commit 0a0b152083cfc44ec1bb599b57b7aab41327f998 upstream. + +I got a bug report that the following code (roughly) was +causing a SIGSEGV: + + mprotect(ptr, size, PROT_EXEC); + mprotect(ptr, size, PROT_NONE); + mprotect(ptr, size, PROT_READ); + *ptr = 100; + +The problem is hit when the mprotect(PROT_EXEC) +is implicitly assigned a protection key to the VMA, and made +that key ACCESS_DENY|WRITE_DENY. The PROT_NONE mprotect() +failed to remove the protection key, and the PROT_NONE-> +PROT_READ left the PTE usable, but the pkey still in place +and left the memory inaccessible. + +To fix this, we ensure that we always "override" the pkee +at mprotect() if the VMA does not have execute-only +permissions, but the VMA has the execute-only pkey. + +We had a check for PROT_READ/WRITE, but it did not work +for PROT_NONE. This entirely removes the PROT_* checks, +which ensures that PROT_NONE now works. + +Reported-by: Shakeel Butt +Signed-off-by: Dave Hansen +Cc: Andrew Morton +Cc: Dave Hansen +Cc: Linus Torvalds +Cc: Michael Ellermen +Cc: Peter Zijlstra +Cc: Ram Pai +Cc: Shuah Khan +Cc: Thomas Gleixner +Cc: linux-mm@kvack.org +Cc: stable@vger.kernel.org +Fixes: 62b5f7d013f ("mm/core, x86/mm/pkeys: Add execute-only protection keys support") +Link: http://lkml.kernel.org/r/20180509171351.084C5A71@viggo.jf.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pkeys.h | 12 +++++++++++- + arch/x86/mm/pkeys.c | 21 +++++++++++---------- + 2 files changed, 22 insertions(+), 11 deletions(-) + +--- a/arch/x86/include/asm/pkeys.h ++++ b/arch/x86/include/asm/pkeys.h +@@ -2,6 +2,8 @@ + #ifndef _ASM_X86_PKEYS_H + #define _ASM_X86_PKEYS_H + ++#define ARCH_DEFAULT_PKEY 0 ++ + #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) + + extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, +@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm + static inline int execute_only_pkey(struct mm_struct *mm) + { + if (!boot_cpu_has(X86_FEATURE_OSPKE)) +- return 0; ++ return ARCH_DEFAULT_PKEY; + + return __execute_only_pkey(mm); + } +@@ -56,6 +58,14 @@ bool mm_pkey_is_allocated(struct mm_stru + return false; + if (pkey >= arch_max_pkey()) + return false; ++ /* ++ * The exec-only pkey is set in the allocation map, but ++ * is not available to any of the user interfaces like ++ * mprotect_pkey(). ++ */ ++ if (pkey == mm->context.execute_only_pkey) ++ return false; ++ + return mm_pkey_allocation_map(mm) & (1U << pkey); + } + +--- a/arch/x86/mm/pkeys.c ++++ b/arch/x86/mm/pkeys.c +@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct + */ + if (pkey != -1) + return pkey; +- /* +- * Look for a protection-key-drive execute-only mapping +- * which is now being given permissions that are not +- * execute-only. Move it back to the default pkey. +- */ +- if (vma_is_pkey_exec_only(vma) && +- (prot & (PROT_READ|PROT_WRITE))) { +- return 0; +- } ++ + /* + * The mapping is execute-only. Go try to get the + * execute-only protection key. If we fail to do that, + * fall through as if we do not have execute-only +- * support. ++ * support in this mm. + */ + if (prot == PROT_EXEC) { + pkey = execute_only_pkey(vma->vm_mm); + if (pkey > 0) + return pkey; ++ } else if (vma_is_pkey_exec_only(vma)) { ++ /* ++ * Protections are *not* PROT_EXEC, but the mapping ++ * is using the exec-only pkey. This mapping was ++ * PROT_EXEC and will no longer be. Move back to ++ * the default pkey. ++ */ ++ return ARCH_DEFAULT_PKEY; + } ++ + /* + * This is a vanilla, non-pkey mprotect (or we failed to + * setup execute-only), inherit the pkey from the VMA we -- 2.47.2