From: Greg Kroah-Hartman Date: Thu, 27 Feb 2020 09:00:40 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.215~46 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8a280a2d58e42596b034bcb0d3a280d7bdd98df8;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: ext4-add-cond_resched-to-__ext4_find_entry.patch ext4-fix-a-data-race-in-ext4_i-inode-i_disksize.patch kvm-nvmx-check-io-instruction-vm-exit-conditions.patch kvm-nvmx-refactor-io-bitmap-checks-into-helper-function.patch kvm-x86-emulate-rdpid.patch x86-vdso-use-rdpid-in-preference-to-lsl-when-available.patch --- diff --git a/queue-4.4/ext4-add-cond_resched-to-__ext4_find_entry.patch b/queue-4.4/ext4-add-cond_resched-to-__ext4_find_entry.patch new file mode 100644 index 00000000000..980c7253a0d --- /dev/null +++ b/queue-4.4/ext4-add-cond_resched-to-__ext4_find_entry.patch @@ -0,0 +1,71 @@ +From 9424ef56e13a1f14c57ea161eed3ecfdc7b2770e Mon Sep 17 00:00:00 2001 +From: Shijie Luo +Date: Sat, 15 Feb 2020 03:02:06 -0500 +Subject: ext4: add cond_resched() to __ext4_find_entry() + +From: Shijie Luo + +commit 9424ef56e13a1f14c57ea161eed3ecfdc7b2770e upstream. + +We tested a soft lockup problem in linux 4.19 which could also +be found in linux 5.x. + +When dir inode takes up a large number of blocks, and if the +directory is growing when we are searching, it's possible the +restart branch could be called many times, and the do while loop +could hold cpu a long time. + +Here is the call trace in linux 4.19. + +[ 473.756186] Call trace: +[ 473.756196] dump_backtrace+0x0/0x198 +[ 473.756199] show_stack+0x24/0x30 +[ 473.756205] dump_stack+0xa4/0xcc +[ 473.756210] watchdog_timer_fn+0x300/0x3e8 +[ 473.756215] __hrtimer_run_queues+0x114/0x358 +[ 473.756217] hrtimer_interrupt+0x104/0x2d8 +[ 473.756222] arch_timer_handler_virt+0x38/0x58 +[ 473.756226] handle_percpu_devid_irq+0x90/0x248 +[ 473.756231] generic_handle_irq+0x34/0x50 +[ 473.756234] __handle_domain_irq+0x68/0xc0 +[ 473.756236] gic_handle_irq+0x6c/0x150 +[ 473.756238] el1_irq+0xb8/0x140 +[ 473.756286] ext4_es_lookup_extent+0xdc/0x258 [ext4] +[ 473.756310] ext4_map_blocks+0x64/0x5c0 [ext4] +[ 473.756333] ext4_getblk+0x6c/0x1d0 [ext4] +[ 473.756356] ext4_bread_batch+0x7c/0x1f8 [ext4] +[ 473.756379] ext4_find_entry+0x124/0x3f8 [ext4] +[ 473.756402] ext4_lookup+0x8c/0x258 [ext4] +[ 473.756407] __lookup_hash+0x8c/0xe8 +[ 473.756411] filename_create+0xa0/0x170 +[ 473.756413] do_mkdirat+0x6c/0x140 +[ 473.756415] __arm64_sys_mkdirat+0x28/0x38 +[ 473.756419] el0_svc_common+0x78/0x130 +[ 473.756421] el0_svc_handler+0x38/0x78 +[ 473.756423] el0_svc+0x8/0xc +[ 485.755156] watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [tmp:5149] + +Add cond_resched() to avoid soft lockup and to provide a better +system responding. + +Link: https://lore.kernel.org/r/20200215080206.13293-1-luoshijie1@huawei.com +Signed-off-by: Shijie Luo +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/namei.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1418,6 +1418,7 @@ restart: + /* + * We deal with the read-ahead logic here. + */ ++ cond_resched(); + if (ra_ptr >= ra_max) { + /* Refill the readahead buffer */ + ra_ptr = 0; diff --git a/queue-4.4/ext4-fix-a-data-race-in-ext4_i-inode-i_disksize.patch b/queue-4.4/ext4-fix-a-data-race-in-ext4_i-inode-i_disksize.patch new file mode 100644 index 00000000000..a5e4c688670 --- /dev/null +++ b/queue-4.4/ext4-fix-a-data-race-in-ext4_i-inode-i_disksize.patch @@ -0,0 +1,87 @@ +From 35df4299a6487f323b0aca120ea3f485dfee2ae3 Mon Sep 17 00:00:00 2001 +From: Qian Cai +Date: Fri, 7 Feb 2020 09:29:11 -0500 +Subject: ext4: fix a data race in EXT4_I(inode)->i_disksize + +From: Qian Cai + +commit 35df4299a6487f323b0aca120ea3f485dfee2ae3 upstream. + +EXT4_I(inode)->i_disksize could be accessed concurrently as noticed by +KCSAN, + + BUG: KCSAN: data-race in ext4_write_end [ext4] / ext4_writepages [ext4] + + write to 0xffff91c6713b00f8 of 8 bytes by task 49268 on cpu 127: + ext4_write_end+0x4e3/0x750 [ext4] + ext4_update_i_disksize at fs/ext4/ext4.h:3032 + (inlined by) ext4_update_inode_size at fs/ext4/ext4.h:3046 + (inlined by) ext4_write_end at fs/ext4/inode.c:1287 + generic_perform_write+0x208/0x2a0 + ext4_buffered_write_iter+0x11f/0x210 [ext4] + ext4_file_write_iter+0xce/0x9e0 [ext4] + new_sync_write+0x29c/0x3b0 + __vfs_write+0x92/0xa0 + vfs_write+0x103/0x260 + ksys_write+0x9d/0x130 + __x64_sys_write+0x4c/0x60 + do_syscall_64+0x91/0xb47 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + + read to 0xffff91c6713b00f8 of 8 bytes by task 24872 on cpu 37: + ext4_writepages+0x10ac/0x1d00 [ext4] + mpage_map_and_submit_extent at fs/ext4/inode.c:2468 + (inlined by) ext4_writepages at fs/ext4/inode.c:2772 + do_writepages+0x5e/0x130 + __writeback_single_inode+0xeb/0xb20 + writeback_sb_inodes+0x429/0x900 + __writeback_inodes_wb+0xc4/0x150 + wb_writeback+0x4bd/0x870 + wb_workfn+0x6b4/0x960 + process_one_work+0x54c/0xbe0 + worker_thread+0x80/0x650 + kthread+0x1e0/0x200 + ret_from_fork+0x27/0x50 + + Reported by Kernel Concurrency Sanitizer on: + CPU: 37 PID: 24872 Comm: kworker/u261:2 Tainted: G W O L 5.5.0-next-20200204+ #5 + Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 + Workqueue: writeback wb_workfn (flush-7:0) + +Since only the read is operating as lockless (outside of the +"i_data_sem"), load tearing could introduce a logic bug. Fix it by +adding READ_ONCE() for the read and WRITE_ONCE() for the write. + +Signed-off-by: Qian Cai +Link: https://lore.kernel.org/r/1581085751-31793-1-git-send-email-cai@lca.pw +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4.h | 2 +- + fs/ext4/inode.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -2851,7 +2851,7 @@ static inline void ext4_update_i_disksiz + !mutex_is_locked(&inode->i_mutex)); + down_write(&EXT4_I(inode)->i_data_sem); + if (newsize > EXT4_I(inode)->i_disksize) +- EXT4_I(inode)->i_disksize = newsize; ++ WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); + up_write(&EXT4_I(inode)->i_data_sem); + } + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2320,7 +2320,7 @@ update_disksize: + * truncate are avoided by checking i_size under i_data_sem. + */ + disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; +- if (disksize > EXT4_I(inode)->i_disksize) { ++ if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { + int err2; + loff_t i_size; + diff --git a/queue-4.4/kvm-nvmx-check-io-instruction-vm-exit-conditions.patch b/queue-4.4/kvm-nvmx-check-io-instruction-vm-exit-conditions.patch new file mode 100644 index 00000000000..d4b91b57142 --- /dev/null +++ b/queue-4.4/kvm-nvmx-check-io-instruction-vm-exit-conditions.patch @@ -0,0 +1,110 @@ +From 35a571346a94fb93b5b3b6a599675ef3384bc75c Mon Sep 17 00:00:00 2001 +From: Oliver Upton +Date: Tue, 4 Feb 2020 15:26:31 -0800 +Subject: KVM: nVMX: Check IO instruction VM-exit conditions + +From: Oliver Upton + +commit 35a571346a94fb93b5b3b6a599675ef3384bc75c upstream. + +Consult the 'unconditional IO exiting' and 'use IO bitmaps' VM-execution +controls when checking instruction interception. If the 'use IO bitmaps' +VM-execution control is 1, check the instruction access against the IO +bitmaps to determine if the instruction causes a VM-exit. + +Signed-off-by: Oliver Upton +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 52 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4281,7 +4281,7 @@ static bool nested_vmx_exit_handled_io(s + struct vmcs12 *vmcs12) + { + unsigned long exit_qualification; +- unsigned int port; ++ unsigned short port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) +@@ -10754,6 +10754,39 @@ static void nested_vmx_entry_failure(str + to_vmx(vcpu)->nested.sync_shadow_vmcs = true; + } + ++static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, ++ struct x86_instruction_info *info) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ unsigned short port; ++ bool intercept; ++ int size; ++ ++ if (info->intercept == x86_intercept_in || ++ info->intercept == x86_intercept_ins) { ++ port = info->src_val; ++ size = info->dst_bytes; ++ } else { ++ port = info->dst_val; ++ size = info->src_bytes; ++ } ++ ++ /* ++ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction ++ * VM-exits depend on the 'unconditional IO exiting' VM-execution ++ * control. ++ * ++ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. ++ */ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) ++ intercept = nested_cpu_has(vmcs12, ++ CPU_BASED_UNCOND_IO_EXITING); ++ else ++ intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); ++ ++ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; ++} ++ + static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +@@ -10761,18 +10794,30 @@ static int vmx_check_intercept(struct kv + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + ++ switch (info->intercept) { + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ +- if (info->intercept == x86_intercept_rdtscp && +- !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { +- ctxt->exception.vector = UD_VECTOR; +- ctxt->exception.error_code_valid = false; +- return X86EMUL_PROPAGATE_FAULT; +- } ++ case x86_intercept_rdtscp: ++ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { ++ ctxt->exception.vector = UD_VECTOR; ++ ctxt->exception.error_code_valid = false; ++ return X86EMUL_PROPAGATE_FAULT; ++ } ++ break; ++ ++ case x86_intercept_in: ++ case x86_intercept_ins: ++ case x86_intercept_out: ++ case x86_intercept_outs: ++ return vmx_check_intercept_io(vcpu, info); + + /* TODO: check more intercepts... */ ++ default: ++ break; ++ } ++ + return X86EMUL_UNHANDLEABLE; + } + diff --git a/queue-4.4/kvm-nvmx-don-t-emulate-instructions-in-guest-mode.patch b/queue-4.4/kvm-nvmx-don-t-emulate-instructions-in-guest-mode.patch index 3fde74d9c79..f09fccd0e15 100644 --- a/queue-4.4/kvm-nvmx-don-t-emulate-instructions-in-guest-mode.patch +++ b/queue-4.4/kvm-nvmx-don-t-emulate-instructions-in-guest-mode.patch @@ -17,22 +17,17 @@ Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- - arch/x86/kvm/vmx.c | 2 +- + arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 6c2b45f5d5016..be7f4c018943b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c -@@ -10744,7 +10744,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, - struct x86_instruction_info *info, - enum x86_intercept_stage stage) - { +@@ -10759,7 +10759,7 @@ static int vmx_check_intercept(struct kv + } + + /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; + return X86EMUL_UNHANDLEABLE; } static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) --- -2.20.1 - diff --git a/queue-4.4/kvm-nvmx-refactor-io-bitmap-checks-into-helper-function.patch b/queue-4.4/kvm-nvmx-refactor-io-bitmap-checks-into-helper-function.patch new file mode 100644 index 00000000000..a18f9c80f09 --- /dev/null +++ b/queue-4.4/kvm-nvmx-refactor-io-bitmap-checks-into-helper-function.patch @@ -0,0 +1,82 @@ +From e71237d3ff1abf9f3388337cfebf53b96df2020d Mon Sep 17 00:00:00 2001 +From: Oliver Upton +Date: Tue, 4 Feb 2020 15:26:30 -0800 +Subject: KVM: nVMX: Refactor IO bitmap checks into helper function + +From: Oliver Upton + +commit e71237d3ff1abf9f3388337cfebf53b96df2020d upstream. + +Checks against the IO bitmap are useful for both instruction emulation +and VM-exit reflection. Refactor the IO bitmap checks into a helper +function. + +Signed-off-by: Oliver Upton +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 40 +++++++++++++++++++++++++++------------- + 1 file changed, 27 insertions(+), 13 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4275,6 +4275,26 @@ static bool cs_ss_rpl_check(struct kvm_v + (ss.selector & SEGMENT_RPL_MASK)); + } + ++static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, ++ unsigned int port, int size); ++static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ unsigned long exit_qualification; ++ unsigned int port; ++ int size; ++ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) ++ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ ++ port = exit_qualification >> 16; ++ size = (exit_qualification & 7) + 1; ++ ++ return nested_vmx_check_io_bitmaps(vcpu, port, size); ++} ++ + /* + * Check if guest state is valid. Returns true if valid, false if + * not. +@@ -7624,23 +7644,17 @@ static int (*const kvm_vmx_exit_handlers + static const int kvm_vmx_max_exit_handlers = + ARRAY_SIZE(kvm_vmx_exit_handlers); + +-static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, +- struct vmcs12 *vmcs12) ++/* ++ * Return true if an IO instruction with the specified port and size should cause ++ * a VM-exit into L1. ++ */ ++bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, ++ int size) + { +- unsigned long exit_qualification; ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gpa_t bitmap, last_bitmap; +- unsigned int port; +- int size; + u8 b; + +- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) +- return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); +- +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION); +- +- port = exit_qualification >> 16; +- size = (exit_qualification & 7) + 1; +- + last_bitmap = (gpa_t)-1; + b = -1; + diff --git a/queue-4.4/kvm-x86-emulate-rdpid.patch b/queue-4.4/kvm-x86-emulate-rdpid.patch new file mode 100644 index 00000000000..a715959ae82 --- /dev/null +++ b/queue-4.4/kvm-x86-emulate-rdpid.patch @@ -0,0 +1,110 @@ +From fb6d4d340e0532032c808a9933eaaa7b8de435ab Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 12 Jul 2016 11:04:26 +0200 +Subject: KVM: x86: emulate RDPID + +From: Paolo Bonzini + +commit fb6d4d340e0532032c808a9933eaaa7b8de435ab upstream. + +This is encoded as F3 0F C7 /7 with a register argument. The register +argument is the second array in the group9 GroupDual, while F3 is the +fourth element of a Prefix. + +Reviewed-by: Wanpeng Li +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/cpuid.c | 7 ++++++- + arch/x86/kvm/emulate.c | 22 +++++++++++++++++++++- + arch/x86/kvm/vmx.c | 15 +++++++++++++++ + 3 files changed, 42 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -267,13 +267,18 @@ static int __do_cpuid_ent_emulated(struc + { + switch (func) { + case 0: +- entry->eax = 1; /* only one leaf currently */ ++ entry->eax = 7; + ++*nent; + break; + case 1: + entry->ecx = F(MOVBE); + ++*nent; + break; ++ case 7: ++ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++ if (index == 0) ++ entry->ecx = F(RDPID); ++ ++*nent; + default: + break; + } +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -3519,6 +3519,16 @@ static int em_cwd(struct x86_emulate_ctx + return X86EMUL_CONTINUE; + } + ++static int em_rdpid(struct x86_emulate_ctxt *ctxt) ++{ ++ u64 tsc_aux = 0; ++ ++ if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) ++ return emulate_gp(ctxt, 0); ++ ctxt->dst.val = tsc_aux; ++ return X86EMUL_CONTINUE; ++} ++ + static int em_rdtsc(struct x86_emulate_ctxt *ctxt) + { + u64 tsc = 0; +@@ -4379,10 +4389,20 @@ static const struct opcode group8[] = { + F(DstMem | SrcImmByte | Lock | PageTable, em_btc), + }; + ++/* ++ * The "memory" destination is actually always a register, since we come ++ * from the register case of group9. ++ */ ++static const struct gprefix pfx_0f_c7_7 = { ++ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), ++}; ++ ++ + static const struct group_dual group9 = { { + N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, + }, { +- N, N, N, N, N, N, N, N, ++ N, N, N, N, N, N, N, ++ GP(0, &pfx_0f_c7_7), + } }; + + static const struct opcode group11[] = { +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -10744,6 +10744,21 @@ static int vmx_check_intercept(struct kv + struct x86_instruction_info *info, + enum x86_intercept_stage stage) + { ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ ++ /* ++ * RDPID causes #UD if disabled through secondary execution controls. ++ * Because it is marked as EmulateOnUD, we need to intercept it here. ++ */ ++ if (info->intercept == x86_intercept_rdtscp && ++ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { ++ ctxt->exception.vector = UD_VECTOR; ++ ctxt->exception.error_code_valid = false; ++ return X86EMUL_PROPAGATE_FAULT; ++ } ++ ++ /* TODO: check more intercepts... */ + return X86EMUL_CONTINUE; + } + diff --git a/queue-4.4/series b/queue-4.4/series index 047ec82efdb..1677b06db41 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -1,3 +1,5 @@ +x86-vdso-use-rdpid-in-preference-to-lsl-when-available.patch +kvm-x86-emulate-rdpid.patch alsa-hda-use-scnprintf-for-printing-texts-for-sysfs-procfs.patch ecryptfs-fix-a-memory-leak-bug-in-parse_tag_1_packet.patch ecryptfs-fix-a-memory-leak-bug-in-ecryptfs_init_messaging.patch @@ -96,3 +98,7 @@ vt_resizex-get-rid-of-field-by-field-copyin.patch vt-vt_ioctl-fix-race-in-vt_resizex.patch kvm-nvmx-don-t-emulate-instructions-in-guest-mode.patch netfilter-xt_bpf-add-overflow-checks.patch +ext4-fix-a-data-race-in-ext4_i-inode-i_disksize.patch +ext4-add-cond_resched-to-__ext4_find_entry.patch +kvm-nvmx-refactor-io-bitmap-checks-into-helper-function.patch +kvm-nvmx-check-io-instruction-vm-exit-conditions.patch diff --git a/queue-4.4/x86-vdso-use-rdpid-in-preference-to-lsl-when-available.patch b/queue-4.4/x86-vdso-use-rdpid-in-preference-to-lsl-when-available.patch new file mode 100644 index 00000000000..6b5c58f1d6f --- /dev/null +++ b/queue-4.4/x86-vdso-use-rdpid-in-preference-to-lsl-when-available.patch @@ -0,0 +1,59 @@ +From a582c540ac1b10f0a7d37415e04c4af42409fd08 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Wed, 16 Nov 2016 10:23:27 -0800 +Subject: x86/vdso: Use RDPID in preference to LSL when available + +From: Andy Lutomirski + +commit a582c540ac1b10f0a7d37415e04c4af42409fd08 upstream. + +RDPID is a new instruction that reads MSR_TSC_AUX quickly. This +should be considerably faster than reading the GDT. Add a +cpufeature for it and use it from __vdso_getcpu() when available. + +Tested-by: Megha Dey +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/4f6c3a22012d10f1c65b9ca15800e01b42c7d39d.1479320367.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/vgtod.h | 7 ++++++- + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -301,6 +301,7 @@ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ + #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ + #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ ++#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ + + /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ + #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ +--- a/arch/x86/include/asm/vgtod.h ++++ b/arch/x86/include/asm/vgtod.h +@@ -83,8 +83,13 @@ static inline unsigned int __getcpu(void + * works on all CPUs. This is volatile so that it orders + * correctly wrt barrier() and to keep gcc from cleverly + * hoisting it out of the calling function. ++ * ++ * If RDPID is available, use it. + */ +- asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); ++ alternative_io ("lsl %[p],%[seg]", ++ ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ ++ X86_FEATURE_RDPID, ++ [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); + + return p; + }