From c7b10754bb0c772f5ccb9018242fadb9c5b53d5c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Sep 2018 21:41:53 +0200 Subject: [PATCH] 4.9-stable patches added patches: x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch --- queue-4.9/series | 1 + ...unction-in-native_ptep_get_and_clear.patch | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 queue-4.9/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch diff --git a/queue-4.9/series b/queue-4.9/series index e5e01f01fef..5e43942f370 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -58,3 +58,4 @@ arm-rockchip-force-config_pm-on-rockchip-systems.patch drm-edid-add-6-bpc-quirk-for-sdc-panel-in-lenovo-b50-80.patch tcp-revert-tcp-tcp_probe-use-spin_lock_bh.patch debugobjects-make-stack-check-warning-more-informative.patch +x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch diff --git a/queue-4.9/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch b/queue-4.9/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch new file mode 100644 index 00000000000..e161caef090 --- /dev/null +++ b/queue-4.9/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch @@ -0,0 +1,62 @@ +From b2d7a075a1ccef2fb321d595802190c8e9b39004 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 21 Aug 2018 17:37:55 +0200 +Subject: x86/pae: use 64 bit atomic xchg function in native_ptep_get_and_clear + +From: Juergen Gross + +commit b2d7a075a1ccef2fb321d595802190c8e9b39004 upstream. + +Using only 32-bit writes for the pte will result in an intermediate +L1TF vulnerable PTE. When running as a Xen PV guest this will at once +switch the guest to shadow mode resulting in a loss of performance. + +Use arch_atomic64_xchg() instead which will perform the requested +operation atomically with all 64 bits. + +Some performance considerations according to: + +https://software.intel.com/sites/default/files/managed/ad/dc/Intel-Xeon-Scalable-Processor-throughput-latency.pdf + +The main number should be the latency, as there is no tight loop around +native_ptep_get_and_clear(). + +"lock cmpxchg8b" has a latency of 20 cycles, while "lock xchg" (with a +memory operand) isn't mentioned in that document. "lock xadd" (with xadd +having 3 cycles less latency than xchg) has a latency of 11, so we can +assume a latency of 14 for "lock xchg". + +Signed-off-by: Juergen Gross +Reviewed-by: Thomas Gleixner +Reviewed-by: Jan Beulich +Tested-by: Jason Andryuk +Signed-off-by: Boris Ostrovsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pgtable-3level.h | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/pgtable-3level.h ++++ b/arch/x86/include/asm/pgtable-3level.h +@@ -1,6 +1,8 @@ + #ifndef _ASM_X86_PGTABLE_3LEVEL_H + #define _ASM_X86_PGTABLE_3LEVEL_H + ++#include ++ + /* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. +@@ -142,10 +144,7 @@ static inline pte_t native_ptep_get_and_ + { + pte_t res; + +- /* xchg acts as a barrier before the setting of the high bits */ +- res.pte_low = xchg(&ptep->pte_low, 0); +- res.pte_high = ptep->pte_high; +- ptep->pte_high = 0; ++ res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); + + return res; + } -- 2.47.2