From: Greg Kroah-Hartman Date: Wed, 12 Sep 2018 19:41:36 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v4.4.156~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0f7382f470b198a9d1ec709a33a680c95c46ac25;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch --- diff --git a/queue-4.4/series b/queue-4.4/series index 9efe99b4342..d21b13fb104 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -33,3 +33,4 @@ btrfs-replace-reset-on-disk-dev-stats-value-after-replace.patch btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has-been-initialized.patch btrfs-don-t-remove-block-group-that-still-has-pinned-down-bytes.patch debugobjects-make-stack-check-warning-more-informative.patch +x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch diff --git a/queue-4.4/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch b/queue-4.4/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch new file mode 100644 index 00000000000..e161caef090 --- /dev/null +++ b/queue-4.4/x86-pae-use-64-bit-atomic-xchg-function-in-native_ptep_get_and_clear.patch @@ -0,0 +1,62 @@ +From b2d7a075a1ccef2fb321d595802190c8e9b39004 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 21 Aug 2018 17:37:55 +0200 +Subject: x86/pae: use 64 bit atomic xchg function in native_ptep_get_and_clear + +From: Juergen Gross + +commit b2d7a075a1ccef2fb321d595802190c8e9b39004 upstream. + +Using only 32-bit writes for the pte will result in an intermediate +L1TF vulnerable PTE. When running as a Xen PV guest this will at once +switch the guest to shadow mode resulting in a loss of performance. + +Use arch_atomic64_xchg() instead which will perform the requested +operation atomically with all 64 bits. + +Some performance considerations according to: + +https://software.intel.com/sites/default/files/managed/ad/dc/Intel-Xeon-Scalable-Processor-throughput-latency.pdf + +The main number should be the latency, as there is no tight loop around +native_ptep_get_and_clear(). + +"lock cmpxchg8b" has a latency of 20 cycles, while "lock xchg" (with a +memory operand) isn't mentioned in that document. "lock xadd" (with xadd +having 3 cycles less latency than xchg) has a latency of 11, so we can +assume a latency of 14 for "lock xchg". + +Signed-off-by: Juergen Gross +Reviewed-by: Thomas Gleixner +Reviewed-by: Jan Beulich +Tested-by: Jason Andryuk +Signed-off-by: Boris Ostrovsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pgtable-3level.h | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/pgtable-3level.h ++++ b/arch/x86/include/asm/pgtable-3level.h +@@ -1,6 +1,8 @@ + #ifndef _ASM_X86_PGTABLE_3LEVEL_H + #define _ASM_X86_PGTABLE_3LEVEL_H + ++#include ++ + /* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. +@@ -142,10 +144,7 @@ static inline pte_t native_ptep_get_and_ + { + pte_t res; + +- /* xchg acts as a barrier before the setting of the high bits */ +- res.pte_low = xchg(&ptep->pte_low, 0); +- res.pte_high = ptep->pte_high; +- ptep->pte_high = 0; ++ res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); + + return res; + }