From 629c0ddde6af9e6250a29467a2a1cf5e273d3939 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 20 Feb 2018 09:47:05 +0100
Subject: [PATCH] 4.4-stable patches

added patches:
	kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch
---
 ...ays-inlining-iterator-helper-methods.patch | 100 ++++++++++++++++++
 queue-4.4/series                              |   1 +
 2 files changed, 101 insertions(+)
 create mode 100644 queue-4.4/kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch

diff --git a/queue-4.4/kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch b/queue-4.4/kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch
new file mode 100644
index 00000000000..98f886277fa
--- /dev/null
+++ b/queue-4.4/kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch
@@ -0,0 +1,100 @@
+From 928a4c39484281f8ca366f53a1db79330d058401 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 10 Feb 2018 23:39:24 +0000
+Subject: KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 928a4c39484281f8ca366f53a1db79330d058401 upstream.
+
+With retpoline, tight loops of "call this function for every XXX" are
+very much pessimised by taking a prediction miss *every* time. This one
+is by far the biggest contributor to the guest launch time with retpoline.
+
+By marking the iterator slot_handle_â¦() functions always_inline, we can
+ensure that the indirect function call can be optimised away into a
+direct call and it actually generates slightly smaller code because
+some of the other conditionals can get optimised away too.
+
+Performance is now pretty close to what we see with nospectre_v2 on
+the command line.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Tested-by: Filippo Sironi <sironi@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Reviewed-by: Filippo Sironi <sironi@amazon.de>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: arjan.van.de.ven@intel.com
+Cc: dave.hansen@intel.com
+Cc: jmattson@google.com
+Cc: karahmed@amazon.de
+Cc: kvm@vger.kernel.org
+Cc: rkrcmar@redhat.com
+Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/mmu.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -4503,7 +4503,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu
+ typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap);
+ 
+ /* The caller should hold mmu-lock before calling this function. */
+-static bool
++static __always_inline bool
+ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 			slot_level_handler fn, int start_level, int end_level,
+ 			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
+@@ -4533,7 +4533,7 @@ slot_handle_level_range(struct kvm *kvm,
+ 	return flush;
+ }
+ 
+-static bool
++static __always_inline bool
+ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 		  slot_level_handler fn, int start_level, int end_level,
+ 		  bool lock_flush_tlb)
+@@ -4544,7 +4544,7 @@ slot_handle_level(struct kvm *kvm, struc
+ 			lock_flush_tlb);
+ }
+ 
+-static bool
++static __always_inline bool
+ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 		      slot_level_handler fn, bool lock_flush_tlb)
+ {
+@@ -4552,7 +4552,7 @@ slot_handle_all_level(struct kvm *kvm, s
+ 				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+ }
+ 
+-static bool
++static __always_inline bool
+ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 			slot_level_handler fn, bool lock_flush_tlb)
+ {
+@@ -4560,7 +4560,7 @@ slot_handle_large_level(struct kvm *kvm,
+ 				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+ }
+ 
+-static bool
++static __always_inline bool
+ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 		 slot_level_handler fn, bool lock_flush_tlb)
+ {
diff --git a/queue-4.4/series b/queue-4.4/series
index db4d29f1cd3..f6baf2ffe35 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -6,3 +6,4 @@ arm-spear600-add-missing-interrupt-parent-of-rtc.patch
 arm-spear13xx-fix-dmas-cells.patch
 arm-spear13xx-fix-spics-gpio-controller-s-warning.patch
 alsa-seq-fix-regression-by-incorrect-ioctl_mutex-usages.patch
+kvm-x86-reduce-retpoline-performance-impact-in-slot_handle_level_range-by-always-inlining-iterator-helper-methods.patch
-- 
2.47.3