--- /dev/null
+From 1ddf0b1b11aa8a90cef6706e935fc31c75c406ba Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@amacapital.net>
+Date: Sun, 21 Dec 2014 08:57:46 -0800
+Subject: x86, vdso: Use asm volatile in __getcpu
+
+From: Andy Lutomirski <luto@amacapital.net>
+
+commit 1ddf0b1b11aa8a90cef6706e935fc31c75c406ba upstream.
+
+In Linux 3.18 and below, GCC hoists the lsl instructions in the
+pvclock code all the way to the beginning of __vdso_clock_gettime,
+slowing the non-paravirt case significantly. For unknown reasons,
+presumably related to the removal of a branch, the performance issue
+is gone as of
+
+e76b027e6408 x86,vdso: Use LSL unconditionally for vgetcpu
+
+but I don't trust GCC enough to expect the problem to stay fixed.
+
+There should be no correctness issue, because the __getcpu calls in
+__vdso_vlock_gettime were never necessary in the first place.
+
+Note to stable maintainers: In 3.18 and below, depending on
+configuration, gcc 4.9.2 generates code like this:
+
+ 9c3: 44 0f 03 e8 lsl %ax,%r13d
+ 9c7: 45 89 eb mov %r13d,%r11d
+ 9ca: 0f 03 d8 lsl %ax,%ebx
+
+This patch won't apply as is to any released kernel, but I'll send a
+trivial backported version if needed.
+
+[
+ Backported by Andy Lutomirski. Should apply to all affected
+ versions. This fixes a functionality bug as well as a performance
+ bug: buggy kernels can infinite loop in __vdso_clock_gettime on
+ affected compilers. See, for exammple:
+
+ https://bugzilla.redhat.com/show_bug.cgi?id=1178975
+]
+
+Fixes: 51c19b4f5927 x86: vdso: pvclock gettime support
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/vsyscall.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -34,7 +34,7 @@ static inline unsigned int __getcpu(void
+ native_read_tscp(&p);
+ } else {
+ /* Load per CPU data from GDT */
+- asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
++ asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ }
+
+ return p;
--- /dev/null
+From 394f56fe480140877304d342dec46d50dc823d46 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@amacapital.net>
+Date: Fri, 19 Dec 2014 16:04:11 -0800
+Subject: x86_64, vdso: Fix the vdso address randomization algorithm
+
+From: Andy Lutomirski <luto@amacapital.net>
+
+commit 394f56fe480140877304d342dec46d50dc823d46 upstream.
+
+The theory behind vdso randomization is that it's mapped at a random
+offset above the top of the stack. To avoid wasting a page of
+memory for an extra page table, the vdso isn't supposed to extend
+past the lowest PMD into which it can fit. Other than that, the
+address should be a uniformly distributed address that meets all of
+the alignment requirements.
+
+The current algorithm is buggy: the vdso has about a 50% probability
+of being at the very end of a PMD. The current algorithm also has a
+decent chance of failing outright due to incorrect handling of the
+case where the top of the stack is near the top of its PMD.
+
+This fixes the implementation. The paxtest estimate of vdso
+"randomisation" improves from 11 bits to 18 bits. (Disclaimer: I
+don't know what the paxtest code is actually calculating.)
+
+It's worth noting that this algorithm is inherently biased: the vdso
+is more likely to end up near the end of its PMD than near the
+beginning. Ideally we would either nix the PMD sharing requirement
+or jointly randomize the vdso and the stack to reduce the bias.
+
+In the mean time, this is a considerable improvement with basically
+no risk of compatibility issues, since the allowed outputs of the
+algorithm are unchanged.
+
+As an easy test, doing this:
+
+for i in `seq 10000`
+ do grep -P vdso /proc/self/maps |cut -d- -f1
+done |sort |uniq -d
+
+used to produce lots of output (1445 lines on my most recent run).
+A tiny subset looks like this:
+
+7fffdfffe000
+7fffe01fe000
+7fffe05fe000
+7fffe07fe000
+7fffe09fe000
+7fffe0bfe000
+7fffe0dfe000
+
+Note the suspicious fe000 endings. With the fix, I get a much more
+palatable 76 repeated addresses.
+
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/vdso/vma.c | 43 +++++++++++++++++++++++++++++--------------
+ 1 file changed, 29 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/vdso/vma.c
++++ b/arch/x86/vdso/vma.c
+@@ -117,30 +117,45 @@ subsys_initcall(init_vdso);
+
+ struct linux_binprm;
+
+-/* Put the vdso above the (randomized) stack with another randomized offset.
+- This way there is no hole in the middle of address space.
+- To save memory make sure it is still in the same PTE as the stack top.
+- This doesn't give that many random bits */
++/*
++ * Put the vdso above the (randomized) stack with another randomized
++ * offset. This way there is no hole in the middle of address space.
++ * To save memory make sure it is still in the same PTE as the stack
++ * top. This doesn't give that many random bits.
++ *
++ * Note that this algorithm is imperfect: the distribution of the vdso
++ * start address within a PMD is biased toward the end.
++ *
++ * Only used for the 64-bit and x32 vdsos.
++ */
+ static unsigned long vdso_addr(unsigned long start, unsigned len)
+ {
+ unsigned long addr, end;
+ unsigned offset;
+- end = (start + PMD_SIZE - 1) & PMD_MASK;
++
++ /*
++ * Round up the start address. It can start out unaligned as a result
++ * of stack start randomization.
++ */
++ start = PAGE_ALIGN(start);
++
++ /* Round the lowest possible end address up to a PMD boundary. */
++ end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+ if (end >= TASK_SIZE_MAX)
+ end = TASK_SIZE_MAX;
+ end -= len;
+- /* This loses some more bits than a modulo, but is cheaper */
+- offset = get_random_int() & (PTRS_PER_PTE - 1);
+- addr = start + (offset << PAGE_SHIFT);
+- if (addr >= end)
+- addr = end;
++
++ if (end > start) {
++ offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
++ addr = start + (offset << PAGE_SHIFT);
++ } else {
++ addr = start;
++ }
+
+ /*
+- * page-align it here so that get_unmapped_area doesn't
+- * align it wrongfully again to the next page. addr can come in 4K
+- * unaligned here as a result of stack start randomization.
++ * Forcibly align the final address in case we have a hardware
++ * issue that requires alignment for performance reasons.
+ */
+- addr = PAGE_ALIGN(addr);
+ addr = align_vdso_addr(addr);
+
+ return addr;