]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Jan 2018 13:17:39 +0000 (14:17 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Jan 2018 13:17:39 +0000 (14:17 +0100)
added patches:
x86-vdso-get-pvclock-data-from-the-vvar-vma-instead-of-the-fixmap.patch
x86-vdso-pvclock-simplify-and-speed-up-the-vdso-pvclock-reader.patch

queue-4.4/series
queue-4.4/x86-vdso-get-pvclock-data-from-the-vvar-vma-instead-of-the-fixmap.patch [new file with mode: 0644]
queue-4.4/x86-vdso-pvclock-simplify-and-speed-up-the-vdso-pvclock-reader.patch [new file with mode: 0644]

index 1c42ce4b35cbeafc4161c15f9de20d77cb202f4b..68e5135c7ed43920169c4ccc10e41ae61dd93747 100644 (file)
@@ -35,3 +35,5 @@ kaiser-disabled-on-xen-pv.patch
 x86-kaiser-move-feature-detection-up.patch
 kpti-rename-to-page_table_isolation.patch
 kpti-report-when-enabled.patch
+x86-vdso-pvclock-simplify-and-speed-up-the-vdso-pvclock-reader.patch
+x86-vdso-get-pvclock-data-from-the-vvar-vma-instead-of-the-fixmap.patch
diff --git a/queue-4.4/x86-vdso-get-pvclock-data-from-the-vvar-vma-instead-of-the-fixmap.patch b/queue-4.4/x86-vdso-get-pvclock-data-from-the-vvar-vma-instead-of-the-fixmap.patch
new file mode 100644 (file)
index 0000000..8625be3
--- /dev/null
@@ -0,0 +1,192 @@
+From dac16fba6fc590fa7239676b35ed75dae4c4cd2b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 10 Dec 2015 19:20:20 -0800
+Subject: x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit dac16fba6fc590fa7239676b35ed75dae4c4cd2b upstream.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/9d37826fdc7e2d2809efe31d5345f97186859284.1449702533.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Jamie Iles <jamie.iles@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/vdso/vclock_gettime.c  |   20 ++++++++------------
+ arch/x86/entry/vdso/vdso-layout.lds.S |    3 ++-
+ arch/x86/entry/vdso/vdso2c.c          |    3 +++
+ arch/x86/entry/vdso/vma.c             |   13 +++++++++++++
+ arch/x86/include/asm/pvclock.h        |    9 +++++++++
+ arch/x86/include/asm/vdso.h           |    1 +
+ arch/x86/kernel/kvmclock.c            |    5 +++++
+ 7 files changed, 41 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/entry/vdso/vclock_gettime.c
++++ b/arch/x86/entry/vdso/vclock_gettime.c
+@@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void)
+ }
+ #endif
++#ifdef CONFIG_PARAVIRT_CLOCK
++extern u8 pvclock_page
++      __attribute__((visibility("hidden")));
++#endif
++
+ #ifndef BUILD_VDSO32
+ #include <linux/kernel.h>
+@@ -62,23 +67,14 @@ notrace static long vdso_fallback_gtod(s
+ #ifdef CONFIG_PARAVIRT_CLOCK
+-static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
++static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
+ {
+-      const struct pvclock_vsyscall_time_info *pvti_base;
+-      int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
+-      int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
+-
+-      BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
+-
+-      pvti_base = (struct pvclock_vsyscall_time_info *)
+-                  __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
+-
+-      return &pvti_base[offset];
++      return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
+ }
+ static notrace cycle_t vread_pvclock(int *mode)
+ {
+-      const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti;
++      const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
+       cycle_t ret;
+       u64 tsc, pvti_tsc;
+       u64 last, delta, pvti_system_time;
+--- a/arch/x86/entry/vdso/vdso-layout.lds.S
++++ b/arch/x86/entry/vdso/vdso-layout.lds.S
+@@ -25,7 +25,7 @@ SECTIONS
+        * segment.
+        */
+-      vvar_start = . - 2 * PAGE_SIZE;
++      vvar_start = . - 3 * PAGE_SIZE;
+       vvar_page = vvar_start;
+       /* Place all vvars at the offsets in asm/vvar.h. */
+@@ -36,6 +36,7 @@ SECTIONS
+ #undef EMIT_VVAR
+       hpet_page = vvar_start + PAGE_SIZE;
++      pvclock_page = vvar_start + 2 * PAGE_SIZE;
+       . = SIZEOF_HEADERS;
+--- a/arch/x86/entry/vdso/vdso2c.c
++++ b/arch/x86/entry/vdso/vdso2c.c
+@@ -73,6 +73,7 @@ enum {
+       sym_vvar_start,
+       sym_vvar_page,
+       sym_hpet_page,
++      sym_pvclock_page,
+       sym_VDSO_FAKE_SECTION_TABLE_START,
+       sym_VDSO_FAKE_SECTION_TABLE_END,
+ };
+@@ -80,6 +81,7 @@ enum {
+ const int special_pages[] = {
+       sym_vvar_page,
+       sym_hpet_page,
++      sym_pvclock_page,
+ };
+ struct vdso_sym {
+@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
+       [sym_vvar_start] = {"vvar_start", true},
+       [sym_vvar_page] = {"vvar_page", true},
+       [sym_hpet_page] = {"hpet_page", true},
++      [sym_pvclock_page] = {"pvclock_page", true},
+       [sym_VDSO_FAKE_SECTION_TABLE_START] = {
+               "VDSO_FAKE_SECTION_TABLE_START", false
+       },
+--- a/arch/x86/entry/vdso/vma.c
++++ b/arch/x86/entry/vdso/vma.c
+@@ -100,6 +100,7 @@ static int map_vdso(const struct vdso_im
+               .name = "[vvar]",
+               .pages = no_pages,
+       };
++      struct pvclock_vsyscall_time_info *pvti;
+       if (calculate_addr) {
+               addr = vdso_addr(current->mm->start_stack,
+@@ -169,6 +170,18 @@ static int map_vdso(const struct vdso_im
+       }
+ #endif
++      pvti = pvclock_pvti_cpu0_va();
++      if (pvti && image->sym_pvclock_page) {
++              ret = remap_pfn_range(vma,
++                                    text_start + image->sym_pvclock_page,
++                                    __pa(pvti) >> PAGE_SHIFT,
++                                    PAGE_SIZE,
++                                    PAGE_READONLY);
++
++              if (ret)
++                      goto up_fail;
++      }
++
+ up_fail:
+       if (ret)
+               current->mm->context.vdso = NULL;
+--- a/arch/x86/include/asm/pvclock.h
++++ b/arch/x86/include/asm/pvclock.h
+@@ -4,6 +4,15 @@
+ #include <linux/clocksource.h>
+ #include <asm/pvclock-abi.h>
++#ifdef CONFIG_PARAVIRT_CLOCK
++extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
++#else
++static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
++{
++      return NULL;
++}
++#endif
++
+ /* some helper functions for xen and kvm pv clock sources */
+ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
+--- a/arch/x86/include/asm/vdso.h
++++ b/arch/x86/include/asm/vdso.h
+@@ -22,6 +22,7 @@ struct vdso_image {
+       long sym_vvar_page;
+       long sym_hpet_page;
++      long sym_pvclock_page;
+       long sym_VDSO32_NOTE_MASK;
+       long sym___kernel_sigreturn;
+       long sym___kernel_rt_sigreturn;
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmc
+ static struct pvclock_vsyscall_time_info *hv_clock;
+ static struct pvclock_wall_clock wall_clock;
++struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
++{
++      return hv_clock;
++}
++
+ /*
+  * The wallclock is the time of day when we booted. Since then, some time may
+  * have elapsed since the hypervisor wrote the data. So we try to account for
diff --git a/queue-4.4/x86-vdso-pvclock-simplify-and-speed-up-the-vdso-pvclock-reader.patch b/queue-4.4/x86-vdso-pvclock-simplify-and-speed-up-the-vdso-pvclock-reader.patch
new file mode 100644 (file)
index 0000000..ecc210a
--- /dev/null
@@ -0,0 +1,133 @@
+From 6b078f5de7fc0851af4102493c7b5bb07e49c4cb Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@amacapital.net>
+Date: Thu, 10 Dec 2015 19:20:19 -0800
+Subject: x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader
+
+From: Andy Lutomirski <luto@amacapital.net>
+
+commit 6b078f5de7fc0851af4102493c7b5bb07e49c4cb upstream.
+
+The pvclock vdso code was too abstracted to understand easily
+and excessively paranoid.  Simplify it for a huge speedup.
+
+This opens the door for additional simplifications, as the vdso
+no longer accesses the pvti for any vcpu other than vcpu 0.
+
+Before, vclock_gettime using kvm-clock took about 45ns on my
+machine. With this change, it takes 29ns, which is almost as
+fast as the pure TSC implementation.
+
+Signed-off-by: Andy Lutomirski <luto@amacapital.net>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/6b51dcc41f1b101f963945c5ec7093d72bdac429.1449702533.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Jamie Iles <jamie.iles@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/vdso/vclock_gettime.c |   79 +++++++++++++++++++----------------
+ 1 file changed, 45 insertions(+), 34 deletions(-)
+
+--- a/arch/x86/entry/vdso/vclock_gettime.c
++++ b/arch/x86/entry/vdso/vclock_gettime.c
+@@ -78,47 +78,58 @@ static notrace const struct pvclock_vsys
+ static notrace cycle_t vread_pvclock(int *mode)
+ {
+-      const struct pvclock_vsyscall_time_info *pvti;
++      const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti;
+       cycle_t ret;
+-      u64 last;
+-      u32 version;
+-      u8 flags;
+-      unsigned cpu, cpu1;
+-
++      u64 tsc, pvti_tsc;
++      u64 last, delta, pvti_system_time;
++      u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+       /*
+-       * Note: hypervisor must guarantee that:
+-       * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
+-       * 2. that per-CPU pvclock time info is updated if the
+-       *    underlying CPU changes.
+-       * 3. that version is increased whenever underlying CPU
+-       *    changes.
++       * Note: The kernel and hypervisor must guarantee that cpu ID
++       * number maps 1:1 to per-CPU pvclock time info.
++       *
++       * Because the hypervisor is entirely unaware of guest userspace
++       * preemption, it cannot guarantee that per-CPU pvclock time
++       * info is updated if the underlying CPU changes or that that
++       * version is increased whenever underlying CPU changes.
++       *
++       * On KVM, we are guaranteed that pvti updates for any vCPU are
++       * atomic as seen by *all* vCPUs.  This is an even stronger
++       * guarantee than we get with a normal seqlock.
+        *
++       * On Xen, we don't appear to have that guarantee, but Xen still
++       * supplies a valid seqlock using the version field.
++
++       * We only do pvclock vdso timing at all if
++       * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
++       * mean that all vCPUs have matching pvti and that the TSC is
++       * synced, so we can just look at vCPU 0's pvti.
+        */
+-      do {
+-              cpu = __getcpu() & VGETCPU_CPU_MASK;
+-              /* TODO: We can put vcpu id into higher bits of pvti.version.
+-               * This will save a couple of cycles by getting rid of
+-               * __getcpu() calls (Gleb).
+-               */
+-
+-              pvti = get_pvti(cpu);
+-
+-              version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
+-
+-              /*
+-               * Test we're still on the cpu as well as the version.
+-               * We could have been migrated just after the first
+-               * vgetcpu but before fetching the version, so we
+-               * wouldn't notice a version change.
+-               */
+-              cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+-      } while (unlikely(cpu != cpu1 ||
+-                        (pvti->pvti.version & 1) ||
+-                        pvti->pvti.version != version));
+-      if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
++      if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
+               *mode = VCLOCK_NONE;
++              return 0;
++      }
++
++      do {
++              version = pvti->version;
++
++              /* This is also a read barrier, so we'll read version first. */
++              tsc = rdtsc_ordered();
++
++              pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
++              pvti_tsc_shift = pvti->tsc_shift;
++              pvti_system_time = pvti->system_time;
++              pvti_tsc = pvti->tsc_timestamp;
++
++              /* Make sure that the version double-check is last. */
++              smp_rmb();
++      } while (unlikely((version & 1) || version != pvti->version));
++
++      delta = tsc - pvti_tsc;
++      ret = pvti_system_time +
++              pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
++                                  pvti_tsc_shift);
+       /* refer to tsc.c read_tsc() comment for rationale */
+       last = gtod->cycle_last;