]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
drop a bunch of 4.9 patches that were not quite ready yet :(
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Dec 2017 08:36:28 +0000 (09:36 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Dec 2017 08:36:28 +0000 (09:36 +0100)
20 files changed:
queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch [deleted file]
queue-4.9/kvm-vmx-fix-enable-vpid-conditions.patch
queue-4.9/kvm-vmx-flush-tlb-when-the-apic-access-address-changes.patch
queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch [deleted file]
queue-4.9/series
queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch [deleted file]
queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch [deleted file]
queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch [deleted file]
queue-4.9/x86-kvm-vmx-simplify-segment_base.patch [deleted file]
queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch [deleted file]
queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch [deleted file]
queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch [deleted file]
queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch [deleted file]
queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch [deleted file]
queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch [deleted file]
queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch [deleted file]
queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch [deleted file]
queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch [deleted file]
queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch [deleted file]
queue-4.9/x86-unify-tss_struct.patch [deleted file]

diff --git a/queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch b/queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
deleted file mode 100644 (file)
index 9228412..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-From 44889942b6eb356eab27ce25fe10701adfec7776 Mon Sep 17 00:00:00 2001
-From: Ladi Prosek <lprosek@redhat.com>
-Date: Fri, 22 Sep 2017 07:53:15 +0200
-Subject: KVM: nVMX: fix HOST_CR3/HOST_CR4 cache
-
-From: Ladi Prosek <lprosek@redhat.com>
-
-commit 44889942b6eb356eab27ce25fe10701adfec7776 upstream.
-
-For nested virt we maintain multiple VMCS that can run on a vCPU. So it is
-incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching
-the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in
-vCPU-wide data structures.
-
-Hyper-V nested on KVM runs into this consistently for me with PCID enabled.
-CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3)
-fires, and the currently loaded VMCS is updated. Then we switch from L2 to
-L1 and the next exit reverts CR3 to its old value.
-
-Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant")
-Signed-off-by: Ladi Prosek <lprosek@redhat.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/kvm/vmx.c |   16 ++++++++--------
- 1 file changed, 8 insertions(+), 8 deletions(-)
-
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -191,6 +191,8 @@ struct loaded_vmcs {
-       struct vmcs *shadow_vmcs;
-       int cpu;
-       int launched;
-+      unsigned long vmcs_host_cr3;    /* May not match real cr3 */
-+      unsigned long vmcs_host_cr4;    /* May not match real cr4 */
-       struct list_head loaded_vmcss_on_cpu_link;
- };
-@@ -573,8 +575,6 @@ struct vcpu_vmx {
-               int           gs_ldt_reload_needed;
-               int           fs_reload_needed;
-               u64           msr_host_bndcfgs;
--              unsigned long vmcs_host_cr3;    /* May not match real cr3 */
--              unsigned long vmcs_host_cr4;    /* May not match real cr4 */
-       } host_state;
-       struct {
-               int vm86_active;
-@@ -4871,12 +4871,12 @@ static void vmx_set_constant_host_state(
-        */
-       cr3 = read_cr3();
-       vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
--      vmx->host_state.vmcs_host_cr3 = cr3;
-+      vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
-       /* Save the most likely value for this task's CR4 in the VMCS. */
-       cr4 = cr4_read_shadow();
-       vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
--      vmx->host_state.vmcs_host_cr4 = cr4;
-+      vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
-       vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
- #ifdef CONFIG_X86_64
-@@ -8874,15 +8874,15 @@ static void __noclone vmx_vcpu_run(struc
-               vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
-       cr3 = __get_current_cr3_fast();
--      if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
-+      if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
-               vmcs_writel(HOST_CR3, cr3);
--              vmx->host_state.vmcs_host_cr3 = cr3;
-+              vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
-       }
-       cr4 = cr4_read_shadow();
--      if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
-+      if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
-               vmcs_writel(HOST_CR4, cr4);
--              vmx->host_state.vmcs_host_cr4 = cr4;
-+              vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
-       }
-       /* When single-stepping over STI and MOV SS, we must clear the
index 54a821ee9c2c906c410aa41f8f9dde49fbbbf557..37f92f78a6e22211ed56fdebb31ec061444bed45 100644 (file)
@@ -59,7 +59,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
-@@ -1201,6 +1201,11 @@ static inline bool cpu_has_vmx_invvpid_g
+@@ -1199,6 +1199,11 @@ static inline bool cpu_has_vmx_invvpid_g
        return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
  }
  
@@ -71,7 +71,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  static inline bool cpu_has_vmx_ept(void)
  {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
-@@ -6445,8 +6450,10 @@ static __init int hardware_setup(void)
+@@ -6434,8 +6439,10 @@ static __init int hardware_setup(void)
        if (boot_cpu_has(X86_FEATURE_NX))
                kvm_enable_efer_bits(EFER_NX);
  
index 3ef7d4186b38e8058e853eab3129225aa28733e1..08e8bc930a201db2f488ea63618ef3d6113c4fc9 100644 (file)
@@ -32,7 +32,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
-@@ -3818,6 +3818,12 @@ static void vmx_flush_tlb(struct kvm_vcp
+@@ -3816,6 +3816,12 @@ static void vmx_flush_tlb(struct kvm_vcp
        __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
  }
  
@@ -45,7 +45,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
  {
        ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
-@@ -8505,6 +8511,7 @@ static void vmx_set_virtual_x2apic_mode(
+@@ -8494,6 +8500,7 @@ static void vmx_set_virtual_x2apic_mode(
        } else {
                sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
                sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
@@ -53,7 +53,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
        }
        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
  
-@@ -8530,8 +8537,10 @@ static void vmx_set_apic_access_page_add
+@@ -8519,8 +8526,10 @@ static void vmx_set_apic_access_page_add
         */
        if (!is_guest_mode(vcpu) ||
            !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
@@ -65,7 +65,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  }
  
  static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
-@@ -10110,6 +10119,9 @@ static void prepare_vmcs02(struct kvm_vc
+@@ -10093,6 +10102,9 @@ static void prepare_vmcs02(struct kvm_vc
        if (nested_cpu_has_ept(vmcs12)) {
                kvm_mmu_unload(vcpu);
                nested_ept_init_mmu_context(vcpu);
@@ -75,7 +75,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
        }
  
        if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
-@@ -10850,6 +10862,10 @@ static void nested_vmx_vmexit(struct kvm
+@@ -10833,6 +10845,10 @@ static void nested_vmx_vmexit(struct kvm
                vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
                vmx_set_virtual_x2apic_mode(vcpu,
                                vcpu->arch.apic_base & X2APIC_ENABLE);
diff --git a/queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch b/queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch
deleted file mode 100644 (file)
index ed23e79..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-From e73ad5ff2f76da25390e9607cb549691639330c3 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 22 May 2017 15:30:03 -0700
-Subject: mm, x86/mm: Make the batched unmap TLB flush API more generic
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit e73ad5ff2f76da25390e9607cb549691639330c3 upstream.
-
-try_to_unmap_flush() used to open-code a rather x86-centric flush
-sequence: local_flush_tlb() + flush_tlb_others().  Rearrange the
-code so that the arch (only x86 for now) provides
-arch_tlbbatch_add_mm() and arch_tlbbatch_flush() and the core code
-calls those functions instead.
-
-I'll want this for x86 because, to enable address space ids, I can't
-support the flush_tlb_others() mode used by exising
-try_to_unmap_flush() implementation with good performance.  I can
-support the new API fairly easily, though.
-
-I imagine that other architectures may be in a similar position.
-Architectures with strong remote flush primitives (arm64?) may have
-even worse performance problems with flush_tlb_others() the way that
-try_to_unmap_flush() uses it.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Acked-by: Kees Cook <keescook@chromium.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Sasha Levin <sasha.levin@oracle.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/19f25a8581f9fb77876b7ff3b001f89835e34ea3.1495492063.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/tlbbatch.h |   16 ++++++++++++++++
- arch/x86/include/asm/tlbflush.h |    8 ++++++++
- arch/x86/mm/tlb.c               |   17 +++++++++++++++++
- include/linux/sched.h           |   15 +++++++++++----
- mm/rmap.c                       |   16 ++--------------
- 5 files changed, 54 insertions(+), 18 deletions(-)
- create mode 100644 arch/x86/include/asm/tlbbatch.h
-
---- /dev/null
-+++ b/arch/x86/include/asm/tlbbatch.h
-@@ -0,0 +1,16 @@
-+#ifndef _ARCH_X86_TLBBATCH_H
-+#define _ARCH_X86_TLBBATCH_H
-+
-+#include <linux/cpumask.h>
-+
-+#ifdef CONFIG_SMP
-+struct arch_tlbflush_unmap_batch {
-+      /*
-+       * Each bit set is a CPU that potentially has a TLB entry for one of
-+       * the PFNs being flushed..
-+       */
-+      struct cpumask cpumask;
-+};
-+#endif
-+
-+#endif /* _ARCH_X86_TLBBATCH_H */
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -327,6 +327,14 @@ static inline void reset_lazy_tlbstate(v
-       this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
- }
-+static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-+                                      struct mm_struct *mm)
-+{
-+      cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-+}
-+
-+extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
-+
- #endif        /* SMP */
- #ifndef CONFIG_PARAVIRT
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -410,6 +410,23 @@ void flush_tlb_kernel_range(unsigned lon
-       }
- }
-+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
-+{
-+      int cpu = get_cpu();
-+
-+      if (cpumask_test_cpu(cpu, &batch->cpumask)) {
-+              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-+              local_flush_tlb();
-+              trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
-+      }
-+
-+      if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
-+              flush_tlb_others(&batch->cpumask, NULL, 0, TLB_FLUSH_ALL);
-+      cpumask_clear(&batch->cpumask);
-+
-+      put_cpu();
-+}
-+
- static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
-                            size_t count, loff_t *ppos)
- {
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -1463,15 +1463,22 @@ enum perf_event_task_context {
-       perf_nr_task_contexts,
- };
-+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-+#include <asm/tlbbatch.h>
-+#endif
-+
- /* Track pages that require TLB flushes */
- struct tlbflush_unmap_batch {
-       /*
--       * Each bit set is a CPU that potentially has a TLB entry for one of
--       * the PFNs being flushed. See set_tlb_ubc_flush_pending().
-+       * The arch code makes the following promise: generic code can modify a
-+       * PTE, then call arch_tlbbatch_add_mm() (which internally provides all
-+       * needed barriers), then call arch_tlbbatch_flush(), and the entries
-+       * will be flushed on all CPUs by the time that arch_tlbbatch_flush()
-+       * returns.
-        */
--      struct cpumask cpumask;
-+      struct arch_tlbflush_unmap_batch arch;
--      /* True if any bit in cpumask is set */
-+      /* True if a flush is needed. */
-       bool flush_required;
-       /*
---- a/mm/rmap.c
-+++ b/mm/rmap.c
-@@ -578,25 +578,13 @@ void page_unlock_anon_vma_read(struct an
- void try_to_unmap_flush(void)
- {
-       struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
--      int cpu;
-       if (!tlb_ubc->flush_required)
-               return;
--      cpu = get_cpu();
--
--      if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
--              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
--              local_flush_tlb();
--              trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
--      }
--
--      if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
--              flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
--      cpumask_clear(&tlb_ubc->cpumask);
-+      arch_tlbbatch_flush(&tlb_ubc->arch);
-       tlb_ubc->flush_required = false;
-       tlb_ubc->writable = false;
--      put_cpu();
- }
- /* Flush iff there are potentially writable TLB entries that can race with IO */
-@@ -613,7 +601,7 @@ static void set_tlb_ubc_flush_pending(st
- {
-       struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
--      cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
-+      arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
-       tlb_ubc->flush_required = true;
-       /*
index d50054c16a5c762daf5a20eb3cf750e12b23d35a..091a8d047ba14c04e0742bc4dfcedd87614f1277 100644 (file)
@@ -1,18 +1,5 @@
 cxl-check-if-vphb-exists-before-iterating-over-afu-devices.patch
 arm64-initialise-high_memory-global-variable-earlier.patch
-x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch
-x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
-x86-mm-reduce-indentation-in-flush_tlb_func.patch
-mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch
-x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch
-x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch
-x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch
-x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch
-x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
-x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch
-x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch
-x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch
-kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
 alsa-hda-add-support-for-docking-station-for-hp-820-g2.patch
 alsa-hda-add-support-for-docking-station-for-hp-840-g3.patch
 kvm-fix-usage-of-uninit-spinlock-in-avic_vm_destroy.patch
@@ -107,10 +94,6 @@ clk-sunxi-ng-sun6i-rename-hdmi-ddc-clock-to-avoid-name-collision.patch
 tcp-fix-under-evaluated-ssthresh-in-tcp-vegas.patch
 rtc-set-the-alarm-to-the-next-expiring-timer.patch
 cpuidle-fix-broadcast-control-when-broadcast-can-not-be-entered.patch
-x86-kvm-vmx-simplify-segment_base.patch
-x86-unify-tss_struct.patch
-x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch
-x86-kvm-vmx-remove-unused-variable-in-segment_base.patch
 thermal-hisilicon-handle-return-value-of-clk_prepare_enable.patch
 thermal-drivers-hisi-fix-missing-interrupt-enablement.patch
 thermal-drivers-hisi-fix-kernel-panic-on-alarm-interrupt.patch
diff --git a/queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch b/queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch
deleted file mode 100644 (file)
index 16962a0..0000000
+++ /dev/null
@@ -1,430 +0,0 @@
-From 946c191161cef10c667b5ee3179db1714fa5b7c0 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Thu, 20 Oct 2016 11:34:40 -0500
-Subject: x86/entry/unwind: Create stack frames for saved interrupt registers
-
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-
-commit 946c191161cef10c667b5ee3179db1714fa5b7c0 upstream.
-
-With frame pointers, when a task is interrupted, its stack is no longer
-completely reliable because the function could have been interrupted
-before it had a chance to save the previous frame pointer on the stack.
-So the caller of the interrupted function could get skipped by a stack
-trace.
-
-This is problematic for live patching, which needs to know whether a
-stack trace of a sleeping task can be relied upon.  There's currently no
-way to detect if a sleeping task was interrupted by a page fault
-exception or preemption before it went to sleep.
-
-Another issue is that when dumping the stack of an interrupted task, the
-unwinder has no way of knowing where the saved pt_regs registers are, so
-it can't print them.
-
-This solves those issues by encoding the pt_regs pointer in the frame
-pointer on entry from an interrupt or an exception.
-
-This patch also updates the unwinder to be able to decode it, because
-otherwise the unwinder would be broken by this change.
-
-Note that this causes a change in the behavior of the unwinder: each
-instance of a pt_regs on the stack is now considered a "frame".  So
-callers of unwind_get_return_address() will now get an occasional
-'regs->ip' address that would have previously been skipped over.
-
-Suggested-by: Andy Lutomirski <luto@amacapital.net>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/8b9f84a21e39d249049e0547b559ff8da0df0988.1476973742.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/entry/calling.h       |   20 ++++++++++
- arch/x86/entry/entry_32.S      |   33 +++++++++++++++--
- arch/x86/entry/entry_64.S      |   10 +++--
- arch/x86/include/asm/unwind.h  |   16 ++++++++
- arch/x86/kernel/unwind_frame.c |   76 ++++++++++++++++++++++++++++++++++++-----
- 5 files changed, 139 insertions(+), 16 deletions(-)
-
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -201,6 +201,26 @@ For 32-bit we have the following convent
-       .byte 0xf1
-       .endm
-+/*
-+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
-+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
-+ * is just setting the LSB, which makes it an invalid stack address and is also
-+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
-+ *
-+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
-+ * the original rbp.
-+ */
-+.macro ENCODE_FRAME_POINTER ptregs_offset=0
-+#ifdef CONFIG_FRAME_POINTER
-+      .if \ptregs_offset
-+              leaq \ptregs_offset(%rsp), %rbp
-+      .else
-+              mov %rsp, %rbp
-+      .endif
-+      orq     $0x1, %rbp
-+#endif
-+.endm
-+
- #endif /* CONFIG_X86_64 */
- /*
---- a/arch/x86/entry/entry_32.S
-+++ b/arch/x86/entry/entry_32.S
-@@ -175,6 +175,22 @@
-       SET_KERNEL_GS %edx
- .endm
-+/*
-+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
-+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
-+ * is just setting the LSB, which makes it an invalid stack address and is also
-+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
-+ *
-+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
-+ * original rbp.
-+ */
-+.macro ENCODE_FRAME_POINTER
-+#ifdef CONFIG_FRAME_POINTER
-+      mov %esp, %ebp
-+      orl $0x1, %ebp
-+#endif
-+.endm
-+
- .macro RESTORE_INT_REGS
-       popl    %ebx
-       popl    %ecx
-@@ -624,6 +640,7 @@ common_interrupt:
-       ASM_CLAC
-       addl    $-0x80, (%esp)                  /* Adjust vector into the [-256, -1] range */
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       TRACE_IRQS_OFF
-       movl    %esp, %eax
-       call    do_IRQ
-@@ -635,6 +652,7 @@ ENTRY(name)                                \
-       ASM_CLAC;                       \
-       pushl   $~(nr);                 \
-       SAVE_ALL;                       \
-+      ENCODE_FRAME_POINTER;           \
-       TRACE_IRQS_OFF                  \
-       movl    %esp, %eax;             \
-       call    fn;                     \
-@@ -769,6 +787,7 @@ END(spurious_interrupt_bug)
- ENTRY(xen_hypervisor_callback)
-       pushl   $-1                             /* orig_ax = -1 => not a system call */
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       TRACE_IRQS_OFF
-       /*
-@@ -823,6 +842,7 @@ ENTRY(xen_failsafe_callback)
-       jmp     iret_exc
- 5:    pushl   $-1                             /* orig_ax = -1 => not a system call */
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       jmp     ret_from_exception
- .section .fixup, "ax"
-@@ -1047,6 +1067,7 @@ error_code:
-       pushl   %edx
-       pushl   %ecx
-       pushl   %ebx
-+      ENCODE_FRAME_POINTER
-       cld
-       movl    $(__KERNEL_PERCPU), %ecx
-       movl    %ecx, %fs
-@@ -1079,6 +1100,7 @@ ENTRY(debug)
-       ASM_CLAC
-       pushl   $-1                             # mark this as an int
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       xorl    %edx, %edx                      # error code 0
-       movl    %esp, %eax                      # pt_regs pointer
-@@ -1094,11 +1116,11 @@ ENTRY(debug)
- .Ldebug_from_sysenter_stack:
-       /* We're on the SYSENTER stack.  Switch off. */
--      movl    %esp, %ebp
-+      movl    %esp, %ebx
-       movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
-       TRACE_IRQS_OFF
-       call    do_debug
--      movl    %ebp, %esp
-+      movl    %ebx, %esp
-       jmp     ret_from_exception
- END(debug)
-@@ -1121,6 +1143,7 @@ ENTRY(nmi)
-       pushl   %eax                            # pt_regs->orig_ax
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       xorl    %edx, %edx                      # zero error code
-       movl    %esp, %eax                      # pt_regs pointer
-@@ -1139,10 +1162,10 @@ ENTRY(nmi)
-        * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
-        * is using the thread stack right now, so it's safe for us to use it.
-        */
--      movl    %esp, %ebp
-+      movl    %esp, %ebx
-       movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
-       call    do_nmi
--      movl    %ebp, %esp
-+      movl    %ebx, %esp
-       jmp     restore_all_notrace
- #ifdef CONFIG_X86_ESPFIX32
-@@ -1159,6 +1182,7 @@ nmi_espfix_stack:
-       .endr
-       pushl   %eax
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       FIXUP_ESPFIX_STACK                      # %eax == %esp
-       xorl    %edx, %edx                      # zero error code
-       call    do_nmi
-@@ -1172,6 +1196,7 @@ ENTRY(int3)
-       ASM_CLAC
-       pushl   $-1                             # mark this as an int
-       SAVE_ALL
-+      ENCODE_FRAME_POINTER
-       TRACE_IRQS_OFF
-       xorl    %edx, %edx                      # zero error code
-       movl    %esp, %eax                      # pt_regs pointer
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -469,6 +469,7 @@ END(irq_entries_start)
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
-+      ENCODE_FRAME_POINTER
-       testb   $3, CS(%rsp)
-       jz      1f
-@@ -985,6 +986,7 @@ ENTRY(xen_failsafe_callback)
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
-+      ENCODE_FRAME_POINTER
-       jmp     error_exit
- END(xen_failsafe_callback)
-@@ -1028,6 +1030,7 @@ ENTRY(paranoid_entry)
-       cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-+      ENCODE_FRAME_POINTER 8
-       movl    $1, %ebx
-       movl    $MSR_GS_BASE, %ecx
-       rdmsr
-@@ -1075,6 +1078,7 @@ ENTRY(error_entry)
-       cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-+      ENCODE_FRAME_POINTER 8
-       xorl    %ebx, %ebx
-       testb   $3, CS+8(%rsp)
-       jz      .Lerror_kernelspace
-@@ -1259,6 +1263,7 @@ ENTRY(nmi)
-       pushq   %r13            /* pt_regs->r13 */
-       pushq   %r14            /* pt_regs->r14 */
-       pushq   %r15            /* pt_regs->r15 */
-+      ENCODE_FRAME_POINTER
-       /*
-        * At this point we no longer need to worry about stack damage
-@@ -1272,11 +1277,10 @@ ENTRY(nmi)
-       /*
-        * Return back to user mode.  We must *not* do the normal exit
--       * work, because we don't want to enable interrupts.  Fortunately,
--       * do_nmi doesn't modify pt_regs.
-+       * work, because we don't want to enable interrupts.
-        */
-       SWAPGS
--      jmp     restore_c_regs_and_iret
-+      jmp     restore_regs_and_iret
- .Lnmi_from_kernel:
-       /*
---- a/arch/x86/include/asm/unwind.h
-+++ b/arch/x86/include/asm/unwind.h
-@@ -13,6 +13,7 @@ struct unwind_state {
-       int graph_idx;
- #ifdef CONFIG_FRAME_POINTER
-       unsigned long *bp;
-+      struct pt_regs *regs;
- #else
-       unsigned long *sp;
- #endif
-@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address
-       if (unwind_done(state))
-               return NULL;
--      return state->bp + 1;
-+      return state->regs ? &state->regs->ip : state->bp + 1;
-+}
-+
-+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
-+{
-+      if (unwind_done(state))
-+              return NULL;
-+
-+      return state->regs;
- }
- #else /* !CONFIG_FRAME_POINTER */
-@@ -57,6 +66,11 @@ unsigned long *unwind_get_return_address
- {
-       return NULL;
- }
-+
-+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
-+{
-+      return NULL;
-+}
- #endif /* CONFIG_FRAME_POINTER */
---- a/arch/x86/kernel/unwind_frame.c
-+++ b/arch/x86/kernel/unwind_frame.c
-@@ -14,6 +14,9 @@ unsigned long unwind_get_return_address(
-       if (unwind_done(state))
-               return 0;
-+      if (state->regs && user_mode(state->regs))
-+              return 0;
-+
-       addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
-                                    addr_p);
-@@ -21,6 +24,20 @@ unsigned long unwind_get_return_address(
- }
- EXPORT_SYMBOL_GPL(unwind_get_return_address);
-+/*
-+ * This determines if the frame pointer actually contains an encoded pointer to
-+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
-+ */
-+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
-+{
-+      unsigned long regs = (unsigned long)bp;
-+
-+      if (!(regs & 0x1))
-+              return NULL;
-+
-+      return (struct pt_regs *)(regs & ~0x1);
-+}
-+
- static bool update_stack_state(struct unwind_state *state, void *addr,
-                              size_t len)
- {
-@@ -43,26 +60,59 @@ static bool update_stack_state(struct un
- bool unwind_next_frame(struct unwind_state *state)
- {
--      unsigned long *next_bp;
-+      struct pt_regs *regs;
-+      unsigned long *next_bp, *next_frame;
-+      size_t next_len;
-       if (unwind_done(state))
-               return false;
--      next_bp = (unsigned long *)*state->bp;
-+      /* have we reached the end? */
-+      if (state->regs && user_mode(state->regs))
-+              goto the_end;
-+
-+      /* get the next frame pointer */
-+      if (state->regs)
-+              next_bp = (unsigned long *)state->regs->bp;
-+      else
-+              next_bp = (unsigned long *)*state->bp;
-+
-+      /* is the next frame pointer an encoded pointer to pt_regs? */
-+      regs = decode_frame_pointer(next_bp);
-+      if (regs) {
-+              next_frame = (unsigned long *)regs;
-+              next_len = sizeof(*regs);
-+      } else {
-+              next_frame = next_bp;
-+              next_len = FRAME_HEADER_SIZE;
-+      }
-       /* make sure the next frame's data is accessible */
--      if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
-+      if (!update_stack_state(state, next_frame, next_len))
-               return false;
--
-       /* move to the next frame */
--      state->bp = next_bp;
-+      if (regs) {
-+              state->regs = regs;
-+              state->bp = NULL;
-+      } else {
-+              state->bp = next_bp;
-+              state->regs = NULL;
-+      }
-+
-       return true;
-+
-+the_end:
-+      state->stack_info.type = STACK_TYPE_UNKNOWN;
-+      return false;
- }
- EXPORT_SYMBOL_GPL(unwind_next_frame);
- void __unwind_start(struct unwind_state *state, struct task_struct *task,
-                   struct pt_regs *regs, unsigned long *first_frame)
- {
-+      unsigned long *bp, *frame;
-+      size_t len;
-+
-       memset(state, 0, sizeof(*state));
-       state->task = task;
-@@ -73,12 +123,22 @@ void __unwind_start(struct unwind_state
-       }
-       /* set up the starting stack frame */
--      state->bp = get_frame_pointer(task, regs);
-+      bp = get_frame_pointer(task, regs);
-+      regs = decode_frame_pointer(bp);
-+      if (regs) {
-+              state->regs = regs;
-+              frame = (unsigned long *)regs;
-+              len = sizeof(*regs);
-+      } else {
-+              state->bp = bp;
-+              frame = bp;
-+              len = FRAME_HEADER_SIZE;
-+      }
-       /* initialize stack info and make sure the frame data is accessible */
--      get_stack_info(state->bp, state->task, &state->stack_info,
-+      get_stack_info(frame, state->task, &state->stack_info,
-                      &state->stack_mask);
--      update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
-+      update_stack_state(state, frame, len);
-       /*
-        * The caller can provide the address of the first frame directly
diff --git a/queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch b/queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch
deleted file mode 100644 (file)
index 340ab94..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-From b7ffc44d5b2ea163899d09289ca7743d5c32e926 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 20 Feb 2017 08:56:14 -0800
-Subject: x86/kvm/vmx: Defer TR reload after VM exit
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit b7ffc44d5b2ea163899d09289ca7743d5c32e926 upstream.
-
-Intel's VMX is daft and resets the hidden TSS limit register to 0x67
-on VMX reload, and the 0x67 is not configurable.  KVM currently
-reloads TR using the LTR instruction on every exit, but this is quite
-slow because LTR is serializing.
-
-The 0x67 limit is entirely harmless unless ioperm() is in use, so
-defer the reload until a task using ioperm() is actually running.
-
-Here's some poorly done benchmarking using kvm-unit-tests:
-
-Before:
-
-cpuid 1313
-vmcall 1195
-mov_from_cr8 11
-mov_to_cr8 17
-inl_from_pmtimer 6770
-inl_from_qemu 6856
-inl_from_kernel 2435
-outl_to_kernel 1402
-
-After:
-
-cpuid 1291
-vmcall 1181
-mov_from_cr8 11
-mov_to_cr8 16
-inl_from_pmtimer 6457
-inl_from_qemu 6209
-inl_from_kernel 2339
-outl_to_kernel 1391
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-[Force-reload TR in invalidate_tss_limit. - Paolo]
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/desc.h |   48 ++++++++++++++++++++++++++++++++++++++++++++
- arch/x86/kernel/ioport.c    |    5 ++++
- arch/x86/kernel/process.c   |   10 +++++++++
- arch/x86/kvm/vmx.c          |   23 ++++++++-------------
- 4 files changed, 72 insertions(+), 14 deletions(-)
-
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -213,6 +213,54 @@ static inline void native_load_tr_desc(v
-       asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
- }
-+static inline void force_reload_TR(void)
-+{
-+      struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
-+      tss_desc tss;
-+
-+      memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
-+
-+      /*
-+       * LTR requires an available TSS, and the TSS is currently
-+       * busy.  Make it be available so that LTR will work.
-+       */
-+      tss.type = DESC_TSS;
-+      write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
-+
-+      load_TR_desc();
-+}
-+
-+DECLARE_PER_CPU(bool, need_tr_refresh);
-+
-+static inline void refresh_TR(void)
-+{
-+      WARN_ON(preemptible());
-+
-+      if (unlikely(this_cpu_read(need_tr_refresh))) {
-+              force_reload_TR();
-+              this_cpu_write(need_tr_refresh, false);
-+      }
-+}
-+
-+/*
-+ * If you do something evil that corrupts the cached TSS limit (I'm looking
-+ * at you, VMX exits), call this function.
-+ *
-+ * The optimization here is that the TSS limit only matters for Linux if the
-+ * IO bitmap is in use.  If the TSS limit gets forced to its minimum value,
-+ * everything works except that IO bitmap will be ignored and all CPL 3 IO
-+ * instructions will #GP, which is exactly what we want for normal tasks.
-+ */
-+static inline void invalidate_tss_limit(void)
-+{
-+      WARN_ON(preemptible());
-+
-+      if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
-+              force_reload_TR();
-+      else
-+              this_cpu_write(need_tr_refresh, true);
-+}
-+
- static inline void native_load_gdt(const struct desc_ptr *dtr)
- {
-       asm volatile("lgdt %0"::"m" (*dtr));
---- a/arch/x86/kernel/ioport.c
-+++ b/arch/x86/kernel/ioport.c
-@@ -16,6 +16,7 @@
- #include <linux/syscalls.h>
- #include <linux/bitmap.h>
- #include <asm/syscalls.h>
-+#include <asm/desc.h>
- /*
-  * this changes the io permissions bitmap in the current task.
-@@ -45,6 +46,10 @@ asmlinkage long sys_ioperm(unsigned long
-               memset(bitmap, 0xff, IO_BITMAP_BYTES);
-               t->io_bitmap_ptr = bitmap;
-               set_thread_flag(TIF_IO_BITMAP);
-+
-+              preempt_disable();
-+              refresh_TR();
-+              preempt_enable();
-       }
-       /*
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -33,6 +33,7 @@
- #include <asm/mce.h>
- #include <asm/vm86.h>
- #include <asm/switch_to.h>
-+#include <asm/desc.h>
- /*
-  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
-@@ -82,6 +83,9 @@ void idle_notifier_unregister(struct not
- EXPORT_SYMBOL_GPL(idle_notifier_unregister);
- #endif
-+DEFINE_PER_CPU(bool, need_tr_refresh);
-+EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
-+
- /*
-  * this gets called so that we can store lazy state into memory and copy the
-  * current task into the new thread.
-@@ -227,6 +231,12 @@ void __switch_to_xtra(struct task_struct
-                */
-               memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-                      max(prev->io_bitmap_max, next->io_bitmap_max));
-+
-+              /*
-+               * Make sure that the TSS limit is correct for the CPU
-+               * to notice the IO bitmap.
-+               */
-+              refresh_TR();
-       } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
-               /*
-                * Clear any possible leftover bits:
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -1959,19 +1959,6 @@ static void add_atomic_switch_msr(struct
-       m->host[i].value = host_val;
- }
--static void reload_tss(void)
--{
--      /*
--       * VT restores TR but not its size.  Useless.
--       */
--      struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
--      struct desc_struct *descs;
--
--      descs = (void *)gdt->address;
--      descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
--      load_TR_desc();
--}
--
- static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
- {
-       u64 guest_efer = vmx->vcpu.arch.efer;
-@@ -2141,7 +2128,7 @@ static void __vmx_load_host_state(struct
-               loadsegment(es, vmx->host_state.es_sel);
-       }
- #endif
--      reload_tss();
-+      invalidate_tss_limit();
- #ifdef CONFIG_X86_64
-       wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
- #endif
-@@ -2265,6 +2252,14 @@ static void vmx_vcpu_load(struct kvm_vcp
-               vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
-               vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
-+              /*
-+               * VM exits change the host TR limit to 0x67 after a VM
-+               * exit.  This is okay, since 0x67 covers everything except
-+               * the IO bitmap and have have code to handle the IO bitmap
-+               * being lost after a VM exit.
-+               */
-+              BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
-+
-               rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
-               vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
diff --git a/queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch b/queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch
deleted file mode 100644 (file)
index cfcf512..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-From 0fce546f9f07b94ccc9de09cf48d35e18946d2fa Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= <jeremy.lefaure@lse.epita.fr>
-Date: Sat, 25 Feb 2017 17:46:53 -0500
-Subject: x86/kvm/vmx: remove unused variable in segment_base()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
-
-commit 0fce546f9f07b94ccc9de09cf48d35e18946d2fa upstream.
-
-The pointer 'struct desc_struct *d' is unused since commit 8c2e41f7ae12
-("x86/kvm/vmx: Simplify segment_base()") so let's remove it.
-
-Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
-Reviewed-by: David Hildenbrand <david@redhat.com>
-Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/kvm/vmx.c |    1 -
- 1 file changed, 1 deletion(-)
-
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -2016,7 +2016,6 @@ static bool update_transition_efer(struc
- static unsigned long segment_base(u16 selector)
- {
-       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
--      struct desc_struct *d;
-       struct desc_struct *table;
-       unsigned long v;
diff --git a/queue-4.9/x86-kvm-vmx-simplify-segment_base.patch b/queue-4.9/x86-kvm-vmx-simplify-segment_base.patch
deleted file mode 100644 (file)
index ab2b7b7..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-From 8c2e41f7ae1234c192ef497472ad306227c77c03 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 20 Feb 2017 08:56:12 -0800
-Subject: x86/kvm/vmx: Simplify segment_base()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit 8c2e41f7ae1234c192ef497472ad306227c77c03 upstream.
-
-Use actual pointer types for pointers (instead of unsigned long) and
-replace hardcoded constants with the appropriate self-documenting
-macros.
-
-The function is still a bit messy, but this seems a lot better than
-before to me.
-
-This is mostly borrowed from a patch by Thomas Garnier.
-
-Cc: Thomas Garnier <thgarnie@google.com>
-Cc: Jim Mattson <jmattson@google.com>
-Cc: Radim Krčmář <rkrcmar@redhat.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/kvm/vmx.c |   19 +++++++------------
- 1 file changed, 7 insertions(+), 12 deletions(-)
-
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -2030,28 +2030,23 @@ static unsigned long segment_base(u16 se
- {
-       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
-       struct desc_struct *d;
--      unsigned long table_base;
-+      struct desc_struct *table;
-       unsigned long v;
--      if (!(selector & ~3))
-+      if (!(selector & ~SEGMENT_RPL_MASK))
-               return 0;
--      table_base = gdt->address;
-+      table = (struct desc_struct *)gdt->address;
--      if (selector & 4) {           /* from ldt */
-+      if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
-               u16 ldt_selector = kvm_read_ldt();
--              if (!(ldt_selector & ~3))
-+              if (!(ldt_selector & ~SEGMENT_RPL_MASK))
-                       return 0;
--              table_base = segment_base(ldt_selector);
-+              table = (struct desc_struct *)segment_base(ldt_selector);
-       }
--      d = (struct desc_struct *)(table_base + (selector & ~7));
--      v = get_desc_base(d);
--#ifdef CONFIG_X86_64
--       if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
--               v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
--#endif
-+      v = get_desc_base(&table[selector >> 3]);
-       return v;
- }
diff --git a/queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch b/queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch
deleted file mode 100644 (file)
index 472a6d5..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-From be4ffc0d787fafb22b89a2f29e71fea3b119205e Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:16 -0700
-Subject: x86/mm: Be more consistent wrt PAGE_SHIFT vs PAGE_SIZE in tlb flush code
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit be4ffc0d787fafb22b89a2f29e71fea3b119205e upstream.
-
-Nadav pointed out that some code used PAGE_SIZE and other code used
-PAGE_SHIFT.  Use PAGE_SHIFT instead of multiplying or dividing by
-PAGE_SIZE.
-
-Requested-by: Nadav Amit <nadav.amit@gmail.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/mm/tlb.c |    5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -220,8 +220,7 @@ static void flush_tlb_func_common(const
-               trace_tlb_flush(reason, TLB_FLUSH_ALL);
-       } else {
-               unsigned long addr;
--              unsigned long nr_pages =
--                      (f->end - f->start) / PAGE_SIZE;
-+              unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
-               addr = f->start;
-               while (addr < f->end) {
-                       __flush_tlb_single(addr);
-@@ -374,7 +373,7 @@ void flush_tlb_kernel_range(unsigned lon
-       /* Balance as user space task's flush, a bit conservative */
-       if (end == TLB_FLUSH_ALL ||
--          (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
-+          (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
-               on_each_cpu(do_flush_tlb_all, NULL, 1);
-       } else {
-               struct flush_tlb_info info;
diff --git a/queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch b/queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch
deleted file mode 100644 (file)
index dbdfdf6..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-From 59f537c1dea04287165bb11407921e095250dc80 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:11 -0700
-Subject: x86/mm: Change the leave_mm() condition for local TLB flushes
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit 59f537c1dea04287165bb11407921e095250dc80 upstream.
-
-On a remote TLB flush, we leave_mm() if we're TLBSTATE_LAZY.  For a
-local flush_tlb_mm_range(), we leave_mm() if !current->mm.  These
-are approximately the same condition -- the scheduler sets lazy TLB
-mode when switching to a thread with no mm.
-
-I'm about to merge the local and remote flush code, but for ease of
-verifying and bisecting the patch, I want the local and remote flush
-behavior to match first.  This patch changes the local code to match
-the remote code.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Acked-by: Rik van Riel <riel@redhat.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/mm/tlb.c |    2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -328,7 +328,7 @@ void flush_tlb_mm_range(struct mm_struct
-               goto out;
-       }
--      if (!current->mm) {
-+      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
-               leave_mm(smp_processor_id());
-               /* Synchronize with switch_mm. */
diff --git a/queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch b/queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch
deleted file mode 100644 (file)
index 55191ad..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-From d6e41f1151feeb118eee776c09323aceb4a415d9 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:17 -0700
-Subject: x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit d6e41f1151feeb118eee776c09323aceb4a415d9 upstream.
-
-When PCID is enabled, CR3's PCID bits can change during context
-switches, so KVM won't be able to treat CR3 as a per-mm constant any
-more.
-
-I structured this like the existing CR4 handling.  Under ordinary
-circumstances (PCID disabled or if the current PCID and the value
-that's already in the VMCS match), then we won't do an extra VMCS
-write, and we'll never do an extra direct CR3 read.  The overhead
-should be minimal.
-
-I disallowed using the new helper in non-atomic context because
-PCID support will cause CR3 to stop being constant in non-atomic
-process context.
-
-(Frankly, it also scares me a bit that KVM ever treated CR3 as
-constant, but it looks like it was okay before.)
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Radim Krčmář <rkrcmar@redhat.com>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: kvm@vger.kernel.org
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/mmu_context.h |   19 +++++++++++++++++++
- arch/x86/kvm/vmx.c                 |   25 +++++++++++++++++++++----
- 2 files changed, 40 insertions(+), 4 deletions(-)
-
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -268,4 +268,23 @@ static inline bool arch_pte_access_permi
- {
-       return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
- }
-+
-+/*
-+ * This can be used from process context to figure out what the value of
-+ * CR3 is without needing to do a (slow) read_cr3().
-+ *
-+ * It's intended to be used for code like KVM that sneakily changes CR3
-+ * and needs to restore it.  It needs to be used very carefully.
-+ */
-+static inline unsigned long __get_current_cr3_fast(void)
-+{
-+      unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-+
-+      /* For now, be very restrictive about when this can be called. */
-+      VM_WARN_ON(in_nmi() || !in_atomic());
-+
-+      VM_BUG_ON(cr3 != read_cr3());
-+      return cr3;
-+}
-+
- #endif /* _ASM_X86_MMU_CONTEXT_H */
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -48,6 +48,7 @@
- #include <asm/kexec.h>
- #include <asm/apic.h>
- #include <asm/irq_remapping.h>
-+#include <asm/mmu_context.h>
- #include "trace.h"
- #include "pmu.h"
-@@ -572,6 +573,7 @@ struct vcpu_vmx {
-               int           gs_ldt_reload_needed;
-               int           fs_reload_needed;
-               u64           msr_host_bndcfgs;
-+              unsigned long vmcs_host_cr3;    /* May not match real cr3 */
-               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
-       } host_state;
-       struct {
-@@ -4857,10 +4859,19 @@ static void vmx_set_constant_host_state(
-       u32 low32, high32;
-       unsigned long tmpl;
-       struct desc_ptr dt;
--      unsigned long cr4;
-+      unsigned long cr0, cr3, cr4;
--      vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
--      vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
-+      cr0 = read_cr0();
-+      WARN_ON(cr0 & X86_CR0_TS);
-+      vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
-+
-+      /*
-+       * Save the most likely value for this task's CR3 in the VMCS.
-+       * We can't use __get_current_cr3_fast() because we're not atomic.
-+       */
-+      cr3 = read_cr3();
-+      vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
-+      vmx->host_state.vmcs_host_cr3 = cr3;
-       /* Save the most likely value for this task's CR4 in the VMCS. */
-       cr4 = cr4_read_shadow();
-@@ -8836,7 +8847,7 @@ void vmx_arm_hv_timer(struct kvm_vcpu *v
- static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
- {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
--      unsigned long debugctlmsr, cr4;
-+      unsigned long debugctlmsr, cr3, cr4;
-       /* Record the guest's net vcpu time for enforced NMI injections. */
-       if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
-@@ -8862,6 +8873,12 @@ static void __noclone vmx_vcpu_run(struc
-       if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
-               vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
-+      cr3 = __get_current_cr3_fast();
-+      if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
-+              vmcs_writel(HOST_CR3, cr3);
-+              vmx->host_state.vmcs_host_cr3 = cr3;
-+      }
-+
-       cr4 = cr4_read_shadow();
-       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
-               vmcs_writel(HOST_CR4, cr4);
diff --git a/queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch b/queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch
deleted file mode 100644 (file)
index ca4bea7..0000000
+++ /dev/null
@@ -1,422 +0,0 @@
-From a2055abe9c6789cedef29abbdaa488a087faccc3 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:10 -0700
-Subject: x86/mm: Pass flush_tlb_info to flush_tlb_others() etc
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit a2055abe9c6789cedef29abbdaa488a087faccc3 upstream.
-
-Rather than passing all the contents of flush_tlb_info to
-flush_tlb_others(), pass a pointer to the structure directly. For
-consistency, this also removes the unnecessary cpu parameter from
-uv_flush_tlb_others() to make its signature match the other
-*flush_tlb_others() functions.
-
-This serves two purposes:
-
- - It will dramatically simplify future patches that change struct
-   flush_tlb_info, which I'm planning to do.
-
- - struct flush_tlb_info is an adequate description of what to do
-   for a local flush, too, so by reusing it we can remove duplicated
-   code between local and remove flushes in a future patch.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Acked-by: Rik van Riel <riel@redhat.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-[ Fix build warning. ]
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/paravirt.h       |    6 --
- arch/x86/include/asm/paravirt_types.h |    5 --
- arch/x86/include/asm/tlbflush.h       |   19 +++++---
- arch/x86/include/asm/uv/uv.h          |   11 ++---
- arch/x86/mm/tlb.c                     |   72 ++++++++++++++++++----------------
- arch/x86/platform/uv/tlb_uv.c         |   10 +---
- arch/x86/xen/mmu.c                    |   10 ++--
- 7 files changed, 68 insertions(+), 65 deletions(-)
-
---- a/arch/x86/include/asm/paravirt.h
-+++ b/arch/x86/include/asm/paravirt.h
-@@ -317,11 +317,9 @@ static inline void __flush_tlb_single(un
- }
- static inline void flush_tlb_others(const struct cpumask *cpumask,
--                                  struct mm_struct *mm,
--                                  unsigned long start,
--                                  unsigned long end)
-+                                  const struct flush_tlb_info *info)
- {
--      PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
-+      PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info);
- }
- static inline int paravirt_pgd_alloc(struct mm_struct *mm)
---- a/arch/x86/include/asm/paravirt_types.h
-+++ b/arch/x86/include/asm/paravirt_types.h
-@@ -51,6 +51,7 @@ struct mm_struct;
- struct desc_struct;
- struct task_struct;
- struct cpumask;
-+struct flush_tlb_info;
- /*
-  * Wrapper type for pointers to code which uses the non-standard
-@@ -225,9 +226,7 @@ struct pv_mmu_ops {
-       void (*flush_tlb_kernel)(void);
-       void (*flush_tlb_single)(unsigned long addr);
-       void (*flush_tlb_others)(const struct cpumask *cpus,
--                               struct mm_struct *mm,
--                               unsigned long start,
--                               unsigned long end);
-+                               const struct flush_tlb_info *info);
-       /* Hooks for allocating and freeing a pagetable top-level */
-       int  (*pgd_alloc)(struct mm_struct *mm);
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -211,12 +211,18 @@ static inline void __flush_tlb_one(unsig
-  *  - flush_tlb_page(vma, vmaddr) flushes one page
-  *  - flush_tlb_range(vma, start, end) flushes a range of pages
-  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
-- *  - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
-+ *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
-  *
-  * ..but the i386 has somewhat limited tlb flushing capabilities,
-  * and page-granular flushes are available only on i486 and up.
-  */
-+struct flush_tlb_info {
-+      struct mm_struct *mm;
-+      unsigned long start;
-+      unsigned long end;
-+};
-+
- #ifndef CONFIG_SMP
- /* "_up" is for UniProcessor.
-@@ -275,9 +281,7 @@ static inline void flush_tlb_mm_range(st
- }
- static inline void native_flush_tlb_others(const struct cpumask *cpumask,
--                                         struct mm_struct *mm,
--                                         unsigned long start,
--                                         unsigned long end)
-+                                         const struct flush_tlb_info *info)
- {
- }
-@@ -315,8 +319,7 @@ static inline void flush_tlb_page(struct
- }
- void native_flush_tlb_others(const struct cpumask *cpumask,
--                              struct mm_struct *mm,
--                              unsigned long start, unsigned long end);
-+                           const struct flush_tlb_info *info);
- #define TLBSTATE_OK   1
- #define TLBSTATE_LAZY 2
-@@ -338,8 +341,8 @@ extern void arch_tlbbatch_flush(struct a
- #endif        /* SMP */
- #ifndef CONFIG_PARAVIRT
--#define flush_tlb_others(mask, mm, start, end)        \
--      native_flush_tlb_others(mask, mm, start, end)
-+#define flush_tlb_others(mask, info)  \
-+      native_flush_tlb_others(mask, info)
- #endif
- #endif /* _ASM_X86_TLBFLUSH_H */
---- a/arch/x86/include/asm/uv/uv.h
-+++ b/arch/x86/include/asm/uv/uv.h
-@@ -1,6 +1,8 @@
- #ifndef _ASM_X86_UV_UV_H
- #define _ASM_X86_UV_UV_H
-+#include <asm/tlbflush.h>
-+
- enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
- struct cpumask;
-@@ -14,10 +16,7 @@ extern void uv_cpu_init(void);
- extern void uv_nmi_init(void);
- extern void uv_system_init(void);
- extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
--                                               struct mm_struct *mm,
--                                               unsigned long start,
--                                               unsigned long end,
--                                               unsigned int cpu);
-+                                               const struct flush_tlb_info *info);
- #else /* X86_UV */
-@@ -26,8 +25,8 @@ static inline int is_uv_system(void) { r
- static inline void uv_cpu_init(void)  { }
- static inline void uv_system_init(void)       { }
- static inline const struct cpumask *
--uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
--                  unsigned long start, unsigned long end, unsigned int cpu)
-+uv_flush_tlb_others(const struct cpumask *cpumask,
-+                  const struct flush_tlb_info *info)
- { return cpumask; }
- #endif        /* X86_UV */
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -30,12 +30,6 @@
- #ifdef CONFIG_SMP
--struct flush_tlb_info {
--      struct mm_struct *flush_mm;
--      unsigned long flush_start;
--      unsigned long flush_end;
--};
--
- /*
-  * We cannot call mmdrop() because we are in interrupt context,
-  * instead update mm->cpu_vm_mask.
-@@ -229,11 +223,11 @@ void switch_mm_irqs_off(struct mm_struct
-  */
- static void flush_tlb_func(void *info)
- {
--      struct flush_tlb_info *f = info;
-+      const struct flush_tlb_info *f = info;
-       inc_irq_stat(irq_tlb_count);
--      if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
-+      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
-               return;
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-@@ -243,15 +237,15 @@ static void flush_tlb_func(void *info)
-               return;
-       }
--      if (f->flush_end == TLB_FLUSH_ALL) {
-+      if (f->end == TLB_FLUSH_ALL) {
-               local_flush_tlb();
-               trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
-       } else {
-               unsigned long addr;
-               unsigned long nr_pages =
--                      (f->flush_end - f->flush_start) / PAGE_SIZE;
--              addr = f->flush_start;
--              while (addr < f->flush_end) {
-+                      (f->end - f->start) / PAGE_SIZE;
-+              addr = f->start;
-+              while (addr < f->end) {
-                       __flush_tlb_single(addr);
-                       addr += PAGE_SIZE;
-               }
-@@ -260,38 +254,38 @@ static void flush_tlb_func(void *info)
- }
- void native_flush_tlb_others(const struct cpumask *cpumask,
--                               struct mm_struct *mm, unsigned long start,
--                               unsigned long end)
-+                           const struct flush_tlb_info *info)
- {
--      struct flush_tlb_info info;
--
--      info.flush_mm = mm;
--      info.flush_start = start;
--      info.flush_end = end;
--
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
--      if (end == TLB_FLUSH_ALL)
-+      if (info->end == TLB_FLUSH_ALL)
-               trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
-       else
-               trace_tlb_flush(TLB_REMOTE_SEND_IPI,
--                              (end - start) >> PAGE_SHIFT);
-+                              (info->end - info->start) >> PAGE_SHIFT);
-       if (is_uv_system()) {
-               unsigned int cpu;
-               cpu = smp_processor_id();
--              cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
-+              cpumask = uv_flush_tlb_others(cpumask, info);
-               if (cpumask)
-                       smp_call_function_many(cpumask, flush_tlb_func,
--                                                              &info, 1);
-+                                             (void *)info, 1);
-               return;
-       }
--      smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
-+      smp_call_function_many(cpumask, flush_tlb_func,
-+                             (void *)info, 1);
- }
- void flush_tlb_current_task(void)
- {
-       struct mm_struct *mm = current->mm;
-+      struct flush_tlb_info info = {
-+              .mm = mm,
-+              .start = 0UL,
-+              .end = TLB_FLUSH_ALL,
-+      };
-+
-       preempt_disable();
-@@ -302,7 +296,7 @@ void flush_tlb_current_task(void)
-       trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
-       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
--              flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
-+              flush_tlb_others(mm_cpumask(mm), &info);
-       preempt_enable();
- }
-@@ -322,6 +316,7 @@ void flush_tlb_mm_range(struct mm_struct
-                               unsigned long end, unsigned long vmflag)
- {
-       unsigned long addr;
-+      struct flush_tlb_info info;
-       /* do a global flush by default */
-       unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
-@@ -362,15 +357,20 @@ void flush_tlb_mm_range(struct mm_struct
-       }
-       trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
- out:
-+      info.mm = mm;
-       if (base_pages_to_flush == TLB_FLUSH_ALL) {
--              start = 0UL;
--              end = TLB_FLUSH_ALL;
-+              info.start = 0UL;
-+              info.end = TLB_FLUSH_ALL;
-+      } else {
-+              info.start = start;
-+              info.end = end;
-       }
-       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
--              flush_tlb_others(mm_cpumask(mm), mm, start, end);
-+              flush_tlb_others(mm_cpumask(mm), &info);
-       preempt_enable();
- }
-+
- static void do_flush_tlb_all(void *info)
- {
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-@@ -391,7 +391,7 @@ static void do_kernel_range_flush(void *
-       unsigned long addr;
-       /* flush range by one by one 'invlpg' */
--      for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
-+      for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-               __flush_tlb_single(addr);
- }
-@@ -404,14 +404,20 @@ void flush_tlb_kernel_range(unsigned lon
-               on_each_cpu(do_flush_tlb_all, NULL, 1);
-       } else {
-               struct flush_tlb_info info;
--              info.flush_start = start;
--              info.flush_end = end;
-+              info.start = start;
-+              info.end = end;
-               on_each_cpu(do_kernel_range_flush, &info, 1);
-       }
- }
- void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
- {
-+      struct flush_tlb_info info = {
-+              .mm = NULL,
-+              .start = 0UL,
-+              .end = TLB_FLUSH_ALL,
-+      };
-+
-       int cpu = get_cpu();
-       if (cpumask_test_cpu(cpu, &batch->cpumask)) {
-@@ -421,7 +427,7 @@ void arch_tlbbatch_flush(struct arch_tlb
-       }
-       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
--              flush_tlb_others(&batch->cpumask, NULL, 0, TLB_FLUSH_ALL);
-+              flush_tlb_others(&batch->cpumask, &info);
-       cpumask_clear(&batch->cpumask);
-       put_cpu();
---- a/arch/x86/platform/uv/tlb_uv.c
-+++ b/arch/x86/platform/uv/tlb_uv.c
-@@ -1110,11 +1110,9 @@ static int set_distrib_bits(struct cpuma
-  * done.  The returned pointer is valid till preemption is re-enabled.
-  */
- const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
--                                              struct mm_struct *mm,
--                                              unsigned long start,
--                                              unsigned long end,
--                                              unsigned int cpu)
-+                                        const struct flush_tlb_info *info)
- {
-+      unsigned int cpu = smp_processor_id();
-       int locals = 0;
-       int remotes = 0;
-       int hubs = 0;
-@@ -1171,8 +1169,8 @@ const struct cpumask *uv_flush_tlb_other
-       record_send_statistics(stat, locals, hubs, remotes, bau_desc);
--      if (!end || (end - start) <= PAGE_SIZE)
--              bau_desc->payload.address = start;
-+      if (!info->end || (info->end - info->start) <= PAGE_SIZE)
-+              bau_desc->payload.address = info->start;
-       else
-               bau_desc->payload.address = TLB_FLUSH_ALL;
-       bau_desc->payload.sending_cpu = cpu;
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -1372,8 +1372,7 @@ static void xen_flush_tlb_single(unsigne
- }
- static void xen_flush_tlb_others(const struct cpumask *cpus,
--                               struct mm_struct *mm, unsigned long start,
--                               unsigned long end)
-+                               const struct flush_tlb_info *info)
- {
-       struct {
-               struct mmuext_op op;
-@@ -1385,7 +1384,7 @@ static void xen_flush_tlb_others(const s
-       } *args;
-       struct multicall_space mcs;
--      trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
-+      trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
-       if (cpumask_empty(cpus))
-               return;         /* nothing to do */
-@@ -1399,9 +1398,10 @@ static void xen_flush_tlb_others(const s
-       cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
-       args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
--      if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
-+      if (info->end != TLB_FLUSH_ALL &&
-+          (info->end - info->start) <= PAGE_SIZE) {
-               args->op.cmd = MMUEXT_INVLPG_MULTI;
--              args->op.arg1.linear_addr = start;
-+              args->op.arg1.linear_addr = info->start;
-       }
-       MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
diff --git a/queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch b/queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch
deleted file mode 100644 (file)
index 737f6dc..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-From b3b90e5af7976e46541f5029a369c9c38c5e4cea Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 22 May 2017 15:30:02 -0700
-Subject: x86/mm: Reduce indentation in flush_tlb_func()
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit b3b90e5af7976e46541f5029a369c9c38c5e4cea upstream.
-
-The leave_mm() case can just exit the function early so we don't
-need to indent the entire remainder of the function.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Acked-by: Kees Cook <keescook@chromium.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/97901ddcc9821d7bc7b296d2918d1179f08aaf22.1495492063.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/mm/tlb.c |   34 ++++++++++++++++++----------------
- 1 file changed, 18 insertions(+), 16 deletions(-)
-
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -237,24 +237,26 @@ static void flush_tlb_func(void *info)
-               return;
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
--      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
--              if (f->flush_end == TLB_FLUSH_ALL) {
--                      local_flush_tlb();
--                      trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
--              } else {
--                      unsigned long addr;
--                      unsigned long nr_pages =
--                              (f->flush_end - f->flush_start) / PAGE_SIZE;
--                      addr = f->flush_start;
--                      while (addr < f->flush_end) {
--                              __flush_tlb_single(addr);
--                              addr += PAGE_SIZE;
--                      }
--                      trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
--              }
--      } else
-+
-+      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
-               leave_mm(smp_processor_id());
-+              return;
-+      }
-+      if (f->flush_end == TLB_FLUSH_ALL) {
-+              local_flush_tlb();
-+              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
-+      } else {
-+              unsigned long addr;
-+              unsigned long nr_pages =
-+                      (f->flush_end - f->flush_start) / PAGE_SIZE;
-+              addr = f->flush_start;
-+              while (addr < f->flush_end) {
-+                      __flush_tlb_single(addr);
-+                      addr += PAGE_SIZE;
-+              }
-+              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
-+      }
- }
- void native_flush_tlb_others(const struct cpumask *cpumask,
diff --git a/queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch b/queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch
deleted file mode 100644 (file)
index da217d4..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-From 454bbad9793f59f5656ce5971ee473a8be736ef5 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:12 -0700
-Subject: x86/mm: Refactor flush_tlb_mm_range() to merge local and remote cases
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit 454bbad9793f59f5656ce5971ee473a8be736ef5 upstream.
-
-The local flush path is very similar to the remote flush path.
-Merge them.
-
-This is intended to make no difference to behavior whatsoever.  It
-removes some code and will make future changes to the flushing
-mechanics simpler.
-
-This patch does remove one small optimization: flush_tlb_mm_range()
-now has an unconditional smp_mb() instead of using MOV to CR3 or
-INVLPG as a full barrier when applicable.  I think this is okay for
-a few reasons.  First, smp_mb() is quite cheap compared to the cost
-of a TLB flush.  Second, this rearrangement makes a bigger
-optimization available: with some work on the SMP function call
-code, we could do the local and remote flushes in parallel.  Third,
-I'm planning a rework of the TLB flush algorithm that will require
-an atomic operation at the beginning of each flush, and that
-operation will replace the smp_mb().
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/tlbflush.h |    1 
- arch/x86/mm/tlb.c               |  111 +++++++++++++++++-----------------------
- 2 files changed, 48 insertions(+), 64 deletions(-)
-
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -216,7 +216,6 @@ static inline void __flush_tlb_one(unsig
-  * ..but the i386 has somewhat limited tlb flushing capabilities,
-  * and page-granular flushes are available only on i486 and up.
-  */
--
- struct flush_tlb_info {
-       struct mm_struct *mm;
-       unsigned long start;
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -216,22 +216,9 @@ void switch_mm_irqs_off(struct mm_struct
-  * write/read ordering problems.
-  */
--/*
-- * TLB flush funcation:
-- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
-- * 2) Leave the mm if we are in the lazy tlb mode.
-- */
--static void flush_tlb_func(void *info)
-+static void flush_tlb_func_common(const struct flush_tlb_info *f,
-+                                bool local, enum tlb_flush_reason reason)
- {
--      const struct flush_tlb_info *f = info;
--
--      inc_irq_stat(irq_tlb_count);
--
--      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
--              return;
--
--      count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
--
-       if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
-               leave_mm(smp_processor_id());
-               return;
-@@ -239,7 +226,9 @@ static void flush_tlb_func(void *info)
-       if (f->end == TLB_FLUSH_ALL) {
-               local_flush_tlb();
--              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
-+              if (local)
-+                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-+              trace_tlb_flush(reason, TLB_FLUSH_ALL);
-       } else {
-               unsigned long addr;
-               unsigned long nr_pages =
-@@ -249,10 +238,32 @@ static void flush_tlb_func(void *info)
-                       __flush_tlb_single(addr);
-                       addr += PAGE_SIZE;
-               }
--              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
-+              if (local)
-+                      count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
-+              trace_tlb_flush(reason, nr_pages);
-       }
- }
-+static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
-+{
-+      const struct flush_tlb_info *f = info;
-+
-+      flush_tlb_func_common(f, true, reason);
-+}
-+
-+static void flush_tlb_func_remote(void *info)
-+{
-+      const struct flush_tlb_info *f = info;
-+
-+      inc_irq_stat(irq_tlb_count);
-+
-+      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
-+              return;
-+
-+      count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-+      flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
-+}
-+
- void native_flush_tlb_others(const struct cpumask *cpumask,
-                            const struct flush_tlb_info *info)
- {
-@@ -269,11 +280,11 @@ void native_flush_tlb_others(const struc
-               cpu = smp_processor_id();
-               cpumask = uv_flush_tlb_others(cpumask, info);
-               if (cpumask)
--                      smp_call_function_many(cpumask, flush_tlb_func,
-+                      smp_call_function_many(cpumask, flush_tlb_func_remote,
-                                              (void *)info, 1);
-               return;
-       }
--      smp_call_function_many(cpumask, flush_tlb_func,
-+      smp_call_function_many(cpumask, flush_tlb_func_remote,
-                              (void *)info, 1);
- }
-@@ -315,59 +326,33 @@ static unsigned long tlb_single_page_flu
- void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-                               unsigned long end, unsigned long vmflag)
- {
--      unsigned long addr;
--      struct flush_tlb_info info;
--      /* do a global flush by default */
--      unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
--
--      preempt_disable();
--      if (current->active_mm != mm) {
--              /* Synchronize with switch_mm. */
--              smp_mb();
-+      int cpu;
--              goto out;
--      }
--
--      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
--              leave_mm(smp_processor_id());
--
--              /* Synchronize with switch_mm. */
--              smp_mb();
-+      struct flush_tlb_info info = {
-+              .mm = mm,
-+      };
--              goto out;
--      }
-+      cpu = get_cpu();
--      if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
--              base_pages_to_flush = (end - start) >> PAGE_SHIFT;
-+      /* Synchronize with switch_mm. */
-+      smp_mb();
--      /*
--       * Both branches below are implicit full barriers (MOV to CR or
--       * INVLPG) that synchronize with switch_mm.
--       */
--      if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
--              base_pages_to_flush = TLB_FLUSH_ALL;
--              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
--              local_flush_tlb();
-+      /* Should we flush just the requested range? */
-+      if ((end != TLB_FLUSH_ALL) &&
-+          !(vmflag & VM_HUGETLB) &&
-+          ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
-+              info.start = start;
-+              info.end = end;
-       } else {
--              /* flush range by one by one 'invlpg' */
--              for (addr = start; addr < end;  addr += PAGE_SIZE) {
--                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
--                      __flush_tlb_single(addr);
--              }
--      }
--      trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
--out:
--      info.mm = mm;
--      if (base_pages_to_flush == TLB_FLUSH_ALL) {
-               info.start = 0UL;
-               info.end = TLB_FLUSH_ALL;
--      } else {
--              info.start = start;
--              info.end = end;
-       }
--      if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-+
-+      if (mm == current->active_mm)
-+              flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
-+      if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
-               flush_tlb_others(mm_cpumask(mm), &info);
--      preempt_enable();
-+      put_cpu();
- }
diff --git a/queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch b/queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
deleted file mode 100644 (file)
index afd15b7..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-From ca6c99c0794875c6d1db6e22f246699691ab7e6b Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 22 May 2017 15:30:01 -0700
-Subject: x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range()
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream.
-
-flush_tlb_page() was very similar to flush_tlb_mm_range() except that
-it had a couple of issues:
-
- - It was missing an smp_mb() in the case where
-   current->active_mm != mm.  (This is a longstanding bug reported by Nadav Amit)
-
- - It was missing tracepoints and vm counter updates.
-
-The only reason that I can see for keeping it at as a separate
-function is that it could avoid a few branches that
-flush_tlb_mm_range() needs to decide to flush just one page.  This
-hardly seems worthwhile.  If we decide we want to get rid of those
-branches again, a better way would be to introduce an
-__flush_tlb_mm_range() helper and make both flush_tlb_page() and
-flush_tlb_mm_range() use it.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Acked-by: Kees Cook <keescook@chromium.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/tlbflush.h |    5 ++++-
- arch/x86/mm/tlb.c               |   27 ---------------------------
- 2 files changed, 4 insertions(+), 28 deletions(-)
-
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -304,12 +304,15 @@ static inline void flush_tlb_kernel_rang
- extern void flush_tlb_all(void);
- extern void flush_tlb_current_task(void);
--extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
- extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-                               unsigned long end, unsigned long vmflag);
- extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
- #define flush_tlb()   flush_tlb_current_task()
-+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
-+{
-+      flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
-+}
- void native_flush_tlb_others(const struct cpumask *cpumask,
-                               struct mm_struct *mm,
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -369,33 +369,6 @@ out:
-       preempt_enable();
- }
--void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
--{
--      struct mm_struct *mm = vma->vm_mm;
--
--      preempt_disable();
--
--      if (current->active_mm == mm) {
--              if (current->mm) {
--                      /*
--                       * Implicit full barrier (INVLPG) that synchronizes
--                       * with switch_mm.
--                       */
--                      __flush_tlb_one(start);
--              } else {
--                      leave_mm(smp_processor_id());
--
--                      /* Synchronize with switch_mm. */
--                      smp_mb();
--              }
--      }
--
--      if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
--              flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
--
--      preempt_enable();
--}
--
- static void do_flush_tlb_all(void *info)
- {
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
diff --git a/queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch b/queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
deleted file mode 100644 (file)
index 13badda..0000000
+++ /dev/null
@@ -1,320 +0,0 @@
-From ce4a4e565f5264909a18c733b864c3f74467f69e Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:14 -0700
-Subject: x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream.
-
-The UP asm/tlbflush.h generates somewhat nicer code than the SMP version.
-Aside from that, it's fallen quite a bit behind the SMP code:
-
- - flush_tlb_mm_range() didn't flush individual pages if the range
-   was small.
-
- - The lazy TLB code was much weaker.  This usually wouldn't matter,
-   but, if a kernel thread flushed its lazy "active_mm" more than
-   once (due to reclaim or similar), it wouldn't be unlazied and
-   would instead pointlessly flush repeatedly.
-
- - Tracepoints were missing.
-
-Aside from that, simply having the UP code around was a maintanence
-burden, since it means that any change to the TLB flush code had to
-make sure not to break it.
-
-Simplify everything by deleting the UP code.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/Kconfig                   |    2 
- arch/x86/include/asm/hardirq.h     |    2 
- arch/x86/include/asm/mmu.h         |    6 --
- arch/x86/include/asm/mmu_context.h |    2 
- arch/x86/include/asm/tlbbatch.h    |    2 
- arch/x86/include/asm/tlbflush.h    |   81 -------------------------------------
- arch/x86/mm/init.c                 |    2 
- arch/x86/mm/tlb.c                  |   17 -------
- 8 files changed, 5 insertions(+), 109 deletions(-)
-
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -45,7 +45,7 @@ config X86
-       select ARCH_USE_CMPXCHG_LOCKREF         if X86_64
-       select ARCH_USE_QUEUED_RWLOCKS
-       select ARCH_USE_QUEUED_SPINLOCKS
--      select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
-+      select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-       select ARCH_WANTS_DYNAMIC_TASK_STRUCT
-       select ARCH_WANT_FRAME_POINTERS
-       select ARCH_WANT_IPC_PARSE_VERSION      if X86_32
---- a/arch/x86/include/asm/hardirq.h
-+++ b/arch/x86/include/asm/hardirq.h
-@@ -22,8 +22,8 @@ typedef struct {
- #ifdef CONFIG_SMP
-       unsigned int irq_resched_count;
-       unsigned int irq_call_count;
--      unsigned int irq_tlb_count;
- #endif
-+      unsigned int irq_tlb_count;
- #ifdef CONFIG_X86_THERMAL_VECTOR
-       unsigned int irq_thermal_count;
- #endif
---- a/arch/x86/include/asm/mmu.h
-+++ b/arch/x86/include/asm/mmu.h
-@@ -33,12 +33,6 @@ typedef struct {
- #endif
- } mm_context_t;
--#ifdef CONFIG_SMP
- void leave_mm(int cpu);
--#else
--static inline void leave_mm(int cpu)
--{
--}
--#endif
- #endif /* _ASM_X86_MMU_H */
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm
- static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
- {
--#ifdef CONFIG_SMP
-       if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
-               this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
--#endif
- }
- static inline int init_new_context(struct task_struct *tsk,
---- a/arch/x86/include/asm/tlbbatch.h
-+++ b/arch/x86/include/asm/tlbbatch.h
-@@ -3,7 +3,6 @@
- #include <linux/cpumask.h>
--#ifdef CONFIG_SMP
- struct arch_tlbflush_unmap_batch {
-       /*
-        * Each bit set is a CPU that potentially has a TLB entry for one of
-@@ -11,6 +10,5 @@ struct arch_tlbflush_unmap_batch {
-        */
-       struct cpumask cpumask;
- };
--#endif
- #endif /* _ARCH_X86_TLBBATCH_H */
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -7,6 +7,7 @@
- #include <asm/processor.h>
- #include <asm/cpufeature.h>
- #include <asm/special_insns.h>
-+#include <asm/smp.h>
- static inline void __invpcid(unsigned long pcid, unsigned long addr,
-                            unsigned long type)
-@@ -65,10 +66,8 @@ static inline void invpcid_flush_all_non
- #endif
- struct tlb_state {
--#ifdef CONFIG_SMP
-       struct mm_struct *active_mm;
-       int state;
--#endif
-       /*
-        * Access to this CR4 shadow and to H/W CR4 is protected by
-@@ -222,82 +221,6 @@ struct flush_tlb_info {
-       unsigned long end;
- };
--#ifndef CONFIG_SMP
--
--/* "_up" is for UniProcessor.
-- *
-- * This is a helper for other header functions.  *Not* intended to be called
-- * directly.  All global TLB flushes need to either call this, or to bump the
-- * vm statistics themselves.
-- */
--static inline void __flush_tlb_up(void)
--{
--      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
--      __flush_tlb();
--}
--
--static inline void flush_tlb_all(void)
--{
--      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
--      __flush_tlb_all();
--}
--
--static inline void flush_tlb(void)
--{
--      __flush_tlb_up();
--}
--
--static inline void local_flush_tlb(void)
--{
--      __flush_tlb_up();
--}
--
--static inline void flush_tlb_mm(struct mm_struct *mm)
--{
--      if (mm == current->active_mm)
--              __flush_tlb_up();
--}
--
--static inline void flush_tlb_page(struct vm_area_struct *vma,
--                                unsigned long addr)
--{
--      if (vma->vm_mm == current->active_mm)
--              __flush_tlb_one(addr);
--}
--
--static inline void flush_tlb_range(struct vm_area_struct *vma,
--                                 unsigned long start, unsigned long end)
--{
--      if (vma->vm_mm == current->active_mm)
--              __flush_tlb_up();
--}
--
--static inline void flush_tlb_mm_range(struct mm_struct *mm,
--         unsigned long start, unsigned long end, unsigned long vmflag)
--{
--      if (mm == current->active_mm)
--              __flush_tlb_up();
--}
--
--static inline void native_flush_tlb_others(const struct cpumask *cpumask,
--                                         const struct flush_tlb_info *info)
--{
--}
--
--static inline void reset_lazy_tlbstate(void)
--{
--}
--
--static inline void flush_tlb_kernel_range(unsigned long start,
--                                        unsigned long end)
--{
--      flush_tlb_all();
--}
--
--#else  /* SMP */
--
--#include <asm/smp.h>
--
- #define local_flush_tlb() __flush_tlb()
- #define flush_tlb_mm(mm)      flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
-@@ -337,8 +260,6 @@ static inline void arch_tlbbatch_add_mm(
- extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
--#endif        /* SMP */
--
- #ifndef CONFIG_PARAVIRT
- #define flush_tlb_others(mask, info)  \
-       native_flush_tlb_others(mask, info)
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -764,10 +764,8 @@ void __init zone_sizes_init(void)
- }
- DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
--#ifdef CONFIG_SMP
-       .active_mm = &init_mm,
-       .state = 0,
--#endif
-       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
- };
- EXPORT_SYMBOL_GPL(cpu_tlbstate);
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -15,7 +15,7 @@
- #include <linux/debugfs.h>
- /*
-- *    Smarter SMP flushing macros.
-+ *    TLB flushing, formerly SMP-only
-  *            c/o Linus Torvalds.
-  *
-  *    These mean you can really definitely utterly forget about
-@@ -28,8 +28,6 @@
-  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
-  */
--#ifdef CONFIG_SMP
--
- /*
-  * We cannot call mmdrop() because we are in interrupt context,
-  * instead update mm->cpu_vm_mask.
-@@ -53,8 +51,6 @@ void leave_mm(int cpu)
- }
- EXPORT_SYMBOL_GPL(leave_mm);
--#endif /* CONFIG_SMP */
--
- void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-              struct task_struct *tsk)
- {
-@@ -85,10 +81,8 @@ void switch_mm_irqs_off(struct mm_struct
-                               set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
-               }
--#ifdef CONFIG_SMP
-               this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-               this_cpu_write(cpu_tlbstate.active_mm, next);
--#endif
-               cpumask_set_cpu(cpu, mm_cpumask(next));
-@@ -146,9 +140,7 @@ void switch_mm_irqs_off(struct mm_struct
-               if (unlikely(prev->context.ldt != next->context.ldt))
-                       load_mm_ldt(next);
- #endif
--      }
--#ifdef CONFIG_SMP
--        else {
-+      } else {
-               this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-               BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-@@ -175,11 +167,8 @@ void switch_mm_irqs_off(struct mm_struct
-                       load_mm_ldt(next);
-               }
-       }
--#endif
- }
--#ifdef CONFIG_SMP
--
- /*
-  * The flush IPI assumes that a thread switch happens in this order:
-  * [cpu0: the cpu that switches]
-@@ -459,5 +448,3 @@ static int __init create_tlb_single_page
-       return 0;
- }
- late_initcall(create_tlb_single_page_flush_ceiling);
--
--#endif /* CONFIG_SMP */
diff --git a/queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch b/queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch
deleted file mode 100644 (file)
index 7906046..0000000
+++ /dev/null
@@ -1,507 +0,0 @@
-From 3d28ebceaffab40f30afa87e33331560148d7b8b Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:15 -0700
-Subject: x86/mm: Rework lazy TLB to track the actual loaded mm
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit 3d28ebceaffab40f30afa87e33331560148d7b8b upstream.
-
-Lazy TLB state is currently managed in a rather baroque manner.
-AFAICT, there are three possible states:
-
- - Non-lazy.  This means that we're running a user thread or a
-   kernel thread that has called use_mm().  current->mm ==
-   current->active_mm == cpu_tlbstate.active_mm and
-   cpu_tlbstate.state == TLBSTATE_OK.
-
- - Lazy with user mm.  We're running a kernel thread without an mm
-   and we're borrowing an mm_struct.  We have current->mm == NULL,
-   current->active_mm == cpu_tlbstate.active_mm, cpu_tlbstate.state
-   != TLBSTATE_OK (i.e. TLBSTATE_LAZY or 0).  The current cpu is set
-   in mm_cpumask(current->active_mm).  CR3 points to
-   current->active_mm->pgd.  The TLB is up to date.
-
- - Lazy with init_mm.  This happens when we call leave_mm().  We
-   have current->mm == NULL, current->active_mm ==
-   cpu_tlbstate.active_mm, but that mm is only relelvant insofar as
-   the scheduler is tracking it for refcounting.  cpu_tlbstate.state
-   != TLBSTATE_OK.  The current cpu is clear in
-   mm_cpumask(current->active_mm).  CR3 points to swapper_pg_dir,
-   i.e. init_mm->pgd.
-
-This patch simplifies the situation.  Other than perf, x86 stops
-caring about current->active_mm at all.  We have
-cpu_tlbstate.loaded_mm pointing to the mm that CR3 references.  The
-TLB is always up to date for that mm.  leave_mm() just switches us
-to init_mm.  There are no longer any special cases for mm_cpumask,
-and switch_mm() switches mms without worrying about laziness.
-
-After this patch, cpu_tlbstate.state serves only to tell the TLB
-flush code whether it may switch to init_mm instead of doing a
-normal flush.
-
-This makes fairly extensive changes to xen_exit_mmap(), which used
-to look a bit like black magic.
-
-Perf is unchanged.  With or without this change, perf may behave a bit
-erratically if it tries to read user memory in kernel thread context.
-We should build on this patch to teach perf to never look at user
-memory when cpu_tlbstate.loaded_mm != current->mm.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/events/core.c          |    3 
- arch/x86/include/asm/tlbflush.h |   12 +-
- arch/x86/kernel/ldt.c           |    7 -
- arch/x86/mm/init.c              |    2 
- arch/x86/mm/tlb.c               |  216 ++++++++++++++++++++--------------------
- arch/x86/xen/mmu.c              |   51 ++++-----
- 6 files changed, 147 insertions(+), 144 deletions(-)
-
---- a/arch/x86/events/core.c
-+++ b/arch/x86/events/core.c
-@@ -2100,8 +2100,7 @@ static int x86_pmu_event_init(struct per
- static void refresh_pce(void *ignored)
- {
--      if (current->active_mm)
--              load_mm_cr4(current->active_mm);
-+      load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
- }
- static void x86_pmu_event_mapped(struct perf_event *event)
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -66,7 +66,13 @@ static inline void invpcid_flush_all_non
- #endif
- struct tlb_state {
--      struct mm_struct *active_mm;
-+      /*
-+       * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
-+       * are on.  This means that it may not match current->active_mm,
-+       * which will contain the previous user mm when we're in lazy TLB
-+       * mode even if we've already switched back to swapper_pg_dir.
-+       */
-+      struct mm_struct *loaded_mm;
-       int state;
-       /*
-@@ -249,7 +255,9 @@ void native_flush_tlb_others(const struc
- static inline void reset_lazy_tlbstate(void)
- {
-       this_cpu_write(cpu_tlbstate.state, 0);
--      this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
-+      this_cpu_write(cpu_tlbstate.loaded_mm, &init_mm);
-+
-+      WARN_ON(read_cr3() != __pa_symbol(swapper_pg_dir));
- }
- static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -23,14 +23,15 @@
- #include <asm/syscalls.h>
- /* context.lock is held for us, so we don't need any locking. */
--static void flush_ldt(void *current_mm)
-+static void flush_ldt(void *__mm)
- {
-+      struct mm_struct *mm = __mm;
-       mm_context_t *pc;
--      if (current->active_mm != current_mm)
-+      if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
-               return;
--      pc = &current->active_mm->context;
-+      pc = &mm->context;
-       set_ldt(pc->ldt->entries, pc->ldt->size);
- }
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -764,7 +764,7 @@ void __init zone_sizes_init(void)
- }
- DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
--      .active_mm = &init_mm,
-+      .loaded_mm = &init_mm,
-       .state = 0,
-       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
- };
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -28,26 +28,25 @@
-  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
-  */
--/*
-- * We cannot call mmdrop() because we are in interrupt context,
-- * instead update mm->cpu_vm_mask.
-- */
- void leave_mm(int cpu)
- {
--      struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
-+      struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-+
-+      /*
-+       * It's plausible that we're in lazy TLB mode while our mm is init_mm.
-+       * If so, our callers still expect us to flush the TLB, but there
-+       * aren't any user TLB entries in init_mm to worry about.
-+       *
-+       * This needs to happen before any other sanity checks due to
-+       * intel_idle's shenanigans.
-+       */
-+      if (loaded_mm == &init_mm)
-+              return;
-+
-       if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
-               BUG();
--      if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
--              cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
--              load_cr3(swapper_pg_dir);
--              /*
--               * This gets called in the idle path where RCU
--               * functions differently.  Tracing normally
--               * uses RCU, so we have to call the tracepoint
--               * specially here.
--               */
--              trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
--      }
-+
-+      switch_mm(NULL, &init_mm, NULL);
- }
- EXPORT_SYMBOL_GPL(leave_mm);
-@@ -65,108 +64,109 @@ void switch_mm_irqs_off(struct mm_struct
-                       struct task_struct *tsk)
- {
-       unsigned cpu = smp_processor_id();
-+      struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
--      if (likely(prev != next)) {
--              if (IS_ENABLED(CONFIG_VMAP_STACK)) {
--                      /*
--                       * If our current stack is in vmalloc space and isn't
--                       * mapped in the new pgd, we'll double-fault.  Forcibly
--                       * map it.
--                       */
--                      unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
--
--                      pgd_t *pgd = next->pgd + stack_pgd_index;
--
--                      if (unlikely(pgd_none(*pgd)))
--                              set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
--              }
-+      /*
-+       * NB: The scheduler will call us with prev == next when
-+       * switching from lazy TLB mode to normal mode if active_mm
-+       * isn't changing.  When this happens, there is no guarantee
-+       * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
-+       *
-+       * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
-+       */
--              this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
--              this_cpu_write(cpu_tlbstate.active_mm, next);
-+      this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
--              cpumask_set_cpu(cpu, mm_cpumask(next));
-+      if (real_prev == next) {
-+              /*
-+               * There's nothing to do: we always keep the per-mm control
-+               * regs in sync with cpu_tlbstate.loaded_mm.  Just
-+               * sanity-check mm_cpumask.
-+               */
-+              if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
-+                      cpumask_set_cpu(cpu, mm_cpumask(next));
-+              return;
-+      }
-+      if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-               /*
--               * Re-load page tables.
--               *
--               * This logic has an ordering constraint:
--               *
--               *  CPU 0: Write to a PTE for 'next'
--               *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
--               *  CPU 1: set bit 1 in next's mm_cpumask
--               *  CPU 1: load from the PTE that CPU 0 writes (implicit)
--               *
--               * We need to prevent an outcome in which CPU 1 observes
--               * the new PTE value and CPU 0 observes bit 1 clear in
--               * mm_cpumask.  (If that occurs, then the IPI will never
--               * be sent, and CPU 0's TLB will contain a stale entry.)
--               *
--               * The bad outcome can occur if either CPU's load is
--               * reordered before that CPU's store, so both CPUs must
--               * execute full barriers to prevent this from happening.
--               *
--               * Thus, switch_mm needs a full barrier between the
--               * store to mm_cpumask and any operation that could load
--               * from next->pgd.  TLB fills are special and can happen
--               * due to instruction fetches or for no reason at all,
--               * and neither LOCK nor MFENCE orders them.
--               * Fortunately, load_cr3() is serializing and gives the
--               * ordering guarantee we need.
--               *
-+               * If our current stack is in vmalloc space and isn't
-+               * mapped in the new pgd, we'll double-fault.  Forcibly
-+               * map it.
-                */
--              load_cr3(next->pgd);
-+              unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
--              trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-+              pgd_t *pgd = next->pgd + stack_pgd_index;
--              /* Stop flush ipis for the previous mm */
--              cpumask_clear_cpu(cpu, mm_cpumask(prev));
-+              if (unlikely(pgd_none(*pgd)))
-+                      set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
-+      }
--              /* Load per-mm CR4 state */
--              load_mm_cr4(next);
-+      this_cpu_write(cpu_tlbstate.loaded_mm, next);
--#ifdef CONFIG_MODIFY_LDT_SYSCALL
--              /*
--               * Load the LDT, if the LDT is different.
--               *
--               * It's possible that prev->context.ldt doesn't match
--               * the LDT register.  This can happen if leave_mm(prev)
--               * was called and then modify_ldt changed
--               * prev->context.ldt but suppressed an IPI to this CPU.
--               * In this case, prev->context.ldt != NULL, because we
--               * never set context.ldt to NULL while the mm still
--               * exists.  That means that next->context.ldt !=
--               * prev->context.ldt, because mms never share an LDT.
--               */
--              if (unlikely(prev->context.ldt != next->context.ldt))
--                      load_mm_ldt(next);
--#endif
--      } else {
--              this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
--              BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-+      WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
-+      cpumask_set_cpu(cpu, mm_cpumask(next));
--              if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
--                      /*
--                       * On established mms, the mm_cpumask is only changed
--                       * from irq context, from ptep_clear_flush() while in
--                       * lazy tlb mode, and here. Irqs are blocked during
--                       * schedule, protecting us from simultaneous changes.
--                       */
--                      cpumask_set_cpu(cpu, mm_cpumask(next));
-+      /*
-+       * Re-load page tables.
-+       *
-+       * This logic has an ordering constraint:
-+       *
-+       *  CPU 0: Write to a PTE for 'next'
-+       *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
-+       *  CPU 1: set bit 1 in next's mm_cpumask
-+       *  CPU 1: load from the PTE that CPU 0 writes (implicit)
-+       *
-+       * We need to prevent an outcome in which CPU 1 observes
-+       * the new PTE value and CPU 0 observes bit 1 clear in
-+       * mm_cpumask.  (If that occurs, then the IPI will never
-+       * be sent, and CPU 0's TLB will contain a stale entry.)
-+       *
-+       * The bad outcome can occur if either CPU's load is
-+       * reordered before that CPU's store, so both CPUs must
-+       * execute full barriers to prevent this from happening.
-+       *
-+       * Thus, switch_mm needs a full barrier between the
-+       * store to mm_cpumask and any operation that could load
-+       * from next->pgd.  TLB fills are special and can happen
-+       * due to instruction fetches or for no reason at all,
-+       * and neither LOCK nor MFENCE orders them.
-+       * Fortunately, load_cr3() is serializing and gives the
-+       * ordering guarantee we need.
-+       */
-+      load_cr3(next->pgd);
-+
-+      /*
-+       * This gets called via leave_mm() in the idle path where RCU
-+       * functions differently.  Tracing normally uses RCU, so we have to
-+       * call the tracepoint specially here.
-+       */
-+      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-+
-+      /* Stop flush ipis for the previous mm */
-+      WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
-+                   real_prev != &init_mm);
-+      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
--                      /*
--                       * We were in lazy tlb mode and leave_mm disabled
--                       * tlb flush IPI delivery. We must reload CR3
--                       * to make sure to use no freed page tables.
--                       *
--                       * As above, load_cr3() is serializing and orders TLB
--                       * fills with respect to the mm_cpumask write.
--                       */
--                      load_cr3(next->pgd);
--                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
--                      load_mm_cr4(next);
--                      load_mm_ldt(next);
--              }
--      }
-+      /* Load per-mm CR4 state */
-+      load_mm_cr4(next);
-+
-+#ifdef CONFIG_MODIFY_LDT_SYSCALL
-+      /*
-+       * Load the LDT, if the LDT is different.
-+       *
-+       * It's possible that prev->context.ldt doesn't match
-+       * the LDT register.  This can happen if leave_mm(prev)
-+       * was called and then modify_ldt changed
-+       * prev->context.ldt but suppressed an IPI to this CPU.
-+       * In this case, prev->context.ldt != NULL, because we
-+       * never set context.ldt to NULL while the mm still
-+       * exists.  That means that next->context.ldt !=
-+       * prev->context.ldt, because mms never share an LDT.
-+       */
-+      if (unlikely(real_prev->context.ldt != next->context.ldt))
-+              load_mm_ldt(next);
-+#endif
- }
- /*
-@@ -246,7 +246,7 @@ static void flush_tlb_func_remote(void *
-       inc_irq_stat(irq_tlb_count);
--      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
-+      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
-               return;
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-@@ -337,7 +337,7 @@ void flush_tlb_mm_range(struct mm_struct
-               info.end = TLB_FLUSH_ALL;
-       }
--      if (mm == current->active_mm)
-+      if (mm == this_cpu_read(cpu_tlbstate.loaded_mm))
-               flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
-       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
-               flush_tlb_others(mm_cpumask(mm), &info);
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -998,37 +998,32 @@ static void xen_dup_mmap(struct mm_struc
-       spin_unlock(&mm->page_table_lock);
- }
--
--#ifdef CONFIG_SMP
--/* Another cpu may still have their %cr3 pointing at the pagetable, so
--   we need to repoint it somewhere else before we can unpin it. */
--static void drop_other_mm_ref(void *info)
-+static void drop_mm_ref_this_cpu(void *info)
- {
-       struct mm_struct *mm = info;
--      struct mm_struct *active_mm;
--
--      active_mm = this_cpu_read(cpu_tlbstate.active_mm);
--      if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
-+      if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
-               leave_mm(smp_processor_id());
--      /* If this cpu still has a stale cr3 reference, then make sure
--         it has been flushed. */
-+      /*
-+       * If this cpu still has a stale cr3 reference, then make sure
-+       * it has been flushed.
-+       */
-       if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
--              load_cr3(swapper_pg_dir);
-+              xen_mc_flush();
- }
-+#ifdef CONFIG_SMP
-+/*
-+ * Another cpu may still have their %cr3 pointing at the pagetable, so
-+ * we need to repoint it somewhere else before we can unpin it.
-+ */
- static void xen_drop_mm_ref(struct mm_struct *mm)
- {
-       cpumask_var_t mask;
-       unsigned cpu;
--      if (current->active_mm == mm) {
--              if (current->mm == mm)
--                      load_cr3(swapper_pg_dir);
--              else
--                      leave_mm(smp_processor_id());
--      }
-+      drop_mm_ref_this_cpu(mm);
-       /* Get the "official" set of cpus referring to our pagetable. */
-       if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
-@@ -1036,31 +1031,31 @@ static void xen_drop_mm_ref(struct mm_st
-                       if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
-                           && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
-                               continue;
--                      smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
-+                      smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
-               }
-               return;
-       }
-       cpumask_copy(mask, mm_cpumask(mm));
--      /* It's possible that a vcpu may have a stale reference to our
--         cr3, because its in lazy mode, and it hasn't yet flushed
--         its set of pending hypercalls yet.  In this case, we can
--         look at its actual current cr3 value, and force it to flush
--         if needed. */
-+      /*
-+       * It's possible that a vcpu may have a stale reference to our
-+       * cr3, because its in lazy mode, and it hasn't yet flushed
-+       * its set of pending hypercalls yet.  In this case, we can
-+       * look at its actual current cr3 value, and force it to flush
-+       * if needed.
-+       */
-       for_each_online_cpu(cpu) {
-               if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-                       cpumask_set_cpu(cpu, mask);
-       }
--      if (!cpumask_empty(mask))
--              smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
-+      smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
-       free_cpumask_var(mask);
- }
- #else
- static void xen_drop_mm_ref(struct mm_struct *mm)
- {
--      if (current->active_mm == mm)
--              load_cr3(swapper_pg_dir);
-+      drop_mm_ref_this_cpu(mm);
- }
- #endif
diff --git a/queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch b/queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch
deleted file mode 100644 (file)
index 459c28b..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-From 3f79e4c7c9c2f5c30751ea5c8dd9fd1d56b81947 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 28 May 2017 10:00:13 -0700
-Subject: x86/mm: Use new merged flush logic in arch_tlbbatch_flush()
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit 3f79e4c7c9c2f5c30751ea5c8dd9fd1d56b81947 upstream.
-
-Now there's only one copy of the local tlb flush logic for
-non-kernel pages on SMP kernels.
-
-The only functional change is that arch_tlbbatch_flush() will now
-leave_mm() on the local CPU if that CPU is in the batch and is in
-TLBSTATE_LAZY mode.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Nadav Amit <namit@vmware.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/mm/tlb.c |    8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -405,12 +405,8 @@ void arch_tlbbatch_flush(struct arch_tlb
-       int cpu = get_cpu();
--      if (cpumask_test_cpu(cpu, &batch->cpumask)) {
--              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
--              local_flush_tlb();
--              trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
--      }
--
-+      if (cpumask_test_cpu(cpu, &batch->cpumask))
-+              flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
-       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
-               flush_tlb_others(&batch->cpumask, &info);
-       cpumask_clear(&batch->cpumask);
diff --git a/queue-4.9/x86-unify-tss_struct.patch b/queue-4.9/x86-unify-tss_struct.patch
deleted file mode 100644 (file)
index e86f87a..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-From ca241c75037b32e0216a68e39ad2801d04fa1f87 Mon Sep 17 00:00:00 2001
-From: Glauber de Oliveira Costa <gcosta@redhat.com>
-Date: Wed, 30 Jan 2008 13:31:31 +0100
-Subject: x86: unify tss_struct
-
-From: Glauber de Oliveira Costa <gcosta@redhat.com>
-
-commit ca241c75037b32e0216a68e39ad2801d04fa1f87 upstream.
-
-Although slighly different, the tss_struct is very similar in x86_64 and
-i386. The really different part, which matchs the hardware vision of it, is
-now called x86_hw_tss, and each of the architectures provides yours.
-It's then used as a field in the outter tss_struct.
-
-Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
-Signed-off-by: Ingo Molnar <mingo@elte.hu>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Eduardo Valentin <eduval@amazon.com>
-Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/include/asm/processor.h |    2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -272,7 +272,7 @@ struct x86_hw_tss {
-       u16                     reserved5;
-       u16                     io_bitmap_base;
--} __attribute__((packed)) ____cacheline_aligned;
-+} __attribute__((packed));
- #endif
- /*