From 1cdca9155ecb91f05b7b793a793a699dbddaf2b4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 23 Oct 2014 14:47:14 +0800 Subject: [PATCH] 3.16-stable patches added patches: kvm-do-not-bias-the-generation-number-in-kvm_current_mmio_generation.patch kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch kvm-fix-potentially-corrupt-mmio-cache.patch kvm-s390-unintended-fallthrough-for-external-call.patch spi-dw-mid-check-that-dma-was-inited-before-exit.patch spi-dw-mid-respect-8-bit-mode.patch x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch x86-kvm-vmx-preserve-cr4-across-vm-entry.patch --- ...umber-in-kvm_current_mmio_generation.patch | 55 +++++ ...ex-for-obviously-invalid-vcpu-ioctls.patch | 46 +++++ ...m-fix-potentially-corrupt-mmio-cache.patch | 191 ++++++++++++++++++ ...tended-fallthrough-for-external-call.patch | 29 +++ queue-3.16/series | 8 + ...heck-that-dma-was-inited-before-exit.patch | 32 +++ .../spi-dw-mid-respect-8-bit-mode.patch | 40 ++++ ...r4.pge-so-tlb-flush-uses-cr3-instead.patch | 53 +++++ ...kvm-vmx-preserve-cr4-across-vm-entry.patch | 82 ++++++++ 9 files changed, 536 insertions(+) create mode 100644 queue-3.16/kvm-do-not-bias-the-generation-number-in-kvm_current_mmio_generation.patch create mode 100644 queue-3.16/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch create mode 100644 queue-3.16/kvm-fix-potentially-corrupt-mmio-cache.patch create mode 100644 queue-3.16/kvm-s390-unintended-fallthrough-for-external-call.patch create mode 100644 queue-3.16/spi-dw-mid-check-that-dma-was-inited-before-exit.patch create mode 100644 queue-3.16/spi-dw-mid-respect-8-bit-mode.patch create mode 100644 queue-3.16/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch create mode 100644 queue-3.16/x86-kvm-vmx-preserve-cr4-across-vm-entry.patch diff --git a/queue-3.16/kvm-do-not-bias-the-generation-number-in-kvm_current_mmio_generation.patch b/queue-3.16/kvm-do-not-bias-the-generation-number-in-kvm_current_mmio_generation.patch new file mode 100644 index 00000000000..93da79e779f --- /dev/null +++ b/queue-3.16/kvm-do-not-bias-the-generation-number-in-kvm_current_mmio_generation.patch @@ -0,0 +1,55 @@ +From 00f034a12fdd81210d58116326d92780aac5c238 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 20 Aug 2014 14:29:21 +0200 +Subject: KVM: do not bias the generation number in kvm_current_mmio_generation + +From: Paolo Bonzini + +commit 00f034a12fdd81210d58116326d92780aac5c238 upstream. + +The next patch will give a meaning (a la seqcount) to the low bit of the +generation number. Ensure that it matches between kvm->memslots->generation +and kvm_current_mmio_generation(). + +Reviewed-by: David Matlack +Reviewed-by: Xiao Guangrong +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/mmu.c | 7 +------ + virt/kvm/kvm_main.c | 7 +++++++ + 2 files changed, 8 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -240,12 +240,7 @@ static unsigned int get_mmio_spte_genera + + static unsigned int kvm_current_mmio_generation(struct kvm *kvm) + { +- /* +- * Init kvm generation close to MMIO_MAX_GEN to easily test the +- * code of handling generation number wrap-around. +- */ +- return (kvm_memslots(kvm)->generation + +- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; ++ return kvm_memslots(kvm)->generation & MMIO_GEN_MASK; + } + + static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -472,6 +472,13 @@ static struct kvm *kvm_create_vm(unsigne + kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + if (!kvm->memslots) + goto out_err_no_srcu; ++ ++ /* ++ * Init kvm generation close to the maximum to easily test the ++ * code of handling generation number wrap-around. ++ */ ++ kvm->memslots->generation = -150; ++ + kvm_init_memslots_id(kvm); + if (init_srcu_struct(&kvm->srcu)) + goto out_err_no_srcu; diff --git a/queue-3.16/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch b/queue-3.16/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch new file mode 100644 index 00000000000..3256ad48afd --- /dev/null +++ b/queue-3.16/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch @@ -0,0 +1,46 @@ +From 2ea75be3219571d0ec009ce20d9971e54af96e09 Mon Sep 17 00:00:00 2001 +From: David Matlack +Date: Fri, 19 Sep 2014 16:03:25 -0700 +Subject: kvm: don't take vcpu mutex for obviously invalid vcpu ioctls + +From: David Matlack + +commit 2ea75be3219571d0ec009ce20d9971e54af96e09 upstream. + +vcpu ioctls can hang the calling thread if issued while a vcpu is running. +However, invalid ioctls can happen when userspace tries to probe the kind +of file descriptors (e.g. isatty() calls ioctl(TCGETS)); in that case, +we know the ioctl is going to be rejected as invalid anyway and we can +fail before trying to take the vcpu mutex. + +This patch does not change functionality, it just makes invalid ioctls +fail faster. + +Signed-off-by: David Matlack +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/kvm_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -52,6 +52,7 @@ + + #include + #include ++#include + #include + #include + +@@ -1989,6 +1990,9 @@ static long kvm_vcpu_ioctl(struct file * + if (vcpu->kvm->mm != current->mm) + return -EIO; + ++ if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) ++ return -EINVAL; ++ + #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) + /* + * Special cases: vcpu ioctls that are asynchronous to vcpu execution, diff --git a/queue-3.16/kvm-fix-potentially-corrupt-mmio-cache.patch b/queue-3.16/kvm-fix-potentially-corrupt-mmio-cache.patch new file mode 100644 index 00000000000..ba49f332c32 --- /dev/null +++ b/queue-3.16/kvm-fix-potentially-corrupt-mmio-cache.patch @@ -0,0 +1,191 @@ +From ee3d1570b58677885b4552bce8217fda7b226a68 Mon Sep 17 00:00:00 2001 +From: David Matlack +Date: Mon, 18 Aug 2014 15:46:06 -0700 +Subject: kvm: fix potentially corrupt mmio cache + +From: David Matlack + +commit ee3d1570b58677885b4552bce8217fda7b226a68 upstream. + +vcpu exits and memslot mutations can run concurrently as long as the +vcpu does not aquire the slots mutex. Thus it is theoretically possible +for memslots to change underneath a vcpu that is handling an exit. + +If we increment the memslot generation number again after +synchronize_srcu_expedited(), vcpus can safely cache memslot generation +without maintaining a single rcu_dereference through an entire vm exit. +And much of the x86/kvm code does not maintain a single rcu_dereference +of the current memslots during each exit. + +We can prevent the following case: + + vcpu (CPU 0) | thread (CPU 1) +--------------------------------------------+-------------------------- +1 vm exit | +2 srcu_read_unlock(&kvm->srcu) | +3 decide to cache something based on | + old memslots | +4 | change memslots + | (increments generation) +5 | synchronize_srcu(&kvm->srcu); +6 retrieve generation # from new memslots | +7 tag cache with new memslot generation | +8 srcu_read_unlock(&kvm->srcu) | +... | + | +... | + | + | + +By incrementing the generation after synchronizing with kvm->srcu readers, +we ensure that the generation retrieved in (6) will become invalid soon +after (8). + +Keeping the existing increment is not strictly necessary, but we +do keep it and just move it for consistency from update_memslots to +install_new_memslots. It invalidates old cached MMIOs immediately, +instead of having to wait for the end of synchronize_srcu_expedited, +which makes the code more clearly correct in case CPU 1 is preempted +right after synchronize_srcu() returns. + +To avoid halving the generation space in SPTEs, always presume that the +low bit of the generation is zero when reconstructing a generation number +out of an SPTE. This effectively disables MMIO caching in SPTEs during +the call to synchronize_srcu_expedited. Using the low bit this way is +somewhat like a seqcount---where the protected thing is a cache, and +instead of retrying we can simply punt if we observe the low bit to be 1. + +Signed-off-by: David Matlack +Reviewed-by: Xiao Guangrong +Reviewed-by: David Matlack +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/virtual/kvm/mmu.txt | 14 ++++++++++++++ + arch/x86/kvm/mmu.c | 20 ++++++++++++-------- + virt/kvm/kvm_main.c | 23 ++++++++++++++++------- + 3 files changed, 42 insertions(+), 15 deletions(-) + +--- a/Documentation/virtual/kvm/mmu.txt ++++ b/Documentation/virtual/kvm/mmu.txt +@@ -425,6 +425,20 @@ fault through the slow path. + Since only 19 bits are used to store generation-number on mmio spte, all + pages are zapped when there is an overflow. + ++Unfortunately, a single memory access might access kvm_memslots(kvm) multiple ++times, the last one happening when the generation number is retrieved and ++stored into the MMIO spte. Thus, the MMIO spte might be created based on ++out-of-date information, but with an up-to-date generation number. ++ ++To avoid this, the generation number is incremented again after synchronize_srcu ++returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a ++memslot update, while some SRCU readers might be using the old copy. We do not ++want to use an MMIO sptes created with an odd generation number, and we can do ++this without losing a bit in the MMIO spte. The low bit of the generation ++is not stored in MMIO spte, and presumed zero when it is extracted out of the ++spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, ++the next access to the spte will always be a cache miss. ++ + + Further reading + =============== +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio + EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); + + /* +- * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, +- * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation +- * number. ++ * the low bit of the generation number is always presumed to be zero. ++ * This disables mmio caching during memslot updates. The concept is ++ * similar to a seqcount but instead of retrying the access we just punt ++ * and ignore the cache. ++ * ++ * spte bits 3-11 are used as bits 1-9 of the generation number, ++ * the bits 52-61 are used as bits 10-19 of the generation number. + */ +-#define MMIO_SPTE_GEN_LOW_SHIFT 3 ++#define MMIO_SPTE_GEN_LOW_SHIFT 2 + #define MMIO_SPTE_GEN_HIGH_SHIFT 52 + +-#define MMIO_GEN_SHIFT 19 +-#define MMIO_GEN_LOW_SHIFT 9 +-#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) ++#define MMIO_GEN_SHIFT 20 ++#define MMIO_GEN_LOW_SHIFT 10 ++#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) + #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) + #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) + +@@ -4433,7 +4437,7 @@ void kvm_mmu_invalidate_mmio_sptes(struc + * The very rare case: if the generation-number is round, + * zap all shadow pages. + */ +- if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { ++ if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { + printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); + kvm_mmu_invalidate_zap_all_pages(kvm); + } +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -95,8 +95,6 @@ static int hardware_enable_all(void); + static void hardware_disable_all(void); + + static void kvm_io_bus_destroy(struct kvm_io_bus *bus); +-static void update_memslots(struct kvm_memslots *slots, +- struct kvm_memory_slot *new, u64 last_generation); + + static void kvm_release_pfn_dirty(pfn_t pfn); + static void mark_page_dirty_in_slot(struct kvm *kvm, +@@ -685,8 +683,7 @@ static void sort_memslots(struct kvm_mem + } + + static void update_memslots(struct kvm_memslots *slots, +- struct kvm_memory_slot *new, +- u64 last_generation) ++ struct kvm_memory_slot *new) + { + if (new) { + int id = new->id; +@@ -697,8 +694,6 @@ static void update_memslots(struct kvm_m + if (new->npages != npages) + sort_memslots(slots); + } +- +- slots->generation = last_generation + 1; + } + + static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) +@@ -720,10 +715,24 @@ static struct kvm_memslots *install_new_ + { + struct kvm_memslots *old_memslots = kvm->memslots; + +- update_memslots(slots, new, kvm->memslots->generation); ++ /* ++ * Set the low bit in the generation, which disables SPTE caching ++ * until the end of synchronize_srcu_expedited. ++ */ ++ WARN_ON(old_memslots->generation & 1); ++ slots->generation = old_memslots->generation + 1; ++ ++ update_memslots(slots, new); + rcu_assign_pointer(kvm->memslots, slots); + synchronize_srcu_expedited(&kvm->srcu); + ++ /* ++ * Increment the new memslot generation a second time. This prevents ++ * vm exits that race with memslot updates from caching a memslot ++ * generation that will (potentially) be valid forever. ++ */ ++ slots->generation++; ++ + kvm_arch_memslots_updated(kvm); + + return old_memslots; diff --git a/queue-3.16/kvm-s390-unintended-fallthrough-for-external-call.patch b/queue-3.16/kvm-s390-unintended-fallthrough-for-external-call.patch new file mode 100644 index 00000000000..d471e08d83b --- /dev/null +++ b/queue-3.16/kvm-s390-unintended-fallthrough-for-external-call.patch @@ -0,0 +1,29 @@ +From f346026e55f1efd3949a67ddd1dcea7c1b9a615e Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Wed, 3 Sep 2014 16:21:32 +0200 +Subject: KVM: s390: unintended fallthrough for external call + +From: Christian Borntraeger + +commit f346026e55f1efd3949a67ddd1dcea7c1b9a615e upstream. + +We must not fallthrough if the conditions for external call are not met. + +Signed-off-by: Christian Borntraeger +Reviewed-by: Thomas Huth +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/interrupt.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/s390/kvm/interrupt.c ++++ b/arch/s390/kvm/interrupt.c +@@ -85,6 +85,7 @@ static int __interrupt_is_deliverable(st + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; ++ return 0; + case KVM_S390_INT_EMERGENCY: + if (psw_extint_disabled(vcpu)) + return 0; diff --git a/queue-3.16/series b/queue-3.16/series index 3eb1f9019f0..f05f1888372 100644 --- a/queue-3.16/series +++ b/queue-3.16/series @@ -11,3 +11,11 @@ fs-add-a-missing-permission-check-to-do_umount.patch usb-pch_udc-usb-gadget-device-support-for-intel-quark-x1000.patch pci_ids-add-support-for-intel-quark-ilb.patch kvm-x86-fix-stale-mmio-cache-bug.patch +kvm-fix-potentially-corrupt-mmio-cache.patch +kvm-do-not-bias-the-generation-number-in-kvm_current_mmio_generation.patch +kvm-s390-unintended-fallthrough-for-external-call.patch +kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch +x86-kvm-vmx-preserve-cr4-across-vm-entry.patch +x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch +spi-dw-mid-respect-8-bit-mode.patch +spi-dw-mid-check-that-dma-was-inited-before-exit.patch diff --git a/queue-3.16/spi-dw-mid-check-that-dma-was-inited-before-exit.patch b/queue-3.16/spi-dw-mid-check-that-dma-was-inited-before-exit.patch new file mode 100644 index 00000000000..0b6d76b1dcb --- /dev/null +++ b/queue-3.16/spi-dw-mid-check-that-dma-was-inited-before-exit.patch @@ -0,0 +1,32 @@ +From fb57862ead652454ceeb659617404c5f13bc34b5 Mon Sep 17 00:00:00 2001 +From: Andy Shevchenko +Date: Fri, 12 Sep 2014 15:11:58 +0300 +Subject: spi: dw-mid: check that DMA was inited before exit + +From: Andy Shevchenko + +commit fb57862ead652454ceeb659617404c5f13bc34b5 upstream. + +If the driver was compiled with DMA support, but DMA channels weren't acquired +by some reason, mid_spi_dma_exit() will crash the kernel. + +Fixes: 7063c0d942a1 (spi/dw_spi: add DMA support) +Signed-off-by: Andy Shevchenko +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/spi/spi-dw-mid.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/spi/spi-dw-mid.c ++++ b/drivers/spi/spi-dw-mid.c +@@ -89,6 +89,8 @@ err_exit: + + static void mid_spi_dma_exit(struct dw_spi *dws) + { ++ if (!dws->dma_inited) ++ return; + dma_release_channel(dws->txchan); + dma_release_channel(dws->rxchan); + } diff --git a/queue-3.16/spi-dw-mid-respect-8-bit-mode.patch b/queue-3.16/spi-dw-mid-respect-8-bit-mode.patch new file mode 100644 index 00000000000..a586a87d4f0 --- /dev/null +++ b/queue-3.16/spi-dw-mid-respect-8-bit-mode.patch @@ -0,0 +1,40 @@ +From b41583e7299046abdc578c33f25ed83ee95b9b31 Mon Sep 17 00:00:00 2001 +From: Andy Shevchenko +Date: Thu, 18 Sep 2014 20:08:51 +0300 +Subject: spi: dw-mid: respect 8 bit mode + +From: Andy Shevchenko + +commit b41583e7299046abdc578c33f25ed83ee95b9b31 upstream. + +In case of 8 bit mode and DMA usage we end up with every second byte written as +0. We have to respect bits_per_word settings what this patch actually does. + +Signed-off-by: Andy Shevchenko +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/spi/spi-dw-mid.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/spi/spi-dw-mid.c ++++ b/drivers/spi/spi-dw-mid.c +@@ -136,7 +136,7 @@ static int mid_spi_dma_transfer(struct d + txconf.dst_addr = dws->dma_addr; + txconf.dst_maxburst = LNW_DMA_MSIZE_16; + txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; +- txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; ++ txconf.dst_addr_width = dws->dma_width; + txconf.device_fc = false; + + txchan->device->device_control(txchan, DMA_SLAVE_CONFIG, +@@ -159,7 +159,7 @@ static int mid_spi_dma_transfer(struct d + rxconf.src_addr = dws->dma_addr; + rxconf.src_maxburst = LNW_DMA_MSIZE_16; + rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; +- rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; ++ rxconf.src_addr_width = dws->dma_width; + rxconf.device_fc = false; + + rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG, diff --git a/queue-3.16/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch b/queue-3.16/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch new file mode 100644 index 00000000000..7f94b697859 --- /dev/null +++ b/queue-3.16/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch @@ -0,0 +1,53 @@ +From ee1b5b165c0a2f04d2107e634e51f05d0eb107de Mon Sep 17 00:00:00 2001 +From: Bryan O'Donoghue +Date: Wed, 24 Sep 2014 00:26:24 +0100 +Subject: x86/intel/quark: Switch off CR4.PGE so TLB flush uses CR3 instead + +From: Bryan O'Donoghue + +commit ee1b5b165c0a2f04d2107e634e51f05d0eb107de upstream. + +Quark x1000 advertises PGE via the standard CPUID method +PGE bits exist in Quark X1000's PTEs. In order to flush +an individual PTE it is necessary to reload CR3 irrespective +of the PTE.PGE bit. + +See Quark Core_DevMan_001.pdf section 6.4.11 + +This bug was fixed in Galileo kernels, unfixed vanilla kernels are expected to +crash and burn on this platform. + +Signed-off-by: Bryan O'Donoghue +Cc: Borislav Petkov +Link: http://lkml.kernel.org/r/1411514784-14885-1-git-send-email-pure.logic@nexus-software.ie +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/intel.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -144,6 +144,21 @@ static void early_init_intel(struct cpui + setup_clear_cpu_cap(X86_FEATURE_ERMS); + } + } ++ ++ /* ++ * Intel Quark Core DevMan_001.pdf section 6.4.11 ++ * "The operating system also is required to invalidate (i.e., flush) ++ * the TLB when any changes are made to any of the page table entries. ++ * The operating system must reload CR3 to cause the TLB to be flushed" ++ * ++ * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should ++ * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE ++ * to be modified ++ */ ++ if (c->x86 == 5 && c->x86_model == 9) { ++ pr_info("Disabling PGE capability bit\n"); ++ setup_clear_cpu_cap(X86_FEATURE_PGE); ++ } + } + + #ifdef CONFIG_X86_32 diff --git a/queue-3.16/x86-kvm-vmx-preserve-cr4-across-vm-entry.patch b/queue-3.16/x86-kvm-vmx-preserve-cr4-across-vm-entry.patch new file mode 100644 index 00000000000..ab8cb06a305 --- /dev/null +++ b/queue-3.16/x86-kvm-vmx-preserve-cr4-across-vm-entry.patch @@ -0,0 +1,82 @@ +From d974baa398f34393db76be45f7d4d04fbdbb4a0a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Wed, 8 Oct 2014 09:02:13 -0700 +Subject: x86,kvm,vmx: Preserve CR4 across VM entry + +From: Andy Lutomirski + +commit d974baa398f34393db76be45f7d4d04fbdbb4a0a upstream. + +CR4 isn't constant; at least the TSD and PCE bits can vary. + +TBH, treating CR0 and CR3 as constant scares me a bit, too, but it looks +like it's correct. + +This adds a branch and a read from cr4 to each vm entry. Because it is +extremely likely that consecutive entries into the same vcpu will have +the same host cr4 value, this fixes up the vmcs instead of restoring cr4 +after the fact. A subsequent patch will add a kernel-wide cr4 shadow, +reducing the overhead in the common case to just two memory reads and a +branch. + +Signed-off-by: Andy Lutomirski +Acked-by: Paolo Bonzini +Cc: Petr Matousek +Cc: Gleb Natapov +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -450,6 +450,7 @@ struct vcpu_vmx { + int gs_ldt_reload_needed; + int fs_reload_needed; + u64 msr_host_bndcfgs; ++ unsigned long vmcs_host_cr4; /* May not match real cr4 */ + } host_state; + struct { + int vm86_active; +@@ -4218,11 +4219,16 @@ static void vmx_set_constant_host_state( + u32 low32, high32; + unsigned long tmpl; + struct desc_ptr dt; ++ unsigned long cr4; + + vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ +- vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ + vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ + ++ /* Save the most likely value for this task's CR4 in the VMCS. */ ++ cr4 = read_cr4(); ++ vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ ++ vmx->host_state.vmcs_host_cr4 = cr4; ++ + vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ + #ifdef CONFIG_X86_64 + /* +@@ -7336,7 +7342,7 @@ static void atomic_switch_perf_msrs(stru + static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); +- unsigned long debugctlmsr; ++ unsigned long debugctlmsr, cr4; + + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) +@@ -7357,6 +7363,12 @@ static void __noclone vmx_vcpu_run(struc + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + ++ cr4 = read_cr4(); ++ if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { ++ vmcs_writel(HOST_CR4, cr4); ++ vmx->host_state.vmcs_host_cr4 = cr4; ++ } ++ + /* When single-stepping over STI and MOV SS, we must clear the + * corresponding interruptibility bits in the guest state. Otherwise + * vmentry fails as it then expects bit 14 (BS) in pending debug -- 2.47.3