]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 23 Oct 2014 06:47:03 +0000 (14:47 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 23 Oct 2014 06:47:03 +0000 (14:47 +0800)
added patches:
kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch
kvm-fix-potentially-corrupt-mmio-cache.patch
kvm-s390-unintended-fallthrough-for-external-call.patch
spi-dw-mid-check-that-dma-was-inited-before-exit.patch
spi-dw-mid-respect-8-bit-mode.patch
x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch

queue-3.14/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch [new file with mode: 0644]
queue-3.14/kvm-fix-potentially-corrupt-mmio-cache.patch [new file with mode: 0644]
queue-3.14/kvm-s390-unintended-fallthrough-for-external-call.patch [new file with mode: 0644]
queue-3.14/series
queue-3.14/spi-dw-mid-check-that-dma-was-inited-before-exit.patch [new file with mode: 0644]
queue-3.14/spi-dw-mid-respect-8-bit-mode.patch [new file with mode: 0644]
queue-3.14/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch [new file with mode: 0644]

diff --git a/queue-3.14/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch b/queue-3.14/kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch
new file mode 100644 (file)
index 0000000..7c1257c
--- /dev/null
@@ -0,0 +1,46 @@
+From 2ea75be3219571d0ec009ce20d9971e54af96e09 Mon Sep 17 00:00:00 2001
+From: David Matlack <dmatlack@google.com>
+Date: Fri, 19 Sep 2014 16:03:25 -0700
+Subject: kvm: don't take vcpu mutex for obviously invalid vcpu ioctls
+
+From: David Matlack <dmatlack@google.com>
+
+commit 2ea75be3219571d0ec009ce20d9971e54af96e09 upstream.
+
+vcpu ioctls can hang the calling thread if issued while a vcpu is running.
+However, invalid ioctls can happen when userspace tries to probe the kind
+of file descriptors (e.g. isatty() calls ioctl(TCGETS)); in that case,
+we know the ioctl is going to be rejected as invalid anyway and we can
+fail before trying to take the vcpu mutex.
+
+This patch does not change functionality, it just makes invalid ioctls
+fail faster.
+
+Signed-off-by: David Matlack <dmatlack@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/kvm_main.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -52,6 +52,7 @@
+ #include <asm/processor.h>
+ #include <asm/io.h>
++#include <asm/ioctl.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -1979,6 +1980,9 @@ static long kvm_vcpu_ioctl(struct file *
+       if (vcpu->kvm->mm != current->mm)
+               return -EIO;
++      if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
++              return -EINVAL;
++
+ #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
+       /*
+        * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
diff --git a/queue-3.14/kvm-fix-potentially-corrupt-mmio-cache.patch b/queue-3.14/kvm-fix-potentially-corrupt-mmio-cache.patch
new file mode 100644 (file)
index 0000000..b9ae7f8
--- /dev/null
@@ -0,0 +1,191 @@
+From ee3d1570b58677885b4552bce8217fda7b226a68 Mon Sep 17 00:00:00 2001
+From: David Matlack <dmatlack@google.com>
+Date: Mon, 18 Aug 2014 15:46:06 -0700
+Subject: kvm: fix potentially corrupt mmio cache
+
+From: David Matlack <dmatlack@google.com>
+
+commit ee3d1570b58677885b4552bce8217fda7b226a68 upstream.
+
+vcpu exits and memslot mutations can run concurrently as long as the
+vcpu does not aquire the slots mutex. Thus it is theoretically possible
+for memslots to change underneath a vcpu that is handling an exit.
+
+If we increment the memslot generation number again after
+synchronize_srcu_expedited(), vcpus can safely cache memslot generation
+without maintaining a single rcu_dereference through an entire vm exit.
+And much of the x86/kvm code does not maintain a single rcu_dereference
+of the current memslots during each exit.
+
+We can prevent the following case:
+
+   vcpu (CPU 0)                             | thread (CPU 1)
+--------------------------------------------+--------------------------
+1  vm exit                                  |
+2  srcu_read_unlock(&kvm->srcu)             |
+3  decide to cache something based on       |
+     old memslots                           |
+4                                           | change memslots
+                                            | (increments generation)
+5                                           | synchronize_srcu(&kvm->srcu);
+6  retrieve generation # from new memslots  |
+7  tag cache with new memslot generation    |
+8  srcu_read_unlock(&kvm->srcu)             |
+...                                         |
+   <action based on cache occurs even       |
+    though the caching decision was based   |
+    on the old memslots>                    |
+...                                         |
+   <action *continues* to occur until next  |
+    memslot generation change, which may    |
+    be never>                               |
+                                            |
+
+By incrementing the generation after synchronizing with kvm->srcu readers,
+we ensure that the generation retrieved in (6) will become invalid soon
+after (8).
+
+Keeping the existing increment is not strictly necessary, but we
+do keep it and just move it for consistency from update_memslots to
+install_new_memslots.  It invalidates old cached MMIOs immediately,
+instead of having to wait for the end of synchronize_srcu_expedited,
+which makes the code more clearly correct in case CPU 1 is preempted
+right after synchronize_srcu() returns.
+
+To avoid halving the generation space in SPTEs, always presume that the
+low bit of the generation is zero when reconstructing a generation number
+out of an SPTE.  This effectively disables MMIO caching in SPTEs during
+the call to synchronize_srcu_expedited.  Using the low bit this way is
+somewhat like a seqcount---where the protected thing is a cache, and
+instead of retrying we can simply punt if we observe the low bit to be 1.
+
+Signed-off-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Reviewed-by: David Matlack <dmatlack@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/virtual/kvm/mmu.txt |   14 ++++++++++++++
+ arch/x86/kvm/mmu.c                |   20 ++++++++++++--------
+ virt/kvm/kvm_main.c               |   23 ++++++++++++++++-------
+ 3 files changed, 42 insertions(+), 15 deletions(-)
+
+--- a/Documentation/virtual/kvm/mmu.txt
++++ b/Documentation/virtual/kvm/mmu.txt
+@@ -425,6 +425,20 @@ fault through the slow path.
+ Since only 19 bits are used to store generation-number on mmio spte, all
+ pages are zapped when there is an overflow.
++Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
++times, the last one happening when the generation number is retrieved and
++stored into the MMIO spte.  Thus, the MMIO spte might be created based on
++out-of-date information, but with an up-to-date generation number.
++
++To avoid this, the generation number is incremented again after synchronize_srcu
++returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
++memslot update, while some SRCU readers might be using the old copy.  We do not
++want to use an MMIO sptes created with an odd generation number, and we can do
++this without losing a bit in the MMIO spte.  The low bit of the generation
++is not stored in MMIO spte, and presumed zero when it is extracted out of the
++spte.  If KVM is unlucky and creates an MMIO spte while the low bit is 1,
++the next access to the spte will always be a cache miss.
++
+ Further reading
+ ===============
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -198,16 +198,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio
+ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+ /*
+- * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
+- * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
+- * number.
++ * the low bit of the generation number is always presumed to be zero.
++ * This disables mmio caching during memslot updates.  The concept is
++ * similar to a seqcount but instead of retrying the access we just punt
++ * and ignore the cache.
++ *
++ * spte bits 3-11 are used as bits 1-9 of the generation number,
++ * the bits 52-61 are used as bits 10-19 of the generation number.
+  */
+-#define MMIO_SPTE_GEN_LOW_SHIFT               3
++#define MMIO_SPTE_GEN_LOW_SHIFT               2
+ #define MMIO_SPTE_GEN_HIGH_SHIFT      52
+-#define MMIO_GEN_SHIFT                        19
+-#define MMIO_GEN_LOW_SHIFT            9
+-#define MMIO_GEN_LOW_MASK             ((1 << MMIO_GEN_LOW_SHIFT) - 1)
++#define MMIO_GEN_SHIFT                        20
++#define MMIO_GEN_LOW_SHIFT            10
++#define MMIO_GEN_LOW_MASK             ((1 << MMIO_GEN_LOW_SHIFT) - 2)
+ #define MMIO_GEN_MASK                 ((1 << MMIO_GEN_SHIFT) - 1)
+ #define MMIO_MAX_GEN                  ((1 << MMIO_GEN_SHIFT) - 1)
+@@ -4379,7 +4383,7 @@ void kvm_mmu_invalidate_mmio_sptes(struc
+        * The very rare case: if the generation-number is round,
+        * zap all shadow pages.
+        */
+-      if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
++      if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
+               printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
+               kvm_mmu_invalidate_zap_all_pages(kvm);
+       }
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -95,8 +95,6 @@ static int hardware_enable_all(void);
+ static void hardware_disable_all(void);
+ static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+-static void update_memslots(struct kvm_memslots *slots,
+-                          struct kvm_memory_slot *new, u64 last_generation);
+ static void kvm_release_pfn_dirty(pfn_t pfn);
+ static void mark_page_dirty_in_slot(struct kvm *kvm,
+@@ -682,8 +680,7 @@ static void sort_memslots(struct kvm_mem
+ }
+ static void update_memslots(struct kvm_memslots *slots,
+-                          struct kvm_memory_slot *new,
+-                          u64 last_generation)
++                          struct kvm_memory_slot *new)
+ {
+       if (new) {
+               int id = new->id;
+@@ -694,8 +691,6 @@ static void update_memslots(struct kvm_m
+               if (new->npages != npages)
+                       sort_memslots(slots);
+       }
+-
+-      slots->generation = last_generation + 1;
+ }
+ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
+@@ -717,10 +712,24 @@ static struct kvm_memslots *install_new_
+ {
+       struct kvm_memslots *old_memslots = kvm->memslots;
+-      update_memslots(slots, new, kvm->memslots->generation);
++      /*
++       * Set the low bit in the generation, which disables SPTE caching
++       * until the end of synchronize_srcu_expedited.
++       */
++      WARN_ON(old_memslots->generation & 1);
++      slots->generation = old_memslots->generation + 1;
++
++      update_memslots(slots, new);
+       rcu_assign_pointer(kvm->memslots, slots);
+       synchronize_srcu_expedited(&kvm->srcu);
++      /*
++       * Increment the new memslot generation a second time. This prevents
++       * vm exits that race with memslot updates from caching a memslot
++       * generation that will (potentially) be valid forever.
++       */
++      slots->generation++;
++
+       kvm_arch_memslots_updated(kvm);
+       return old_memslots;
diff --git a/queue-3.14/kvm-s390-unintended-fallthrough-for-external-call.patch b/queue-3.14/kvm-s390-unintended-fallthrough-for-external-call.patch
new file mode 100644 (file)
index 0000000..aea4e97
--- /dev/null
@@ -0,0 +1,29 @@
+From f346026e55f1efd3949a67ddd1dcea7c1b9a615e Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Wed, 3 Sep 2014 16:21:32 +0200
+Subject: KVM: s390: unintended fallthrough for external call
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit f346026e55f1efd3949a67ddd1dcea7c1b9a615e upstream.
+
+We must not fallthrough if the conditions for external call are not met.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/interrupt.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -71,6 +71,7 @@ static int __interrupt_is_deliverable(st
+                       return 0;
+               if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
+                       return 1;
++              return 0;
+       case KVM_S390_INT_EMERGENCY:
+               if (psw_extint_disabled(vcpu))
+                       return 0;
index 1702ee3bf5db9be36a3060398f76583badb58505..5e2f4ecbf8c8edac29e56bed7a023835b9848fc5 100644 (file)
@@ -10,3 +10,9 @@ usb-pch_udc-usb-gadget-device-support-for-intel-quark-x1000.patch
 pci_ids-add-support-for-intel-quark-ilb.patch
 btrfs-send-fix-data-corruption-due-to-incorrect-hole-detection.patch
 kvm-x86-fix-stale-mmio-cache-bug.patch
+kvm-fix-potentially-corrupt-mmio-cache.patch
+kvm-s390-unintended-fallthrough-for-external-call.patch
+kvm-don-t-take-vcpu-mutex-for-obviously-invalid-vcpu-ioctls.patch
+x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch
+spi-dw-mid-respect-8-bit-mode.patch
+spi-dw-mid-check-that-dma-was-inited-before-exit.patch
diff --git a/queue-3.14/spi-dw-mid-check-that-dma-was-inited-before-exit.patch b/queue-3.14/spi-dw-mid-check-that-dma-was-inited-before-exit.patch
new file mode 100644 (file)
index 0000000..0b6d76b
--- /dev/null
@@ -0,0 +1,32 @@
+From fb57862ead652454ceeb659617404c5f13bc34b5 Mon Sep 17 00:00:00 2001
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Date: Fri, 12 Sep 2014 15:11:58 +0300
+Subject: spi: dw-mid: check that DMA was inited before exit
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+commit fb57862ead652454ceeb659617404c5f13bc34b5 upstream.
+
+If the driver was compiled with DMA support, but DMA channels weren't acquired
+by some reason, mid_spi_dma_exit() will crash the kernel.
+
+Fixes: 7063c0d942a1 (spi/dw_spi: add DMA support)
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-dw-mid.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/spi/spi-dw-mid.c
++++ b/drivers/spi/spi-dw-mid.c
+@@ -89,6 +89,8 @@ err_exit:
+ static void mid_spi_dma_exit(struct dw_spi *dws)
+ {
++      if (!dws->dma_inited)
++              return;
+       dma_release_channel(dws->txchan);
+       dma_release_channel(dws->rxchan);
+ }
diff --git a/queue-3.14/spi-dw-mid-respect-8-bit-mode.patch b/queue-3.14/spi-dw-mid-respect-8-bit-mode.patch
new file mode 100644 (file)
index 0000000..a586a87
--- /dev/null
@@ -0,0 +1,40 @@
+From b41583e7299046abdc578c33f25ed83ee95b9b31 Mon Sep 17 00:00:00 2001
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Date: Thu, 18 Sep 2014 20:08:51 +0300
+Subject: spi: dw-mid: respect 8 bit mode
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+commit b41583e7299046abdc578c33f25ed83ee95b9b31 upstream.
+
+In case of 8 bit mode and DMA usage we end up with every second byte written as
+0. We have to respect bits_per_word settings what this patch actually does.
+
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-dw-mid.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/spi/spi-dw-mid.c
++++ b/drivers/spi/spi-dw-mid.c
+@@ -136,7 +136,7 @@ static int mid_spi_dma_transfer(struct d
+       txconf.dst_addr = dws->dma_addr;
+       txconf.dst_maxburst = LNW_DMA_MSIZE_16;
+       txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+-      txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
++      txconf.dst_addr_width = dws->dma_width;
+       txconf.device_fc = false;
+       txchan->device->device_control(txchan, DMA_SLAVE_CONFIG,
+@@ -159,7 +159,7 @@ static int mid_spi_dma_transfer(struct d
+       rxconf.src_addr = dws->dma_addr;
+       rxconf.src_maxburst = LNW_DMA_MSIZE_16;
+       rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+-      rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
++      rxconf.src_addr_width = dws->dma_width;
+       rxconf.device_fc = false;
+       rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG,
diff --git a/queue-3.14/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch b/queue-3.14/x86-intel-quark-switch-off-cr4.pge-so-tlb-flush-uses-cr3-instead.patch
new file mode 100644 (file)
index 0000000..b94f421
--- /dev/null
@@ -0,0 +1,53 @@
+From ee1b5b165c0a2f04d2107e634e51f05d0eb107de Mon Sep 17 00:00:00 2001
+From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
+Date: Wed, 24 Sep 2014 00:26:24 +0100
+Subject: x86/intel/quark: Switch off CR4.PGE so TLB flush uses CR3 instead
+
+From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
+
+commit ee1b5b165c0a2f04d2107e634e51f05d0eb107de upstream.
+
+Quark x1000 advertises PGE via the standard CPUID method
+PGE bits exist in Quark X1000's PTEs. In order to flush
+an individual PTE it is necessary to reload CR3 irrespective
+of the PTE.PGE bit.
+
+See Quark Core_DevMan_001.pdf section 6.4.11
+
+This bug was fixed in Galileo kernels, unfixed vanilla kernels are expected to
+crash and burn on this platform.
+
+Signed-off-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: http://lkml.kernel.org/r/1411514784-14885-1-git-send-email-pure.logic@nexus-software.ie
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/intel.c |   15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -153,6 +153,21 @@ static void early_init_intel(struct cpui
+                       setup_clear_cpu_cap(X86_FEATURE_ERMS);
+               }
+       }
++
++      /*
++       * Intel Quark Core DevMan_001.pdf section 6.4.11
++       * "The operating system also is required to invalidate (i.e., flush)
++       *  the TLB when any changes are made to any of the page table entries.
++       *  The operating system must reload CR3 to cause the TLB to be flushed"
++       *
++       * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
++       * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
++       * to be modified
++       */
++      if (c->x86 == 5 && c->x86_model == 9) {
++              pr_info("Disabling PGE capability bit\n");
++              setup_clear_cpu_cap(X86_FEATURE_PGE);
++      }
+ }
+ #ifdef CONFIG_X86_32