3.3-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 10 May 2012 16:28:59 +0000 (09:28 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 10 May 2012 16:28:59 +0000 (09:28 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 10 May 2012 16:28:59 +0000 (09:28 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 10 May 2012 16:28:59 +0000 (09:28 -0700)
diff --git a/queue-3.3/hugepages-fix-use-after-free-bug-in-quota-handling.patch b/queue-3.3/hugepages-fix-use-after-free-bug-in-quota-handling.patch

new file mode 100644 (file)

index 0000000..5d122fb
--- /dev/null
+++ b/queue-3.3/hugepages-fix-use-after-free-bug-in-quota-handling.patch
@@ -0,0 +1,452 @@
+From 90481622d75715bfcb68501280a917dbfe516029 Mon Sep 17 00:00:00 2001
+From: David Gibson <david@gibson.dropbear.id.au>
+Date: Wed, 21 Mar 2012 16:34:12 -0700
+Subject: hugepages: fix use after free bug in "quota" handling
+
+From: David Gibson <david@gibson.dropbear.id.au>
+
+commit 90481622d75715bfcb68501280a917dbfe516029 upstream.
+
+hugetlbfs_{get,put}_quota() are badly named.  They don't interact with the
+general quota handling code, and they don't much resemble its behaviour.
+Rather than being about maintaining limits on on-disk block usage by
+particular users, they are instead about maintaining limits on in-memory
+page usage (including anonymous MAP_PRIVATE copied-on-write pages)
+associated with a particular hugetlbfs filesystem instance.
+
+Worse, they work by having callbacks to the hugetlbfs filesystem code from
+the low-level page handling code, in particular from free_huge_page().
+This is a layering violation of itself, but more importantly, if the
+kernel does a get_user_pages() on hugepages (which can happen from KVM
+amongst others), then the free_huge_page() can be delayed until after the
+associated inode has already been freed.  If an unmount occurs at the
+wrong time, even the hugetlbfs superblock where the "quota" limits are
+stored may have been freed.
+
+Andrew Barry proposed a patch to fix this by having hugepages, instead of
+storing a pointer to their address_space and reaching the superblock from
+there, had the hugepages store pointers directly to the superblock,
+bumping the reference count as appropriate to avoid it being freed.
+Andrew Morton rejected that version, however, on the grounds that it made
+the existing layering violation worse.
+
+This is a reworked version of Andrew's patch, which removes the extra, and
+some of the existing, layering violation.  It works by introducing the
+concept of a hugepage "subpool" at the lower hugepage mm layer - that is a
+finite logical pool of hugepages to allocate from.  hugetlbfs now creates
+a subpool for each filesystem instance with a page limit set, and a
+pointer to the subpool gets added to each allocated hugepage, instead of
+the address_space pointer used now.  The subpool has its own lifetime and
+is only freed once all pages in it _and_ all other references to it (i.e.
+superblocks) are gone.
+
+subpools are optional - a NULL subpool pointer is taken by the code to
+mean that no subpool limits are in effect.
+
+Previous discussion of this bug found in:  "Fix refcounting in hugetlbfs
+quota handling.". See:  https://lkml.org/lkml/2011/8/11/28 or
+http://marc.info/?l=linux-mm&m=126928970510627&w=1
+
+v2: Fixed a bug spotted by Hillf Danton, and removed the extra parameter to
+alloc_huge_page() - since it already takes the vma, it is not necessary.
+
+Signed-off-by: Andrew Barry <abarry@cray.com>
+Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Minchan Kim <minchan.kim@gmail.com>
+Cc: Hillf Danton <dhillf@gmail.com>
+Cc: Paul Mackerras <paulus@samba.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/hugetlbfs/inode.c    |   54 +++++++------------
+ include/linux/hugetlb.h |   14 +++-
+ mm/hugetlb.c            |  135 ++++++++++++++++++++++++++++++++++++++----------
+ 3 files changed, 139 insertions(+), 64 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -600,9 +600,15 @@ static int hugetlbfs_statfs(struct dentr
+               spin_lock(&sbinfo->stat_lock);
+               /* If no limits set, just report 0 for max/free/used
+                * blocks, like simple_statfs() */
+-              if (sbinfo->max_blocks >= 0) {
+-                      buf->f_blocks = sbinfo->max_blocks;
+-                      buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
++              if (sbinfo->spool) {
++                      long free_pages;
++
++                      spin_lock(&sbinfo->spool->lock);
++                      buf->f_blocks = sbinfo->spool->max_hpages;
++                      free_pages = sbinfo->spool->max_hpages
++                              - sbinfo->spool->used_hpages;
++                      buf->f_bavail = buf->f_bfree = free_pages;
++                      spin_unlock(&sbinfo->spool->lock);
+                       buf->f_files = sbinfo->max_inodes;
+                       buf->f_ffree = sbinfo->free_inodes;
+               }
+@@ -618,6 +624,10 @@ static void hugetlbfs_put_super(struct s
+ 
+       if (sbi) {
+               sb->s_fs_info = NULL;
++
++              if (sbi->spool)
++                      hugepage_put_subpool(sbi->spool);
++
+               kfree(sbi);
+       }
+ }
+@@ -848,10 +858,14 @@ hugetlbfs_fill_super(struct super_block
+       sb->s_fs_info = sbinfo;
+       sbinfo->hstate = config.hstate;
+       spin_lock_init(&sbinfo->stat_lock);
+-      sbinfo->max_blocks = config.nr_blocks;
+-      sbinfo->free_blocks = config.nr_blocks;
+       sbinfo->max_inodes = config.nr_inodes;
+       sbinfo->free_inodes = config.nr_inodes;
++      sbinfo->spool = NULL;
++      if (config.nr_blocks != -1) {
++              sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
++              if (!sbinfo->spool)
++                      goto out_free;
++      }
+       sb->s_maxbytes = MAX_LFS_FILESIZE;
+       sb->s_blocksize = huge_page_size(config.hstate);
+       sb->s_blocksize_bits = huge_page_shift(config.hstate);
+@@ -870,38 +884,12 @@ hugetlbfs_fill_super(struct super_block
+       sb->s_root = root;
+       return 0;
+ out_free:
++      if (sbinfo->spool)
++              kfree(sbinfo->spool);
+       kfree(sbinfo);
+       return -ENOMEM;
+ }
+ 
+-int hugetlb_get_quota(struct address_space *mapping, long delta)
+-{
+-      int ret = 0;
+-      struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
+-
+-      if (sbinfo->free_blocks > -1) {
+-              spin_lock(&sbinfo->stat_lock);
+-              if (sbinfo->free_blocks - delta >= 0)
+-                      sbinfo->free_blocks -= delta;
+-              else
+-                      ret = -ENOMEM;
+-              spin_unlock(&sbinfo->stat_lock);
+-      }
+-
+-      return ret;
+-}
+-
+-void hugetlb_put_quota(struct address_space *mapping, long delta)
+-{
+-      struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
+-
+-      if (sbinfo->free_blocks > -1) {
+-              spin_lock(&sbinfo->stat_lock);
+-              sbinfo->free_blocks += delta;
+-              spin_unlock(&sbinfo->stat_lock);
+-      }
+-}
+-
+ static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *data)
+ {
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -14,6 +14,15 @@ struct user_struct;
+ #include <linux/shm.h>
+ #include <asm/tlbflush.h>
+ 
++struct hugepage_subpool {
++      spinlock_t lock;
++      long count;
++      long max_hpages, used_hpages;
++};
++
++struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
++void hugepage_put_subpool(struct hugepage_subpool *spool);
++
+ int PageHuge(struct page *page);
+ 
+ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+@@ -138,12 +147,11 @@ struct hugetlbfs_config {
+ };
+ 
+ struct hugetlbfs_sb_info {
+-      long    max_blocks;   /* blocks allowed */
+-      long    free_blocks;  /* blocks free */
+       long    max_inodes;   /* inodes allowed */
+       long    free_inodes;  /* inodes free */
+       spinlock_t      stat_lock;
+       struct hstate *hstate;
++      struct hugepage_subpool *spool;
+ };
+ 
+ 
+@@ -166,8 +174,6 @@ extern const struct file_operations huge
+ extern const struct vm_operations_struct hugetlb_vm_ops;
+ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
+                               struct user_struct **user, int creat_flags);
+-int hugetlb_get_quota(struct address_space *mapping, long delta);
+-void hugetlb_put_quota(struct address_space *mapping, long delta);
+ 
+ static inline int is_file_hugepages(struct file *file)
+ {
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -53,6 +53,84 @@ static unsigned long __initdata default_
+  */
+ static DEFINE_SPINLOCK(hugetlb_lock);
+ 
++static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
++{
++      bool free = (spool->count == 0) && (spool->used_hpages == 0);
++
++      spin_unlock(&spool->lock);
++
++      /* If no pages are used, and no other handles to the subpool
++       * remain, free the subpool the subpool remain */
++      if (free)
++              kfree(spool);
++}
++
++struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
++{
++      struct hugepage_subpool *spool;
++
++      spool = kmalloc(sizeof(*spool), GFP_KERNEL);
++      if (!spool)
++              return NULL;
++
++      spin_lock_init(&spool->lock);
++      spool->count = 1;
++      spool->max_hpages = nr_blocks;
++      spool->used_hpages = 0;
++
++      return spool;
++}
++
++void hugepage_put_subpool(struct hugepage_subpool *spool)
++{
++      spin_lock(&spool->lock);
++      BUG_ON(!spool->count);
++      spool->count--;
++      unlock_or_release_subpool(spool);
++}
++
++static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
++                                    long delta)
++{
++      int ret = 0;
++
++      if (!spool)
++              return 0;
++
++      spin_lock(&spool->lock);
++      if ((spool->used_hpages + delta) <= spool->max_hpages) {
++              spool->used_hpages += delta;
++      } else {
++              ret = -ENOMEM;
++      }
++      spin_unlock(&spool->lock);
++
++      return ret;
++}
++
++static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
++                                     long delta)
++{
++      if (!spool)
++              return;
++
++      spin_lock(&spool->lock);
++      spool->used_hpages -= delta;
++      /* If hugetlbfs_put_super couldn't free spool due to
++      * an outstanding quota reference, free it now. */
++      unlock_or_release_subpool(spool);
++}
++
++static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
++{
++      return HUGETLBFS_SB(inode->i_sb)->spool;
++}
++
++static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
++{
++      return subpool_inode(vma->vm_file->f_dentry->d_inode);
++}
++
+ /*
+  * Region tracking -- allows tracking of reservations and instantiated pages
+  *                    across the pages in a mapping.
+@@ -533,9 +611,9 @@ static void free_huge_page(struct page *
+        */
+       struct hstate *h = page_hstate(page);
+       int nid = page_to_nid(page);
+-      struct address_space *mapping;
++      struct hugepage_subpool *spool =
++              (struct hugepage_subpool *)page_private(page);
+ 
+-      mapping = (struct address_space *) page_private(page);
+       set_page_private(page, 0);
+       page->mapping = NULL;
+       BUG_ON(page_count(page));
+@@ -551,8 +629,7 @@ static void free_huge_page(struct page *
+               enqueue_huge_page(h, page);
+       }
+       spin_unlock(&hugetlb_lock);
+-      if (mapping)
+-              hugetlb_put_quota(mapping, 1);
++      hugepage_subpool_put_pages(spool, 1);
+ }
+ 
+ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+@@ -966,11 +1043,12 @@ static void return_unused_surplus_pages(
+ /*
+  * Determine if the huge page at addr within the vma has an associated
+  * reservation.  Where it does not we will need to logically increase
+- * reservation and actually increase quota before an allocation can occur.
+- * Where any new reservation would be required the reservation change is
+- * prepared, but not committed.  Once the page has been quota'd allocated
+- * an instantiated the change should be committed via vma_commit_reservation.
+- * No action is required on failure.
++ * reservation and actually increase subpool usage before an allocation
++ * can occur.  Where any new reservation would be required the
++ * reservation change is prepared, but not committed.  Once the page
++ * has been allocated from the subpool and instantiated the change should
++ * be committed via vma_commit_reservation.  No action is required on
++ * failure.
+  */
+ static long vma_needs_reservation(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long addr)
+@@ -1019,24 +1097,24 @@ static void vma_commit_reservation(struc
+ static struct page *alloc_huge_page(struct vm_area_struct *vma,
+                                   unsigned long addr, int avoid_reserve)
+ {
++      struct hugepage_subpool *spool = subpool_vma(vma);
+       struct hstate *h = hstate_vma(vma);
+       struct page *page;
+-      struct address_space *mapping = vma->vm_file->f_mapping;
+-      struct inode *inode = mapping->host;
+       long chg;
+ 
+       /*
+-       * Processes that did not create the mapping will have no reserves and
+-       * will not have accounted against quota. Check that the quota can be
+-       * made before satisfying the allocation
+-       * MAP_NORESERVE mappings may also need pages and quota allocated
+-       * if no reserve mapping overlaps.
++       * Processes that did not create the mapping will have no
++       * reserves and will not have accounted against subpool
++       * limit. Check that the subpool limit can be made before
++       * satisfying the allocation MAP_NORESERVE mappings may also
++       * need pages and subpool limit allocated allocated if no reserve
++       * mapping overlaps.
+        */
+       chg = vma_needs_reservation(h, vma, addr);
+       if (chg < 0)
+               return ERR_PTR(-VM_FAULT_OOM);
+       if (chg)
+-              if (hugetlb_get_quota(inode->i_mapping, chg))
++              if (hugepage_subpool_get_pages(spool, chg))
+                       return ERR_PTR(-VM_FAULT_SIGBUS);
+ 
+       spin_lock(&hugetlb_lock);
+@@ -1046,12 +1124,12 @@ static struct page *alloc_huge_page(stru
+       if (!page) {
+               page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
+               if (!page) {
+-                      hugetlb_put_quota(inode->i_mapping, chg);
++                      hugepage_subpool_put_pages(spool, chg);
+                       return ERR_PTR(-VM_FAULT_SIGBUS);
+               }
+       }
+ 
+-      set_page_private(page, (unsigned long) mapping);
++      set_page_private(page, (unsigned long)spool);
+ 
+       vma_commit_reservation(h, vma, addr);
+ 
+@@ -2072,6 +2150,7 @@ static void hugetlb_vm_op_close(struct v
+ {
+       struct hstate *h = hstate_vma(vma);
+       struct resv_map *reservations = vma_resv_map(vma);
++      struct hugepage_subpool *spool = subpool_vma(vma);
+       unsigned long reserve;
+       unsigned long start;
+       unsigned long end;
+@@ -2087,7 +2166,7 @@ static void hugetlb_vm_op_close(struct v
+ 
+               if (reserve) {
+                       hugetlb_acct_memory(h, -reserve);
+-                      hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
++                      hugepage_subpool_put_pages(spool, reserve);
+               }
+       }
+ }
+@@ -2316,7 +2395,7 @@ static int unmap_ref_private(struct mm_s
+        */
+       address = address & huge_page_mask(h);
+       pgoff = vma_hugecache_offset(h, vma, address);
+-      mapping = (struct address_space *)page_private(page);
++      mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+ 
+       /*
+        * Take the mapping lock for the duration of the table walk. As
+@@ -2871,11 +2950,12 @@ int hugetlb_reserve_pages(struct inode *
+ {
+       long ret, chg;
+       struct hstate *h = hstate_inode(inode);
++      struct hugepage_subpool *spool = subpool_inode(inode);
+ 
+       /*
+        * Only apply hugepage reservation if asked. At fault time, an
+        * attempt will be made for VM_NORESERVE to allocate a page
+-       * and filesystem quota without using reserves
++       * without using reserves
+        */
+       if (vm_flags & VM_NORESERVE)
+               return 0;
+@@ -2902,17 +2982,17 @@ int hugetlb_reserve_pages(struct inode *
+       if (chg < 0)
+               return chg;
+ 
+-      /* There must be enough filesystem quota for the mapping */
+-      if (hugetlb_get_quota(inode->i_mapping, chg))
++      /* There must be enough pages in the subpool for the mapping */
++      if (hugepage_subpool_get_pages(spool, chg))
+               return -ENOSPC;
+ 
+       /*
+        * Check enough hugepages are available for the reservation.
+-       * Hand back the quota if there are not
++       * Hand the pages back to the subpool if there are not
+        */
+       ret = hugetlb_acct_memory(h, chg);
+       if (ret < 0) {
+-              hugetlb_put_quota(inode->i_mapping, chg);
++              hugepage_subpool_put_pages(spool, chg);
+               return ret;
+       }
+ 
+@@ -2936,12 +3016,13 @@ void hugetlb_unreserve_pages(struct inod
+ {
+       struct hstate *h = hstate_inode(inode);
+       long chg = region_truncate(&inode->i_mapping->private_list, offset);
++      struct hugepage_subpool *spool = subpool_inode(inode);
+ 
+       spin_lock(&inode->i_lock);
+       inode->i_blocks -= (blocks_per_huge_page(h) * freed);
+       spin_unlock(&inode->i_lock);
+ 
+-      hugetlb_put_quota(inode->i_mapping, (chg - freed));
++      hugepage_subpool_put_pages(spool, (chg - freed));
+       hugetlb_acct_memory(h, -(chg - freed));
+ }
+ 
diff --git a/queue-3.3/kvm-ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch b/queue-3.3/kvm-ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch

new file mode 100644 (file)

index 0000000..57ff0e8
--- /dev/null
+++ b/queue-3.3/kvm-ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch
@@ -0,0 +1,102 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:29 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:42 +0300
+Subject: KVM: Ensure all vcpus are consistent with in-kernel irqchip settings
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-7-git-send-email-avi@redhat.com>
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 3e515705a1f46beb1c942bb8043c16f8ac7b1e9e)
+
+If some vcpus are created before KVM_CREATE_IRQCHIP, then
+irqchip_in_kernel() and vcpu->arch.apic will be inconsistent, leading
+to potential NULL pointer dereferences.
+
+Fix by:
+- ensuring that no vcpus are installed when KVM_CREATE_IRQCHIP is called
+- ensuring that a vcpu has an apic if it is installed after KVM_CREATE_IRQCHIP
+
+This is somewhat long winded because vcpu->arch.apic is created without
+kvm->lock held.
+
+Based on earlier patch by Michael Ellerman.
+
+Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/ia64/kvm/kvm-ia64.c |    5 +++++
+ arch/x86/kvm/x86.c       |    8 ++++++++
+ include/linux/kvm_host.h |    7 +++++++
+ virt/kvm/kvm_main.c      |    4 ++++
+ 4 files changed, 24 insertions(+)
+
+--- a/arch/ia64/kvm/kvm-ia64.c
++++ b/arch/ia64/kvm/kvm-ia64.c
+@@ -1169,6 +1169,11 @@ out:
+ 
+ #define PALE_RESET_ENTRY    0x80000000ffffffb0UL
+ 
++bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
++{
++      return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL);
++}
++
+ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_vcpu *v;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3131,6 +3131,9 @@ long kvm_arch_vm_ioctl(struct file *filp
+               r = -EEXIST;
+               if (kvm->arch.vpic)
+                       goto create_irqchip_unlock;
++              r = -EINVAL;
++              if (atomic_read(&kvm->online_vcpus))
++                      goto create_irqchip_unlock;
+               r = -ENOMEM;
+               vpic = kvm_create_pic(kvm);
+               if (vpic) {
+@@ -5956,6 +5959,11 @@ void kvm_arch_check_processor_compat(voi
+       kvm_x86_ops->check_processor_compatibility(rtn);
+ }
+ 
++bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
++{
++      return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
++}
++
+ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+ {
+       struct page *page;
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -775,6 +775,13 @@ static inline bool kvm_vcpu_is_bsp(struc
+ {
+       return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
+ }
++
++bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
++
++#else
++
++static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
++
+ #endif
+ 
+ #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1720,6 +1720,10 @@ static int kvm_vm_ioctl_create_vcpu(stru
+               goto vcpu_destroy;
+ 
+       mutex_lock(&kvm->lock);
++      if (!kvm_vcpu_compatible(vcpu)) {
++              r = -EINVAL;
++              goto unlock_vcpu_destroy;
++      }
+       if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
+               r = -EINVAL;
+               goto unlock_vcpu_destroy;
diff --git a/queue-3.3/kvm-fix-write-protection-race-during-dirty-logging.patch b/queue-3.3/kvm-fix-write-protection-race-during-dirty-logging.patch

new file mode 100644 (file)

index 0000000..e4cbcf7
--- /dev/null
+++ b/queue-3.3/kvm-fix-write-protection-race-during-dirty-logging.patch
@@ -0,0 +1,82 @@
+From stable-owner@vger.kernel.org Wed May  9 06:11:17 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:39 +0300
+Subject: KVM: Fix write protection race during dirty logging
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-4-git-send-email-avi@redhat.com>
+
+
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+
+(cherry picked from commit 6dbf79e7164e9a86c1e466062c48498142ae6128)
+
+This patch fixes a race introduced by:
+
+  commit 95d4c16ce78cb6b7549a09159c409d52ddd18dae
+  KVM: Optimize dirty logging by rmap_write_protect()
+
+During protecting pages for dirty logging, other threads may also try
+to protect a page in mmu_sync_children() or kvm_mmu_get_page().
+
+In such a case, because get_dirty_log releases mmu_lock before flushing
+TLB's, the following race condition can happen:
+
+  A (get_dirty_log)     B (another thread)
+
+  lock(mmu_lock)
+  clear pte.w
+  unlock(mmu_lock)
+                        lock(mmu_lock)
+                        pte.w is already cleared
+                        unlock(mmu_lock)
+                        skip TLB flush
+                        return
+  ...
+  TLB flush
+
+Though thread B assumes the page has already been protected when it
+returns, the remaining TLB entry will break that assumption.
+
+This patch fixes this problem by making get_dirty_log hold the mmu_lock
+until it flushes the TLB's.
+
+Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2997,6 +2997,8 @@ static void write_protect_slot(struct kv
+                              unsigned long *dirty_bitmap,
+                              unsigned long nr_dirty_pages)
+ {
++      spin_lock(&kvm->mmu_lock);
++
+       /* Not many dirty pages compared to # of shadow pages. */
+       if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
+               unsigned long gfn_offset;
+@@ -3004,16 +3006,13 @@ static void write_protect_slot(struct kv
+               for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
+                       unsigned long gfn = memslot->base_gfn + gfn_offset;
+ 
+-                      spin_lock(&kvm->mmu_lock);
+                       kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
+-                      spin_unlock(&kvm->mmu_lock);
+               }
+               kvm_flush_remote_tlbs(kvm);
+-      } else {
+-              spin_lock(&kvm->mmu_lock);
++      } else
+               kvm_mmu_slot_remove_write_access(kvm, memslot->id);
+-              spin_unlock(&kvm->mmu_lock);
+-      }
++
++      spin_unlock(&kvm->mmu_lock);
+ }
+ 
+ /*
diff --git a/queue-3.3/kvm-lock-slots_lock-around-device-assignment.patch b/queue-3.3/kvm-lock-slots_lock-around-device-assignment.patch

new file mode 100644 (file)

index 0000000..d669aca
--- /dev/null
+++ b/queue-3.3/kvm-lock-slots_lock-around-device-assignment.patch
@@ -0,0 +1,83 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:31 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:47 +0300
+Subject: KVM: lock slots_lock around device assignment
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-12-git-send-email-avi@redhat.com>
+
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+(cherry picked from commit 21a1416a1c945c5aeaeaf791b63c64926018eb77)
+
+As pointed out by Jason Baron, when assigning a device to a guest
+we first set the iommu domain pointer, which enables mapping
+and unmapping of memory slots to the iommu.  This leaves a window
+where this path is enabled, but we haven't synchronized the iommu
+mappings to the existing memory slots.  Thus a slot being removed
+at that point could send us down unexpected code paths removing
+non-existent pinnings and iommu mappings.  Take the slots_lock
+around creating the iommu domain and initial mappings as well as
+around iommu teardown to avoid this race.
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/iommu.c |   23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/virt/kvm/iommu.c
++++ b/virt/kvm/iommu.c
+@@ -240,9 +240,13 @@ int kvm_iommu_map_guest(struct kvm *kvm)
+               return -ENODEV;
+       }
+ 
++      mutex_lock(&kvm->slots_lock);
++
+       kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
+-      if (!kvm->arch.iommu_domain)
+-              return -ENOMEM;
++      if (!kvm->arch.iommu_domain) {
++              r = -ENOMEM;
++              goto out_unlock;
++      }
+ 
+       if (!allow_unsafe_assigned_interrupts &&
+           !iommu_domain_has_cap(kvm->arch.iommu_domain,
+@@ -253,17 +257,16 @@ int kvm_iommu_map_guest(struct kvm *kvm)
+                      " module option.\n", __func__);
+               iommu_domain_free(kvm->arch.iommu_domain);
+               kvm->arch.iommu_domain = NULL;
+-              return -EPERM;
++              r = -EPERM;
++              goto out_unlock;
+       }
+ 
+       r = kvm_iommu_map_memslots(kvm);
+       if (r)
+-              goto out_unmap;
+-
+-      return 0;
++              kvm_iommu_unmap_memslots(kvm);
+ 
+-out_unmap:
+-      kvm_iommu_unmap_memslots(kvm);
++out_unlock:
++      mutex_unlock(&kvm->slots_lock);
+       return r;
+ }
+ 
+@@ -340,7 +343,11 @@ int kvm_iommu_unmap_guest(struct kvm *kv
+       if (!domain)
+               return 0;
+ 
++      mutex_lock(&kvm->slots_lock);
+       kvm_iommu_unmap_memslots(kvm);
++      kvm->arch.iommu_domain = NULL;
++      mutex_unlock(&kvm->slots_lock);
++
+       iommu_domain_free(domain);
+       return 0;
+ }
diff --git a/queue-3.3/kvm-mmu_notifier-flush-tlbs-before-releasing-mmu_lock.patch b/queue-3.3/kvm-mmu_notifier-flush-tlbs-before-releasing-mmu_lock.patch

new file mode 100644 (file)

index 0000000..9e580b1
--- /dev/null
+++ b/queue-3.3/kvm-mmu_notifier-flush-tlbs-before-releasing-mmu_lock.patch
@@ -0,0 +1,79 @@
+From stable-owner@vger.kernel.org Wed May  9 06:11:18 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:40 +0300
+Subject: KVM: mmu_notifier: Flush TLBs before releasing mmu_lock
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-5-git-send-email-avi@redhat.com>
+
+
+From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+
+(cherry picked from commit 565f3be2174611f364405bbea2d86e153c2e7e78)
+
+Other threads may process the same page in that small window and skip
+TLB flush and then return before these functions do flush.
+
+Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |   19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -289,15 +289,15 @@ static void kvm_mmu_notifier_invalidate_
+        */
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
++
+       kvm->mmu_notifier_seq++;
+       need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
+-      spin_unlock(&kvm->mmu_lock);
+-      srcu_read_unlock(&kvm->srcu, idx);
+-
+       /* we've to flush the tlb before the pages can be freed */
+       if (need_tlb_flush)
+               kvm_flush_remote_tlbs(kvm);
+ 
++      spin_unlock(&kvm->mmu_lock);
++      srcu_read_unlock(&kvm->srcu, idx);
+ }
+ 
+ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
+@@ -335,12 +335,12 @@ static void kvm_mmu_notifier_invalidate_
+       for (; start < end; start += PAGE_SIZE)
+               need_tlb_flush |= kvm_unmap_hva(kvm, start);
+       need_tlb_flush |= kvm->tlbs_dirty;
+-      spin_unlock(&kvm->mmu_lock);
+-      srcu_read_unlock(&kvm->srcu, idx);
+-
+       /* we've to flush the tlb before the pages can be freed */
+       if (need_tlb_flush)
+               kvm_flush_remote_tlbs(kvm);
++
++      spin_unlock(&kvm->mmu_lock);
++      srcu_read_unlock(&kvm->srcu, idx);
+ }
+ 
+ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+@@ -378,13 +378,14 @@ static int kvm_mmu_notifier_clear_flush_
+ 
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+-      young = kvm_age_hva(kvm, address);
+-      spin_unlock(&kvm->mmu_lock);
+-      srcu_read_unlock(&kvm->srcu, idx);
+ 
++      young = kvm_age_hva(kvm, address);
+       if (young)
+               kvm_flush_remote_tlbs(kvm);
+ 
++      spin_unlock(&kvm->mmu_lock);
++      srcu_read_unlock(&kvm->srcu, idx);
++
+       return young;
+ }
+ 
diff --git a/queue-3.3/kvm-nvmx-fix-erroneous-exception-bitmap-check.patch b/queue-3.3/kvm-nvmx-fix-erroneous-exception-bitmap-check.patch

new file mode 100644 (file)

index 0000000..8ea76a0
--- /dev/null
+++ b/queue-3.3/kvm-nvmx-fix-erroneous-exception-bitmap-check.patch
@@ -0,0 +1,36 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:29 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:44 +0300
+Subject: KVM: nVMX: Fix erroneous exception bitmap check
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-9-git-send-email-avi@redhat.com>
+
+
+From: Nadav Har'El <nyh@math.technion.ac.il>
+
+(cherry picked from commit 9587190107d0c0cbaccbf7bf6b0245d29095a9ae)
+
+The code which checks whether to inject a pagefault to L1 or L2 (in
+nested VMX) was wrong, incorrect in how it checked the PF_VECTOR bit.
+Thanks to Dan Carpenter for spotting this.
+
+Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1678,7 +1678,7 @@ static int nested_pf_handled(struct kvm_
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ 
+       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+-      if (!(vmcs12->exception_bitmap & PF_VECTOR))
++      if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+               return 0;
+ 
+       nested_vmx_vmexit(vcpu);
diff --git a/queue-3.3/kvm-s390-do-store-status-after-handling-stop_on_stop-bit.patch b/queue-3.3/kvm-s390-do-store-status-after-handling-stop_on_stop-bit.patch

new file mode 100644 (file)

index 0000000..36797ad
--- /dev/null
+++ b/queue-3.3/kvm-s390-do-store-status-after-handling-stop_on_stop-bit.patch
@@ -0,0 +1,64 @@
+From stable-owner@vger.kernel.org Wed May  9 06:11:16 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:37 +0300
+Subject: KVM: s390: do store status after handling STOP_ON_STOP bit
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-2-git-send-email-avi@redhat.com>
+
+
+From: Jens Freimann <jfrei@linux.vnet.ibm.com>
+
+(cherry picked from commit 9e0d5473e2f0ba2d2fe9dab9408edef3060b710e)
+
+In handle_stop() handle the stop bit before doing the store status as
+described for "Stop and Store Status" in the Principles of Operation.
+We have to give up the local_int.lock before calling kvm store status
+since it calls gmap_fault() which might sleep. Since local_int.lock
+only protects local_int.* and not guest memory we can give up the lock.
+
+Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/intercept.c |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -133,13 +133,6 @@ static int handle_stop(struct kvm_vcpu *
+ 
+       vcpu->stat.exit_stop_request++;
+       spin_lock_bh(&vcpu->arch.local_int.lock);
+-      if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+-              vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+-              rc = kvm_s390_vcpu_store_status(vcpu,
+-                                                KVM_S390_STORE_STATUS_NOADDR);
+-              if (rc >= 0)
+-                      rc = -EOPNOTSUPP;
+-      }
+ 
+       if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
+               vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
+@@ -155,7 +148,18 @@ static int handle_stop(struct kvm_vcpu *
+               rc = -EOPNOTSUPP;
+       }
+ 
+-      spin_unlock_bh(&vcpu->arch.local_int.lock);
++      if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
++              vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
++              /* store status must be called unlocked. Since local_int.lock
++               * only protects local_int.* and not guest memory we can give
++               * up the lock here */
++              spin_unlock_bh(&vcpu->arch.local_int.lock);
++              rc = kvm_s390_vcpu_store_status(vcpu,
++                                              KVM_S390_STORE_STATUS_NOADDR);
++              if (rc >= 0)
++                      rc = -EOPNOTSUPP;
++      } else
++              spin_unlock_bh(&vcpu->arch.local_int.lock);
+       return rc;
+ }
+ 
diff --git a/queue-3.3/kvm-s390-sanitize-fpc-registers-for-kvm_set_fpu.patch b/queue-3.3/kvm-s390-sanitize-fpc-registers-for-kvm_set_fpu.patch

new file mode 100644 (file)

index 0000000..aec7654
--- /dev/null
+++ b/queue-3.3/kvm-s390-sanitize-fpc-registers-for-kvm_set_fpu.patch
@@ -0,0 +1,36 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:27 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:38 +0300
+Subject: KVM: s390: Sanitize fpc registers for KVM_SET_FPU
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-3-git-send-email-avi@redhat.com>
+
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+(cherry picked from commit 851755871c1f3184f4124c466e85881f17fa3226)
+
+commit 7eef87dc99e419b1cc051e4417c37e4744d7b661 (KVM: s390: fix
+register setting) added a load of the floating point control register
+to the KVM_SET_FPU path. Lets make sure that the fpc is valid.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/kvm-s390.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -418,7 +418,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct
+ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+ {
+       memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+-      vcpu->arch.guest_fpregs.fpc = fpu->fpc;
++      vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
+       restore_fp_regs(&vcpu->arch.guest_fpregs);
+       return 0;
+ }
diff --git a/queue-3.3/kvm-vmx-fix-delayed-load-of-shared-msrs.patch b/queue-3.3/kvm-vmx-fix-delayed-load-of-shared-msrs.patch

new file mode 100644 (file)

index 0000000..7867c51
--- /dev/null
+++ b/queue-3.3/kvm-vmx-fix-delayed-load-of-shared-msrs.patch
@@ -0,0 +1,40 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:29 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:43 +0300
+Subject: KVM: VMX: Fix delayed load of shared MSRs
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-8-git-send-email-avi@redhat.com>
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 9ee73970c03edb68146ceb1ba2a7033c99a5e017)
+
+Shared MSRs (MSR_*STAR and related) are stored in both vmx->guest_msrs
+and in the CPU registers, but vmx_set_msr() only updated memory. Prior
+to 46199f33c2953, this didn't matter, since we called vmx_load_host_state(),
+which scheduled a vmx_save_host_state(), which re-synchronized the CPU
+state, but now we don't, so the CPU state will not be synchronized until
+the next exit to host userspace.  This mostly affects nested vmx workloads,
+which play with these MSRs a lot.
+
+Fix by loading the MSR eagerly.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2219,6 +2219,9 @@ static int vmx_set_msr(struct kvm_vcpu *
+               msr = find_msr_entry(vmx, msr_index);
+               if (msr) {
+                       msr->data = data;
++                      if (msr - vmx->guest_msrs < vmx->save_nmsrs)
++                              kvm_set_shared_msr(msr->index, msr->data,
++                                                 msr->mask);
+                       break;
+               }
+               ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/queue-3.3/kvm-vmx-fix-kvm_set_shared_msr-called-in-preemptible-context.patch b/queue-3.3/kvm-vmx-fix-kvm_set_shared_msr-called-in-preemptible-context.patch

new file mode 100644 (file)

index 0000000..32d83a5
--- /dev/null
+++ b/queue-3.3/kvm-vmx-fix-kvm_set_shared_msr-called-in-preemptible-context.patch
@@ -0,0 +1,52 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:31 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:46 +0300
+Subject: KVM: VMX: Fix kvm_set_shared_msr() called in preemptible context
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-11-git-send-email-avi@redhat.com>
+
+From: Avi Kivity <avi@redhat.com>
+
+(cherry picked from commit 2225fd56049643c1a7d645c0ce9d499d43c7974e)
+
+kvm_set_shared_msr() may not be called in preemptible context,
+but vmx_set_msr() does so:
+
+  BUG: using smp_processor_id() in preemptible [00000000] code: qemu-kvm/22713
+  caller is kvm_set_shared_msr+0x32/0xa0 [kvm]
+  Pid: 22713, comm: qemu-kvm Not tainted 3.4.0-rc3+ #39
+  Call Trace:
+   [<ffffffff8131fa82>] debug_smp_processor_id+0xe2/0x100
+   [<ffffffffa0328ae2>] kvm_set_shared_msr+0x32/0xa0 [kvm]
+   [<ffffffffa03a103b>] vmx_set_msr+0x28b/0x2d0 [kvm_intel]
+   ...
+
+Making kvm_set_shared_msr() work in preemptible is cleaner, but
+it's used in the fast path.  Making two variants is overkill, so
+this patch just disables preemption around the call.
+
+Reported-by: Dave Jones <davej@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2219,9 +2219,12 @@ static int vmx_set_msr(struct kvm_vcpu *
+               msr = find_msr_entry(vmx, msr_index);
+               if (msr) {
+                       msr->data = data;
+-                      if (msr - vmx->guest_msrs < vmx->save_nmsrs)
++                      if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
++                              preempt_disable();
+                               kvm_set_shared_msr(msr->index, msr->data,
+                                                  msr->mask);
++                              preempt_enable();
++                      }
+                       break;
+               }
+               ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/queue-3.3/kvm-vmx-vmx_set_cr0-expects-kvm-srcu-locked.patch b/queue-3.3/kvm-vmx-vmx_set_cr0-expects-kvm-srcu-locked.patch

new file mode 100644 (file)

index 0000000..bacd8eb
--- /dev/null
+++ b/queue-3.3/kvm-vmx-vmx_set_cr0-expects-kvm-srcu-locked.patch
@@ -0,0 +1,35 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:30 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:45 +0300
+Subject: KVM: VMX: vmx_set_cr0 expects kvm->srcu locked
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-10-git-send-email-avi@redhat.com>
+
+
+From: Marcelo Tosatti <mtosatti@redhat.com>
+
+(cherry picked from commit 7a4f5ad051e02139a9f1c0f7f4b1acb88915852b)
+
+vmx_set_cr0 is called from vcpu run context, therefore it expects
+kvm->srcu to be held (for setting up the real-mode TSS).
+
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3918,7 +3918,9 @@ static int vmx_vcpu_reset(struct kvm_vcp
+               vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ 
+       vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
++      vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
++      srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       vmx_set_cr4(&vmx->vcpu, 0);
+       vmx_set_efer(&vmx->vcpu, 0);
+       vmx_fpu_activate(&vmx->vcpu);
diff --git a/queue-3.3/kvm-x86-emulator-correctly-mask-pmc-index-bits-in-rdpmc-instruction-emulation.patch b/queue-3.3/kvm-x86-emulator-correctly-mask-pmc-index-bits-in-rdpmc-instruction-emulation.patch

new file mode 100644 (file)

index 0000000..f661cbd
--- /dev/null
+++ b/queue-3.3/kvm-x86-emulator-correctly-mask-pmc-index-bits-in-rdpmc-instruction-emulation.patch
@@ -0,0 +1,32 @@
+From stable-owner@vger.kernel.org Wed May  9 06:14:28 2012
+From: Avi Kivity <avi@redhat.com>
+Date: Wed,  9 May 2012 16:10:41 +0300
+Subject: KVM: x86 emulator: correctly mask pmc index bits in RDPMC instruction emulation
+To: stable@vger.kernel.org
+Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
+Message-ID: <1336569047-23576-6-git-send-email-avi@redhat.com>
+
+
+From: Gleb Natapov <gleb@redhat.com>
+
+(cherry picked from commit 270c6c79f4e15e599f47174ecedad932463af7a2)
+
+
+Signed-off-by: Gleb Natapov <gleb@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/pmu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -413,7 +413,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vc
+       struct kvm_pmc *counters;
+       u64 ctr;
+ 
+-      pmc &= (3u << 30) - 1;
++      pmc &= ~(3u << 30);
+       if (!fixed && pmc >= pmu->nr_arch_gp_counters)
+               return 1;
+       if (fixed && pmc >= pmu->nr_arch_fixed_counters)
diff --git a/queue-3.3/series b/queue-3.3/series

index 5b679d64d7685b91be4e6c9f31128111b99bc8cd..0854345293bd29339f4724ddd24991819c44b7b6 100644 (file)
--- a/queue-3.3/series
+++ b/queue-3.3/series
@@ -31,3 +31,16 @@ tcp-fix-infinite-cwnd-in-tcp_complete_cwr.patch
  tcp-change-tcp_adv_win_scale-and-tcp_rmem.patch
  net-add-memory-barriers-to-prevent-possible-race-in-byte-queue-limits.patch
  net-fix-issue-with-netdev_tx_reset_queue-not-resetting-queue-from-xoff-state.patch
+kvm-s390-do-store-status-after-handling-stop_on_stop-bit.patch
+kvm-s390-sanitize-fpc-registers-for-kvm_set_fpu.patch
+kvm-fix-write-protection-race-during-dirty-logging.patch
+kvm-mmu_notifier-flush-tlbs-before-releasing-mmu_lock.patch
+kvm-x86-emulator-correctly-mask-pmc-index-bits-in-rdpmc-instruction-emulation.patch
+kvm-ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch
+kvm-vmx-fix-delayed-load-of-shared-msrs.patch
+kvm-nvmx-fix-erroneous-exception-bitmap-check.patch
+kvm-vmx-vmx_set_cr0-expects-kvm-srcu-locked.patch
+kvm-vmx-fix-kvm_set_shared_msr-called-in-preemptible-context.patch
+kvm-lock-slots_lock-around-device-assignment.patch
+sony-laptop-enable-keyboard-backlight-by-default.patch
+hugepages-fix-use-after-free-bug-in-quota-handling.patch
diff --git a/queue-3.3/sony-laptop-enable-keyboard-backlight-by-default.patch b/queue-3.3/sony-laptop-enable-keyboard-backlight-by-default.patch

new file mode 100644 (file)

index 0000000..c06a280
--- /dev/null
+++ b/queue-3.3/sony-laptop-enable-keyboard-backlight-by-default.patch
@@ -0,0 +1,47 @@
+From 6fe6ae56a7cebaebc2e6daa11c423e4692f9b592 Mon Sep 17 00:00:00 2001
+From: Josh Boyer <jwboyer@redhat.com>
+Date: Wed, 2 Nov 2011 14:32:00 -0400
+Subject: sony-laptop: Enable keyboard backlight by default
+
+From: Josh Boyer <jwboyer@redhat.com>
+
+commit 6fe6ae56a7cebaebc2e6daa11c423e4692f9b592 upstream.
+
+When the keyboard backlight support was originally added, the commit said
+to default it to on with a 10 second timeout.  That actually wasn't the
+case, as the default value is commented out for the kbd_backlight parameter.
+Because it is a static variable, it gets set to 0 by default without some
+other form of initialization.
+
+However, it seems the function to set the value wasn't actually called
+immediately, so whatever state the keyboard was in initially would remain.
+Then commit df410d522410e67660 was introduced during the 2.6.39 timeframe to
+immediately set whatever value was present (as well as attempt to
+restore/reset the state on module removal or resume).  That seems to have
+now forced the light off immediately when the module is loaded unless
+the option kbd_backlight=1 is specified.
+
+Let's enable it by default again (for the first time).  This should solve
+https://bugzilla.redhat.com/show_bug.cgi?id=728478
+
+Signed-off-by: Josh Boyer <jwboyer@redhat.com>
+Acked-by: Mattia Dongili <malattia@linux.it>
+Signed-off-by: Matthew Garrett <mjg@redhat.com>
+Cc: maximilian attems <max@stro.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/platform/x86/sony-laptop.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/platform/x86/sony-laptop.c
++++ b/drivers/platform/x86/sony-laptop.c
+@@ -127,7 +127,7 @@ MODULE_PARM_DESC(minor,
+                "default is -1 (automatic)");
+ #endif
+ 
+-static int kbd_backlight;     /* = 1 */
++static int kbd_backlight = 1;
+ module_param(kbd_backlight, int, 0444);
+ MODULE_PARM_DESC(kbd_backlight,
+                "set this to 0 to disable keyboard backlight, "
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 10 May 2012 16:28:59 +0000 (09:28 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 10 May 2012 16:28:59 +0000 (09:28 -0700)
queue-3.3/hugepages-fix-use-after-free-bug-in-quota-handling.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-fix-write-protection-race-during-dirty-logging.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-lock-slots_lock-around-device-assignment.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-mmu_notifier-flush-tlbs-before-releasing-mmu_lock.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-nvmx-fix-erroneous-exception-bitmap-check.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-s390-do-store-status-after-handling-stop_on_stop-bit.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-s390-sanitize-fpc-registers-for-kvm_set_fpu.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-vmx-fix-delayed-load-of-shared-msrs.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-vmx-fix-kvm_set_shared_msr-called-in-preemptible-context.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-vmx-vmx_set_cr0-expects-kvm-srcu-locked.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/kvm-x86-emulator-correctly-mask-pmc-index-bits-in-rdpmc-instruction-emulation.patch	[new file with mode: 0644]	patch \| blob
queue-3.3/series		patch \| blob \| blame \| history
queue-3.3/sony-laptop-enable-keyboard-backlight-by-default.patch	[new file with mode: 0644]	patch \| blob