From: Greg Kroah-Hartman Date: Wed, 29 Apr 2009 05:13:03 +0000 (-0700) Subject: more .28 patches X-Git-Tag: v2.6.27.22~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=20bc07223b2c9feb7ca32573ee125cbcce1ed479;p=thirdparty%2Fkernel%2Fstable-queue.git more .28 patches --- diff --git a/queue-2.6.28/anon_inodes-use-fops-owner-for-module-refcount.patch b/queue-2.6.28/anon_inodes-use-fops-owner-for-module-refcount.patch new file mode 100644 index 00000000000..cdeba45cd7c --- /dev/null +++ b/queue-2.6.28/anon_inodes-use-fops-owner-for-module-refcount.patch @@ -0,0 +1,63 @@ +From mtosatti@redhat.com Tue Apr 28 21:35:18 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:17 -0300 +Subject: anon_inodes: use fops->owner for module refcount +To: stable@kernel.org +Cc: Christian Borntraeger , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-1-git-send-email-mtosatti@redhat.com> + + +From: Christian Borntraeger + +There is an imbalance for anonymous inodes. If the fops->owner field is set, +the module reference count of owner is decreases on release. +("filp_close" --> "__fput" ---> "fops_put") + +On the other hand, anon_inode_getfd does not increase the module reference +count of owner. This causes two problems: + +- if owner is set, the module refcount goes negative +- if owner is not set, the module can be unloaded while code is running + +This patch changes anon_inode_getfd to be symmetric regarding fops->owner +handling. + +I have checked all existing users of anon_inode_getfd. Noone sets fops->owner, +thats why nobody has seen the module refcount negative. The refcounting was +tested with a patched and unpatched KVM module.(see patch 2/2) I also did an +epoll_open/close test. + +Signed-off-by: Christian Borntraeger +Reviewed-by: Davide Libenzi +Signed-off-by: Avi Kivity +(cherry picked from commit e3a2a0d4e5ace731e60e2eff4fb7056ecb34adc1) +Signed-off-by: Greg Kroah-Hartman +--- + fs/anon_inodes.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/anon_inodes.c ++++ b/fs/anon_inodes.c +@@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, c + if (IS_ERR(anon_inode_inode)) + return -ENODEV; + ++ if (fops->owner && !try_module_get(fops->owner)) ++ return -ENOENT; ++ + error = get_unused_fd_flags(flags); + if (error < 0) +- return error; ++ goto err_module; + fd = error; + + /* +@@ -128,6 +131,8 @@ err_dput: + dput(dentry); + err_put_unused_fd: + put_unused_fd(fd); ++err_module: ++ module_put(fops->owner); + return error; + } + EXPORT_SYMBOL_GPL(anon_inode_getfd); diff --git a/queue-2.6.28/ath9k-ar9280-pci-devices-must-serialize-io-as-well.patch b/queue-2.6.28/ath9k-ar9280-pci-devices-must-serialize-io-as-well.patch new file mode 100644 index 00000000000..30547c29e14 --- /dev/null +++ b/queue-2.6.28/ath9k-ar9280-pci-devices-must-serialize-io-as-well.patch @@ -0,0 +1,31 @@ +From lrodriguez@atheros.com Tue Apr 28 21:42:57 2009 +From: "Luis R. Rodriguez" +Date: Mon, 23 Mar 2009 19:03:27 -0400 +Subject: ath9k: AR9280 PCI devices must serialize IO as well +To: stable@kernel.org +Cc: "Luis R. Rodriguez" , ath9k-devel@venema.h4ckr.net, linux-wireless@vger.kernel.org +Message-ID: <1237849407-17273-2-git-send-email-lrodriguez@atheros.com> + +From: Luis R. Rodriguez + +This is a port of: +commit SHA1 5ec905a8df3fa877566ba98298433fbfb3d688cc +for 2.6.28 + +Signed-off-by: Luis R. Rodriguez +--- + drivers/net/wireless/ath9k/hw.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/wireless/ath9k/hw.c ++++ b/drivers/net/wireless/ath9k/hw.c +@@ -3311,7 +3311,8 @@ static struct ath_hal *ath9k_hw_do_attac + } + + if (ah->ah_config.serialize_regmode == SER_REG_MODE_AUTO) { +- if (ah->ah_macVersion == AR_SREV_VERSION_5416_PCI) { ++ if (ah->ah_macVersion == AR_SREV_VERSION_5416_PCI || ++ (AR_SREV_9280(ah) && !ah->ah_isPciExpress)) { + ah->ah_config.serialize_regmode = + SER_REG_MODE_ON; + } else { diff --git a/queue-2.6.28/ath9k-implement-io-serialization.patch b/queue-2.6.28/ath9k-implement-io-serialization.patch new file mode 100644 index 00000000000..401b5eee2f5 --- /dev/null +++ b/queue-2.6.28/ath9k-implement-io-serialization.patch @@ -0,0 +1,130 @@ +From lrodriguez@atheros.com Tue Apr 28 21:42:36 2009 +From: "Luis R. Rodriguez" +Date: Mon, 23 Mar 2009 19:03:26 -0400 +Subject: ath9k: implement IO serialization +To: stable@kernel.org +Cc: "Luis R. Rodriguez" , ath9k-devel@venema.h4ckr.net, linux-wireless@vger.kernel.org +Message-ID: <1237849407-17273-1-git-send-email-lrodriguez@atheros.com> + +From: Luis R. Rodriguez + +This is a port of: +commit SHA1 6158425be398936af1fd04451f78ffad01529cb0 +for 2.6.28. + +All 802.11n PCI devices (Cardbus, PCI, mini-PCI) require +serialization of IO when on non-uniprocessor systems. PCI +express devices not not require this. + +This should fix our only last standing open ath9k kernel.org +bugzilla bug report: + +http://bugzilla.kernel.org/show_bug.cgi?id=12110 + +Signed-off-by: Luis R. Rodriguez +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/ath9k/ath9k.h | 4 ++-- + drivers/net/wireless/ath9k/core.c | 1 + + drivers/net/wireless/ath9k/core.h | 33 +++++++++++++++++++++++++++++++++ + drivers/net/wireless/ath9k/hw.c | 19 +++++++++++++++++++ + 4 files changed, 55 insertions(+), 2 deletions(-) + +--- a/drivers/net/wireless/ath9k/ath9k.h ++++ b/drivers/net/wireless/ath9k/ath9k.h +@@ -590,8 +590,8 @@ struct ath9k_country_entry { + u8 iso[3]; + }; + +-#define REG_WRITE(_ah, _reg, _val) iowrite32(_val, _ah->ah_sh + _reg) +-#define REG_READ(_ah, _reg) ioread32(_ah->ah_sh + _reg) ++#define REG_WRITE(_ah, _reg, _val) ath9k_iowrite32((_ah), (_reg), (_val)) ++#define REG_READ(_ah, _reg) ath9k_ioread32((_ah), (_reg)) + + #define SM(_v, _f) (((_v) << _f##_S) & _f) + #define MS(_v, _f) (((_v) & _f) >> _f##_S) +--- a/drivers/net/wireless/ath9k/core.c ++++ b/drivers/net/wireless/ath9k/core.c +@@ -1089,6 +1089,7 @@ int ath_init(u16 devid, struct ath_softc + sc->sc_cachelsz = csz << 2; /* convert to bytes */ + + spin_lock_init(&sc->sc_resetlock); ++ spin_lock_init(&sc->sc_serial_rw); + + ah = ath9k_hw_attach(devid, sc, sc->mem, &status); + if (ah == NULL) { +--- a/drivers/net/wireless/ath9k/core.h ++++ b/drivers/net/wireless/ath9k/core.h +@@ -1040,6 +1040,7 @@ struct ath_softc { + spinlock_t sc_rxbuflock; + spinlock_t sc_txbuflock; + spinlock_t sc_resetlock; ++ spinlock_t sc_serial_rw; + spinlock_t node_lock; + + /* LEDs */ +@@ -1081,4 +1082,36 @@ void ath_get_currentCountry(struct ath_s + struct ath9k_country_entry *ctry); + u64 ath_extend_tsf(struct ath_softc *sc, u32 rstamp); + ++/* ++ * Read and write, they both share the same lock. We do this to serialize ++ * reads and writes on Atheros 802.11n PCI devices only. This is required ++ * as the FIFO on these devices can only accept sanely 2 requests. After ++ * that the device goes bananas. Serializing the reads/writes prevents this ++ * from happening. ++ */ ++ ++static inline void ath9k_iowrite32(struct ath_hal *ah, u32 reg_offset, u32 val) ++{ ++ if (ah->ah_config.serialize_regmode == SER_REG_MODE_ON) { ++ unsigned long flags; ++ spin_lock_irqsave(&ah->ah_sc->sc_serial_rw, flags); ++ iowrite32(val, ah->ah_sc->mem + reg_offset); ++ spin_unlock_irqrestore(&ah->ah_sc->sc_serial_rw, flags); ++ } else ++ iowrite32(val, ah->ah_sc->mem + reg_offset); ++} ++ ++static inline unsigned int ath9k_ioread32(struct ath_hal *ah, u32 reg_offset) ++{ ++ u32 val; ++ if (ah->ah_config.serialize_regmode == SER_REG_MODE_ON) { ++ unsigned long flags; ++ spin_lock_irqsave(&ah->ah_sc->sc_serial_rw, flags); ++ val = ioread32(ah->ah_sc->mem + reg_offset); ++ spin_unlock_irqrestore(&ah->ah_sc->sc_serial_rw, flags); ++ } else ++ val = ioread32(ah->ah_sc->mem + reg_offset); ++ return val; ++} ++ + #endif /* CORE_H */ +--- a/drivers/net/wireless/ath9k/hw.c ++++ b/drivers/net/wireless/ath9k/hw.c +@@ -346,6 +346,25 @@ static void ath9k_hw_set_defaults(struct + } + + ah->ah_config.intr_mitigation = 0; ++ ++ /* ++ * We need this for PCI devices only (Cardbus, PCI, miniPCI) ++ * _and_ if on non-uniprocessor systems (Multiprocessor/HT). ++ * This means we use it for all AR5416 devices, and the few ++ * minor PCI AR9280 devices out there. ++ * ++ * Serialization is required because these devices do not handle ++ * well the case of two concurrent reads/writes due to the latency ++ * involved. During one read/write another read/write can be issued ++ * on another CPU while the previous read/write may still be working ++ * on our hardware, if we hit this case the hardware poops in a loop. ++ * We prevent this by serializing reads and writes. ++ * ++ * This issue is not present on PCI-Express devices or pre-AR5416 ++ * devices (legacy, 802.11abg). ++ */ ++ if (num_possible_cpus() > 1) ++ ah->ah_config.serialize_regmode = SER_REG_MODE_AUTO; + } + + static void ath9k_hw_override_ini(struct ath_hal *ah, diff --git a/queue-2.6.28/kvm-advertise-the-bug-in-memory-region-destruction-as-fixed.patch b/queue-2.6.28/kvm-advertise-the-bug-in-memory-region-destruction-as-fixed.patch new file mode 100644 index 00000000000..c729bc94711 --- /dev/null +++ b/queue-2.6.28/kvm-advertise-the-bug-in-memory-region-destruction-as-fixed.patch @@ -0,0 +1,62 @@ +From mtosatti@redhat.com Tue Apr 28 21:40:13 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:31 -0300 +Subject: KVM: Advertise the bug in memory region destruction as fixed +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-15-git-send-email-mtosatti@redhat.com> + + +From: Avi Kivity + +(cherry picked from 1a811b6167089bcdb84284f2dc9fd0b4d0f1899d) + +Userspace might need to act differently. + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/kvm.h | 2 ++ + virt/kvm/kvm_main.c | 13 ++++++++++++- + 2 files changed, 14 insertions(+), 1 deletion(-) + +--- a/include/linux/kvm.h ++++ b/include/linux/kvm.h +@@ -387,6 +387,8 @@ struct kvm_trace_rec { + #define KVM_CAP_DEVICE_ASSIGNMENT 17 + #endif + #define KVM_CAP_IOMMU 18 ++/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ ++#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 + + /* + * ioctls for VM fds +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1715,6 +1715,17 @@ static int kvm_dev_ioctl_create_vm(void) + return fd; + } + ++static long kvm_dev_ioctl_check_extension_generic(long arg) ++{ ++ switch (arg) { ++ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: ++ return 1; ++ default: ++ break; ++ } ++ return kvm_dev_ioctl_check_extension(arg); ++} ++ + static long kvm_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) + { +@@ -1734,7 +1745,7 @@ static long kvm_dev_ioctl(struct file *f + r = kvm_dev_ioctl_create_vm(); + break; + case KVM_CHECK_EXTENSION: +- r = kvm_dev_ioctl_check_extension(arg); ++ r = kvm_dev_ioctl_check_extension_generic(arg); + break; + case KVM_GET_VCPU_MMAP_SIZE: + r = -EINVAL; diff --git a/queue-2.6.28/kvm-call-kvm_arch_vcpu_reset-instead-of-the-kvm_x86_ops-callback.patch b/queue-2.6.28/kvm-call-kvm_arch_vcpu_reset-instead-of-the-kvm_x86_ops-callback.patch new file mode 100644 index 00000000000..9537b646dfc --- /dev/null +++ b/queue-2.6.28/kvm-call-kvm_arch_vcpu_reset-instead-of-the-kvm_x86_ops-callback.patch @@ -0,0 +1,34 @@ +From mtosatti@redhat.com Tue Apr 28 21:35:51 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:19 -0300 +Subject: KVM: call kvm_arch_vcpu_reset() instead of the kvm_x86_ops callback +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Gleb Natapov +Message-ID: <1237841498-14100-3-git-send-email-mtosatti@redhat.com> + + +From: Gleb Natapov + +(cherry picked from 5f179287fa02723215eecf681d812b303c243973) + +Call kvm_arch_vcpu_reset() instead of directly using arch callback. +The function does additional things. + +Signed-off-by: Gleb Natapov +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2973,7 +2973,7 @@ static int __vcpu_run(struct kvm_vcpu *v + pr_debug("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, vcpu->arch.sipi_vector); + kvm_lapic_reset(vcpu); +- r = kvm_x86_ops->vcpu_reset(vcpu); ++ r = kvm_arch_vcpu_reset(vcpu); + if (r) + return r; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/queue-2.6.28/kvm-fix-cpuid-iteration-on-multiple-leaves-per-eac.patch b/queue-2.6.28/kvm-fix-cpuid-iteration-on-multiple-leaves-per-eac.patch new file mode 100644 index 00000000000..78e5965eba9 --- /dev/null +++ b/queue-2.6.28/kvm-fix-cpuid-iteration-on-multiple-leaves-per-eac.patch @@ -0,0 +1,52 @@ +From mtosatti@redhat.com Tue Apr 28 21:37:05 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:26 -0300 +Subject: KVM: Fix cpuid iteration on multiple leaves per eac +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Nitin A Kamble +Message-ID: <1237841498-14100-10-git-send-email-mtosatti@redhat.com> + + +From: Nitin A Kamble + +(cherry picked from 0fdf8e59faa5c60e9d77c8e14abe3a0f8bfcf586) + +The code to traverse the cpuid data array list for counting type of leaves is +currently broken. + +This patches fixes the 2 things in it. + + 1. Set the 1st counting entry's flag KVM_CPUID_FLAG_STATE_READ_NEXT. Without + it the code will never find a valid entry. + + 2. Also the stop condition in the for loop while looking for the next unflaged + entry is broken. It needs to stop when it find one matching entry; + and in the case of count of 1, it will be the same entry found in this + iteration. + +Signed-Off-By: Nitin A Kamble +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1188,6 +1188,7 @@ static void do_cpuid_ent(struct kvm_cpui + int t, times = entry->eax & 0xff; + + entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; ++ entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; + for (t = 1; t < times && *nent < maxnent; ++t) { + do_cpuid_1_ent(&entry[t], function, 0); + entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; +@@ -2729,7 +2730,7 @@ static int move_to_next_stateful_cpuid_e + + e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; + /* when no next entry is found, the current entry[i] is reselected */ +- for (j = i + 1; j == i; j = (j + 1) % nent) { ++ for (j = i + 1; ; j = (j + 1) % nent) { + struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; + if (ej->function == e->function) { + ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; diff --git a/queue-2.6.28/kvm-fix-cpuid-leaf-0xb-loop-termination.patch b/queue-2.6.28/kvm-fix-cpuid-leaf-0xb-loop-termination.patch new file mode 100644 index 00000000000..d8ca1b9b784 --- /dev/null +++ b/queue-2.6.28/kvm-fix-cpuid-leaf-0xb-loop-termination.patch @@ -0,0 +1,35 @@ +From mtosatti@redhat.com Tue Apr 28 21:36:55 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:25 -0300 +Subject: KVM: Fix cpuid leaf 0xb loop termination +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Nitin A Kamble +Message-ID: <1237841498-14100-9-git-send-email-mtosatti@redhat.com> + + +From: Nitin A Kamble + +(cherry picked from 0853d2c1d849ef69884d2447d90d04007590b72b) + +For cpuid leaf 0xb the bits 8-15 in ECX register define the end of counting +leaf. The previous code was using bits 0-7 for this purpose, which is +a bug. + +Signed-off-by: Nitin A Kamble +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1218,7 +1218,7 @@ static void do_cpuid_ent(struct kvm_cpui + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + /* read more entries until level_type is zero */ + for (i = 1; *nent < maxnent; ++i) { +- level_type = entry[i - 1].ecx & 0xff; ++ level_type = entry[i - 1].ecx & 0xff00; + if (!level_type) + break; + do_cpuid_1_ent(&entry[i], function, i); diff --git a/queue-2.6.28/kvm-mmu-check-for-present-pdptr-shadow-page-in-walk_shadow.patch b/queue-2.6.28/kvm-mmu-check-for-present-pdptr-shadow-page-in-walk_shadow.patch new file mode 100644 index 00000000000..bc588e9adac --- /dev/null +++ b/queue-2.6.28/kvm-mmu-check-for-present-pdptr-shadow-page-in-walk_shadow.patch @@ -0,0 +1,34 @@ +From mtosatti@redhat.com Tue Apr 28 21:40:23 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:32 -0300 +Subject: KVM: MMU: check for present pdptr shadow page in walk_shadow +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-16-git-send-email-mtosatti@redhat.com> + + +(cherry picked from eb64f1e8cd5c3cae912db30a77d062367f7a11a6) + +walk_shadow assumes the caller verified validity of the pdptr pointer in +question, which is not the case for the invlpg handler. + +Fixes oops during Solaris 10 install. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1159,6 +1159,8 @@ static int walk_shadow(struct kvm_shadow + if (level == PT32E_ROOT_LEVEL) { + shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; + shadow_addr &= PT64_BASE_ADDR_MASK; ++ if (!shadow_addr) ++ return 1; + --level; + } + diff --git a/queue-2.6.28/kvm-mmu-extend-kvm_mmu_page-slot_bitmap-size.patch b/queue-2.6.28/kvm-mmu-extend-kvm_mmu_page-slot_bitmap-size.patch new file mode 100644 index 00000000000..21ab7b72dc9 --- /dev/null +++ b/queue-2.6.28/kvm-mmu-extend-kvm_mmu_page-slot_bitmap-size.patch @@ -0,0 +1,70 @@ +From mtosatti@redhat.com Tue Apr 28 21:36:00 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:20 -0300 +Subject: KVM: MMU: Extend kvm_mmu_page->slot_bitmap size +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Sheng Yang +Message-ID: <1237841498-14100-4-git-send-email-mtosatti@redhat.com> + + +From: Sheng Yang + +(cherry picked from 291f26bc0f89518ad7ee3207c09eb8a743ac8fcc) + +Otherwise set_bit() for private memory slot(above KVM_MEMORY_SLOTS) would +corrupted memory in 32bit host. + +Signed-off-by: Sheng Yang +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 8 +++++--- + arch/x86/kvm/mmu.c | 6 +++--- + 2 files changed, 8 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -190,9 +190,11 @@ struct kvm_mmu_page { + u64 *spt; + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; +- unsigned long slot_bitmap; /* One bit set per slot which has memory +- * in this shadow page. +- */ ++ /* ++ * One bit set per slot which has memory ++ * in this shadow page. ++ */ ++ DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + int multimapped; /* More than one parent_pte? */ + int root_count; /* Currently serving as active root */ + bool unsync; +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -787,7 +787,7 @@ static struct kvm_mmu_page *kvm_mmu_allo + set_page_private(virt_to_page(sp->spt), (unsigned long)sp); + list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); + ASSERT(is_empty_shadow_page(sp->spt)); +- sp->slot_bitmap = 0; ++ bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + sp->multimapped = 0; + sp->parent_pte = parent_pte; + --vcpu->kvm->arch.n_free_mmu_pages; +@@ -1362,7 +1362,7 @@ static void page_header_update_slot(stru + int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); + struct kvm_mmu_page *sp = page_header(__pa(pte)); + +- __set_bit(slot, &sp->slot_bitmap); ++ __set_bit(slot, sp->slot_bitmap); + } + + static void mmu_convert_notrap(struct kvm_mmu_page *sp) +@@ -2451,7 +2451,7 @@ void kvm_mmu_slot_remove_write_access(st + int i; + u64 *pt; + +- if (!test_bit(slot, &sp->slot_bitmap)) ++ if (!test_bit(slot, sp->slot_bitmap)) + continue; + + pt = sp->spt; diff --git a/queue-2.6.28/kvm-mmu-fix-aliased-gfns-treated-as-unaliased.patch b/queue-2.6.28/kvm-mmu-fix-aliased-gfns-treated-as-unaliased.patch new file mode 100644 index 00000000000..294651fc871 --- /dev/null +++ b/queue-2.6.28/kvm-mmu-fix-aliased-gfns-treated-as-unaliased.patch @@ -0,0 +1,128 @@ +From mtosatti@redhat.com Tue Apr 28 21:36:45 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:24 -0300 +Subject: KVM: MMU: Fix aliased gfns treated as unaliased +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Izik Eidus +Message-ID: <1237841498-14100-8-git-send-email-mtosatti@redhat.com> + + +From: Izik Eidus + +(cherry picked from 2843099fee32a6020e1caa95c6026f28b5d43bff) + +Some areas of kvm x86 mmu are using gfn offset inside a slot without +unaliasing the gfn first. This patch makes sure that the gfn will be +unaliased and add gfn_to_memslot_unaliased() to save the calculating +of the gfn unaliasing in case we have it unaliased already. + +Signed-off-by: Izik Eidus +Acked-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 2 ++ + arch/x86/kvm/mmu.c | 14 ++++++++++---- + virt/kvm/kvm_main.c | 9 +++++---- + 3 files changed, 17 insertions(+), 8 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -609,6 +609,8 @@ void kvm_disable_tdp(void); + int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); + int complete_pio(struct kvm_vcpu *vcpu); + ++struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); ++ + static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) + { + struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -384,7 +384,9 @@ static void account_shadowed(struct kvm + { + int *write_count; + +- write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); ++ gfn = unalias_gfn(kvm, gfn); ++ write_count = slot_largepage_idx(gfn, ++ gfn_to_memslot_unaliased(kvm, gfn)); + *write_count += 1; + } + +@@ -392,16 +394,20 @@ static void unaccount_shadowed(struct kv + { + int *write_count; + +- write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); ++ gfn = unalias_gfn(kvm, gfn); ++ write_count = slot_largepage_idx(gfn, ++ gfn_to_memslot_unaliased(kvm, gfn)); + *write_count -= 1; + WARN_ON(*write_count < 0); + } + + static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) + { +- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); ++ struct kvm_memory_slot *slot; + int *largepage_idx; + ++ gfn = unalias_gfn(kvm, gfn); ++ slot = gfn_to_memslot_unaliased(kvm, gfn); + if (slot) { + largepage_idx = slot_largepage_idx(gfn, slot); + return *largepage_idx; +@@ -2860,8 +2866,8 @@ static void audit_write_protection(struc + if (sp->role.metaphysical) + continue; + +- slot = gfn_to_memslot(vcpu->kvm, sp->gfn); + gfn = unalias_gfn(vcpu->kvm, sp->gfn); ++ slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); + rmapp = &slot->rmap[gfn - slot->base_gfn]; + if (*rmapp) + printk(KERN_ERR "%s: (%s) shadow page has writable" +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -918,7 +918,7 @@ int kvm_is_error_hva(unsigned long addr) + } + EXPORT_SYMBOL_GPL(kvm_is_error_hva); + +-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) ++struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) + { + int i; + +@@ -931,11 +931,12 @@ static struct kvm_memory_slot *__gfn_to_ + } + return NULL; + } ++EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); + + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) + { + gfn = unalias_gfn(kvm, gfn); +- return __gfn_to_memslot(kvm, gfn); ++ return gfn_to_memslot_unaliased(kvm, gfn); + } + + int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) +@@ -959,7 +960,7 @@ unsigned long gfn_to_hva(struct kvm *kvm + struct kvm_memory_slot *slot; + + gfn = unalias_gfn(kvm, gfn); +- slot = __gfn_to_memslot(kvm, gfn); ++ slot = gfn_to_memslot_unaliased(kvm, gfn); + if (!slot) + return bad_hva(); + return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); +@@ -1210,7 +1211,7 @@ void mark_page_dirty(struct kvm *kvm, gf + struct kvm_memory_slot *memslot; + + gfn = unalias_gfn(kvm, gfn); +- memslot = __gfn_to_memslot(kvm, gfn); ++ memslot = gfn_to_memslot_unaliased(kvm, gfn); + if (memslot && memslot->dirty_bitmap) { + unsigned long rel_gfn = gfn - memslot->base_gfn; + diff --git a/queue-2.6.28/kvm-mmu-handle-large-host-sptes-on-invlpg-resync.patch b/queue-2.6.28/kvm-mmu-handle-large-host-sptes-on-invlpg-resync.patch new file mode 100644 index 00000000000..9ebc9eb9587 --- /dev/null +++ b/queue-2.6.28/kvm-mmu-handle-large-host-sptes-on-invlpg-resync.patch @@ -0,0 +1,55 @@ +From mtosatti@redhat.com Tue Apr 28 21:40:33 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:33 -0300 +Subject: KVM: MMU: handle large host sptes on invlpg/resync +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-17-git-send-email-mtosatti@redhat.com> + + +(cherry picked from 87917239204d67a316cb89751750f86c9ed3640b) + +The invlpg and sync walkers lack knowledge of large host sptes, +descending to non-existant pagetable level. + +Stop at directory level in such case. + +Fixes SMP Windows XP with hugepages. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu.c | 2 +- + arch/x86/kvm/paging_tmpl.h | 8 ++++++-- + 2 files changed, 7 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -981,7 +981,7 @@ static int mmu_unsync_walk(struct kvm_mm + for_each_unsync_children(sp->unsync_child_bitmap, i) { + u64 ent = sp->spt[i]; + +- if (is_shadow_present_pte(ent)) { ++ if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { + struct kvm_mmu_page *child; + child = page_header(ent & PT64_BASE_ADDR_MASK); + +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -467,9 +467,13 @@ static int FNAME(shadow_invlpg_entry)(st + u64 *sptep, int level) + { + +- if (level == PT_PAGE_TABLE_LEVEL) { +- if (is_shadow_present_pte(*sptep)) ++ if (level == PT_PAGE_TABLE_LEVEL || ++ ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { ++ if (is_shadow_present_pte(*sptep)) { + rmap_remove(vcpu->kvm, sptep); ++ if (is_large_pte(*sptep)) ++ --vcpu->kvm->stat.lpages; ++ } + set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + return 1; + } diff --git a/queue-2.6.28/kvm-mmu_notifiers-release-method.patch b/queue-2.6.28/kvm-mmu_notifiers-release-method.patch new file mode 100644 index 00000000000..5ab987f8ac5 --- /dev/null +++ b/queue-2.6.28/kvm-mmu_notifiers-release-method.patch @@ -0,0 +1,54 @@ +From mtosatti@redhat.com Tue Apr 28 21:41:33 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:34 -0300 +Subject: KVM: mmu_notifiers release method +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-18-git-send-email-mtosatti@redhat.com> + + +(cherry picked from 85db06e514422ae429b5f85742d8111b70bd56f3) + +The destructor for huge pages uses the backing inode for adjusting +hugetlbfs accounting. + +Hugepage mappings are destroyed by exit_mmap, after +mmu_notifier_release, so there are no notifications through +unmap_hugepage_range at this point. + +The hugetlbfs inode can be freed with pages backed by it referenced +by the shadow. When the shadow releases its reference, the huge page +destructor will access a now freed inode. + +Implement the release operation for kvm mmu notifiers to release page +refs before the hugetlbfs inode is gone. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -553,11 +553,19 @@ static int kvm_mmu_notifier_clear_flush_ + return young; + } + ++static void kvm_mmu_notifier_release(struct mmu_notifier *mn, ++ struct mm_struct *mm) ++{ ++ struct kvm *kvm = mmu_notifier_to_kvm(mn); ++ kvm_arch_flush_shadow(kvm); ++} ++ + static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_page = kvm_mmu_notifier_invalidate_page, + .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, + .clear_flush_young = kvm_mmu_notifier_clear_flush_young, ++ .release = kvm_mmu_notifier_release, + }; + #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ + diff --git a/queue-2.6.28/kvm-pit-fix-i8254-pending-count-read.patch b/queue-2.6.28/kvm-pit-fix-i8254-pending-count-read.patch new file mode 100644 index 00000000000..2c76436907e --- /dev/null +++ b/queue-2.6.28/kvm-pit-fix-i8254-pending-count-read.patch @@ -0,0 +1,32 @@ +From mtosatti@redhat.com Tue Apr 28 21:41:43 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:35 -0300 +Subject: KVM: PIT: fix i8254 pending count read +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-19-git-send-email-mtosatti@redhat.com> + + +(cherry picked from d2a8284e8fca9e2a938bee6cd074064d23864886) + +count_load_time assignment is bogus: its supposed to contain what it +means, not the expiration time. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/i8254.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/i8254.c ++++ b/arch/x86/kvm/i8254.c +@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpi + hrtimer_add_expires_ns(&pt->timer, pt->period); + pt->scheduled = hrtimer_get_expires_ns(&pt->timer); + if (pt->period) +- ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); ++ ps->channels[0].count_load_time = ktime_get(); + + return (pt->period == 0 ? 0 : 1); + } diff --git a/queue-2.6.28/kvm-prevent-trace-call-into-unloaded-module-text.patch b/queue-2.6.28/kvm-prevent-trace-call-into-unloaded-module-text.patch new file mode 100644 index 00000000000..7a50e445b46 --- /dev/null +++ b/queue-2.6.28/kvm-prevent-trace-call-into-unloaded-module-text.patch @@ -0,0 +1,33 @@ +From mtosatti@redhat.com Tue Apr 28 21:37:16 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:27 -0300 +Subject: KVM: Prevent trace call into unloaded module text +To: stable@kernel.org +Cc: Wu Fengguang , mtosatti@redhat.com, Wu Fengguang , avi@redhat.com +Message-ID: <1237841498-14100-11-git-send-email-mtosatti@redhat.com> + + +From: Wu Fengguang + +(cherry picked from b82091824ee4970adf92d5cd6d57b12273171625) + +Add marker_synchronize_unregister() before module unloading. +This prevents possible trace calls into unloaded module text. + +Signed-off-by: Wu Fengguang +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_trace.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/virt/kvm/kvm_trace.c ++++ b/virt/kvm/kvm_trace.c +@@ -252,6 +252,7 @@ void kvm_trace_cleanup(void) + struct kvm_trace_probe *p = &kvm_trace_probes[i]; + marker_probe_unregister(p->name, p->probe_func, p); + } ++ marker_synchronize_unregister(); + + relay_close(kt->rchan); + debugfs_remove(kt->lost_file); diff --git a/queue-2.6.28/kvm-really-remove-a-slot-when-a-user-ask-us-so.patch b/queue-2.6.28/kvm-really-remove-a-slot-when-a-user-ask-us-so.patch new file mode 100644 index 00000000000..820eb9470ff --- /dev/null +++ b/queue-2.6.28/kvm-really-remove-a-slot-when-a-user-ask-us-so.patch @@ -0,0 +1,46 @@ +From mtosatti@redhat.com Tue Apr 28 21:37:27 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:28 -0300 +Subject: KVM: Really remove a slot when a user ask us so +To: stable@kernel.org +Cc: Glauber Costa , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-12-git-send-email-mtosatti@redhat.com> + + +From: Glauber Costa + +(cherry picked from 6f89724829cfd4ad6771a92fd4b8d59c90c7220c) + +Right now, KVM does not remove a slot when we do a +register ioctl for size 0 (would be the expected behaviour). + +Instead, we only mark it as empty, but keep all bitmaps +and allocated data structures present. It completely +nullifies our chances of reusing that same slot again +for mapping a different piece of memory. + +In this patch, we destroy rmaps, and vfree() the +pointers that used to hold the dirty bitmap, rmap +and lpage_info structures. + +Signed-off-by: Glauber Costa +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -821,7 +821,10 @@ int __kvm_set_memory_region(struct kvm * + goto out_free; + } + +- kvm_free_physmem_slot(&old, &new); ++ kvm_free_physmem_slot(&old, npages ? &new : NULL); ++ /* Slot deletion case: we have to update the current slot */ ++ if (!npages) ++ *memslot = old; + #ifdef CONFIG_DMAR + /* map the pages in iommu page table */ + r = kvm_iommu_map_pages(kvm, base_gfn, npages); diff --git a/queue-2.6.28/kvm-set-owner-of-cpu-and-vm-file-operations.patch b/queue-2.6.28/kvm-set-owner-of-cpu-and-vm-file-operations.patch new file mode 100644 index 00000000000..0b7a73a02b3 --- /dev/null +++ b/queue-2.6.28/kvm-set-owner-of-cpu-and-vm-file-operations.patch @@ -0,0 +1,86 @@ +From mtosatti@redhat.com Tue Apr 28 21:40:01 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:30 -0300 +Subject: KVM: set owner of cpu and vm file operations +To: stable@kernel.org +Cc: Christian Borntraeger , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-14-git-send-email-mtosatti@redhat.com> + + +From: Christian Borntraeger + +(cherry picked from 3d3aab1b973b01bd2a1aa46307e94a1380b1d802) + +There is a race between a "close of the file descriptors" and module +unload in the kvm module. + +You can easily trigger this problem by applying this debug patch: +>--- kvm.orig/virt/kvm/kvm_main.c +>+++ kvm/virt/kvm/kvm_main.c +>@@ -648,10 +648,14 @@ void kvm_free_physmem(struct kvm *kvm) +> kvm_free_physmem_slot(&kvm->memslots[i], NULL); +> } +> +>+#include +> static void kvm_destroy_vm(struct kvm *kvm) +> { +> struct mm_struct *mm = kvm->mm; +> +>+ printk("off1\n"); +>+ msleep(5000); +>+ printk("off2\n"); +> spin_lock(&kvm_lock); +> list_del(&kvm->vm_list); +> spin_unlock(&kvm_lock); + +and killing the userspace, followed by an rmmod. + +The problem is that kvm_destroy_vm can run while the module count +is 0. That means, you can remove the module while kvm_destroy_vm +is running. But kvm_destroy_vm is part of the module text. This +causes a kerneloops. The race exists without the msleep but is much +harder to trigger. + +This patch requires the fix for anon_inodes (anon_inodes: use fops->owner +for module refcount). +With this patch, we can set the owner of all anonymous KVM inodes file +operations. The VFS will then control the KVM module refcount as long as there +is an open file. kvm_destroy_vm will be called by the release function of the +last closed file - before the VFS drops the module refcount. + +Signed-off-by: Christian Borntraeger +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1299,7 +1299,7 @@ static int kvm_vcpu_release(struct inode + return 0; + } + +-static const struct file_operations kvm_vcpu_fops = { ++static struct file_operations kvm_vcpu_fops = { + .release = kvm_vcpu_release, + .unlocked_ioctl = kvm_vcpu_ioctl, + .compat_ioctl = kvm_vcpu_ioctl, +@@ -1693,7 +1693,7 @@ static int kvm_vm_mmap(struct file *file + return 0; + } + +-static const struct file_operations kvm_vm_fops = { ++static struct file_operations kvm_vm_fops = { + .release = kvm_vm_release, + .unlocked_ioctl = kvm_vm_ioctl, + .compat_ioctl = kvm_vm_ioctl, +@@ -2057,6 +2057,8 @@ int kvm_init(void *opaque, unsigned int + } + + kvm_chardev_ops.owner = module; ++ kvm_vm_fops.owner = module; ++ kvm_vcpu_fops.owner = module; + + r = misc_register(&kvm_dev); + if (r) { diff --git a/queue-2.6.28/kvm-svm-set-the-busy-flag-of-the-tr-selector.patch b/queue-2.6.28/kvm-svm-set-the-busy-flag-of-the-tr-selector.patch new file mode 100644 index 00000000000..c98b1023257 --- /dev/null +++ b/queue-2.6.28/kvm-svm-set-the-busy-flag-of-the-tr-selector.patch @@ -0,0 +1,39 @@ +From mtosatti@redhat.com Tue Apr 28 21:36:29 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:23 -0300 +Subject: KVM: SVM: Set the 'busy' flag of the TR selector +To: stable@kernel.org +Cc: Amit Shah , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-7-git-send-email-mtosatti@redhat.com> + + +From: Amit Shah + +(cherry picked from c0d09828c870f90c6bc72070ada281568f89c63b) + +The busy flag of the TR selector is not set by the hardware. This breaks +migration from amd hosts to intel hosts. + +Signed-off-by: Amit Shah +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -781,6 +781,13 @@ static void svm_get_segment(struct kvm_v + if (seg == VCPU_SREG_CS) + var->g = s->limit > 0xfffff; + ++ /* ++ * Work around a bug where the busy flag in the tr selector ++ * isn't exposed ++ */ ++ if (seg == VCPU_SREG_TR) ++ var->type |= 0x2; ++ + var->unusable = !var->present; + } + diff --git a/queue-2.6.28/kvm-svm-set-the-g-bit-of-the-cs-selector-for-cross-vendor-migration.patch b/queue-2.6.28/kvm-svm-set-the-g-bit-of-the-cs-selector-for-cross-vendor-migration.patch new file mode 100644 index 00000000000..2d21db4de96 --- /dev/null +++ b/queue-2.6.28/kvm-svm-set-the-g-bit-of-the-cs-selector-for-cross-vendor-migration.patch @@ -0,0 +1,42 @@ +From mtosatti@redhat.com Tue Apr 28 21:36:19 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:22 -0300 +Subject: KVM: SVM: Set the 'g' bit of the cs selector for cross-vendor migration +To: stable@kernel.org +Cc: Amit Shah , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-6-git-send-email-mtosatti@redhat.com> + + +From: Amit Shah + +(cherry picked from 25022acc3dd5f0b54071c7ba7c371860f2971b52) + +The hardware does not set the 'g' bit of the cs selector and this breaks +migration from amd hosts to intel hosts. Set this bit if the segment +limit is beyond 1 MB. + +Signed-off-by: Amit Shah +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -772,6 +772,15 @@ static void svm_get_segment(struct kvm_v + var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; + var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; + var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; ++ ++ /* ++ * SVM always stores 0 for the 'G' bit in the CS selector in ++ * the VMCB on a VMEXIT. This hurts cross-vendor migration: ++ * Intel's VMENTRY has a check on the 'G' bit. ++ */ ++ if (seg == VCPU_SREG_CS) ++ var->g = s->limit > 0xfffff; ++ + var->unusable = !var->present; + } + diff --git a/queue-2.6.28/kvm-vmx-flush-volatile-msrs-before-emulating-rdmsr.patch b/queue-2.6.28/kvm-vmx-flush-volatile-msrs-before-emulating-rdmsr.patch new file mode 100644 index 00000000000..a89cde977b2 --- /dev/null +++ b/queue-2.6.28/kvm-vmx-flush-volatile-msrs-before-emulating-rdmsr.patch @@ -0,0 +1,34 @@ +From mtosatti@redhat.com Tue Apr 28 21:42:10 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:38 -0300 +Subject: KVM: VMX: Flush volatile msrs before emulating rdmsr +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-22-git-send-email-mtosatti@redhat.com> + + +From: Avi Kivity + +(cherry picked from 516a1a7e9dc80358030fe01aabb3bedf882db9e2) + +Some msrs (notable MSR_KERNEL_GS_BASE) are held in the processor registers +and need to be flushed to the vcpu struture before they can be read. + +This fixes cygwin longjmp() failure on Windows x64. + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -897,6 +897,7 @@ static int vmx_get_msr(struct kvm_vcpu * + data = vmcs_readl(GUEST_SYSENTER_ESP); + break; + default: ++ vmx_load_host_state(to_vmx(vcpu)); + msr = find_msr_entry(to_vmx(vcpu), msr_index); + if (msr) { + data = msr->data; diff --git a/queue-2.6.28/kvm-vmx-move-private-memory-slot-position.patch b/queue-2.6.28/kvm-vmx-move-private-memory-slot-position.patch new file mode 100644 index 00000000000..04b84c9e835 --- /dev/null +++ b/queue-2.6.28/kvm-vmx-move-private-memory-slot-position.patch @@ -0,0 +1,52 @@ +From mtosatti@redhat.com Tue Apr 28 21:36:10 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:21 -0300 +Subject: KVM: VMX: Move private memory slot position +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Sheng Yang +Message-ID: <1237841498-14100-5-git-send-email-mtosatti@redhat.com> + + +From: Sheng Yang + +(cherry picked from 6fe639792c7b8e462baeaac39ecc33541fd5da6e) + +PCI device assignment would map guest MMIO spaces as separate slot, so it is +possible that the device has more than 2 MMIO spaces and overwrite current +private memslot. + +The patch move private memory slot to the top of userspace visible memory slots. + +Signed-off-by: Sheng Yang +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx.c | 2 +- + arch/x86/kvm/vmx.h | 5 +++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2407,7 +2407,7 @@ static int vmx_set_tss_addr(struct kvm * + { + int ret; + struct kvm_userspace_memory_region tss_mem = { +- .slot = 8, ++ .slot = TSS_PRIVATE_MEMSLOT, + .guest_phys_addr = addr, + .memory_size = PAGE_SIZE * 3, + .flags = 0, +--- a/arch/x86/kvm/vmx.h ++++ b/arch/x86/kvm/vmx.h +@@ -331,8 +331,9 @@ enum vmcs_field { + + #define AR_RESERVD_MASK 0xfffe0f00 + +-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 +-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 ++#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) ++#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) ++#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) + + #define VMX_NR_VPIDS (1 << 16) + #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 diff --git a/queue-2.6.28/kvm-x86-disable-kvmclock-on-non-constant-tsc-hosts.patch b/queue-2.6.28/kvm-x86-disable-kvmclock-on-non-constant-tsc-hosts.patch new file mode 100644 index 00000000000..fe61e0d04bd --- /dev/null +++ b/queue-2.6.28/kvm-x86-disable-kvmclock-on-non-constant-tsc-hosts.patch @@ -0,0 +1,45 @@ +From mtosatti@redhat.com Tue Apr 28 21:41:53 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:36 -0300 +Subject: KVM: x86: disable kvmclock on non constant TSC hosts +To: stable@kernel.org +Cc: Glauber Costa , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-20-git-send-email-mtosatti@redhat.com> + + +(cherry picked from abe6655dd699069b53bcccbc65b2717f60203b12) + +This is better. + +Currently, this code path is posing us big troubles, +and we won't have a decent patch in time. So, temporarily +disable it. + +Signed-off-by: Glauber Costa +Signed-off-by: Marcelo Tosatti +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -906,7 +906,6 @@ int kvm_dev_ioctl_check_extension(long e + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SET_TSS_ADDR: + case KVM_CAP_EXT_CPUID: +- case KVM_CAP_CLOCKSOURCE: + case KVM_CAP_PIT: + case KVM_CAP_NOP_IO_DELAY: + case KVM_CAP_MP_STATE: +@@ -931,6 +930,9 @@ int kvm_dev_ioctl_check_extension(long e + case KVM_CAP_IOMMU: + r = intel_iommu_found(); + break; ++ case KVM_CAP_CLOCKSOURCE: ++ r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); ++ break; + default: + r = 0; + break; diff --git a/queue-2.6.28/kvm-x86-emulator-fix-handling-of-vmmcall-instruction.patch b/queue-2.6.28/kvm-x86-emulator-fix-handling-of-vmmcall-instruction.patch new file mode 100644 index 00000000000..c28b673bfd8 --- /dev/null +++ b/queue-2.6.28/kvm-x86-emulator-fix-handling-of-vmmcall-instruction.patch @@ -0,0 +1,37 @@ +From mtosatti@redhat.com Tue Apr 28 21:37:36 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:29 -0300 +Subject: KVM: x86 emulator: Fix handling of VMMCALL instruction +To: stable@kernel.org +Cc: Amit Shah , mtosatti@redhat.com, avi@redhat.com +Message-ID: <1237841498-14100-13-git-send-email-mtosatti@redhat.com> + + +From: Amit Shah + +(cherry picked from fbce554e940a983d005e29849636d0ef54b3eb18) + +The VMMCALL instruction doesn't get recognised and isn't processed +by the emulator. + +This is seen on an Intel host that tries to execute the VMMCALL +instruction after a guest live migrates from an AMD host. + +Signed-off-by: Amit Shah +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86_emulate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86_emulate.c ++++ b/arch/x86/kvm/x86_emulate.c +@@ -299,7 +299,7 @@ static u16 group_table[] = { + + static u16 group2_table[] = { + [Group7*8] = +- SrcNone | ModRM, 0, 0, 0, ++ SrcNone | ModRM, 0, 0, SrcNone | ModRM, + SrcNone | ModRM | DstMem | Mov, 0, + SrcMem16 | ModRM | Mov, 0, + }; diff --git a/queue-2.6.28/kvm-x86-fix-lapic-pending-count-calculation.patch b/queue-2.6.28/kvm-x86-fix-lapic-pending-count-calculation.patch new file mode 100644 index 00000000000..e810d5dde5b --- /dev/null +++ b/queue-2.6.28/kvm-x86-fix-lapic-pending-count-calculation.patch @@ -0,0 +1,200 @@ +From mtosatti@redhat.com Tue Apr 28 21:42:01 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:37 -0300 +Subject: KVM: x86: fix LAPIC pending count calculation +To: stable@kernel.org +Cc: mtosatti@redhat.com, avi@redhat.com, Alexander Graf +Message-ID: <1237841498-14100-21-git-send-email-mtosatti@redhat.com> + + +(cherry picked from b682b814e3cc340f905c14dff87ce8bdba7c5eba) + +Simplify LAPIC TMCCT calculation by using hrtimer provided +function to query remaining time until expiration. + +Fixes host hang with nested ESX. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Alexander Graf +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/irq.c | 7 ----- + arch/x86/kvm/irq.h | 1 + arch/x86/kvm/lapic.c | 66 +++++++++++---------------------------------------- + arch/x86/kvm/lapic.h | 2 - + arch/x86/kvm/svm.c | 1 + arch/x86/kvm/vmx.c | 4 --- + 6 files changed, 16 insertions(+), 65 deletions(-) + +--- a/arch/x86/kvm/irq.c ++++ b/arch/x86/kvm/irq.c +@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struc + } + EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); + +-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +-{ +- kvm_apic_timer_intr_post(vcpu, vec); +- /* TODO: PIT, RTC etc. */ +-} +-EXPORT_SYMBOL_GPL(kvm_timer_intr_post); +- + void __kvm_migrate_timers(struct kvm_vcpu *vcpu) + { + __kvm_migrate_apic_timer(vcpu); +--- a/arch/x86/kvm/irq.h ++++ b/arch/x86/kvm/irq.h +@@ -84,7 +84,6 @@ static inline int irqchip_in_kernel(stru + + void kvm_pic_reset(struct kvm_kpic_state *s); + +-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); + void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); + void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); + void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -35,6 +35,12 @@ + #include "kvm_cache_regs.h" + #include "irq.h" + ++#ifndef CONFIG_X86_64 ++#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) ++#else ++#define mod_64(x, y) ((x) % (y)) ++#endif ++ + #define PRId64 "d" + #define PRIx64 "llx" + #define PRIu64 "u" +@@ -497,52 +503,22 @@ static void apic_send_ipi(struct kvm_lap + + static u32 apic_get_tmcct(struct kvm_lapic *apic) + { +- u64 counter_passed; +- ktime_t passed, now; ++ ktime_t remaining; ++ s64 ns; + u32 tmcct; + + ASSERT(apic != NULL); + +- now = apic->timer.dev.base->get_time(); +- tmcct = apic_get_reg(apic, APIC_TMICT); +- + /* if initial count is 0, current count should also be 0 */ +- if (tmcct == 0) ++ if (apic_get_reg(apic, APIC_TMICT) == 0) + return 0; + +- if (unlikely(ktime_to_ns(now) <= +- ktime_to_ns(apic->timer.last_update))) { +- /* Wrap around */ +- passed = ktime_add(( { +- (ktime_t) { +- .tv64 = KTIME_MAX - +- (apic->timer.last_update).tv64}; } +- ), now); +- apic_debug("time elapsed\n"); +- } else +- passed = ktime_sub(now, apic->timer.last_update); +- +- counter_passed = div64_u64(ktime_to_ns(passed), +- (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); +- +- if (counter_passed > tmcct) { +- if (unlikely(!apic_lvtt_period(apic))) { +- /* one-shot timers stick at 0 until reset */ +- tmcct = 0; +- } else { +- /* +- * periodic timers reset to APIC_TMICT when they +- * hit 0. The while loop simulates this happening N +- * times. (counter_passed %= tmcct) would also work, +- * but might be slower or not work on 32-bit?? +- */ +- while (counter_passed > tmcct) +- counter_passed -= tmcct; +- tmcct -= counter_passed; +- } +- } else { +- tmcct -= counter_passed; +- } ++ remaining = hrtimer_expires_remaining(&apic->timer.dev); ++ if (ktime_to_ns(remaining) < 0) ++ remaining = ktime_set(0, 0); ++ ++ ns = mod_64(ktime_to_ns(remaining), apic->timer.period); ++ tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); + + return tmcct; + } +@@ -639,8 +615,6 @@ static void start_apic_timer(struct kvm_ + { + ktime_t now = apic->timer.dev.base->get_time(); + +- apic->timer.last_update = now; +- + apic->timer.period = apic_get_reg(apic, APIC_TMICT) * + APIC_BUS_CYCLE_NS * apic->timer.divide_count; + atomic_set(&apic->timer.pending, 0); +@@ -1068,16 +1042,6 @@ void kvm_inject_apic_timer_irqs(struct k + } + } + +-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +-{ +- struct kvm_lapic *apic = vcpu->arch.apic; +- +- if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) +- apic->timer.last_update = ktime_add_ns( +- apic->timer.last_update, +- apic->timer.period); +-} +- + int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) + { + int vector = kvm_apic_has_interrupt(vcpu); +--- a/arch/x86/kvm/lapic.h ++++ b/arch/x86/kvm/lapic.h +@@ -12,7 +12,6 @@ struct kvm_lapic { + atomic_t pending; + s64 period; /* unit: ns */ + u32 divide_count; +- ktime_t last_update; + struct hrtimer dev; + } timer; + struct kvm_vcpu *vcpu; +@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu * + void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); + int kvm_lapic_enabled(struct kvm_vcpu *vcpu); + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); +-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); + + void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); + void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1612,7 +1612,6 @@ static void svm_intr_assist(struct kvm_v + /* Okay, we can deliver the interrupt: grab it and update PIC state. */ + intr_vector = kvm_cpu_get_interrupt(vcpu); + svm_inject_irq(svm, intr_vector); +- kvm_timer_intr_post(vcpu, intr_vector); + out: + update_cr8_intercept(vcpu); + } +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3171,10 +3171,8 @@ static void vmx_intr_assist(struct kvm_v + else + enable_irq_window(vcpu); + } +- if (vcpu->arch.interrupt.pending) { ++ if (vcpu->arch.interrupt.pending) + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); +- kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); +- } + } + + /* diff --git a/queue-2.6.28/kvm-x86-reset-pending-inject-nmi-state-on-cpu-reset.patch b/queue-2.6.28/kvm-x86-reset-pending-inject-nmi-state-on-cpu-reset.patch new file mode 100644 index 00000000000..b6bb039bcc8 --- /dev/null +++ b/queue-2.6.28/kvm-x86-reset-pending-inject-nmi-state-on-cpu-reset.patch @@ -0,0 +1,38 @@ +From mtosatti@redhat.com Tue Apr 28 21:35:39 2009 +From: Marcelo Tosatti +Date: Mon, 23 Mar 2009 17:51:18 -0300 +Subject: KVM: x86: Reset pending/inject NMI state on CPU reset +To: stable@kernel.org +Cc: Jan Kiszka , mtosatti@redhat.com, avi@redhat.com, Gleb Natapov +Message-ID: <1237841498-14100-2-git-send-email-mtosatti@redhat.com> + + +From: Jan Kiszka + +(cherry picked from 448fa4a9c5dbc6941dd19ed09692c588d815bb06) + +CPU reset invalidates pending or already injected NMIs, therefore reset +the related state variables. + +Based on original patch by Gleb Natapov. + +Signed-off-by: Gleb Natapov +Signed-off-by: Jan Kiszka +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3925,6 +3925,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vc + + int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) + { ++ vcpu->arch.nmi_pending = false; ++ vcpu->arch.nmi_injected = false; ++ + return kvm_x86_ops->vcpu_reset(vcpu); + } + diff --git a/queue-2.6.28/md-fix-deadlock-when-stopping-arrays.patch b/queue-2.6.28/md-fix-deadlock-when-stopping-arrays.patch new file mode 100644 index 00000000000..ca5000fe245 --- /dev/null +++ b/queue-2.6.28/md-fix-deadlock-when-stopping-arrays.patch @@ -0,0 +1,147 @@ +From dan.j.williams@intel.com Tue Apr 28 21:46:06 2009 +From: Dan Williams +Date: Fri, 27 Mar 2009 14:38:11 -0700 +Subject: md: fix deadlock when stopping arrays +To: Greg KH +Cc: NeilBrown , "stable@kernel.org" +Message-ID: <1238189891.29636.14.camel@dwillia2-linux.ch.intel.com> + +From: Dan Williams + +[backport of 5fd3a17ed456637a224cf4ca82b9ad9d005bc8d4] + +Resolve a deadlock when stopping redundant arrays, i.e. ones that +require a call to sysfs_remove_group when shutdown. The deadlock is +summarized below: + +Thread1 Thread2 +------- ------- +read sysfs attribute stop array + take mddev lock + sysfs_remove_group +sysfs_get_active +wait for mddev lock + wait for active + +Sysrq-w: + -------- +mdmon S 00000017 2212 4163 1 + f1982ea8 00000046 2dcf6b85 00000017 c0b23100 f2f83ed0 c0b23100 f2f8413c + c0b23100 c0b23100 c0b1fb98 f2f8413c 00000000 f2f8413c c0b23100 f2291ecc + 00000002 c0b23100 00000000 00000017 f2f83ed0 f1982eac 00000046 c044d9dd +Call Trace: + [] ? debug_mutex_add_waiter+0x1d/0x58 + [] __mutex_lock_common+0x1d9/0x338 + [] ? __mutex_lock_common+0x1d9/0x338 + [] mutex_lock_interruptible_nested+0x33/0x3a + [] ? mddev_lock+0x14/0x16 + [] mddev_lock+0x14/0x16 + [] md_attr_show+0x2a/0x49 + [] sysfs_read_file+0x93/0xf9 +mdadm D 00000017 2812 4177 1 + f0401d78 00000046 430456f8 00000017 f0401d58 f0401d20 c0b23100 f2da2c4c + c0b23100 c0b23100 c0b1fb98 f2da2c4c 0a10fc36 00000000 c0b23100 f0401d70 + 00000003 c0b23100 00000000 00000017 f2da29e0 00000001 00000002 00000000 +Call Trace: + [] schedule_timeout+0x1b/0x95 + [] ? schedule_timeout+0x1b/0x95 + [] ? wait_for_common+0x34/0xdc + [] ? trace_hardirqs_on_caller+0x18/0x145 + [] ? trace_hardirqs_on+0xb/0xd + [] wait_for_common+0xa0/0xdc + [] ? default_wake_function+0x0/0x12 + [] wait_for_completion+0x17/0x19 + [] sysfs_addrm_finish+0x19f/0x1d1 + [] sysfs_hash_and_remove+0x42/0x55 + [] sysfs_remove_group+0x57/0x86 + [] do_md_stop+0x13a/0x499 + +This has been there for a while, but is easier to trigger now that mdmon +is closely watching sysfs. + +Cc: Neil Brown +Reported-by: Jacek Danecki +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + + drivers/md/md.c | 27 ++++++++++++++++++++++++--- + include/linux/raid/md_k.h | 2 ++ + 2 files changed, 26 insertions(+), 3 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -3694,6 +3694,10 @@ static int do_md_run(mddev_t * mddev) + return err; + } + if (mddev->pers->sync_request) { ++ /* wait for any previously scheduled redundancy groups ++ * to be removed ++ */ ++ flush_scheduled_work(); + if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) + printk(KERN_WARNING + "md: cannot register extra attributes for %s\n", +@@ -3824,6 +3828,14 @@ static void restore_bitmap_write_access( + spin_unlock(&inode->i_lock); + } + ++ ++static void sysfs_delayed_rm(struct work_struct *ws) ++{ ++ mddev_t *mddev = container_of(ws, mddev_t, del_work); ++ ++ sysfs_remove_group(&mddev->kobj, &md_redundancy_group); ++} ++ + /* mode: + * 0 - completely stop and dis-assemble array + * 1 - switch to readonly +@@ -3833,6 +3845,7 @@ static int do_md_stop(mddev_t * mddev, i + { + int err = 0; + struct gendisk *disk = mddev->gendisk; ++ int remove_group = 0; + + if (atomic_read(&mddev->openers) > is_open) { + printk("md: %s still in use.\n",mdname(mddev)); +@@ -3868,10 +3881,9 @@ static int do_md_stop(mddev_t * mddev, i + mddev->queue->merge_bvec_fn = NULL; + mddev->queue->unplug_fn = NULL; + mddev->queue->backing_dev_info.congested_fn = NULL; +- if (mddev->pers->sync_request) +- sysfs_remove_group(&mddev->kobj, &md_redundancy_group); +- + module_put(mddev->pers->owner); ++ if (mddev->pers->sync_request) ++ remove_group = 1; + mddev->pers = NULL; + /* tell userspace to handle 'inactive' */ + sysfs_notify_dirent(mddev->sysfs_state); +@@ -3919,6 +3931,15 @@ static int do_md_stop(mddev_t * mddev, i + /* make sure all md_delayed_delete calls have finished */ + flush_scheduled_work(); + ++ /* we can't wait for group removal under mddev_lock as ++ * threads holding the group 'active' need to acquire ++ * mddev_lock before going inactive ++ */ ++ if (remove_group) { ++ INIT_WORK(&mddev->del_work, sysfs_delayed_rm); ++ schedule_work(&mddev->del_work); ++ } ++ + export_array(mddev); + + mddev->array_sectors = 0; +--- a/include/linux/raid/md_k.h ++++ b/include/linux/raid/md_k.h +@@ -245,6 +245,8 @@ struct mddev_s + * file in sysfs. + */ + ++ struct work_struct del_work; /* used for delayed sysfs removal */ ++ + spinlock_t write_lock; + wait_queue_head_t sb_wait; /* for waiting on superblock updates */ + atomic_t pending_writes; /* number of active superblock writes */ diff --git a/queue-2.6.28/series b/queue-2.6.28/series index 9f85c5dfafa..b15f41052e4 100644 --- a/queue-2.6.28/series +++ b/queue-2.6.28/series @@ -51,3 +51,28 @@ kprobes-fix-locking-imbalance-in-kretprobes.patch 0093-agp-zero-pages-before-sending-to-userspace.patch 0096-hugetlbfs-return-negative-error-code-for-bad-mount.patch block-revert-part-of-18ce3751ccd488c78d3827e9f6bf54e6322676fb.patch +anon_inodes-use-fops-owner-for-module-refcount.patch +kvm-x86-reset-pending-inject-nmi-state-on-cpu-reset.patch +kvm-call-kvm_arch_vcpu_reset-instead-of-the-kvm_x86_ops-callback.patch +kvm-mmu-extend-kvm_mmu_page-slot_bitmap-size.patch +kvm-vmx-move-private-memory-slot-position.patch +kvm-svm-set-the-g-bit-of-the-cs-selector-for-cross-vendor-migration.patch +kvm-svm-set-the-busy-flag-of-the-tr-selector.patch +kvm-mmu-fix-aliased-gfns-treated-as-unaliased.patch +kvm-fix-cpuid-leaf-0xb-loop-termination.patch +kvm-fix-cpuid-iteration-on-multiple-leaves-per-eac.patch +kvm-prevent-trace-call-into-unloaded-module-text.patch +kvm-really-remove-a-slot-when-a-user-ask-us-so.patch +kvm-x86-emulator-fix-handling-of-vmmcall-instruction.patch +kvm-set-owner-of-cpu-and-vm-file-operations.patch +kvm-advertise-the-bug-in-memory-region-destruction-as-fixed.patch +kvm-mmu-check-for-present-pdptr-shadow-page-in-walk_shadow.patch +kvm-mmu-handle-large-host-sptes-on-invlpg-resync.patch +kvm-mmu_notifiers-release-method.patch +kvm-pit-fix-i8254-pending-count-read.patch +kvm-x86-disable-kvmclock-on-non-constant-tsc-hosts.patch +kvm-x86-fix-lapic-pending-count-calculation.patch +kvm-vmx-flush-volatile-msrs-before-emulating-rdmsr.patch +ath9k-implement-io-serialization.patch +ath9k-ar9280-pci-devices-must-serialize-io-as-well.patch +md-fix-deadlock-when-stopping-arrays.patch