From: Greg Kroah-Hartman Date: Wed, 10 Oct 2012 01:06:30 +0000 (+0900) Subject: 3.5-stable patches X-Git-Tag: v3.0.46~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1094237511e47afd6b3e596eb19a899f9c365116;p=thirdparty%2Fkernel%2Fstable-queue.git 3.5-stable patches added patches: cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch efi-build-efi-stub-with-efi-appropriate-options.patch efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch mempolicy-fix-a-race-in-shared_policy_replace.patch mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch mempolicy-remove-mempolicy-sharing.patch revert-drm-i915-correctly-order-the-ring-init-sequence.patch revert-kvm-vmx-fix-kvm_set_sregs-with-big-real-mode-segments.patch revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch --- diff --git a/queue-3.5/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch b/queue-3.5/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch new file mode 100644 index 00000000000..0445b64821d --- /dev/null +++ b/queue-3.5/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch @@ -0,0 +1,145 @@ +From d35be8bab9b0ce44bed4b9453f86ebf64062721e Mon Sep 17 00:00:00 2001 +From: "Srivatsa S. Bhat" +Date: Thu, 24 May 2012 19:46:26 +0530 +Subject: CPU hotplug, cpusets, suspend: Don't modify cpusets during suspend/resume + +From: "Srivatsa S. Bhat" + +commit d35be8bab9b0ce44bed4b9453f86ebf64062721e upstream. + +In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed +masks as and when necessary to ensure that the tasks belonging to the cpusets +have some place (online CPUs) to run on. And regular CPU hotplug is +destructive in the sense that the kernel doesn't remember the original cpuset +configurations set by the user, across hotplug operations. + +However, suspend/resume (which uses CPU hotplug) is a special case in which +the kernel has the responsibility to restore the system (during resume), to +exactly the same state it was in before suspend. + +In order to achieve that, do the following: + +1. Don't modify cpusets during suspend/resume. At all. + In particular, don't move the tasks from one cpuset to another, and + don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets + during the CPU hotplug operations that are carried out in the + suspend/resume path. + +2. However, cpusets and sched domains are related. We just want to avoid + altering cpusets alone. So, to keep the sched domains updated, build + a single sched domain (containing all active cpus) during each of the + CPU hotplug operations carried out in s/r path, effectively ignoring + the cpusets' cpus_allowed masks. + + (Since userspace is frozen while doing all this, it will go unnoticed.) + +3. During the last CPU online operation during resume, build the sched + domains by looking up the (unaltered) cpusets' cpus_allowed masks. + That will bring back the system to the same original state as it was in + before suspend. + +Ultimately, this will not only solve the cpuset problem related to suspend +resume (ie., restores the cpusets to exactly what it was before suspend, by +not touching it at all) but also speeds up suspend/resume because we avoid +running cpuset update code for every CPU being offlined/onlined. + +Signed-off-by: Srivatsa S. Bhat +Signed-off-by: Peter Zijlstra +Cc: Linus Torvalds +Cc: Andrew Morton +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com +Signed-off-by: Ingo Molnar +Signed-off-by: Preeti U Murthy +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cpuset.c | 3 +++ + kernel/sched/core.c | 40 ++++++++++++++++++++++++++++++++++++---- + 2 files changed, 39 insertions(+), 4 deletions(-) + +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -2054,6 +2054,9 @@ static void scan_for_empty_cpusets(struc + * (of no affect) on systems that are actively using CPU hotplug + * but making no active use of cpusets. + * ++ * The only exception to this is suspend/resume, where we don't ++ * modify cpusets at all. ++ * + * This routine ensures that top_cpuset.cpus_allowed tracks + * cpu_active_mask on each CPU hotplug (cpuhp) event. + * +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -7103,34 +7103,66 @@ match2: + mutex_unlock(&sched_domains_mutex); + } + ++static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ ++ + /* + * Update cpusets according to cpu_active mask. If cpusets are + * disabled, cpuset_update_active_cpus() becomes a simple wrapper + * around partition_sched_domains(). ++ * ++ * If we come here as part of a suspend/resume, don't touch cpusets because we ++ * want to restore it back to its original state upon resume anyway. + */ + static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, + void *hcpu) + { +- switch (action & ~CPU_TASKS_FROZEN) { ++ switch (action) { ++ case CPU_ONLINE_FROZEN: ++ case CPU_DOWN_FAILED_FROZEN: ++ ++ /* ++ * num_cpus_frozen tracks how many CPUs are involved in suspend ++ * resume sequence. As long as this is not the last online ++ * operation in the resume sequence, just build a single sched ++ * domain, ignoring cpusets. ++ */ ++ num_cpus_frozen--; ++ if (likely(num_cpus_frozen)) { ++ partition_sched_domains(1, NULL, NULL); ++ break; ++ } ++ ++ /* ++ * This is the last CPU online operation. So fall through and ++ * restore the original sched domains by considering the ++ * cpuset configurations. ++ */ ++ + case CPU_ONLINE: + case CPU_DOWN_FAILED: + cpuset_update_active_cpus(); +- return NOTIFY_OK; ++ break; + default: + return NOTIFY_DONE; + } ++ return NOTIFY_OK; + } + + static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, + void *hcpu) + { +- switch (action & ~CPU_TASKS_FROZEN) { ++ switch (action) { + case CPU_DOWN_PREPARE: + cpuset_update_active_cpus(); +- return NOTIFY_OK; ++ break; ++ case CPU_DOWN_PREPARE_FROZEN: ++ num_cpus_frozen++; ++ partition_sched_domains(1, NULL, NULL); ++ break; + default: + return NOTIFY_DONE; + } ++ return NOTIFY_OK; + } + + void __init sched_init_smp(void) diff --git a/queue-3.5/efi-build-efi-stub-with-efi-appropriate-options.patch b/queue-3.5/efi-build-efi-stub-with-efi-appropriate-options.patch new file mode 100644 index 00000000000..9b3321c0c7b --- /dev/null +++ b/queue-3.5/efi-build-efi-stub-with-efi-appropriate-options.patch @@ -0,0 +1,35 @@ +From 9dead5bbb825d7c25c0400e61de83075046322d0 Mon Sep 17 00:00:00 2001 +From: Matthew Garrett +Date: Thu, 26 Jul 2012 18:00:00 -0400 +Subject: efi: Build EFI stub with EFI-appropriate options + +From: Matthew Garrett + +commit 9dead5bbb825d7c25c0400e61de83075046322d0 upstream. + +We can't assume the presence of the red zone while we're still in a boot +services environment, so we should build with -fno-red-zone to avoid +problems. Change the size of wchar at the same time to make string handling +simpler. + +Signed-off-by: Matthew Garrett +Signed-off-by: Matt Fleming +Acked-by: Josh Boyer +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/boot/compressed/Makefile | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/boot/compressed/Makefile ++++ b/arch/x86/boot/compressed/Makefile +@@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj) + $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ + $(obj)/piggy.o + ++$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone ++$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone ++ + ifeq ($(CONFIG_EFI_STUB), y) + VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o + endif diff --git a/queue-3.5/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch b/queue-3.5/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch new file mode 100644 index 00000000000..093d218e54f --- /dev/null +++ b/queue-3.5/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch @@ -0,0 +1,45 @@ +From d6cf86d8f23253225fe2a763d627ecf7dfee9dae Mon Sep 17 00:00:00 2001 +From: Seiji Aguchi +Date: Tue, 24 Jul 2012 13:27:23 +0000 +Subject: efi: initialize efi.runtime_version to make query_variable_info/update_capsule workable + +From: Seiji Aguchi + +commit d6cf86d8f23253225fe2a763d627ecf7dfee9dae upstream. + +A value of efi.runtime_version is checked before calling +update_capsule()/query_variable_info() as follows. +But it isn't initialized anywhere. + + +static efi_status_t virt_efi_query_variable_info(u32 attr, + u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + +This patch initializes a value of efi.runtime_version at boot time. + +Signed-off-by: Seiji Aguchi +Acked-by: Matthew Garrett +Signed-off-by: Matt Fleming +Signed-off-by: Ivan Hu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/platform/efi/efi.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/platform/efi/efi.c ++++ b/arch/x86/platform/efi/efi.c +@@ -890,6 +890,7 @@ void __init efi_enter_virtual_mode(void) + * + * Call EFI services through wrapper functions. + */ ++ efi.runtime_version = efi_systab.fw_revision; + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; diff --git a/queue-3.5/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch b/queue-3.5/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch new file mode 100644 index 00000000000..8b60b300add --- /dev/null +++ b/queue-3.5/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch @@ -0,0 +1,54 @@ +From 00442ad04a5eac08a98255697c510e708f6082e2 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 8 Oct 2012 16:29:20 -0700 +Subject: mempolicy: fix a memory corruption by refcount imbalance in alloc_pages_vma() + +From: Mel Gorman + +commit 00442ad04a5eac08a98255697c510e708f6082e2 upstream. + +Commit cc9a6c877661 ("cpuset: mm: reduce large amounts of memory barrier +related damage v3") introduced a potential memory corruption. +shmem_alloc_page() uses a pseudo vma and it has one significant unique +combination, vma->vm_ops=NULL and vma->policy->flags & MPOL_F_SHARED. + +get_vma_policy() does NOT increase a policy ref when vma->vm_ops=NULL +and mpol_cond_put() DOES decrease a policy ref when a policy has +MPOL_F_SHARED. Therefore, when a cpuset update race occurs, +alloc_pages_vma() falls in 'goto retry_cpuset' path, decrements the +reference count and frees the policy prematurely. + +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Reviewed-by: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1552,8 +1552,18 @@ struct mempolicy *get_vma_policy(struct + addr); + if (vpol) + pol = vpol; +- } else if (vma->vm_policy) ++ } else if (vma->vm_policy) { + pol = vma->vm_policy; ++ ++ /* ++ * shmem_alloc_page() passes MPOL_F_SHARED policy with ++ * a pseudo vma whose vma->vm_ops=NULL. Take a reference ++ * count on these policies which will be dropped by ++ * mpol_cond_put() later ++ */ ++ if (mpol_needs_cond_ref(pol)) ++ mpol_get(pol); ++ } + } + if (!pol) + pol = &default_policy; diff --git a/queue-3.5/mempolicy-fix-a-race-in-shared_policy_replace.patch b/queue-3.5/mempolicy-fix-a-race-in-shared_policy_replace.patch new file mode 100644 index 00000000000..05bdfc1948c --- /dev/null +++ b/queue-3.5/mempolicy-fix-a-race-in-shared_policy_replace.patch @@ -0,0 +1,151 @@ +From b22d127a39ddd10d93deee3d96e643657ad53a49 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 8 Oct 2012 16:29:17 -0700 +Subject: mempolicy: fix a race in shared_policy_replace() + +From: Mel Gorman + +commit b22d127a39ddd10d93deee3d96e643657ad53a49 upstream. + +shared_policy_replace() use of sp_alloc() is unsafe. 1) sp_node cannot +be dereferenced if sp->lock is not held and 2) another thread can modify +sp_node between spin_unlock for allocating a new sp node and next +spin_lock. The bug was introduced before 2.6.12-rc2. + +Kosaki's original patch for this problem was to allocate an sp node and +policy within shared_policy_replace and initialise it when the lock is +reacquired. I was not keen on this approach because it partially +duplicates sp_alloc(). As the paths were sp->lock is taken are not that +performance critical this patch converts sp->lock to sp->mutex so it can +sleep when calling sp_alloc(). + +[kosaki.motohiro@jp.fujitsu.com: Original patch] +Signed-off-by: Mel Gorman +Acked-by: KOSAKI Motohiro +Reviewed-by: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mempolicy.h | 2 +- + mm/mempolicy.c | 37 ++++++++++++++++--------------------- + 2 files changed, 17 insertions(+), 22 deletions(-) + +--- a/include/linux/mempolicy.h ++++ b/include/linux/mempolicy.h +@@ -188,7 +188,7 @@ struct sp_node { + + struct shared_policy { + struct rb_root root; +- spinlock_t lock; ++ struct mutex mutex; + }; + + void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2077,7 +2077,7 @@ bool __mpol_equal(struct mempolicy *a, s + */ + + /* lookup first element intersecting start-end */ +-/* Caller holds sp->lock */ ++/* Caller holds sp->mutex */ + static struct sp_node * + sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) + { +@@ -2141,13 +2141,13 @@ mpol_shared_policy_lookup(struct shared_ + + if (!sp->root.rb_node) + return NULL; +- spin_lock(&sp->lock); ++ mutex_lock(&sp->mutex); + sn = sp_lookup(sp, idx, idx+1); + if (sn) { + mpol_get(sn->policy); + pol = sn->policy; + } +- spin_unlock(&sp->lock); ++ mutex_unlock(&sp->mutex); + return pol; + } + +@@ -2187,10 +2187,10 @@ static struct sp_node *sp_alloc(unsigned + static int shared_policy_replace(struct shared_policy *sp, unsigned long start, + unsigned long end, struct sp_node *new) + { +- struct sp_node *n, *new2 = NULL; ++ struct sp_node *n; ++ int ret = 0; + +-restart: +- spin_lock(&sp->lock); ++ mutex_lock(&sp->mutex); + n = sp_lookup(sp, start, end); + /* Take care of old policies in the same range. */ + while (n && n->start < end) { +@@ -2203,16 +2203,14 @@ restart: + } else { + /* Old policy spanning whole new range. */ + if (n->end > end) { ++ struct sp_node *new2; ++ new2 = sp_alloc(end, n->end, n->policy); + if (!new2) { +- spin_unlock(&sp->lock); +- new2 = sp_alloc(end, n->end, n->policy); +- if (!new2) +- return -ENOMEM; +- goto restart; ++ ret = -ENOMEM; ++ goto out; + } + n->end = start; + sp_insert(sp, new2); +- new2 = NULL; + break; + } else + n->end = start; +@@ -2223,12 +2221,9 @@ restart: + } + if (new) + sp_insert(sp, new); +- spin_unlock(&sp->lock); +- if (new2) { +- mpol_put(new2->policy); +- kmem_cache_free(sn_cache, new2); +- } +- return 0; ++out: ++ mutex_unlock(&sp->mutex); ++ return ret; + } + + /** +@@ -2246,7 +2241,7 @@ void mpol_shared_policy_init(struct shar + int ret; + + sp->root = RB_ROOT; /* empty tree == default mempolicy */ +- spin_lock_init(&sp->lock); ++ mutex_init(&sp->mutex); + + if (mpol) { + struct vm_area_struct pvma; +@@ -2312,7 +2307,7 @@ void mpol_free_shared_policy(struct shar + + if (!p->root.rb_node) + return; +- spin_lock(&p->lock); ++ mutex_lock(&p->mutex); + next = rb_first(&p->root); + while (next) { + n = rb_entry(next, struct sp_node, nd); +@@ -2321,7 +2316,7 @@ void mpol_free_shared_policy(struct shar + mpol_put(n->policy); + kmem_cache_free(sn_cache, n); + } +- spin_unlock(&p->lock); ++ mutex_unlock(&p->mutex); + } + + /* assumes fs == KERNEL_DS */ diff --git a/queue-3.5/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch b/queue-3.5/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch new file mode 100644 index 00000000000..689df6e9f79 --- /dev/null +++ b/queue-3.5/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch @@ -0,0 +1,72 @@ +From 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Mon, 8 Oct 2012 16:29:19 -0700 +Subject: mempolicy: fix refcount leak in mpol_set_shared_policy() + +From: KOSAKI Motohiro + +commit 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 upstream. + +When shared_policy_replace() fails to allocate new->policy is not freed +correctly by mpol_set_shared_policy(). The problem is that shared +mempolicy code directly call kmem_cache_free() in multiple places where +it is easy to make a mistake. + +This patch creates an sp_free wrapper function and uses it. The bug was +introduced pre-git age (IOW, before 2.6.12-rc2). + +[mgorman@suse.de: Editted changelog] +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Reviewed-by: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2151,12 +2151,17 @@ mpol_shared_policy_lookup(struct shared_ + return pol; + } + ++static void sp_free(struct sp_node *n) ++{ ++ mpol_put(n->policy); ++ kmem_cache_free(sn_cache, n); ++} ++ + static void sp_delete(struct shared_policy *sp, struct sp_node *n) + { + pr_debug("deleting %lx-l%lx\n", n->start, n->end); + rb_erase(&n->nd, &sp->root); +- mpol_put(n->policy); +- kmem_cache_free(sn_cache, n); ++ sp_free(n); + } + + static struct sp_node *sp_alloc(unsigned long start, unsigned long end, +@@ -2295,7 +2300,7 @@ int mpol_set_shared_policy(struct shared + } + err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new); + if (err && new) +- kmem_cache_free(sn_cache, new); ++ sp_free(new); + return err; + } + +@@ -2312,9 +2317,7 @@ void mpol_free_shared_policy(struct shar + while (next) { + n = rb_entry(next, struct sp_node, nd); + next = rb_next(&n->nd); +- rb_erase(&n->nd, &p->root); +- mpol_put(n->policy); +- kmem_cache_free(sn_cache, n); ++ sp_delete(p, n); + } + mutex_unlock(&p->mutex); + } diff --git a/queue-3.5/mempolicy-remove-mempolicy-sharing.patch b/queue-3.5/mempolicy-remove-mempolicy-sharing.patch new file mode 100644 index 00000000000..619d5d518f4 --- /dev/null +++ b/queue-3.5/mempolicy-remove-mempolicy-sharing.patch @@ -0,0 +1,187 @@ +From 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Mon, 8 Oct 2012 16:29:16 -0700 +Subject: mempolicy: remove mempolicy sharing + +From: KOSAKI Motohiro + +commit 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 upstream. + +Dave Jones' system call fuzz testing tool "trinity" triggered the +following bug error with slab debugging enabled + + ============================================================================= + BUG numa_policy (Not tainted): Poison overwritten + ----------------------------------------------------------------------------- + + INFO: 0xffff880146498250-0xffff880146498250. First byte 0x6a instead of 0x6b + INFO: Allocated in mpol_new+0xa3/0x140 age=46310 cpu=6 pid=32154 + __slab_alloc+0x3d3/0x445 + kmem_cache_alloc+0x29d/0x2b0 + mpol_new+0xa3/0x140 + sys_mbind+0x142/0x620 + system_call_fastpath+0x16/0x1b + + INFO: Freed in __mpol_put+0x27/0x30 age=46268 cpu=6 pid=32154 + __slab_free+0x2e/0x1de + kmem_cache_free+0x25a/0x260 + __mpol_put+0x27/0x30 + remove_vma+0x68/0x90 + exit_mmap+0x118/0x140 + mmput+0x73/0x110 + exit_mm+0x108/0x130 + do_exit+0x162/0xb90 + do_group_exit+0x4f/0xc0 + sys_exit_group+0x17/0x20 + system_call_fastpath+0x16/0x1b + + INFO: Slab 0xffffea0005192600 objects=27 used=27 fp=0x (null) flags=0x20000000004080 + INFO: Object 0xffff880146498250 @offset=592 fp=0xffff88014649b9d0 + +The problem is that the structure is being prematurely freed due to a +reference count imbalance. In the following case mbind(addr, len) should +replace the memory policies of both vma1 and vma2 and thus they will +become to share the same mempolicy and the new mempolicy will have the +MPOL_F_SHARED flag. + + +-------------------+-------------------+ + | vma1 | vma2(shmem) | + +-------------------+-------------------+ + | | + addr addr+len + +alloc_pages_vma() uses get_vma_policy() and mpol_cond_put() pair for +maintaining the mempolicy reference count. The current rule is that +get_vma_policy() only increments refcount for shmem VMA and +mpol_conf_put() only decrements refcount if the policy has +MPOL_F_SHARED. + +In above case, vma1 is not shmem vma and vma->policy has MPOL_F_SHARED! +The reference count will be decreased even though was not increased +whenever alloc_page_vma() is called. This has been broken since commit +[52cd3b07: mempolicy: rework mempolicy Reference Counting] in 2008. + +There is another serious bug with the sharing of memory policies. +Currently, mempolicy rebind logic (it is called from cpuset rebinding) +ignores a refcount of mempolicy and override it forcibly. Thus, any +mempolicy sharing may cause mempolicy corruption. The bug was +introduced by commit [68860ec1: cpusets: automatic numa mempolicy +rebinding]. + +Ideally, the shared policy handling would be rewritten to either +properly handle COW of the policy structures or at least reference count +MPOL_F_SHARED based exclusively on information within the policy. +However, this patch takes the easier approach of disabling any policy +sharing between VMAs. Each new range allocated with sp_alloc will +allocate a new policy, set the reference count to 1 and drop the +reference count of the old policy. This increases the memory footprint +but is not expected to be a major problem as mbind() is unlikely to be +used for fine-grained ranges. It is also inefficient because it means +we allocate a new policy even in cases where mbind_range() could use the +new_policy passed to it. However, it is more straight-forward and the +change should be invisible to the user. + +[mgorman@suse.de: Edited changelog] +Reported-by: Dave Jones , +Cc: Christoph Lameter , +Reviewed-by: Christoph Lameter +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 52 ++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 38 insertions(+), 14 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -607,24 +607,39 @@ check_range(struct mm_struct *mm, unsign + return first; + } + +-/* Apply policy to a single VMA */ +-static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) ++/* ++ * Apply policy to a single VMA ++ * This must be called with the mmap_sem held for writing. ++ */ ++static int vma_replace_policy(struct vm_area_struct *vma, ++ struct mempolicy *pol) + { +- int err = 0; +- struct mempolicy *old = vma->vm_policy; ++ int err; ++ struct mempolicy *old; ++ struct mempolicy *new; + + pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_ops, vma->vm_file, + vma->vm_ops ? vma->vm_ops->set_policy : NULL); + +- if (vma->vm_ops && vma->vm_ops->set_policy) ++ new = mpol_dup(pol); ++ if (IS_ERR(new)) ++ return PTR_ERR(new); ++ ++ if (vma->vm_ops && vma->vm_ops->set_policy) { + err = vma->vm_ops->set_policy(vma, new); +- if (!err) { +- mpol_get(new); +- vma->vm_policy = new; +- mpol_put(old); ++ if (err) ++ goto err_out; + } ++ ++ old = vma->vm_policy; ++ vma->vm_policy = new; /* protected by mmap_sem */ ++ mpol_put(old); ++ ++ return 0; ++ err_out: ++ mpol_put(new); + return err; + } + +@@ -676,7 +691,7 @@ static int mbind_range(struct mm_struct + if (err) + goto out; + } +- err = policy_vma(vma, new_pol); ++ err = vma_replace_policy(vma, new_pol); + if (err) + goto out; + } +@@ -2147,15 +2162,24 @@ static void sp_delete(struct shared_poli + static struct sp_node *sp_alloc(unsigned long start, unsigned long end, + struct mempolicy *pol) + { +- struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); ++ struct sp_node *n; ++ struct mempolicy *newpol; + ++ n = kmem_cache_alloc(sn_cache, GFP_KERNEL); + if (!n) + return NULL; ++ ++ newpol = mpol_dup(pol); ++ if (IS_ERR(newpol)) { ++ kmem_cache_free(sn_cache, n); ++ return NULL; ++ } ++ newpol->flags |= MPOL_F_SHARED; ++ + n->start = start; + n->end = end; +- mpol_get(pol); +- pol->flags |= MPOL_F_SHARED; /* for unref */ +- n->policy = pol; ++ n->policy = newpol; ++ + return n; + } + diff --git a/queue-3.5/revert-drm-i915-correctly-order-the-ring-init-sequence.patch b/queue-3.5/revert-drm-i915-correctly-order-the-ring-init-sequence.patch new file mode 100644 index 00000000000..f73a350bda1 --- /dev/null +++ b/queue-3.5/revert-drm-i915-correctly-order-the-ring-init-sequence.patch @@ -0,0 +1,48 @@ +From 622ecb9ace2ec006b58f6779732961945ea303ca Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Tue, 7 Aug 2012 09:54:14 +0200 +Subject: Revert "drm/i915: correctly order the ring init sequence" + +From: Greg Kroah-Hartman + +This reverts 622ecb9ace2ec006b58f6779732961945ea303ca which is +commit 0d8957c8a90bbb5d34fab9a304459448a5131e06 upstream. + +We may only start to set up the new register values after having +confirmed that the ring is truely off. Otherwise the hw might lose the +newly written register values. This is caught later on in the init +sequence, when we check whether the register writes have stuck. + +Reported-by: Andreas Sturmlechner +Cc: Jani Nikula +Cc: Yang Guang +Cc: Daniel Vetter +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/i915/intel_ringbuffer.c ++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c +@@ -280,6 +280,8 @@ static int init_ring_common(struct intel + I915_WRITE_HEAD(ring, 0); + ring->write_tail(ring, 0); + ++ /* Initialize the ring. */ ++ I915_WRITE_START(ring, obj->gtt_offset); + head = I915_READ_HEAD(ring) & HEAD_ADDR; + + /* G45 ring initialization fails to reset head to zero */ +@@ -305,11 +307,6 @@ static int init_ring_common(struct intel + } + } + +- /* Initialize the ring. This must happen _after_ we've cleared the ring +- * registers with the above sequence (the readback of the HEAD registers +- * also enforces ordering), otherwise the hw might lose the new ring +- * register values. */ +- I915_WRITE_START(ring, obj->gtt_offset); + I915_WRITE_CTL(ring, + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) + | RING_VALID); diff --git a/queue-3.5/revert-kvm-vmx-fix-kvm_set_sregs-with-big-real-mode-segments.patch b/queue-3.5/revert-kvm-vmx-fix-kvm_set_sregs-with-big-real-mode-segments.patch new file mode 100644 index 00000000000..819e75e6cf0 --- /dev/null +++ b/queue-3.5/revert-kvm-vmx-fix-kvm_set_sregs-with-big-real-mode-segments.patch @@ -0,0 +1,128 @@ +From avi@redhat.com Wed Oct 10 09:48:14 2012 +From: Avi Kivity +Date: Tue, 9 Oct 2012 12:30:01 +0200 +Subject: Revert "KVM: VMX: Fix KVM_SET_SREGS with big real mode segments" +To: stable@vger.kernel.org +Cc: Marcelo Tosatti +Message-ID: <1349778601-13662-1-git-send-email-avi@redhat.com> + +From: Avi Kivity + +This reverts commit b398aa3174cd. It +causes a regression with some versions of ipxe. 3.6 has other fixes +that cannot be backported to 3.5, so it is better to live with the +minor problem that b398aa3174cd fixes. + +https://bugzilla.redhat.com/show_bug.cgi?id=854983 + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 74 ++++++++++------------------------------------------- + 1 file changed, 14 insertions(+), 60 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -615,10 +615,6 @@ static void kvm_cpu_vmxon(u64 addr); + static void kvm_cpu_vmxoff(void); + static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); + static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); +-static void vmx_set_segment(struct kvm_vcpu *vcpu, +- struct kvm_segment *var, int seg); +-static void vmx_get_segment(struct kvm_vcpu *vcpu, +- struct kvm_segment *var, int seg); + + static DEFINE_PER_CPU(struct vmcs *, vmxarea); + static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +@@ -2767,7 +2763,6 @@ static void enter_rmode(struct kvm_vcpu + { + unsigned long flags; + struct vcpu_vmx *vmx = to_vmx(vcpu); +- struct kvm_segment var; + + if (enable_unrestricted_guest) + return; +@@ -2811,23 +2806,20 @@ static void enter_rmode(struct kvm_vcpu + if (emulate_invalid_guest_state) + goto continue_rmode; + +- vmx_get_segment(vcpu, &var, VCPU_SREG_SS); +- vmx_set_segment(vcpu, &var, VCPU_SREG_SS); +- +- vmx_get_segment(vcpu, &var, VCPU_SREG_CS); +- vmx_set_segment(vcpu, &var, VCPU_SREG_CS); +- +- vmx_get_segment(vcpu, &var, VCPU_SREG_ES); +- vmx_set_segment(vcpu, &var, VCPU_SREG_ES); +- +- vmx_get_segment(vcpu, &var, VCPU_SREG_DS); +- vmx_set_segment(vcpu, &var, VCPU_SREG_DS); +- +- vmx_get_segment(vcpu, &var, VCPU_SREG_GS); +- vmx_set_segment(vcpu, &var, VCPU_SREG_GS); +- +- vmx_get_segment(vcpu, &var, VCPU_SREG_FS); +- vmx_set_segment(vcpu, &var, VCPU_SREG_FS); ++ vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); ++ vmcs_write32(GUEST_SS_LIMIT, 0xffff); ++ vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); ++ ++ vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); ++ vmcs_write32(GUEST_CS_LIMIT, 0xffff); ++ if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) ++ vmcs_writel(GUEST_CS_BASE, 0xf0000); ++ vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); ++ ++ fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); ++ fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); ++ fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); ++ fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); + + continue_rmode: + kvm_mmu_reset_context(vcpu); +@@ -3230,44 +3222,6 @@ static void vmx_set_segment(struct kvm_v + + vmcs_write32(sf->ar_bytes, ar); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); +- +- /* +- * Fix segments for real mode guest in hosts that don't have +- * "unrestricted_mode" or it was disabled. +- * This is done to allow migration of the guests from hosts with +- * unrestricted guest like Westmere to older host that don't have +- * unrestricted guest like Nehelem. +- */ +- if (!enable_unrestricted_guest && vmx->rmode.vm86_active) { +- switch (seg) { +- case VCPU_SREG_CS: +- vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); +- vmcs_write32(GUEST_CS_LIMIT, 0xffff); +- if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) +- vmcs_writel(GUEST_CS_BASE, 0xf0000); +- vmcs_write16(GUEST_CS_SELECTOR, +- vmcs_readl(GUEST_CS_BASE) >> 4); +- break; +- case VCPU_SREG_ES: +- fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); +- break; +- case VCPU_SREG_DS: +- fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); +- break; +- case VCPU_SREG_GS: +- fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); +- break; +- case VCPU_SREG_FS: +- fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); +- break; +- case VCPU_SREG_SS: +- vmcs_write16(GUEST_SS_SELECTOR, +- vmcs_readl(GUEST_SS_BASE) >> 4); +- vmcs_write32(GUEST_SS_LIMIT, 0xffff); +- vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); +- break; +- } +- } + } + + static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) diff --git a/queue-3.5/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch b/queue-3.5/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch new file mode 100644 index 00000000000..8e29019f702 --- /dev/null +++ b/queue-3.5/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch @@ -0,0 +1,85 @@ +From 8d34694c1abf29df1f3c7317936b7e3e2e308d9b Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Mon, 8 Oct 2012 16:29:14 -0700 +Subject: revert "mm: mempolicy: Let vma_merge and vma_split handle vma->vm_policy linkages" + +From: KOSAKI Motohiro + +commit 8d34694c1abf29df1f3c7317936b7e3e2e308d9b upstream. + +Commit 05f144a0d5c2 ("mm: mempolicy: Let vma_merge and vma_split handle +vma->vm_policy linkages") removed vma->vm_policy updates code but it is +the purpose of mbind_range(). Now, mbind_range() is virtually a no-op +and while it does not allow memory corruption it is not the right fix. +This patch is a revert. + +[mgorman@suse.de: Edited changelog] +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Cc: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -607,6 +607,27 @@ check_range(struct mm_struct *mm, unsign + return first; + } + ++/* Apply policy to a single VMA */ ++static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) ++{ ++ int err = 0; ++ struct mempolicy *old = vma->vm_policy; ++ ++ pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", ++ vma->vm_start, vma->vm_end, vma->vm_pgoff, ++ vma->vm_ops, vma->vm_file, ++ vma->vm_ops ? vma->vm_ops->set_policy : NULL); ++ ++ if (vma->vm_ops && vma->vm_ops->set_policy) ++ err = vma->vm_ops->set_policy(vma, new); ++ if (!err) { ++ mpol_get(new); ++ vma->vm_policy = new; ++ mpol_put(old); ++ } ++ return err; ++} ++ + /* Step 2: apply policy to a range and do splits. */ + static int mbind_range(struct mm_struct *mm, unsigned long start, + unsigned long end, struct mempolicy *new_pol) +@@ -655,23 +676,9 @@ static int mbind_range(struct mm_struct + if (err) + goto out; + } +- +- /* +- * Apply policy to a single VMA. The reference counting of +- * policy for vma_policy linkages has already been handled by +- * vma_merge and split_vma as necessary. If this is a shared +- * policy then ->set_policy will increment the reference count +- * for an sp node. +- */ +- pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", +- vma->vm_start, vma->vm_end, vma->vm_pgoff, +- vma->vm_ops, vma->vm_file, +- vma->vm_ops ? vma->vm_ops->set_policy : NULL); +- if (vma->vm_ops && vma->vm_ops->set_policy) { +- err = vma->vm_ops->set_policy(vma, new_pol); +- if (err) +- goto out; +- } ++ err = policy_vma(vma, new_pol); ++ if (err) ++ goto out; + } + + out: diff --git a/queue-3.5/series b/queue-3.5/series index c745df10d5c..1ab6865d467 100644 --- a/queue-3.5/series +++ b/queue-3.5/series @@ -109,3 +109,13 @@ cris-add-missing-rcu-idle-apis-on-idle-loop.patch m32r-add-missing-rcu-idle-apis-on-idle-loop.patch score-add-missing-rcu-idle-apis-on-idle-loop.patch rcu-fix-day-one-dyntick-idle-stall-warning-bug.patch +revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch +mempolicy-remove-mempolicy-sharing.patch +mempolicy-fix-a-race-in-shared_policy_replace.patch +mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch +mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch +revert-kvm-vmx-fix-kvm_set_sregs-with-big-real-mode-segments.patch +efi-build-efi-stub-with-efi-appropriate-options.patch +efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch +cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch +revert-drm-i915-correctly-order-the-ring-init-sequence.patch