From: Greg Kroah-Hartman Date: Wed, 10 Oct 2012 01:06:12 +0000 (+0900) Subject: 3.0-stable patches X-Git-Tag: v3.0.46~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=70146c90b6cafb5fd2819f8d89c4decbca3d12d7;p=thirdparty%2Fkernel%2Fstable-queue.git 3.0-stable patches added patches: cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch mempolicy-fix-a-race-in-shared_policy_replace.patch mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch mempolicy-remove-mempolicy-sharing.patch revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch --- diff --git a/queue-3.0/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch b/queue-3.0/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch new file mode 100644 index 00000000000..23b7aaac56f --- /dev/null +++ b/queue-3.0/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch @@ -0,0 +1,146 @@ +From d35be8bab9b0ce44bed4b9453f86ebf64062721e Mon Sep 17 00:00:00 2001 +From: "Srivatsa S. Bhat" +Date: Thu, 24 May 2012 19:46:26 +0530 +Subject: CPU hotplug, cpusets, suspend: Don't modify cpusets during suspend/resume + +From: "Srivatsa S. Bhat" + +commit d35be8bab9b0ce44bed4b9453f86ebf64062721e upstream. + +In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed +masks as and when necessary to ensure that the tasks belonging to the cpusets +have some place (online CPUs) to run on. And regular CPU hotplug is +destructive in the sense that the kernel doesn't remember the original cpuset +configurations set by the user, across hotplug operations. + +However, suspend/resume (which uses CPU hotplug) is a special case in which +the kernel has the responsibility to restore the system (during resume), to +exactly the same state it was in before suspend. + +In order to achieve that, do the following: + +1. Don't modify cpusets during suspend/resume. At all. + In particular, don't move the tasks from one cpuset to another, and + don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets + during the CPU hotplug operations that are carried out in the + suspend/resume path. + +2. However, cpusets and sched domains are related. We just want to avoid + altering cpusets alone. So, to keep the sched domains updated, build + a single sched domain (containing all active cpus) during each of the + CPU hotplug operations carried out in s/r path, effectively ignoring + the cpusets' cpus_allowed masks. + + (Since userspace is frozen while doing all this, it will go unnoticed.) + +3. During the last CPU online operation during resume, build the sched + domains by looking up the (unaltered) cpusets' cpus_allowed masks. + That will bring back the system to the same original state as it was in + before suspend. + +Ultimately, this will not only solve the cpuset problem related to suspend +resume (ie., restores the cpusets to exactly what it was before suspend, by +not touching it at all) but also speeds up suspend/resume because we avoid +running cpuset update code for every CPU being offlined/onlined. + +Signed-off-by: Srivatsa S. Bhat +Signed-off-by: Peter Zijlstra +Cc: Linus Torvalds +Cc: Andrew Morton +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com +Signed-off-by: Ingo Molnar +Signed-off-by: Preeti U Murthy +Signed-off-by: Greg Kroah-Hartman + + +--- + kernel/cpuset.c | 3 +++ + kernel/sched.c | 40 ++++++++++++++++++++++++++++++++++++---- + 2 files changed, 39 insertions(+), 4 deletions(-) + +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -2080,6 +2080,9 @@ static void scan_for_empty_cpusets(struc + * (of no affect) on systems that are actively using CPU hotplug + * but making no active use of cpusets. + * ++ * The only exception to this is suspend/resume, where we don't ++ * modify cpusets at all. ++ * + * This routine ensures that top_cpuset.cpus_allowed tracks + * cpu_active_mask on each CPU hotplug (cpuhp) event. + * +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -7777,34 +7777,66 @@ int __init sched_create_sysfs_power_savi + } + #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ + ++static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ ++ + /* + * Update cpusets according to cpu_active mask. If cpusets are + * disabled, cpuset_update_active_cpus() becomes a simple wrapper + * around partition_sched_domains(). ++ * ++ * If we come here as part of a suspend/resume, don't touch cpusets because we ++ * want to restore it back to its original state upon resume anyway. + */ + static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, + void *hcpu) + { +- switch (action & ~CPU_TASKS_FROZEN) { ++ switch (action) { ++ case CPU_ONLINE_FROZEN: ++ case CPU_DOWN_FAILED_FROZEN: ++ ++ /* ++ * num_cpus_frozen tracks how many CPUs are involved in suspend ++ * resume sequence. As long as this is not the last online ++ * operation in the resume sequence, just build a single sched ++ * domain, ignoring cpusets. ++ */ ++ num_cpus_frozen--; ++ if (likely(num_cpus_frozen)) { ++ partition_sched_domains(1, NULL, NULL); ++ break; ++ } ++ ++ /* ++ * This is the last CPU online operation. So fall through and ++ * restore the original sched domains by considering the ++ * cpuset configurations. ++ */ ++ + case CPU_ONLINE: + case CPU_DOWN_FAILED: + cpuset_update_active_cpus(); +- return NOTIFY_OK; ++ break; + default: + return NOTIFY_DONE; + } ++ return NOTIFY_OK; + } + + static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, + void *hcpu) + { +- switch (action & ~CPU_TASKS_FROZEN) { ++ switch (action) { + case CPU_DOWN_PREPARE: + cpuset_update_active_cpus(); +- return NOTIFY_OK; ++ break; ++ case CPU_DOWN_PREPARE_FROZEN: ++ num_cpus_frozen++; ++ partition_sched_domains(1, NULL, NULL); ++ break; + default: + return NOTIFY_DONE; + } ++ return NOTIFY_OK; + } + + static int update_runtime(struct notifier_block *nfb, diff --git a/queue-3.0/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch b/queue-3.0/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch new file mode 100644 index 00000000000..52c561370df --- /dev/null +++ b/queue-3.0/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch @@ -0,0 +1,45 @@ +From d6cf86d8f23253225fe2a763d627ecf7dfee9dae Mon Sep 17 00:00:00 2001 +From: Seiji Aguchi +Date: Tue, 24 Jul 2012 13:27:23 +0000 +Subject: efi: initialize efi.runtime_version to make query_variable_info/update_capsule workable + +From: Seiji Aguchi + +commit d6cf86d8f23253225fe2a763d627ecf7dfee9dae upstream. + +A value of efi.runtime_version is checked before calling +update_capsule()/query_variable_info() as follows. +But it isn't initialized anywhere. + + +static efi_status_t virt_efi_query_variable_info(u32 attr, + u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + +This patch initializes a value of efi.runtime_version at boot time. + +Signed-off-by: Seiji Aguchi +Acked-by: Matthew Garrett +Signed-off-by: Matt Fleming +Signed-off-by: Ivan Hu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/platform/efi/efi.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/platform/efi/efi.c ++++ b/arch/x86/platform/efi/efi.c +@@ -659,6 +659,7 @@ void __init efi_enter_virtual_mode(void) + * + * Call EFI services through wrapper functions. + */ ++ efi.runtime_version = efi_systab.fw_revision; + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; diff --git a/queue-3.0/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch b/queue-3.0/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch new file mode 100644 index 00000000000..930f3eb3a07 --- /dev/null +++ b/queue-3.0/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch @@ -0,0 +1,54 @@ +From 00442ad04a5eac08a98255697c510e708f6082e2 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 8 Oct 2012 16:29:20 -0700 +Subject: mempolicy: fix a memory corruption by refcount imbalance in alloc_pages_vma() + +From: Mel Gorman + +commit 00442ad04a5eac08a98255697c510e708f6082e2 upstream. + +Commit cc9a6c877661 ("cpuset: mm: reduce large amounts of memory barrier +related damage v3") introduced a potential memory corruption. +shmem_alloc_page() uses a pseudo vma and it has one significant unique +combination, vma->vm_ops=NULL and vma->policy->flags & MPOL_F_SHARED. + +get_vma_policy() does NOT increase a policy ref when vma->vm_ops=NULL +and mpol_cond_put() DOES decrease a policy ref when a policy has +MPOL_F_SHARED. Therefore, when a cpuset update race occurs, +alloc_pages_vma() falls in 'goto retry_cpuset' path, decrements the +reference count and frees the policy prematurely. + +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Reviewed-by: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1511,8 +1511,18 @@ struct mempolicy *get_vma_policy(struct + addr); + if (vpol) + pol = vpol; +- } else if (vma->vm_policy) ++ } else if (vma->vm_policy) { + pol = vma->vm_policy; ++ ++ /* ++ * shmem_alloc_page() passes MPOL_F_SHARED policy with ++ * a pseudo vma whose vma->vm_ops=NULL. Take a reference ++ * count on these policies which will be dropped by ++ * mpol_cond_put() later ++ */ ++ if (mpol_needs_cond_ref(pol)) ++ mpol_get(pol); ++ } + } + if (!pol) + pol = &default_policy; diff --git a/queue-3.0/mempolicy-fix-a-race-in-shared_policy_replace.patch b/queue-3.0/mempolicy-fix-a-race-in-shared_policy_replace.patch new file mode 100644 index 00000000000..bf58bb8df75 --- /dev/null +++ b/queue-3.0/mempolicy-fix-a-race-in-shared_policy_replace.patch @@ -0,0 +1,151 @@ +From b22d127a39ddd10d93deee3d96e643657ad53a49 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 8 Oct 2012 16:29:17 -0700 +Subject: mempolicy: fix a race in shared_policy_replace() + +From: Mel Gorman + +commit b22d127a39ddd10d93deee3d96e643657ad53a49 upstream. + +shared_policy_replace() use of sp_alloc() is unsafe. 1) sp_node cannot +be dereferenced if sp->lock is not held and 2) another thread can modify +sp_node between spin_unlock for allocating a new sp node and next +spin_lock. The bug was introduced before 2.6.12-rc2. + +Kosaki's original patch for this problem was to allocate an sp node and +policy within shared_policy_replace and initialise it when the lock is +reacquired. I was not keen on this approach because it partially +duplicates sp_alloc(). As the paths were sp->lock is taken are not that +performance critical this patch converts sp->lock to sp->mutex so it can +sleep when calling sp_alloc(). + +[kosaki.motohiro@jp.fujitsu.com: Original patch] +Signed-off-by: Mel Gorman +Acked-by: KOSAKI Motohiro +Reviewed-by: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mempolicy.h | 2 +- + mm/mempolicy.c | 37 ++++++++++++++++--------------------- + 2 files changed, 17 insertions(+), 22 deletions(-) + +--- a/include/linux/mempolicy.h ++++ b/include/linux/mempolicy.h +@@ -188,7 +188,7 @@ struct sp_node { + + struct shared_policy { + struct rb_root root; +- spinlock_t lock; ++ struct mutex mutex; + }; + + void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2021,7 +2021,7 @@ int __mpol_equal(struct mempolicy *a, st + */ + + /* lookup first element intersecting start-end */ +-/* Caller holds sp->lock */ ++/* Caller holds sp->mutex */ + static struct sp_node * + sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) + { +@@ -2085,13 +2085,13 @@ mpol_shared_policy_lookup(struct shared_ + + if (!sp->root.rb_node) + return NULL; +- spin_lock(&sp->lock); ++ mutex_lock(&sp->mutex); + sn = sp_lookup(sp, idx, idx+1); + if (sn) { + mpol_get(sn->policy); + pol = sn->policy; + } +- spin_unlock(&sp->lock); ++ mutex_unlock(&sp->mutex); + return pol; + } + +@@ -2131,10 +2131,10 @@ static struct sp_node *sp_alloc(unsigned + static int shared_policy_replace(struct shared_policy *sp, unsigned long start, + unsigned long end, struct sp_node *new) + { +- struct sp_node *n, *new2 = NULL; ++ struct sp_node *n; ++ int ret = 0; + +-restart: +- spin_lock(&sp->lock); ++ mutex_lock(&sp->mutex); + n = sp_lookup(sp, start, end); + /* Take care of old policies in the same range. */ + while (n && n->start < end) { +@@ -2147,16 +2147,14 @@ restart: + } else { + /* Old policy spanning whole new range. */ + if (n->end > end) { ++ struct sp_node *new2; ++ new2 = sp_alloc(end, n->end, n->policy); + if (!new2) { +- spin_unlock(&sp->lock); +- new2 = sp_alloc(end, n->end, n->policy); +- if (!new2) +- return -ENOMEM; +- goto restart; ++ ret = -ENOMEM; ++ goto out; + } + n->end = start; + sp_insert(sp, new2); +- new2 = NULL; + break; + } else + n->end = start; +@@ -2167,12 +2165,9 @@ restart: + } + if (new) + sp_insert(sp, new); +- spin_unlock(&sp->lock); +- if (new2) { +- mpol_put(new2->policy); +- kmem_cache_free(sn_cache, new2); +- } +- return 0; ++out: ++ mutex_unlock(&sp->mutex); ++ return ret; + } + + /** +@@ -2190,7 +2185,7 @@ void mpol_shared_policy_init(struct shar + int ret; + + sp->root = RB_ROOT; /* empty tree == default mempolicy */ +- spin_lock_init(&sp->lock); ++ mutex_init(&sp->mutex); + + if (mpol) { + struct vm_area_struct pvma; +@@ -2256,7 +2251,7 @@ void mpol_free_shared_policy(struct shar + + if (!p->root.rb_node) + return; +- spin_lock(&p->lock); ++ mutex_lock(&p->mutex); + next = rb_first(&p->root); + while (next) { + n = rb_entry(next, struct sp_node, nd); +@@ -2265,7 +2260,7 @@ void mpol_free_shared_policy(struct shar + mpol_put(n->policy); + kmem_cache_free(sn_cache, n); + } +- spin_unlock(&p->lock); ++ mutex_unlock(&p->mutex); + } + + /* assumes fs == KERNEL_DS */ diff --git a/queue-3.0/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch b/queue-3.0/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch new file mode 100644 index 00000000000..8db79ba1559 --- /dev/null +++ b/queue-3.0/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch @@ -0,0 +1,72 @@ +From 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Mon, 8 Oct 2012 16:29:19 -0700 +Subject: mempolicy: fix refcount leak in mpol_set_shared_policy() + +From: KOSAKI Motohiro + +commit 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 upstream. + +When shared_policy_replace() fails to allocate new->policy is not freed +correctly by mpol_set_shared_policy(). The problem is that shared +mempolicy code directly call kmem_cache_free() in multiple places where +it is easy to make a mistake. + +This patch creates an sp_free wrapper function and uses it. The bug was +introduced pre-git age (IOW, before 2.6.12-rc2). + +[mgorman@suse.de: Editted changelog] +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Reviewed-by: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2095,12 +2095,17 @@ mpol_shared_policy_lookup(struct shared_ + return pol; + } + ++static void sp_free(struct sp_node *n) ++{ ++ mpol_put(n->policy); ++ kmem_cache_free(sn_cache, n); ++} ++ + static void sp_delete(struct shared_policy *sp, struct sp_node *n) + { + pr_debug("deleting %lx-l%lx\n", n->start, n->end); + rb_erase(&n->nd, &sp->root); +- mpol_put(n->policy); +- kmem_cache_free(sn_cache, n); ++ sp_free(n); + } + + static struct sp_node *sp_alloc(unsigned long start, unsigned long end, +@@ -2239,7 +2244,7 @@ int mpol_set_shared_policy(struct shared + } + err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new); + if (err && new) +- kmem_cache_free(sn_cache, new); ++ sp_free(new); + return err; + } + +@@ -2256,9 +2261,7 @@ void mpol_free_shared_policy(struct shar + while (next) { + n = rb_entry(next, struct sp_node, nd); + next = rb_next(&n->nd); +- rb_erase(&n->nd, &p->root); +- mpol_put(n->policy); +- kmem_cache_free(sn_cache, n); ++ sp_delete(p, n); + } + mutex_unlock(&p->mutex); + } diff --git a/queue-3.0/mempolicy-remove-mempolicy-sharing.patch b/queue-3.0/mempolicy-remove-mempolicy-sharing.patch new file mode 100644 index 00000000000..cb1b2b27bf4 --- /dev/null +++ b/queue-3.0/mempolicy-remove-mempolicy-sharing.patch @@ -0,0 +1,187 @@ +From 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Mon, 8 Oct 2012 16:29:16 -0700 +Subject: mempolicy: remove mempolicy sharing + +From: KOSAKI Motohiro + +commit 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 upstream. + +Dave Jones' system call fuzz testing tool "trinity" triggered the +following bug error with slab debugging enabled + + ============================================================================= + BUG numa_policy (Not tainted): Poison overwritten + ----------------------------------------------------------------------------- + + INFO: 0xffff880146498250-0xffff880146498250. First byte 0x6a instead of 0x6b + INFO: Allocated in mpol_new+0xa3/0x140 age=46310 cpu=6 pid=32154 + __slab_alloc+0x3d3/0x445 + kmem_cache_alloc+0x29d/0x2b0 + mpol_new+0xa3/0x140 + sys_mbind+0x142/0x620 + system_call_fastpath+0x16/0x1b + + INFO: Freed in __mpol_put+0x27/0x30 age=46268 cpu=6 pid=32154 + __slab_free+0x2e/0x1de + kmem_cache_free+0x25a/0x260 + __mpol_put+0x27/0x30 + remove_vma+0x68/0x90 + exit_mmap+0x118/0x140 + mmput+0x73/0x110 + exit_mm+0x108/0x130 + do_exit+0x162/0xb90 + do_group_exit+0x4f/0xc0 + sys_exit_group+0x17/0x20 + system_call_fastpath+0x16/0x1b + + INFO: Slab 0xffffea0005192600 objects=27 used=27 fp=0x (null) flags=0x20000000004080 + INFO: Object 0xffff880146498250 @offset=592 fp=0xffff88014649b9d0 + +The problem is that the structure is being prematurely freed due to a +reference count imbalance. In the following case mbind(addr, len) should +replace the memory policies of both vma1 and vma2 and thus they will +become to share the same mempolicy and the new mempolicy will have the +MPOL_F_SHARED flag. + + +-------------------+-------------------+ + | vma1 | vma2(shmem) | + +-------------------+-------------------+ + | | + addr addr+len + +alloc_pages_vma() uses get_vma_policy() and mpol_cond_put() pair for +maintaining the mempolicy reference count. The current rule is that +get_vma_policy() only increments refcount for shmem VMA and +mpol_conf_put() only decrements refcount if the policy has +MPOL_F_SHARED. + +In above case, vma1 is not shmem vma and vma->policy has MPOL_F_SHARED! +The reference count will be decreased even though was not increased +whenever alloc_page_vma() is called. This has been broken since commit +[52cd3b07: mempolicy: rework mempolicy Reference Counting] in 2008. + +There is another serious bug with the sharing of memory policies. +Currently, mempolicy rebind logic (it is called from cpuset rebinding) +ignores a refcount of mempolicy and override it forcibly. Thus, any +mempolicy sharing may cause mempolicy corruption. The bug was +introduced by commit [68860ec1: cpusets: automatic numa mempolicy +rebinding]. + +Ideally, the shared policy handling would be rewritten to either +properly handle COW of the policy structures or at least reference count +MPOL_F_SHARED based exclusively on information within the policy. +However, this patch takes the easier approach of disabling any policy +sharing between VMAs. Each new range allocated with sp_alloc will +allocate a new policy, set the reference count to 1 and drop the +reference count of the old policy. This increases the memory footprint +but is not expected to be a major problem as mbind() is unlikely to be +used for fine-grained ranges. It is also inefficient because it means +we allocate a new policy even in cases where mbind_range() could use the +new_policy passed to it. However, it is more straight-forward and the +change should be invisible to the user. + +[mgorman@suse.de: Edited changelog] +Reported-by: Dave Jones , +Cc: Christoph Lameter , +Reviewed-by: Christoph Lameter +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 52 ++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 38 insertions(+), 14 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -606,24 +606,39 @@ check_range(struct mm_struct *mm, unsign + return first; + } + +-/* Apply policy to a single VMA */ +-static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) ++/* ++ * Apply policy to a single VMA ++ * This must be called with the mmap_sem held for writing. ++ */ ++static int vma_replace_policy(struct vm_area_struct *vma, ++ struct mempolicy *pol) + { +- int err = 0; +- struct mempolicy *old = vma->vm_policy; ++ int err; ++ struct mempolicy *old; ++ struct mempolicy *new; + + pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_ops, vma->vm_file, + vma->vm_ops ? vma->vm_ops->set_policy : NULL); + +- if (vma->vm_ops && vma->vm_ops->set_policy) ++ new = mpol_dup(pol); ++ if (IS_ERR(new)) ++ return PTR_ERR(new); ++ ++ if (vma->vm_ops && vma->vm_ops->set_policy) { + err = vma->vm_ops->set_policy(vma, new); +- if (!err) { +- mpol_get(new); +- vma->vm_policy = new; +- mpol_put(old); ++ if (err) ++ goto err_out; + } ++ ++ old = vma->vm_policy; ++ vma->vm_policy = new; /* protected by mmap_sem */ ++ mpol_put(old); ++ ++ return 0; ++ err_out: ++ mpol_put(new); + return err; + } + +@@ -666,7 +681,7 @@ static int mbind_range(struct mm_struct + if (err) + goto out; + } +- err = policy_vma(vma, new_pol); ++ err = vma_replace_policy(vma, new_pol); + if (err) + goto out; + } +@@ -2091,15 +2106,24 @@ static void sp_delete(struct shared_poli + static struct sp_node *sp_alloc(unsigned long start, unsigned long end, + struct mempolicy *pol) + { +- struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); ++ struct sp_node *n; ++ struct mempolicy *newpol; + ++ n = kmem_cache_alloc(sn_cache, GFP_KERNEL); + if (!n) + return NULL; ++ ++ newpol = mpol_dup(pol); ++ if (IS_ERR(newpol)) { ++ kmem_cache_free(sn_cache, n); ++ return NULL; ++ } ++ newpol->flags |= MPOL_F_SHARED; ++ + n->start = start; + n->end = end; +- mpol_get(pol); +- pol->flags |= MPOL_F_SHARED; /* for unref */ +- n->policy = pol; ++ n->policy = newpol; ++ + return n; + } + diff --git a/queue-3.0/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch b/queue-3.0/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch new file mode 100644 index 00000000000..d369cc6d272 --- /dev/null +++ b/queue-3.0/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch @@ -0,0 +1,85 @@ +From 8d34694c1abf29df1f3c7317936b7e3e2e308d9b Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Mon, 8 Oct 2012 16:29:14 -0700 +Subject: revert "mm: mempolicy: Let vma_merge and vma_split handle vma->vm_policy linkages" + +From: KOSAKI Motohiro + +commit 8d34694c1abf29df1f3c7317936b7e3e2e308d9b upstream. + +Commit 05f144a0d5c2 ("mm: mempolicy: Let vma_merge and vma_split handle +vma->vm_policy linkages") removed vma->vm_policy updates code but it is +the purpose of mbind_range(). Now, mbind_range() is virtually a no-op +and while it does not allow memory corruption it is not the right fix. +This patch is a revert. + +[mgorman@suse.de: Edited changelog] +Signed-off-by: KOSAKI Motohiro +Signed-off-by: Mel Gorman +Cc: Christoph Lameter +Cc: Josh Boyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -606,6 +606,27 @@ check_range(struct mm_struct *mm, unsign + return first; + } + ++/* Apply policy to a single VMA */ ++static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) ++{ ++ int err = 0; ++ struct mempolicy *old = vma->vm_policy; ++ ++ pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", ++ vma->vm_start, vma->vm_end, vma->vm_pgoff, ++ vma->vm_ops, vma->vm_file, ++ vma->vm_ops ? vma->vm_ops->set_policy : NULL); ++ ++ if (vma->vm_ops && vma->vm_ops->set_policy) ++ err = vma->vm_ops->set_policy(vma, new); ++ if (!err) { ++ mpol_get(new); ++ vma->vm_policy = new; ++ mpol_put(old); ++ } ++ return err; ++} ++ + /* Step 2: apply policy to a range and do splits. */ + static int mbind_range(struct mm_struct *mm, unsigned long start, + unsigned long end, struct mempolicy *new_pol) +@@ -645,23 +666,9 @@ static int mbind_range(struct mm_struct + if (err) + goto out; + } +- +- /* +- * Apply policy to a single VMA. The reference counting of +- * policy for vma_policy linkages has already been handled by +- * vma_merge and split_vma as necessary. If this is a shared +- * policy then ->set_policy will increment the reference count +- * for an sp node. +- */ +- pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", +- vma->vm_start, vma->vm_end, vma->vm_pgoff, +- vma->vm_ops, vma->vm_file, +- vma->vm_ops ? vma->vm_ops->set_policy : NULL); +- if (vma->vm_ops && vma->vm_ops->set_policy) { +- err = vma->vm_ops->set_policy(vma, new_pol); +- if (err) +- goto out; +- } ++ err = policy_vma(vma, new_pol); ++ if (err) ++ goto out; + } + + out: diff --git a/queue-3.0/series b/queue-3.0/series index a1ae5aa69be..c397a925f26 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -72,3 +72,10 @@ r8169-config1-is-read-only-on-8168c-and-later.patch r8169-8168c-and-later-require-bit-0x20-to-be-set-in-config2-for-pme-signaling.patch r8169-fix-unsigned-int-wraparound-with-tso.patch r8169-call-netif_napi_del-at-errpaths-and-at-driver-unload.patch +revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch +mempolicy-remove-mempolicy-sharing.patch +mempolicy-fix-a-race-in-shared_policy_replace.patch +mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch +mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch +efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch +cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch