3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 10 Oct 2012 01:06:21 +0000 (10:06 +0900)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 10 Oct 2012 01:06:21 +0000 (10:06 +0900)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 10 Oct 2012 01:06:21 +0000 (10:06 +0900)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 10 Oct 2012 01:06:21 +0000 (10:06 +0900)
diff --git a/queue-3.4/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch b/queue-3.4/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch

new file mode 100644 (file)

index 0000000..2603ed2
--- /dev/null
+++ b/queue-3.4/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch
@@ -0,0 +1,145 @@
+From d35be8bab9b0ce44bed4b9453f86ebf64062721e Mon Sep 17 00:00:00 2001
+From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
+Date: Thu, 24 May 2012 19:46:26 +0530
+Subject: CPU hotplug, cpusets, suspend: Don't modify cpusets during suspend/resume
+
+From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
+
+commit d35be8bab9b0ce44bed4b9453f86ebf64062721e upstream.
+
+In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed
+masks as and when necessary to ensure that the tasks belonging to the cpusets
+have some place (online CPUs) to run on. And regular CPU hotplug is
+destructive in the sense that the kernel doesn't remember the original cpuset
+configurations set by the user, across hotplug operations.
+
+However, suspend/resume (which uses CPU hotplug) is a special case in which
+the kernel has the responsibility to restore the system (during resume), to
+exactly the same state it was in before suspend.
+
+In order to achieve that, do the following:
+
+1. Don't modify cpusets during suspend/resume. At all.
+   In particular, don't move the tasks from one cpuset to another, and
+   don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets
+   during the CPU hotplug operations that are carried out in the
+   suspend/resume path.
+
+2. However, cpusets and sched domains are related. We just want to avoid
+   altering cpusets alone. So, to keep the sched domains updated, build
+   a single sched domain (containing all active cpus) during each of the
+   CPU hotplug operations carried out in s/r path, effectively ignoring
+   the cpusets' cpus_allowed masks.
+
+   (Since userspace is frozen while doing all this, it will go unnoticed.)
+
+3. During the last CPU online operation during resume, build the sched
+   domains by looking up the (unaltered) cpusets' cpus_allowed masks.
+   That will bring back the system to the same original state as it was in
+   before suspend.
+
+Ultimately, this will not only solve the cpuset problem related to suspend
+resume (ie., restores the cpusets to exactly what it was before suspend, by
+not touching it at all) but also speeds up suspend/resume because we avoid
+running cpuset update code for every CPU being offlined/onlined.
+
+Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cpuset.c     |    3 +++
+ kernel/sched/core.c |   40 ++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 39 insertions(+), 4 deletions(-)
+
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2065,6 +2065,9 @@ static void scan_for_empty_cpusets(struc
+  * (of no affect) on systems that are actively using CPU hotplug
+  * but making no active use of cpusets.
+  *
++ * The only exception to this is suspend/resume, where we don't
++ * modify cpusets at all.
++ *
+  * This routine ensures that top_cpuset.cpus_allowed tracks
+  * cpu_active_mask on each CPU hotplug (cpuhp) event.
+  *
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6937,34 +6937,66 @@ int __init sched_create_sysfs_power_savi
+ }
+ #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ 
++static int num_cpus_frozen;   /* used to mark begin/end of suspend/resume */
++
+ /*
+  * Update cpusets according to cpu_active mask.  If cpusets are
+  * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+  * around partition_sched_domains().
++ *
++ * If we come here as part of a suspend/resume, don't touch cpusets because we
++ * want to restore it back to its original state upon resume anyway.
+  */
+ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
+                            void *hcpu)
+ {
+-      switch (action & ~CPU_TASKS_FROZEN) {
++      switch (action) {
++      case CPU_ONLINE_FROZEN:
++      case CPU_DOWN_FAILED_FROZEN:
++
++              /*
++               * num_cpus_frozen tracks how many CPUs are involved in suspend
++               * resume sequence. As long as this is not the last online
++               * operation in the resume sequence, just build a single sched
++               * domain, ignoring cpusets.
++               */
++              num_cpus_frozen--;
++              if (likely(num_cpus_frozen)) {
++                      partition_sched_domains(1, NULL, NULL);
++                      break;
++              }
++
++              /*
++               * This is the last CPU online operation. So fall through and
++               * restore the original sched domains by considering the
++               * cpuset configurations.
++               */
++
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               cpuset_update_active_cpus();
+-              return NOTIFY_OK;
++              break;
+       default:
+               return NOTIFY_DONE;
+       }
++      return NOTIFY_OK;
+ }
+ 
+ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
+                              void *hcpu)
+ {
+-      switch (action & ~CPU_TASKS_FROZEN) {
++      switch (action) {
+       case CPU_DOWN_PREPARE:
+               cpuset_update_active_cpus();
+-              return NOTIFY_OK;
++              break;
++      case CPU_DOWN_PREPARE_FROZEN:
++              num_cpus_frozen++;
++              partition_sched_domains(1, NULL, NULL);
++              break;
+       default:
+               return NOTIFY_DONE;
+       }
++      return NOTIFY_OK;
+ }
+ 
+ void __init sched_init_smp(void)
diff --git a/queue-3.4/efi-build-efi-stub-with-efi-appropriate-options.patch b/queue-3.4/efi-build-efi-stub-with-efi-appropriate-options.patch

new file mode 100644 (file)

index 0000000..9b3321c
--- /dev/null
+++ b/queue-3.4/efi-build-efi-stub-with-efi-appropriate-options.patch
@@ -0,0 +1,35 @@
+From 9dead5bbb825d7c25c0400e61de83075046322d0 Mon Sep 17 00:00:00 2001
+From: Matthew Garrett <mjg@redhat.com>
+Date: Thu, 26 Jul 2012 18:00:00 -0400
+Subject: efi: Build EFI stub with EFI-appropriate options
+
+From: Matthew Garrett <mjg@redhat.com>
+
+commit 9dead5bbb825d7c25c0400e61de83075046322d0 upstream.
+
+We can't assume the presence of the red zone while we're still in a boot
+services environment, so we should build with -fno-red-zone to avoid
+problems. Change the size of wchar at the same time to make string handling
+simpler.
+
+Signed-off-by: Matthew Garrett <mjg@redhat.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Acked-by: Josh Boyer <jwboyer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/boot/compressed/Makefile |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/boot/compressed/Makefile
++++ b/arch/x86/boot/compressed/Makefile
+@@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)
+       $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
+       $(obj)/piggy.o
+ 
++$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
++$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
++
+ ifeq ($(CONFIG_EFI_STUB), y)
+       VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
+ endif
diff --git a/queue-3.4/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch b/queue-3.4/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch

new file mode 100644 (file)

index 0000000..093d218
--- /dev/null
+++ b/queue-3.4/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch
@@ -0,0 +1,45 @@
+From d6cf86d8f23253225fe2a763d627ecf7dfee9dae Mon Sep 17 00:00:00 2001
+From: Seiji Aguchi <seiji.aguchi@hds.com>
+Date: Tue, 24 Jul 2012 13:27:23 +0000
+Subject: efi: initialize efi.runtime_version to make query_variable_info/update_capsule workable
+
+From: Seiji Aguchi <seiji.aguchi@hds.com>
+
+commit d6cf86d8f23253225fe2a763d627ecf7dfee9dae upstream.
+
+A value of efi.runtime_version is checked before calling
+update_capsule()/query_variable_info() as follows.
+But it isn't initialized anywhere.
+
+<snip>
+static efi_status_t virt_efi_query_variable_info(u32 attr,
+                                                 u64 *storage_space,
+                                                 u64 *remaining_space,
+                                                 u64 *max_variable_size)
+{
+        if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+                return EFI_UNSUPPORTED;
+<snip>
+
+This patch initializes a value of efi.runtime_version at boot time.
+
+Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
+Acked-by: Matthew Garrett <mjg@redhat.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Signed-off-by: Ivan Hu <ivan.hu@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/efi/efi.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/platform/efi/efi.c
++++ b/arch/x86/platform/efi/efi.c
+@@ -890,6 +890,7 @@ void __init efi_enter_virtual_mode(void)
+        *
+        * Call EFI services through wrapper functions.
+        */
++      efi.runtime_version = efi_systab.fw_revision;
+       efi.get_time = virt_efi_get_time;
+       efi.set_time = virt_efi_set_time;
+       efi.get_wakeup_time = virt_efi_get_wakeup_time;
diff --git a/queue-3.4/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch b/queue-3.4/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch

new file mode 100644 (file)

index 0000000..0c16205
--- /dev/null
+++ b/queue-3.4/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch
@@ -0,0 +1,54 @@
+From 00442ad04a5eac08a98255697c510e708f6082e2 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Mon, 8 Oct 2012 16:29:20 -0700
+Subject: mempolicy: fix a memory corruption by refcount imbalance in alloc_pages_vma()
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 00442ad04a5eac08a98255697c510e708f6082e2 upstream.
+
+Commit cc9a6c877661 ("cpuset: mm: reduce large amounts of memory barrier
+related damage v3") introduced a potential memory corruption.
+shmem_alloc_page() uses a pseudo vma and it has one significant unique
+combination, vma->vm_ops=NULL and vma->policy->flags & MPOL_F_SHARED.
+
+get_vma_policy() does NOT increase a policy ref when vma->vm_ops=NULL
+and mpol_cond_put() DOES decrease a policy ref when a policy has
+MPOL_F_SHARED.  Therefore, when a cpuset update race occurs,
+alloc_pages_vma() falls in 'goto retry_cpuset' path, decrements the
+reference count and frees the policy prematurely.
+
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Cc: Josh Boyer <jwboyer@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1532,8 +1532,18 @@ struct mempolicy *get_vma_policy(struct
+                                                                       addr);
+                       if (vpol)
+                               pol = vpol;
+-              } else if (vma->vm_policy)
++              } else if (vma->vm_policy) {
+                       pol = vma->vm_policy;
++
++                      /*
++                       * shmem_alloc_page() passes MPOL_F_SHARED policy with
++                       * a pseudo vma whose vma->vm_ops=NULL. Take a reference
++                       * count on these policies which will be dropped by
++                       * mpol_cond_put() later
++                       */
++                      if (mpol_needs_cond_ref(pol))
++                              mpol_get(pol);
++              }
+       }
+       if (!pol)
+               pol = &default_policy;
diff --git a/queue-3.4/mempolicy-fix-a-race-in-shared_policy_replace.patch b/queue-3.4/mempolicy-fix-a-race-in-shared_policy_replace.patch

new file mode 100644 (file)

index 0000000..54cfae2
--- /dev/null
+++ b/queue-3.4/mempolicy-fix-a-race-in-shared_policy_replace.patch
@@ -0,0 +1,151 @@
+From b22d127a39ddd10d93deee3d96e643657ad53a49 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Mon, 8 Oct 2012 16:29:17 -0700
+Subject: mempolicy: fix a race in shared_policy_replace()
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit b22d127a39ddd10d93deee3d96e643657ad53a49 upstream.
+
+shared_policy_replace() use of sp_alloc() is unsafe.  1) sp_node cannot
+be dereferenced if sp->lock is not held and 2) another thread can modify
+sp_node between spin_unlock for allocating a new sp node and next
+spin_lock.  The bug was introduced before 2.6.12-rc2.
+
+Kosaki's original patch for this problem was to allocate an sp node and
+policy within shared_policy_replace and initialise it when the lock is
+reacquired.  I was not keen on this approach because it partially
+duplicates sp_alloc().  As the paths were sp->lock is taken are not that
+performance critical this patch converts sp->lock to sp->mutex so it can
+sleep when calling sp_alloc().
+
+[kosaki.motohiro@jp.fujitsu.com: Original patch]
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Cc: Josh Boyer <jwboyer@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mempolicy.h |    2 +-
+ mm/mempolicy.c            |   37 ++++++++++++++++---------------------
+ 2 files changed, 17 insertions(+), 22 deletions(-)
+
+--- a/include/linux/mempolicy.h
++++ b/include/linux/mempolicy.h
+@@ -188,7 +188,7 @@ struct sp_node {
+ 
+ struct shared_policy {
+       struct rb_root root;
+-      spinlock_t lock;
++      struct mutex mutex;
+ };
+ 
+ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -2057,7 +2057,7 @@ bool __mpol_equal(struct mempolicy *a, s
+  */
+ 
+ /* lookup first element intersecting start-end */
+-/* Caller holds sp->lock */
++/* Caller holds sp->mutex */
+ static struct sp_node *
+ sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
+ {
+@@ -2121,13 +2121,13 @@ mpol_shared_policy_lookup(struct shared_
+ 
+       if (!sp->root.rb_node)
+               return NULL;
+-      spin_lock(&sp->lock);
++      mutex_lock(&sp->mutex);
+       sn = sp_lookup(sp, idx, idx+1);
+       if (sn) {
+               mpol_get(sn->policy);
+               pol = sn->policy;
+       }
+-      spin_unlock(&sp->lock);
++      mutex_unlock(&sp->mutex);
+       return pol;
+ }
+ 
+@@ -2167,10 +2167,10 @@ static struct sp_node *sp_alloc(unsigned
+ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
+                                unsigned long end, struct sp_node *new)
+ {
+-      struct sp_node *n, *new2 = NULL;
++      struct sp_node *n;
++      int ret = 0;
+ 
+-restart:
+-      spin_lock(&sp->lock);
++      mutex_lock(&sp->mutex);
+       n = sp_lookup(sp, start, end);
+       /* Take care of old policies in the same range. */
+       while (n && n->start < end) {
+@@ -2183,16 +2183,14 @@ restart:
+               } else {
+                       /* Old policy spanning whole new range. */
+                       if (n->end > end) {
++                              struct sp_node *new2;
++                              new2 = sp_alloc(end, n->end, n->policy);
+                               if (!new2) {
+-                                      spin_unlock(&sp->lock);
+-                                      new2 = sp_alloc(end, n->end, n->policy);
+-                                      if (!new2)
+-                                              return -ENOMEM;
+-                                      goto restart;
++                                      ret = -ENOMEM;
++                                      goto out;
+                               }
+                               n->end = start;
+                               sp_insert(sp, new2);
+-                              new2 = NULL;
+                               break;
+                       } else
+                               n->end = start;
+@@ -2203,12 +2201,9 @@ restart:
+       }
+       if (new)
+               sp_insert(sp, new);
+-      spin_unlock(&sp->lock);
+-      if (new2) {
+-              mpol_put(new2->policy);
+-              kmem_cache_free(sn_cache, new2);
+-      }
+-      return 0;
++out:
++      mutex_unlock(&sp->mutex);
++      return ret;
+ }
+ 
+ /**
+@@ -2226,7 +2221,7 @@ void mpol_shared_policy_init(struct shar
+       int ret;
+ 
+       sp->root = RB_ROOT;             /* empty tree == default mempolicy */
+-      spin_lock_init(&sp->lock);
++      mutex_init(&sp->mutex);
+ 
+       if (mpol) {
+               struct vm_area_struct pvma;
+@@ -2292,7 +2287,7 @@ void mpol_free_shared_policy(struct shar
+ 
+       if (!p->root.rb_node)
+               return;
+-      spin_lock(&p->lock);
++      mutex_lock(&p->mutex);
+       next = rb_first(&p->root);
+       while (next) {
+               n = rb_entry(next, struct sp_node, nd);
+@@ -2301,7 +2296,7 @@ void mpol_free_shared_policy(struct shar
+               mpol_put(n->policy);
+               kmem_cache_free(sn_cache, n);
+       }
+-      spin_unlock(&p->lock);
++      mutex_unlock(&p->mutex);
+ }
+ 
+ /* assumes fs == KERNEL_DS */
diff --git a/queue-3.4/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch b/queue-3.4/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch

new file mode 100644 (file)

index 0000000..75ad5ab
--- /dev/null
+++ b/queue-3.4/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch
@@ -0,0 +1,72 @@
+From 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Date: Mon, 8 Oct 2012 16:29:19 -0700
+Subject: mempolicy: fix refcount leak in mpol_set_shared_policy()
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit 63f74ca21f1fad36d075e063f06dcc6d39fe86b2 upstream.
+
+When shared_policy_replace() fails to allocate new->policy is not freed
+correctly by mpol_set_shared_policy().  The problem is that shared
+mempolicy code directly call kmem_cache_free() in multiple places where
+it is easy to make a mistake.
+
+This patch creates an sp_free wrapper function and uses it. The bug was
+introduced pre-git age (IOW, before 2.6.12-rc2).
+
+[mgorman@suse.de: Editted changelog]
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Cc: Josh Boyer <jwboyer@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -2131,12 +2131,17 @@ mpol_shared_policy_lookup(struct shared_
+       return pol;
+ }
+ 
++static void sp_free(struct sp_node *n)
++{
++      mpol_put(n->policy);
++      kmem_cache_free(sn_cache, n);
++}
++
+ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
+ {
+       pr_debug("deleting %lx-l%lx\n", n->start, n->end);
+       rb_erase(&n->nd, &sp->root);
+-      mpol_put(n->policy);
+-      kmem_cache_free(sn_cache, n);
++      sp_free(n);
+ }
+ 
+ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
+@@ -2275,7 +2280,7 @@ int mpol_set_shared_policy(struct shared
+       }
+       err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
+       if (err && new)
+-              kmem_cache_free(sn_cache, new);
++              sp_free(new);
+       return err;
+ }
+ 
+@@ -2292,9 +2297,7 @@ void mpol_free_shared_policy(struct shar
+       while (next) {
+               n = rb_entry(next, struct sp_node, nd);
+               next = rb_next(&n->nd);
+-              rb_erase(&n->nd, &p->root);
+-              mpol_put(n->policy);
+-              kmem_cache_free(sn_cache, n);
++              sp_delete(p, n);
+       }
+       mutex_unlock(&p->mutex);
+ }
diff --git a/queue-3.4/mempolicy-remove-mempolicy-sharing.patch b/queue-3.4/mempolicy-remove-mempolicy-sharing.patch

new file mode 100644 (file)

index 0000000..864129f
--- /dev/null
+++ b/queue-3.4/mempolicy-remove-mempolicy-sharing.patch
@@ -0,0 +1,187 @@
+From 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Date: Mon, 8 Oct 2012 16:29:16 -0700
+Subject: mempolicy: remove mempolicy sharing
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit 869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 upstream.
+
+Dave Jones' system call fuzz testing tool "trinity" triggered the
+following bug error with slab debugging enabled
+
+    =============================================================================
+    BUG numa_policy (Not tainted): Poison overwritten
+    -----------------------------------------------------------------------------
+
+    INFO: 0xffff880146498250-0xffff880146498250. First byte 0x6a instead of 0x6b
+    INFO: Allocated in mpol_new+0xa3/0x140 age=46310 cpu=6 pid=32154
+     __slab_alloc+0x3d3/0x445
+     kmem_cache_alloc+0x29d/0x2b0
+     mpol_new+0xa3/0x140
+     sys_mbind+0x142/0x620
+     system_call_fastpath+0x16/0x1b
+
+    INFO: Freed in __mpol_put+0x27/0x30 age=46268 cpu=6 pid=32154
+     __slab_free+0x2e/0x1de
+     kmem_cache_free+0x25a/0x260
+     __mpol_put+0x27/0x30
+     remove_vma+0x68/0x90
+     exit_mmap+0x118/0x140
+     mmput+0x73/0x110
+     exit_mm+0x108/0x130
+     do_exit+0x162/0xb90
+     do_group_exit+0x4f/0xc0
+     sys_exit_group+0x17/0x20
+     system_call_fastpath+0x16/0x1b
+
+    INFO: Slab 0xffffea0005192600 objects=27 used=27 fp=0x          (null) flags=0x20000000004080
+    INFO: Object 0xffff880146498250 @offset=592 fp=0xffff88014649b9d0
+
+The problem is that the structure is being prematurely freed due to a
+reference count imbalance. In the following case mbind(addr, len) should
+replace the memory policies of both vma1 and vma2 and thus they will
+become to share the same mempolicy and the new mempolicy will have the
+MPOL_F_SHARED flag.
+
+  +-------------------+-------------------+
+  |     vma1          |     vma2(shmem)   |
+  +-------------------+-------------------+
+  |                                       |
+ addr                                 addr+len
+
+alloc_pages_vma() uses get_vma_policy() and mpol_cond_put() pair for
+maintaining the mempolicy reference count.  The current rule is that
+get_vma_policy() only increments refcount for shmem VMA and
+mpol_conf_put() only decrements refcount if the policy has
+MPOL_F_SHARED.
+
+In above case, vma1 is not shmem vma and vma->policy has MPOL_F_SHARED!
+The reference count will be decreased even though was not increased
+whenever alloc_page_vma() is called.  This has been broken since commit
+[52cd3b07: mempolicy: rework mempolicy Reference Counting] in 2008.
+
+There is another serious bug with the sharing of memory policies.
+Currently, mempolicy rebind logic (it is called from cpuset rebinding)
+ignores a refcount of mempolicy and override it forcibly.  Thus, any
+mempolicy sharing may cause mempolicy corruption.  The bug was
+introduced by commit [68860ec1: cpusets: automatic numa mempolicy
+rebinding].
+
+Ideally, the shared policy handling would be rewritten to either
+properly handle COW of the policy structures or at least reference count
+MPOL_F_SHARED based exclusively on information within the policy.
+However, this patch takes the easier approach of disabling any policy
+sharing between VMAs.  Each new range allocated with sp_alloc will
+allocate a new policy, set the reference count to 1 and drop the
+reference count of the old policy.  This increases the memory footprint
+but is not expected to be a major problem as mbind() is unlikely to be
+used for fine-grained ranges.  It is also inefficient because it means
+we allocate a new policy even in cases where mbind_range() could use the
+new_policy passed to it.  However, it is more straight-forward and the
+change should be invisible to the user.
+
+[mgorman@suse.de: Edited changelog]
+Reported-by: Dave Jones <davej@redhat.com>,
+Cc: Christoph Lameter <cl@linux.com>,
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Cc: Josh Boyer <jwboyer@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   52 ++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 38 insertions(+), 14 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -607,24 +607,39 @@ check_range(struct mm_struct *mm, unsign
+       return first;
+ }
+ 
+-/* Apply policy to a single VMA */
+-static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
++/*
++ * Apply policy to a single VMA
++ * This must be called with the mmap_sem held for writing.
++ */
++static int vma_replace_policy(struct vm_area_struct *vma,
++                                              struct mempolicy *pol)
+ {
+-      int err = 0;
+-      struct mempolicy *old = vma->vm_policy;
++      int err;
++      struct mempolicy *old;
++      struct mempolicy *new;
+ 
+       pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
+                vma->vm_start, vma->vm_end, vma->vm_pgoff,
+                vma->vm_ops, vma->vm_file,
+                vma->vm_ops ? vma->vm_ops->set_policy : NULL);
+ 
+-      if (vma->vm_ops && vma->vm_ops->set_policy)
++      new = mpol_dup(pol);
++      if (IS_ERR(new))
++              return PTR_ERR(new);
++
++      if (vma->vm_ops && vma->vm_ops->set_policy) {
+               err = vma->vm_ops->set_policy(vma, new);
+-      if (!err) {
+-              mpol_get(new);
+-              vma->vm_policy = new;
+-              mpol_put(old);
++              if (err)
++                      goto err_out;
+       }
++
++      old = vma->vm_policy;
++      vma->vm_policy = new; /* protected by mmap_sem */
++      mpol_put(old);
++
++      return 0;
++ err_out:
++      mpol_put(new);
+       return err;
+ }
+ 
+@@ -676,7 +691,7 @@ static int mbind_range(struct mm_struct
+                       if (err)
+                               goto out;
+               }
+-              err = policy_vma(vma, new_pol);
++              err = vma_replace_policy(vma, new_pol);
+               if (err)
+                       goto out;
+       }
+@@ -2127,15 +2142,24 @@ static void sp_delete(struct shared_poli
+ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
+                               struct mempolicy *pol)
+ {
+-      struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
++      struct sp_node *n;
++      struct mempolicy *newpol;
+ 
++      n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+       if (!n)
+               return NULL;
++
++      newpol = mpol_dup(pol);
++      if (IS_ERR(newpol)) {
++              kmem_cache_free(sn_cache, n);
++              return NULL;
++      }
++      newpol->flags |= MPOL_F_SHARED;
++
+       n->start = start;
+       n->end = end;
+-      mpol_get(pol);
+-      pol->flags |= MPOL_F_SHARED;    /* for unref */
+-      n->policy = pol;
++      n->policy = newpol;
++
+       return n;
+ }
+ 
diff --git a/queue-3.4/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch b/queue-3.4/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch

new file mode 100644 (file)

index 0000000..8e29019
--- /dev/null
+++ b/queue-3.4/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch
@@ -0,0 +1,85 @@
+From 8d34694c1abf29df1f3c7317936b7e3e2e308d9b Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+Date: Mon, 8 Oct 2012 16:29:14 -0700
+Subject: revert "mm: mempolicy: Let vma_merge and vma_split handle vma->vm_policy linkages"
+
+From: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+
+commit 8d34694c1abf29df1f3c7317936b7e3e2e308d9b upstream.
+
+Commit 05f144a0d5c2 ("mm: mempolicy: Let vma_merge and vma_split handle
+vma->vm_policy linkages") removed vma->vm_policy updates code but it is
+the purpose of mbind_range().  Now, mbind_range() is virtually a no-op
+and while it does not allow memory corruption it is not the right fix.
+This patch is a revert.
+
+[mgorman@suse.de: Edited changelog]
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Josh Boyer <jwboyer@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   41 ++++++++++++++++++++++++-----------------
+ 1 file changed, 24 insertions(+), 17 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -607,6 +607,27 @@ check_range(struct mm_struct *mm, unsign
+       return first;
+ }
+ 
++/* Apply policy to a single VMA */
++static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
++{
++      int err = 0;
++      struct mempolicy *old = vma->vm_policy;
++
++      pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
++               vma->vm_start, vma->vm_end, vma->vm_pgoff,
++               vma->vm_ops, vma->vm_file,
++               vma->vm_ops ? vma->vm_ops->set_policy : NULL);
++
++      if (vma->vm_ops && vma->vm_ops->set_policy)
++              err = vma->vm_ops->set_policy(vma, new);
++      if (!err) {
++              mpol_get(new);
++              vma->vm_policy = new;
++              mpol_put(old);
++      }
++      return err;
++}
++
+ /* Step 2: apply policy to a range and do splits. */
+ static int mbind_range(struct mm_struct *mm, unsigned long start,
+                      unsigned long end, struct mempolicy *new_pol)
+@@ -655,23 +676,9 @@ static int mbind_range(struct mm_struct
+                       if (err)
+                               goto out;
+               }
+-
+-              /*
+-               * Apply policy to a single VMA. The reference counting of
+-               * policy for vma_policy linkages has already been handled by
+-               * vma_merge and split_vma as necessary. If this is a shared
+-               * policy then ->set_policy will increment the reference count
+-               * for an sp node.
+-               */
+-              pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
+-                      vma->vm_start, vma->vm_end, vma->vm_pgoff,
+-                      vma->vm_ops, vma->vm_file,
+-                      vma->vm_ops ? vma->vm_ops->set_policy : NULL);
+-              if (vma->vm_ops && vma->vm_ops->set_policy) {
+-                      err = vma->vm_ops->set_policy(vma, new_pol);
+-                      if (err)
+-                              goto out;
+-              }
++              err = policy_vma(vma, new_pol);
++              if (err)
++                      goto out;
+       }
+ 
+  out:
diff --git a/queue-3.4/series b/queue-3.4/series

index d9041e0e905af432de1dc1fa4eb363b8c93c05b8..6cbbe4bbdd185f5aabd22a0bd44b156dcc700055 100644 (file)
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -98,3 +98,11 @@ score-add-missing-rcu-idle-apis-on-idle-loop.patch
  rcu-fix-day-one-dyntick-idle-stall-warning-bug.patch
  r8169-config1-is-read-only-on-8168c-and-later.patch
  r8169-8168c-and-later-require-bit-0x20-to-be-set-in-config2-for-pme-signaling.patch
+revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch
+mempolicy-remove-mempolicy-sharing.patch
+mempolicy-fix-a-race-in-shared_policy_replace.patch
+mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch
+mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch
+efi-build-efi-stub-with-efi-appropriate-options.patch
+efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch
+cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 10 Oct 2012 01:06:21 +0000 (10:06 +0900)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 10 Oct 2012 01:06:21 +0000 (10:06 +0900)
queue-3.4/cpu-hotplug-cpusets-suspend-don-t-modify-cpusets-during-suspend-resume.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/efi-build-efi-stub-with-efi-appropriate-options.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/efi-initialize-efi.runtime_version-to-make-query_variable_info-update_capsule-workable.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/mempolicy-fix-a-memory-corruption-by-refcount-imbalance-in-alloc_pages_vma.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/mempolicy-fix-a-race-in-shared_policy_replace.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/mempolicy-fix-refcount-leak-in-mpol_set_shared_policy.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/mempolicy-remove-mempolicy-sharing.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/revert-mm-mempolicy-let-vma_merge-and-vma_split-handle-vma-vm_policy-linkages.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series		patch \| blob \| blame \| history