From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 28 Sep 2012 00:29:44 +0000 (-0700)
Subject: 3.0-stable patches
X-Git-Tag: v3.0.44~18
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=04759f7960367881fdc889ec3a89dfe2ecf2553c;p=thirdparty%2Fkernel%2Fstable-queue.git

3.0-stable patches

added patches:
	mce-fix-vm86-handling-for-32bit-mce-handler.patch
	sched-fix-ancient-race-in-do_exit.patch
---

diff --git a/queue-3.0/fs-proc-fix-potential-unregister_sysctl_table-hang.patch b/queue-3.0/fs-proc-fix-potential-unregister_sysctl_table-hang.patch
deleted file mode 100644
index 37600d8ee31..00000000000
--- a/queue-3.0/fs-proc-fix-potential-unregister_sysctl_table-hang.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From 6bf6104573482570f7103d3e5ddf9574db43a363 Mon Sep 17 00:00:00 2001
-From: Francesco Ruggeri <fruggeri@aristanetworks.com>
-Date: Thu, 13 Sep 2012 15:03:37 -0700
-Subject: fs/proc: fix potential unregister_sysctl_table hang
-
-From: Francesco Ruggeri <fruggeri@aristanetworks.com>
-
-commit 6bf6104573482570f7103d3e5ddf9574db43a363 upstream.
-
-The unregister_sysctl_table() function hangs if all references to its
-ctl_table_header structure are not dropped.
-
-This can happen sometimes because of a leak in proc_sys_lookup():
-proc_sys_lookup() gets a reference to the table via lookup_entry(), but
-it does not release it when a subsequent call to sysctl_follow_link()
-fails.
-
-This patch fixes this leak by making sure the reference is always
-dropped on return.
-
-See also commit 076c3eed2c31 ("sysctl: Rewrite proc_sys_lookup
-introducing find_entry and lookup_entry") which reorganized this code in
-3.4.
-
-Tested in Linux 3.4.4.
-
-Signed-off-by: Francesco Ruggeri <fruggeri@aristanetworks.com>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- fs/proc/proc_sysctl.c |    5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
---- a/fs/proc/proc_sysctl.c
-+++ b/fs/proc/proc_sysctl.c
-@@ -113,9 +113,6 @@ static struct dentry *proc_sys_lookup(st
- 
- 	err = ERR_PTR(-ENOMEM);
- 	inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
--	if (h)
--		sysctl_head_finish(h);
--
- 	if (!inode)
- 		goto out;
- 
-@@ -124,6 +121,8 @@ static struct dentry *proc_sys_lookup(st
- 	d_add(dentry, inode);
- 
- out:
-+	if (h)
-+		sysctl_head_finish(h);
- 	sysctl_head_finish(head);
- 	return err;
- }
diff --git a/queue-3.0/mce-fix-vm86-handling-for-32bit-mce-handler.patch b/queue-3.0/mce-fix-vm86-handling-for-32bit-mce-handler.patch
new file mode 100644
index 00000000000..e1d5ae1195d
--- /dev/null
+++ b/queue-3.0/mce-fix-vm86-handling-for-32bit-mce-handler.patch
@@ -0,0 +1,67 @@
+From a129a7c84582629741e5fa6f40026efcd7a65bd4 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <andi@firstfloor.org>
+Date: Fri, 19 Nov 2010 13:16:22 +0100
+Subject: MCE: Fix vm86 handling for 32bit mce handler
+
+From: Andi Kleen <andi@firstfloor.org>
+
+commit a129a7c84582629741e5fa6f40026efcd7a65bd4 upstream.
+
+When running on 32bit the mce handler could misinterpret
+vm86 mode as ring 0. This can affect whether it does recovery
+or not; it was possible to panic when recovery was actually
+possible.
+
+Fix this by always forcing vm86 to look like ring 3.
+
+[ Backport to 3.0 notes:
+Things changed there slightly:
+   - move mce_get_rip() up. It fills up m->cs and m->ip values which
+     are evaluated in mce_severity(). Therefore move it up right before
+     the mce_severity call. This seem to be another bug in 3.0?
+   - Place the backport (fix m->cs in V86 case) to where m->cs gets
+     filled which is mce_get_rip() in 3.0
+]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -451,6 +451,13 @@ static inline void mce_get_rip(struct mc
+ 	if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
+ 		m->ip = regs->ip;
+ 		m->cs = regs->cs;
++		/*
++		 * When in VM86 mode make the cs look like ring 3
++		 * always. This is a lie, but it's better than passing
++		 * the additional vm86 bit around everywhere.
++		 */
++		if (v8086_mode(regs))
++			m->cs |= 3;
+ 	} else {
+ 		m->ip = 0;
+ 		m->cs = 0;
+@@ -988,6 +995,7 @@ void do_machine_check(struct pt_regs *re
+ 		 */
+ 		add_taint(TAINT_MACHINE_CHECK);
+ 
++		mce_get_rip(&m, regs);
+ 		severity = mce_severity(&m, tolerant, NULL);
+ 
+ 		/*
+@@ -1026,7 +1034,6 @@ void do_machine_check(struct pt_regs *re
+ 		if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
+ 			mce_ring_add(m.addr >> PAGE_SHIFT);
+ 
+-		mce_get_rip(&m, regs);
+ 		mce_log(&m);
+ 
+ 		if (severity > worst) {
diff --git a/queue-3.0/sched-fix-ancient-race-in-do_exit.patch b/queue-3.0/sched-fix-ancient-race-in-do_exit.patch
new file mode 100644
index 00000000000..0e154c8e07b
--- /dev/null
+++ b/queue-3.0/sched-fix-ancient-race-in-do_exit.patch
@@ -0,0 +1,124 @@
+From b5740f4b2cb3503b436925eb2242bc3d75cd3dfe Mon Sep 17 00:00:00 2001
+From: Yasunori Goto <y-goto@jp.fujitsu.com>
+Date: Tue, 17 Jan 2012 17:40:31 +0900
+Subject: sched: Fix ancient race in do_exit()
+
+From: Yasunori Goto <y-goto@jp.fujitsu.com>
+
+commit b5740f4b2cb3503b436925eb2242bc3d75cd3dfe upstream.
+
+try_to_wake_up() has a problem which may change status from TASK_DEAD to
+TASK_RUNNING in race condition with SMI or guest environment of virtual
+machine. As a result, exited task is scheduled() again and panic occurs.
+
+Here is the sequence how it occurs:
+
+ ----------------------------------+-----------------------------
+                                   |
+            CPU A                  |             CPU B
+ ----------------------------------+-----------------------------
+
+TASK A calls exit()....
+
+do_exit()
+
+  exit_mm()
+    down_read(mm->mmap_sem);
+
+    rwsem_down_failed_common()
+
+      set TASK_UNINTERRUPTIBLE
+      set waiter.task <= task A
+      list_add to sem->wait_list
+           :
+      raw_spin_unlock_irq()
+      (I/O interruption occured)
+
+                                      __rwsem_do_wake(mmap_sem)
+
+                                        list_del(&waiter->list);
+                                        waiter->task = NULL
+                                        wake_up_process(task A)
+                                          try_to_wake_up()
+                                             (task is still
+                                               TASK_UNINTERRUPTIBLE)
+                                              p->on_rq is still 1.)
+
+                                              ttwu_do_wakeup()
+                                                 (*A)
+                                                   :
+     (I/O interruption handler finished)
+
+      if (!waiter.task)
+          schedule() is not called
+          due to waiter.task is NULL.
+
+      tsk->state = TASK_RUNNING
+
+          :
+                                              check_preempt_curr();
+                                                  :
+  task->state = TASK_DEAD
+                                              (*B)
+                                        <---    set TASK_RUNNING (*C)
+
+     schedule()
+     (exit task is running again)
+     BUG_ON() is called!
+ --------------------------------------------------------
+
+The execution time between (*A) and (*B) is usually very short,
+because the interruption is disabled, and setting TASK_RUNNING at (*C)
+must be executed before setting TASK_DEAD.
+
+HOWEVER, if SMI is interrupted between (*A) and (*B),
+(*C) is able to execute AFTER setting TASK_DEAD!
+Then, exited task is scheduled again, and BUG_ON() is called....
+
+If the system works on guest system of virtual machine, the time
+between (*A) and (*B) may be also long due to scheduling of hypervisor,
+and same phenomenon can occur.
+
+By this patch, do_exit() waits for releasing task->pi_lock which is used
+in try_to_wake_up(). It guarantees the task becomes TASK_DEAD after
+waking up.
+
+Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20120117174031.3118.E1E9C6FF@jp.fujitsu.com
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Cc: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/exit.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1049,6 +1049,22 @@ NORET_TYPE void do_exit(long code)
+ 
+ 	preempt_disable();
+ 	exit_rcu();
++
++	/*
++	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
++	 * when the following two conditions become true.
++	 *   - There is race condition of mmap_sem (It is acquired by
++	 *     exit_mm()), and
++	 *   - SMI occurs before setting TASK_RUNINNG.
++	 *     (or hypervisor of virtual machine switches to other guest)
++	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
++	 *
++	 * To avoid it, we have to wait for releasing tsk->pi_lock which
++	 * is held by try_to_wake_up()
++	 */
++	smp_mb();
++	raw_spin_unlock_wait(&tsk->pi_lock);
++
+ 	/* causes final put_task_struct in finish_task_switch(). */
+ 	tsk->state = TASK_DEAD;
+ 	schedule();
diff --git a/queue-3.0/series b/queue-3.0/series
index 861f9f67653..179d866bf0a 100644
--- a/queue-3.0/series
+++ b/queue-3.0/series
@@ -34,7 +34,6 @@ staging-vt6656-failed-connection-incorrect-endian.patch
 staging-r8712u-fix-bug-in-r8712_recv_indicatepkt.patch
 staging-comedi-das08-correct-ao-output-for-das08jr-16-ao.patch
 usb-option-replace-zte-k5006-z-entry-with-vendor-class-rule.patch
-fs-proc-fix-potential-unregister_sysctl_table-hang.patch
 perf_event-switch-to-internal-refcount-fix-race-with-close.patch
 mmc-mxs-mmc-fix-deadlock-in-sdio-irq-case.patch
 mmc-sdhci-esdhc-break-out-early-if-clock-is-0.patch
@@ -123,3 +122,5 @@ e1000e-disable-aspm-l1-on-82574.patch
 ubi-fix-a-horrible-memory-deallocation-bug.patch
 spi-mpc83xx-fix-null-pdata-dereference-bug.patch
 spi-spi-fsl-spi-reference-correct-pdata-in-fsl_spi_cs_control.patch
+sched-fix-ancient-race-in-do_exit.patch
+mce-fix-vm86-handling-for-32bit-mce-handler.patch