From: Greg Kroah-Hartman Date: Fri, 28 Sep 2012 00:29:44 +0000 (-0700) Subject: 3.0-stable patches X-Git-Tag: v3.0.44~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=04759f7960367881fdc889ec3a89dfe2ecf2553c;p=thirdparty%2Fkernel%2Fstable-queue.git 3.0-stable patches added patches: mce-fix-vm86-handling-for-32bit-mce-handler.patch sched-fix-ancient-race-in-do_exit.patch --- diff --git a/queue-3.0/fs-proc-fix-potential-unregister_sysctl_table-hang.patch b/queue-3.0/fs-proc-fix-potential-unregister_sysctl_table-hang.patch deleted file mode 100644 index 37600d8ee31..00000000000 --- a/queue-3.0/fs-proc-fix-potential-unregister_sysctl_table-hang.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 6bf6104573482570f7103d3e5ddf9574db43a363 Mon Sep 17 00:00:00 2001 -From: Francesco Ruggeri -Date: Thu, 13 Sep 2012 15:03:37 -0700 -Subject: fs/proc: fix potential unregister_sysctl_table hang - -From: Francesco Ruggeri - -commit 6bf6104573482570f7103d3e5ddf9574db43a363 upstream. - -The unregister_sysctl_table() function hangs if all references to its -ctl_table_header structure are not dropped. - -This can happen sometimes because of a leak in proc_sys_lookup(): -proc_sys_lookup() gets a reference to the table via lookup_entry(), but -it does not release it when a subsequent call to sysctl_follow_link() -fails. - -This patch fixes this leak by making sure the reference is always -dropped on return. - -See also commit 076c3eed2c31 ("sysctl: Rewrite proc_sys_lookup -introducing find_entry and lookup_entry") which reorganized this code in -3.4. - -Tested in Linux 3.4.4. - -Signed-off-by: Francesco Ruggeri -Signed-off-by: Linus Torvalds -Signed-off-by: Greg Kroah-Hartman - ---- - fs/proc/proc_sysctl.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - ---- a/fs/proc/proc_sysctl.c -+++ b/fs/proc/proc_sysctl.c -@@ -113,9 +113,6 @@ static struct dentry *proc_sys_lookup(st - - err = ERR_PTR(-ENOMEM); - inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); -- if (h) -- sysctl_head_finish(h); -- - if (!inode) - goto out; - -@@ -124,6 +121,8 @@ static struct dentry *proc_sys_lookup(st - d_add(dentry, inode); - - out: -+ if (h) -+ sysctl_head_finish(h); - sysctl_head_finish(head); - return err; - } diff --git a/queue-3.0/mce-fix-vm86-handling-for-32bit-mce-handler.patch b/queue-3.0/mce-fix-vm86-handling-for-32bit-mce-handler.patch new file mode 100644 index 00000000000..e1d5ae1195d --- /dev/null +++ b/queue-3.0/mce-fix-vm86-handling-for-32bit-mce-handler.patch @@ -0,0 +1,67 @@ +From a129a7c84582629741e5fa6f40026efcd7a65bd4 Mon Sep 17 00:00:00 2001 +From: Andi Kleen +Date: Fri, 19 Nov 2010 13:16:22 +0100 +Subject: MCE: Fix vm86 handling for 32bit mce handler + +From: Andi Kleen + +commit a129a7c84582629741e5fa6f40026efcd7a65bd4 upstream. + +When running on 32bit the mce handler could misinterpret +vm86 mode as ring 0. This can affect whether it does recovery +or not; it was possible to panic when recovery was actually +possible. + +Fix this by always forcing vm86 to look like ring 3. + +[ Backport to 3.0 notes: +Things changed there slightly: + - move mce_get_rip() up. It fills up m->cs and m->ip values which + are evaluated in mce_severity(). Therefore move it up right before + the mce_severity call. This seem to be another bug in 3.0? + - Place the backport (fix m->cs in V86 case) to where m->cs gets + filled which is mce_get_rip() in 3.0 +] + +Signed-off-by: Andi Kleen +Signed-off-by: Tony Luck +Signed-off-by: Thomas Renninger +Reviewed-by: Tony Luck +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/mcheck/mce.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -451,6 +451,13 @@ static inline void mce_get_rip(struct mc + if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { + m->ip = regs->ip; + m->cs = regs->cs; ++ /* ++ * When in VM86 mode make the cs look like ring 3 ++ * always. This is a lie, but it's better than passing ++ * the additional vm86 bit around everywhere. ++ */ ++ if (v8086_mode(regs)) ++ m->cs |= 3; + } else { + m->ip = 0; + m->cs = 0; +@@ -988,6 +995,7 @@ void do_machine_check(struct pt_regs *re + */ + add_taint(TAINT_MACHINE_CHECK); + ++ mce_get_rip(&m, regs); + severity = mce_severity(&m, tolerant, NULL); + + /* +@@ -1026,7 +1034,6 @@ void do_machine_check(struct pt_regs *re + if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) + mce_ring_add(m.addr >> PAGE_SHIFT); + +- mce_get_rip(&m, regs); + mce_log(&m); + + if (severity > worst) { diff --git a/queue-3.0/sched-fix-ancient-race-in-do_exit.patch b/queue-3.0/sched-fix-ancient-race-in-do_exit.patch new file mode 100644 index 00000000000..0e154c8e07b --- /dev/null +++ b/queue-3.0/sched-fix-ancient-race-in-do_exit.patch @@ -0,0 +1,124 @@ +From b5740f4b2cb3503b436925eb2242bc3d75cd3dfe Mon Sep 17 00:00:00 2001 +From: Yasunori Goto +Date: Tue, 17 Jan 2012 17:40:31 +0900 +Subject: sched: Fix ancient race in do_exit() + +From: Yasunori Goto + +commit b5740f4b2cb3503b436925eb2242bc3d75cd3dfe upstream. + +try_to_wake_up() has a problem which may change status from TASK_DEAD to +TASK_RUNNING in race condition with SMI or guest environment of virtual +machine. As a result, exited task is scheduled() again and panic occurs. + +Here is the sequence how it occurs: + + ----------------------------------+----------------------------- + | + CPU A | CPU B + ----------------------------------+----------------------------- + +TASK A calls exit().... + +do_exit() + + exit_mm() + down_read(mm->mmap_sem); + + rwsem_down_failed_common() + + set TASK_UNINTERRUPTIBLE + set waiter.task <= task A + list_add to sem->wait_list + : + raw_spin_unlock_irq() + (I/O interruption occured) + + __rwsem_do_wake(mmap_sem) + + list_del(&waiter->list); + waiter->task = NULL + wake_up_process(task A) + try_to_wake_up() + (task is still + TASK_UNINTERRUPTIBLE) + p->on_rq is still 1.) + + ttwu_do_wakeup() + (*A) + : + (I/O interruption handler finished) + + if (!waiter.task) + schedule() is not called + due to waiter.task is NULL. + + tsk->state = TASK_RUNNING + + : + check_preempt_curr(); + : + task->state = TASK_DEAD + (*B) + <--- set TASK_RUNNING (*C) + + schedule() + (exit task is running again) + BUG_ON() is called! + -------------------------------------------------------- + +The execution time between (*A) and (*B) is usually very short, +because the interruption is disabled, and setting TASK_RUNNING at (*C) +must be executed before setting TASK_DEAD. + +HOWEVER, if SMI is interrupted between (*A) and (*B), +(*C) is able to execute AFTER setting TASK_DEAD! +Then, exited task is scheduled again, and BUG_ON() is called.... + +If the system works on guest system of virtual machine, the time +between (*A) and (*B) may be also long due to scheduling of hypervisor, +and same phenomenon can occur. + +By this patch, do_exit() waits for releasing task->pi_lock which is used +in try_to_wake_up(). It guarantees the task becomes TASK_DEAD after +waking up. + +Signed-off-by: Yasunori Goto +Acked-by: Oleg Nesterov +Signed-off-by: Peter Zijlstra +Cc: Linus Torvalds +Cc: Andrew Morton +Link: http://lkml.kernel.org/r/20120117174031.3118.E1E9C6FF@jp.fujitsu.com +Signed-off-by: Ingo Molnar +Cc: Michal Hocko +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/exit.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1049,6 +1049,22 @@ NORET_TYPE void do_exit(long code) + + preempt_disable(); + exit_rcu(); ++ ++ /* ++ * The setting of TASK_RUNNING by try_to_wake_up() may be delayed ++ * when the following two conditions become true. ++ * - There is race condition of mmap_sem (It is acquired by ++ * exit_mm()), and ++ * - SMI occurs before setting TASK_RUNINNG. ++ * (or hypervisor of virtual machine switches to other guest) ++ * As a result, we may become TASK_RUNNING after becoming TASK_DEAD ++ * ++ * To avoid it, we have to wait for releasing tsk->pi_lock which ++ * is held by try_to_wake_up() ++ */ ++ smp_mb(); ++ raw_spin_unlock_wait(&tsk->pi_lock); ++ + /* causes final put_task_struct in finish_task_switch(). */ + tsk->state = TASK_DEAD; + schedule(); diff --git a/queue-3.0/series b/queue-3.0/series index 861f9f67653..179d866bf0a 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -34,7 +34,6 @@ staging-vt6656-failed-connection-incorrect-endian.patch staging-r8712u-fix-bug-in-r8712_recv_indicatepkt.patch staging-comedi-das08-correct-ao-output-for-das08jr-16-ao.patch usb-option-replace-zte-k5006-z-entry-with-vendor-class-rule.patch -fs-proc-fix-potential-unregister_sysctl_table-hang.patch perf_event-switch-to-internal-refcount-fix-race-with-close.patch mmc-mxs-mmc-fix-deadlock-in-sdio-irq-case.patch mmc-sdhci-esdhc-break-out-early-if-clock-is-0.patch @@ -123,3 +122,5 @@ e1000e-disable-aspm-l1-on-82574.patch ubi-fix-a-horrible-memory-deallocation-bug.patch spi-mpc83xx-fix-null-pdata-dereference-bug.patch spi-spi-fsl-spi-reference-correct-pdata-in-fsl_spi_cs_control.patch +sched-fix-ancient-race-in-do_exit.patch +mce-fix-vm86-handling-for-32bit-mce-handler.patch