From 3e34a05ac6a5b9d79242d718acdd9b5fc45346b3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Mar 2013 11:10:07 -0800 Subject: [PATCH] 3.0-stable patches added patches: cgroup-fix-exit-vs-rmdir-race.patch cpuset-fix-cpuset_print_task_mems_allowed-vs-rename-race.patch x86-apic-work-around-boot-failure-on-hp-proliant-dl980-g7-server-systems.patch x86-do-not-leak-kernel-page-mapping-locations.patch --- queue-3.0/cgroup-fix-exit-vs-rmdir-race.patch | 55 +++++++++++ ...int_task_mems_allowed-vs-rename-race.patch | 47 ++++++++++ queue-3.0/series | 4 + ...-hp-proliant-dl980-g7-server-systems.patch | 94 +++++++++++++++++++ ...t-leak-kernel-page-mapping-locations.patch | 61 ++++++++++++ 5 files changed, 261 insertions(+) create mode 100644 queue-3.0/cgroup-fix-exit-vs-rmdir-race.patch create mode 100644 queue-3.0/cpuset-fix-cpuset_print_task_mems_allowed-vs-rename-race.patch create mode 100644 queue-3.0/x86-apic-work-around-boot-failure-on-hp-proliant-dl980-g7-server-systems.patch create mode 100644 queue-3.0/x86-do-not-leak-kernel-page-mapping-locations.patch diff --git a/queue-3.0/cgroup-fix-exit-vs-rmdir-race.patch b/queue-3.0/cgroup-fix-exit-vs-rmdir-race.patch new file mode 100644 index 00000000000..1b93c372e31 --- /dev/null +++ b/queue-3.0/cgroup-fix-exit-vs-rmdir-race.patch @@ -0,0 +1,55 @@ +From 71b5707e119653039e6e95213f00479668c79b75 Mon Sep 17 00:00:00 2001 +From: Li Zefan +Date: Thu, 24 Jan 2013 14:43:28 +0800 +Subject: cgroup: fix exit() vs rmdir() race + +From: Li Zefan + +commit 71b5707e119653039e6e95213f00479668c79b75 upstream. + +In cgroup_exit() put_css_set_taskexit() is called without any lock, +which might lead to accessing a freed cgroup: + +thread1 thread2 +--------------------------------------------- +exit() + cgroup_exit() + put_css_set_taskexit() + atomic_dec(cgrp->count); + rmdir(); + /* not safe !! */ + check_for_release(cgrp); + +rcu_read_lock() can be used to make sure the cgroup is alive. + +Signed-off-by: Li Zefan +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -359,12 +359,20 @@ static void __put_css_set(struct css_set + struct cgroup *cgrp = link->cgrp; + list_del(&link->cg_link_list); + list_del(&link->cgrp_link_list); ++ ++ /* ++ * We may not be holding cgroup_mutex, and if cgrp->count is ++ * dropped to 0 the cgroup can be destroyed at any time, hence ++ * rcu_read_lock is used to keep it alive. ++ */ ++ rcu_read_lock(); + if (atomic_dec_and_test(&cgrp->count) && + notify_on_release(cgrp)) { + if (taskexit) + set_bit(CGRP_RELEASABLE, &cgrp->flags); + check_for_release(cgrp); + } ++ rcu_read_unlock(); + + kfree(link); + } diff --git a/queue-3.0/cpuset-fix-cpuset_print_task_mems_allowed-vs-rename-race.patch b/queue-3.0/cpuset-fix-cpuset_print_task_mems_allowed-vs-rename-race.patch new file mode 100644 index 00000000000..50031b60be7 --- /dev/null +++ b/queue-3.0/cpuset-fix-cpuset_print_task_mems_allowed-vs-rename-race.patch @@ -0,0 +1,47 @@ +From 63f43f55c9bbc14f76b582644019b8a07dc8219a Mon Sep 17 00:00:00 2001 +From: Li Zefan +Date: Fri, 25 Jan 2013 16:08:01 +0800 +Subject: cpuset: fix cpuset_print_task_mems_allowed() vs rename() race + +From: Li Zefan + +commit 63f43f55c9bbc14f76b582644019b8a07dc8219a upstream. + +rename() will change dentry->d_name. The result of this race can +be worse than seeing partially rewritten name, but we might access +a stale pointer because rename() will re-allocate memory to hold +a longer name. + +It's safe in the protection of dentry->d_lock. + +v2: check NULL dentry before acquiring dentry lock. + +Signed-off-by: Li Zefan +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cpuset.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -2499,8 +2499,16 @@ void cpuset_print_task_mems_allowed(stru + + dentry = task_cs(tsk)->css.cgroup->dentry; + spin_lock(&cpuset_buffer_lock); +- snprintf(cpuset_name, CPUSET_NAME_LEN, +- dentry ? (const char *)dentry->d_name.name : "/"); ++ ++ if (!dentry) { ++ strcpy(cpuset_name, "/"); ++ } else { ++ spin_lock(&dentry->d_lock); ++ strlcpy(cpuset_name, (const char *)dentry->d_name.name, ++ CPUSET_NAME_LEN); ++ spin_unlock(&dentry->d_lock); ++ } ++ + nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, + tsk->mems_allowed); + printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", diff --git a/queue-3.0/series b/queue-3.0/series index 8fb729c42d0..10e86d57c69 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -19,3 +19,7 @@ ptrace-introduce-signal_wake_up_state-and-ptrace_signal_wake_up.patch ptrace-ensure-arch_ptrace-ptrace_request-can-never-race-with-sigkill.patch wake_up_process-should-be-never-used-to-wakeup-a-task_stopped-traced-task.patch unbreak-automounter-support-on-64-bit-kernel-with-32-bit-userspace-v2.patch +x86-do-not-leak-kernel-page-mapping-locations.patch +x86-apic-work-around-boot-failure-on-hp-proliant-dl980-g7-server-systems.patch +cpuset-fix-cpuset_print_task_mems_allowed-vs-rename-race.patch +cgroup-fix-exit-vs-rmdir-race.patch diff --git a/queue-3.0/x86-apic-work-around-boot-failure-on-hp-proliant-dl980-g7-server-systems.patch b/queue-3.0/x86-apic-work-around-boot-failure-on-hp-proliant-dl980-g7-server-systems.patch new file mode 100644 index 00000000000..2fe070f705a --- /dev/null +++ b/queue-3.0/x86-apic-work-around-boot-failure-on-hp-proliant-dl980-g7-server-systems.patch @@ -0,0 +1,94 @@ +From cb214ede7657db458fd0b2a25ea0b28dbf900ebc Mon Sep 17 00:00:00 2001 +From: Stoney Wang +Date: Thu, 7 Feb 2013 10:53:02 -0800 +Subject: x86/apic: Work around boot failure on HP ProLiant DL980 G7 Server systems + +From: Stoney Wang + +commit cb214ede7657db458fd0b2a25ea0b28dbf900ebc upstream. + +When a HP ProLiant DL980 G7 Server boots a regular kernel, +there will be intermittent lost interrupts which could +result in a hang or (in extreme cases) data loss. + +The reason is that this system only supports x2apic physical +mode, while the kernel boots with a logical-cluster default +setting. + +This bug can be worked around by specifying the "x2apic_phys" or +"nox2apic" boot option, but we want to handle this system +without requiring manual workarounds. + +The BIOS sets ACPI_FADT_APIC_PHYSICAL in FADT table. +As all apicids are smaller than 255, BIOS need to pass the +control to the OS with xapic mode, according to x2apic-spec, +chapter 2.9. + +Current code handle x2apic when BIOS pass with xapic mode +enabled: + +When user specifies x2apic_phys, or FADT indicates PHYSICAL: + +1. During madt oem check, apic driver is set with xapic logical + or xapic phys driver at first. + +2. enable_IR_x2apic() will enable x2apic_mode. + +3. if user specifies x2apic_phys on the boot line, x2apic_phys_probe() + will install the correct x2apic phys driver and use x2apic phys mode. + Otherwise it will skip the driver will let x2apic_cluster_probe to + take over to install x2apic cluster driver (wrong one) even though FADT + indicates PHYSICAL, because x2apic_phys_probe does not check + FADT PHYSICAL. + +Add checking x2apic_fadt_phys in x2apic_phys_probe() to fix the +problem. + +Signed-off-by: Stoney Wang +[ updated the changelog and simplified the code ] +Signed-off-by: Yinghai Lu +Signed-off-by: Zhang Lin-Bao +[ make a patch specially for 3.0.66] +Link: http://lkml.kernel.org/r/1360263182-16226-1-git-send-email-yinghai@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/x2apic_phys.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/apic/x2apic_phys.c ++++ b/arch/x86/kernel/apic/x2apic_phys.c +@@ -20,12 +20,19 @@ static int set_x2apic_phys_mode(char *ar + } + early_param("x2apic_phys", set_x2apic_phys_mode); + ++static bool x2apic_fadt_phys(void) ++{ ++ if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && ++ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { ++ printk(KERN_DEBUG "System requires x2apic physical mode\n"); ++ return true; ++ } ++ return false; ++} ++ + static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) + { +- if (x2apic_phys) +- return x2apic_enabled(); +- else +- return 0; ++ return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); + } + + static void +@@ -108,7 +115,7 @@ static void init_x2apic_ldr(void) + + static int x2apic_phys_probe(void) + { +- if (x2apic_mode && x2apic_phys) ++ if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) + return 1; + + return apic == &apic_x2apic_phys; diff --git a/queue-3.0/x86-do-not-leak-kernel-page-mapping-locations.patch b/queue-3.0/x86-do-not-leak-kernel-page-mapping-locations.patch new file mode 100644 index 00000000000..3704e0806fd --- /dev/null +++ b/queue-3.0/x86-do-not-leak-kernel-page-mapping-locations.patch @@ -0,0 +1,61 @@ +From e575a86fdc50d013bf3ad3aa81d9100e8e6cc60d Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 7 Feb 2013 09:44:13 -0800 +Subject: x86: Do not leak kernel page mapping locations + +From: Kees Cook + +commit e575a86fdc50d013bf3ad3aa81d9100e8e6cc60d upstream. + +Without this patch, it is trivial to determine kernel page +mappings by examining the error code reported to dmesg[1]. +Instead, declare the entire kernel memory space as a violation +of a present page. + +Additionally, since show_unhandled_signals is enabled by +default, switch branch hinting to the more realistic +expectation, and unobfuscate the setting of the PF_PROT bit to +improve readability. + +[1] http://vulnfactory.org/blog/2013/02/06/a-linux-memory-trick/ + +Reported-by: Dan Rosenberg +Suggested-by: Brad Spengler +Signed-off-by: Kees Cook +Acked-by: H. Peter Anvin +Cc: Paul E. McKenney +Cc: Frederic Weisbecker +Cc: Eric W. Biederman +Cc: Linus Torvalds +Cc: Andrew Morton +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/20130207174413.GA12485@www.outflux.net +Signed-off-by: Ingo Molnar +Signed-off-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/fault.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -720,12 +720,15 @@ __bad_area_nosemaphore(struct pt_regs *r + if (is_errata100(regs, address)) + return; + +- if (unlikely(show_unhandled_signals)) ++ /* Kernel addresses are always protection faults: */ ++ if (address >= TASK_SIZE) ++ error_code |= PF_PROT; ++ ++ if (likely(show_unhandled_signals)) + show_signal_msg(regs, error_code, address, tsk); + +- /* Kernel addresses are always protection faults: */ + tsk->thread.cr2 = address; +- tsk->thread.error_code = error_code | (address >= TASK_SIZE); ++ tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + + force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); -- 2.47.3