]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Apr 2013 00:52:25 +0000 (17:52 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Apr 2013 00:52:25 +0000 (17:52 -0700)
added patches:
x86-mm-paravirt-fix-vmalloc_fault-oops-during-lazy-mmu-updates.patch
x86-mm-patch-out-arch_flush_lazy_mmu_mode-when-running-on-bare-metal.patch

queue-3.0/series
queue-3.0/x86-mm-paravirt-fix-vmalloc_fault-oops-during-lazy-mmu-updates.patch [new file with mode: 0644]
queue-3.0/x86-mm-patch-out-arch_flush_lazy_mmu_mode-when-running-on-bare-metal.patch [new file with mode: 0644]

index 2d757a00a72de51019ca57d53d9d32f5566d388b..26bd4b4f5666b96294bbcf510c9e40df894c965f 100644 (file)
@@ -5,3 +5,5 @@ tracing-fix-double-free-when-function-profile-init-failed.patch
 pm-reboot-call-syscore_shutdown-after-disable_nonboot_cpus.patch
 target-fix-incorrect-fallthrough-of-alua-standby-offline-transition-cdbs.patch
 sched_clock-prevent-64bit-inatomicity-on-32bit-systems.patch
+x86-mm-paravirt-fix-vmalloc_fault-oops-during-lazy-mmu-updates.patch
+x86-mm-patch-out-arch_flush_lazy_mmu_mode-when-running-on-bare-metal.patch
diff --git a/queue-3.0/x86-mm-paravirt-fix-vmalloc_fault-oops-during-lazy-mmu-updates.patch b/queue-3.0/x86-mm-paravirt-fix-vmalloc_fault-oops-during-lazy-mmu-updates.patch
new file mode 100644 (file)
index 0000000..4ab9fef
--- /dev/null
@@ -0,0 +1,88 @@
+From 1160c2779b826c6f5c08e5cc542de58fd1f667d5 Mon Sep 17 00:00:00 2001
+From: Samu Kallio <samu.kallio@aberdeencloud.com>
+Date: Sat, 23 Mar 2013 09:36:35 -0400
+Subject: x86, mm, paravirt: Fix vmalloc_fault oops during lazy MMU updates
+
+From: Samu Kallio <samu.kallio@aberdeencloud.com>
+
+commit 1160c2779b826c6f5c08e5cc542de58fd1f667d5 upstream.
+
+In paravirtualized x86_64 kernels, vmalloc_fault may cause an oops
+when lazy MMU updates are enabled, because set_pgd effects are being
+deferred.
+
+One instance of this problem is during process mm cleanup with memory
+cgroups enabled. The chain of events is as follows:
+
+- zap_pte_range enables lazy MMU updates
+- zap_pte_range eventually calls mem_cgroup_charge_statistics,
+  which accesses the vmalloc'd mem_cgroup per-cpu stat area
+- vmalloc_fault is triggered which tries to sync the corresponding
+  PGD entry with set_pgd, but the update is deferred
+- vmalloc_fault oopses due to a mismatch in the PUD entries
+
+The OOPs usually looks as so:
+
+------------[ cut here ]------------
+kernel BUG at arch/x86/mm/fault.c:396!
+invalid opcode: 0000 [#1] SMP
+.. snip ..
+CPU 1
+Pid: 10866, comm: httpd Not tainted 3.6.10-4.fc18.x86_64 #1
+RIP: e030:[<ffffffff816271bf>]  [<ffffffff816271bf>] vmalloc_fault+0x11f/0x208
+.. snip ..
+Call Trace:
+ [<ffffffff81627759>] do_page_fault+0x399/0x4b0
+ [<ffffffff81004f4c>] ? xen_mc_extend_args+0xec/0x110
+ [<ffffffff81624065>] page_fault+0x25/0x30
+ [<ffffffff81184d03>] ? mem_cgroup_charge_statistics.isra.13+0x13/0x50
+ [<ffffffff81186f78>] __mem_cgroup_uncharge_common+0xd8/0x350
+ [<ffffffff8118aac7>] mem_cgroup_uncharge_page+0x57/0x60
+ [<ffffffff8115fbc0>] page_remove_rmap+0xe0/0x150
+ [<ffffffff8115311a>] ? vm_normal_page+0x1a/0x80
+ [<ffffffff81153e61>] unmap_single_vma+0x531/0x870
+ [<ffffffff81154962>] unmap_vmas+0x52/0xa0
+ [<ffffffff81007442>] ? pte_mfn_to_pfn+0x72/0x100
+ [<ffffffff8115c8f8>] exit_mmap+0x98/0x170
+ [<ffffffff810050d9>] ? __raw_callee_save_xen_pmd_val+0x11/0x1e
+ [<ffffffff81059ce3>] mmput+0x83/0xf0
+ [<ffffffff810624c4>] exit_mm+0x104/0x130
+ [<ffffffff8106264a>] do_exit+0x15a/0x8c0
+ [<ffffffff810630ff>] do_group_exit+0x3f/0xa0
+ [<ffffffff81063177>] sys_exit_group+0x17/0x20
+ [<ffffffff8162bae9>] system_call_fastpath+0x16/0x1b
+
+Calling arch_flush_lazy_mmu_mode immediately after set_pgd makes the
+changes visible to the consistency checks.
+
+RedHat-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=914737
+Tested-by: Josh Boyer <jwboyer@redhat.com>
+Reported-and-Tested-by: Krishna Raman <kraman@redhat.com>
+Signed-off-by: Samu Kallio <samu.kallio@aberdeencloud.com>
+Link: http://lkml.kernel.org/r/1364045796-10720-1-git-send-email-konrad.wilk@oracle.com
+Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -376,10 +376,12 @@ static noinline __kprobes int vmalloc_fa
+       if (pgd_none(*pgd_ref))
+               return -1;
+-      if (pgd_none(*pgd))
++      if (pgd_none(*pgd)) {
+               set_pgd(pgd, *pgd_ref);
+-      else
++              arch_flush_lazy_mmu_mode();
++      } else {
+               BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
++      }
+       /*
+        * Below here mismatches are bugs because these lower tables
diff --git a/queue-3.0/x86-mm-patch-out-arch_flush_lazy_mmu_mode-when-running-on-bare-metal.patch b/queue-3.0/x86-mm-patch-out-arch_flush_lazy_mmu_mode-when-running-on-bare-metal.patch
new file mode 100644 (file)
index 0000000..cdfdc89
--- /dev/null
@@ -0,0 +1,138 @@
+From 511ba86e1d386f671084b5d0e6f110bb30b8eeb2 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Sat, 23 Mar 2013 09:36:36 -0400
+Subject: x86, mm: Patch out arch_flush_lazy_mmu_mode() when running on bare metal
+
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+
+commit 511ba86e1d386f671084b5d0e6f110bb30b8eeb2 upstream.
+
+Invoking arch_flush_lazy_mmu_mode() results in calls to
+preempt_enable()/disable() which may have performance impact.
+
+Since lazy MMU is not used on bare metal we can patch away
+arch_flush_lazy_mmu_mode() so that it is never called in such
+environment.
+
+[ hpa: the previous patch "Fix vmalloc_fault oops during lazy MMU
+  updates" may cause a minor performance regression on
+  bare metal.  This patch resolves that performance regression.  It is
+  somewhat unclear to me if this is a good -stable candidate. ]
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Link: http://lkml.kernel.org/r/1364045796-10720-2-git-send-email-konrad.wilk@oracle.com
+Tested-by: Josh Boyer <jwboyer@redhat.com>
+Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/paravirt.h       |    5 ++++-
+ arch/x86/include/asm/paravirt_types.h |    2 ++
+ arch/x86/kernel/paravirt.c            |   25 +++++++++++++------------
+ arch/x86/lguest/boot.c                |    1 +
+ arch/x86/xen/mmu.c                    |    1 +
+ 5 files changed, 21 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -731,7 +731,10 @@ static inline void arch_leave_lazy_mmu_m
+       PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
+ }
+-void arch_flush_lazy_mmu_mode(void);
++static inline void arch_flush_lazy_mmu_mode(void)
++{
++      PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
++}
+ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
+                               phys_addr_t phys, pgprot_t flags)
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -85,6 +85,7 @@ struct pv_lazy_ops {
+       /* Set deferred update mode, used for batching operations. */
+       void (*enter)(void);
+       void (*leave)(void);
++      void (*flush)(void);
+ };
+ struct pv_time_ops {
+@@ -673,6 +674,7 @@ void paravirt_end_context_switch(struct
+ void paravirt_enter_lazy_mmu(void);
+ void paravirt_leave_lazy_mmu(void);
++void paravirt_flush_lazy_mmu(void);
+ void _paravirt_nop(void);
+ u32 _paravirt_ident_32(u32);
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -253,6 +253,18 @@ void paravirt_leave_lazy_mmu(void)
+       leave_lazy(PARAVIRT_LAZY_MMU);
+ }
++void paravirt_flush_lazy_mmu(void)
++{
++      preempt_disable();
++
++      if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
++              arch_leave_lazy_mmu_mode();
++              arch_enter_lazy_mmu_mode();
++      }
++
++      preempt_enable();
++}
++
+ void paravirt_start_context_switch(struct task_struct *prev)
+ {
+       BUG_ON(preemptible());
+@@ -282,18 +294,6 @@ enum paravirt_lazy_mode paravirt_get_laz
+       return percpu_read(paravirt_lazy_mode);
+ }
+-void arch_flush_lazy_mmu_mode(void)
+-{
+-      preempt_disable();
+-
+-      if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+-              arch_leave_lazy_mmu_mode();
+-              arch_enter_lazy_mmu_mode();
+-      }
+-
+-      preempt_enable();
+-}
+-
+ struct pv_info pv_info = {
+       .name = "bare hardware",
+       .paravirt_enabled = 0,
+@@ -462,6 +462,7 @@ struct pv_mmu_ops pv_mmu_ops = {
+       .lazy_mode = {
+               .enter = paravirt_nop,
+               .leave = paravirt_nop,
++              .flush = paravirt_nop,
+       },
+       .set_fixmap = native_set_fixmap,
+--- a/arch/x86/lguest/boot.c
++++ b/arch/x86/lguest/boot.c
+@@ -1309,6 +1309,7 @@ __init void lguest_init(void)
+       pv_mmu_ops.read_cr3 = lguest_read_cr3;
+       pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
+       pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
++      pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
+       pv_mmu_ops.pte_update = lguest_pte_update;
+       pv_mmu_ops.pte_update_defer = lguest_pte_update;
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -2011,6 +2011,7 @@ static const struct pv_mmu_ops xen_mmu_o
+       .lazy_mode = {
+               .enter = paravirt_enter_lazy_mmu,
+               .leave = xen_leave_lazy_mmu,
++              .flush = paravirt_flush_lazy_mmu,
+       },
+       .set_fixmap = xen_set_fixmap,