]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.7-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Aug 2016 14:29:41 +0000 (10:29 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Aug 2016 14:29:41 +0000 (10:29 -0400)
added patches:
tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch
uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch
x86-mm-disable-preemption-during-cr3-read-write.patch
x86-platform-uv-skip-uv-runtime-services-mapping-in-the-efi_runtime_disabled-case.patch

queue-4.7/series
queue-4.7/tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch [new file with mode: 0644]
queue-4.7/uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch [new file with mode: 0644]
queue-4.7/x86-mm-disable-preemption-during-cr3-read-write.patch [new file with mode: 0644]
queue-4.7/x86-platform-uv-skip-uv-runtime-services-mapping-in-the-efi_runtime_disabled-case.patch [new file with mode: 0644]

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f5cb6fb00433a7c6c22e73a1d249b9ba38aacc01 100644 (file)
@@ -0,0 +1,4 @@
+x86-mm-disable-preemption-during-cr3-read-write.patch
+uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch
+x86-platform-uv-skip-uv-runtime-services-mapping-in-the-efi_runtime_disabled-case.patch
+tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch
diff --git a/queue-4.7/tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch b/queue-4.7/tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch
new file mode 100644 (file)
index 0000000..eceaa75
--- /dev/null
@@ -0,0 +1,60 @@
+From d8d378fa1a0c98ecb50ca52c9bf3bc14e25aa2d2 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Wed, 10 Aug 2016 15:59:09 -0700
+Subject: tools/testing/nvdimm: fix SIGTERM vs hotplug crash
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit d8d378fa1a0c98ecb50ca52c9bf3bc14e25aa2d2 upstream.
+
+The unit tests crash when hotplug races the previous probe. This race
+requires that the loading of the nfit_test module be terminated with
+SIGTERM, and the module to be unloaded while the ars scan is still
+running.
+
+In contrast to the normal nfit driver, the unit test calls
+acpi_nfit_init() twice to simulate hotplug, whereas the nominal case
+goes through the acpi_nfit_notify() event handler.  The
+acpi_nfit_notify() path is careful to flush the previous region
+registration before servicing the hotplug event. The unit test was
+missing this guarantee.
+
+ BUG: unable to handle kernel NULL pointer dereference at           (null)
+ IP: [<ffffffff810cdce7>] pwq_activate_delayed_work+0x47/0x170
+ [..]
+ Call Trace:
+  [<ffffffff810ce186>] pwq_dec_nr_in_flight+0x66/0xa0
+  [<ffffffff810ce490>] process_one_work+0x2d0/0x680
+  [<ffffffff810ce331>] ? process_one_work+0x171/0x680
+  [<ffffffff810ce88e>] worker_thread+0x4e/0x480
+  [<ffffffff810ce840>] ? process_one_work+0x680/0x680
+  [<ffffffff810ce840>] ? process_one_work+0x680/0x680
+  [<ffffffff810d5343>] kthread+0xf3/0x110
+  [<ffffffff8199846f>] ret_from_fork+0x1f/0x40
+  [<ffffffff810d5250>] ? kthread_create_on_node+0x230/0x230
+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/nvdimm/test/nfit.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/nvdimm/test/nfit.c
++++ b/tools/testing/nvdimm/test/nfit.c
+@@ -13,6 +13,7 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ #include <linux/platform_device.h>
+ #include <linux/dma-mapping.h>
++#include <linux/workqueue.h>
+ #include <linux/libnvdimm.h>
+ #include <linux/vmalloc.h>
+ #include <linux/device.h>
+@@ -1480,6 +1481,7 @@ static int nfit_test_probe(struct platfo
+       if (nfit_test->setup != nfit_test0_setup)
+               return 0;
++      flush_work(&acpi_desc->work);
+       nfit_test->setup_hotplug = 1;
+       nfit_test->setup(nfit_test);
diff --git a/queue-4.7/uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch b/queue-4.7/uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch
new file mode 100644 (file)
index 0000000..62f3e28
--- /dev/null
@@ -0,0 +1,100 @@
+From 68187872c76a96ed4db7bfb064272591f02e208b Mon Sep 17 00:00:00 2001
+From: Denys Vlasenko <dvlasenk@redhat.com>
+Date: Thu, 11 Aug 2016 17:45:21 +0200
+Subject: uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions
+
+From: Denys Vlasenko <dvlasenk@redhat.com>
+
+commit 68187872c76a96ed4db7bfb064272591f02e208b upstream.
+
+Since instruction decoder now supports EVEX-encoded instructions, two fixes
+are needed to correctly handle them in uprobes.
+
+Extended bits for MODRM.rm field need to be sanitized just like we do it
+for VEX3, to avoid encoding wrong register for register-relative access.
+
+EVEX has _two_ extended bits: b and x. Theoretically, EVEX.x should be
+ignored by the CPU (since GPRs go only up to 15, not 31), but let's be
+paranoid here: proper encoding for register-relative access
+should have EVEX.x = 1.
+
+Secondly, we should fetch vex.vvvv for EVEX too.
+This is now super easy because instruction decoder populates
+vex_prefix.bytes[2] for all flavors of (e)vex encodings, even for VEX2.
+
+Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jim Keniston <jkenisto@us.ibm.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: linux-kernel@vger.kernel.org
+Fixes: 8a764a875fe3 ("x86/asm/decoder: Create artificial 3rd byte for 2-byte VEX")
+Link: http://lkml.kernel.org/r/20160811154521.20469-1-dvlasenk@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/uprobes.c |   22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/uprobes.c
++++ b/arch/x86/kernel/uprobes.c
+@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_u
+               *cursor &= 0xfe;
+       }
+       /*
+-       * Similar treatment for VEX3 prefix.
+-       * TODO: add XOP/EVEX treatment when insn decoder supports them
++       * Similar treatment for VEX3/EVEX prefix.
++       * TODO: add XOP treatment when insn decoder supports them
+        */
+-      if (insn->vex_prefix.nbytes == 3) {
++      if (insn->vex_prefix.nbytes >= 3) {
+               /*
+                * vex2:     c5    rvvvvLpp   (has no b bit)
+                * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+                * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
+-               *   (evex will need setting of both b and x since
+-               *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
+-               * Setting VEX3.b (setting because it has inverted meaning):
++               * Setting VEX3.b (setting because it has inverted meaning).
++               * Setting EVEX.x since (in non-SIB encoding) EVEX.x
++               * is the 4th bit of MODRM.rm, and needs the same treatment.
++               * For VEX3-encoded insns, VEX3.x value has no effect in
++               * non-SIB encoding, the change is superfluous but harmless.
+                */
+               cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+-              *cursor |= 0x20;
++              *cursor |= 0x60;
+       }
+       /*
+@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_u
+       reg = MODRM_REG(insn);  /* Fetch modrm.reg */
+       reg2 = 0xff;            /* Fetch vex.vvvv */
+-      if (insn->vex_prefix.nbytes == 2)
+-              reg2 = insn->vex_prefix.bytes[1];
+-      else if (insn->vex_prefix.nbytes == 3)
++      if (insn->vex_prefix.nbytes)
+               reg2 = insn->vex_prefix.bytes[2];
+       /*
+-       * TODO: add XOP, EXEV vvvv reading.
++       * TODO: add XOP vvvv reading.
+        *
+        * vex.vvvv field is in bits 6-3, bits are inverted.
+        * But in 32-bit mode, high-order bit may be ignored.
diff --git a/queue-4.7/x86-mm-disable-preemption-during-cr3-read-write.patch b/queue-4.7/x86-mm-disable-preemption-during-cr3-read-write.patch
new file mode 100644 (file)
index 0000000..350d441
--- /dev/null
@@ -0,0 +1,109 @@
+From 5cf0791da5c162ebc14b01eb01631cfa7ed4fa6e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 5 Aug 2016 15:37:39 +0200
+Subject: x86/mm: Disable preemption during CR3 read+write
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 5cf0791da5c162ebc14b01eb01631cfa7ed4fa6e upstream.
+
+There's a subtle preemption race on UP kernels:
+
+Usually current->mm (and therefore mm->pgd) stays the same during the
+lifetime of a task so it does not matter if a task gets preempted during
+the read and write of the CR3.
+
+But then, there is this scenario on x86-UP:
+
+TaskA is in do_exit() and exit_mm() sets current->mm = NULL followed by:
+
+ -> mmput()
+ -> exit_mmap()
+ -> tlb_finish_mmu()
+ -> tlb_flush_mmu()
+ -> tlb_flush_mmu_tlbonly()
+ -> tlb_flush()
+ -> flush_tlb_mm_range()
+ -> __flush_tlb_up()
+ -> __flush_tlb()
+ ->  __native_flush_tlb()
+
+At this point current->mm is NULL but current->active_mm still points to
+the "old" mm.
+
+Let's preempt taskA _after_ native_read_cr3() by taskB. TaskB has its
+own mm so CR3 has changed.
+
+Now preempt back to taskA. TaskA has no ->mm set so it borrows taskB's
+mm and so CR3 remains unchanged. Once taskA gets active it continues
+where it was interrupted and that means it writes its old CR3 value
+back. Everything is fine because userland won't need its memory
+anymore.
+
+Now the fun part:
+
+Let's preempt taskA one more time and get back to taskB. This
+time switch_mm() won't do a thing because oldmm (->active_mm)
+is the same as mm (as per context_switch()). So we remain
+with a bad CR3 / PGD and return to userland.
+
+The next thing that happens is handle_mm_fault() with an address for
+the execution of its code in userland. handle_mm_fault() realizes that
+it has a PTE with proper rights so it returns doing nothing. But the
+CPU looks at the wrong PGD and insists that something is wrong and
+faults again. And again. And one more time…
+
+This pagefault circle continues until the scheduler gets tired of it and
+puts another task on the CPU. It gets little difficult if the task is a
+RT task with a high priority. The system will either freeze or it gets
+fixed by the software watchdog thread which usually runs at RT-max prio.
+But waiting for the watchdog will increase the latency of the RT task
+which is no good.
+
+Fix this by disabling preemption across the critical code section.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Acked-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/1470404259-26290-1-git-send-email-bigeasy@linutronix.de
+[ Prettified the changelog. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_upda
+ static inline void __native_flush_tlb(void)
+ {
++      /*
++       * If current->mm == NULL then we borrow a mm which may change during a
++       * task switch and therefore we must not be preempted while we write CR3
++       * back:
++       */
++      preempt_disable();
+       native_write_cr3(native_read_cr3());
++      preempt_enable();
+ }
+ static inline void __native_flush_tlb_global_irq_disabled(void)
diff --git a/queue-4.7/x86-platform-uv-skip-uv-runtime-services-mapping-in-the-efi_runtime_disabled-case.patch b/queue-4.7/x86-platform-uv-skip-uv-runtime-services-mapping-in-the-efi_runtime_disabled-case.patch
new file mode 100644 (file)
index 0000000..ed92628
--- /dev/null
@@ -0,0 +1,66 @@
+From f72075c9eda8a43aeea2f9dbb8d187afd4a76f0b Mon Sep 17 00:00:00 2001
+From: Alex Thorlton <athorlton@sgi.com>
+Date: Thu, 11 Aug 2016 11:41:59 +0100
+Subject: x86/platform/uv: Skip UV runtime services mapping in the efi_runtime_disabled case
+
+From: Alex Thorlton <athorlton@sgi.com>
+
+commit f72075c9eda8a43aeea2f9dbb8d187afd4a76f0b upstream.
+
+This problem has actually been in the UV code for a while, but we didn't
+catch it until recently, because we had been relying on EFI_OLD_MEMMAP
+to allow our systems to boot for a period of time.  We noticed the issue
+when trying to kexec a recent community kernel, where we hit this NULL
+pointer dereference in efi_sync_low_kernel_mappings():
+
+ [    0.337515] BUG: unable to handle kernel NULL pointer dereference at 0000000000000880
+ [    0.346276] IP: [<ffffffff8105df8d>] efi_sync_low_kernel_mappings+0x5d/0x1b0
+
+The problem doesn't show up with EFI_OLD_MEMMAP because we skip the
+chunk of setup_efi_state() that sets the efi_loader_signature for the
+kexec'd kernel.  When the kexec'd kernel boots, it won't set EFI_BOOT in
+setup_arch, so we completely avoid the bug.
+
+We always kexec with noefi on the command line, so this shouldn't be an
+issue, but since we're not actually checking for efi_runtime_disabled in
+uv_bios_init(), we end up trying to do EFI runtime callbacks when we
+shouldn't be. This patch just adds a check for efi_runtime_disabled in
+uv_bios_init() so that we don't map in uv_systab when runtime_disabled ==
+true.
+
+Signed-off-by: Alex Thorlton <athorlton@sgi.com>
+Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Travis <travis@sgi.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Russ Anderson <rja@sgi.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-efi@vger.kernel.org
+Link: http://lkml.kernel.org/r/1470912120-22831-2-git-send-email-matt@codeblueprint.co.uk
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/uv/bios_uv.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/platform/uv/bios_uv.c
++++ b/arch/x86/platform/uv/bios_uv.c
+@@ -188,7 +188,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga
+ void uv_bios_init(void)
+ {
+       uv_systab = NULL;
+-      if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
++      if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
++          !efi.uv_systab || efi_runtime_disabled()) {
+               pr_crit("UV: UVsystab: missing\n");
+               return;
+       }