]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Aug 2016 14:29:29 +0000 (10:29 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Aug 2016 14:29:29 +0000 (10:29 -0400)
added patches:
tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch
uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch
x86-mm-disable-preemption-during-cr3-read-write.patch

queue-4.4/series
queue-4.4/tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch [new file with mode: 0644]
queue-4.4/uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch [new file with mode: 0644]
queue-4.4/x86-mm-disable-preemption-during-cr3-read-write.patch [new file with mode: 0644]

index 6a2be4da5f52c0500d4da4d2b2d0a8992534a248..af3fbe063d862f0a232d2d54fbe667c3f1b2ab3b 100644 (file)
@@ -1 +1,4 @@
 hugetlb-fix-nr_pmds-accounting-with-shared-page-tables.patch
+x86-mm-disable-preemption-during-cr3-read-write.patch
+uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch
+tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch
diff --git a/queue-4.4/tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch b/queue-4.4/tools-testing-nvdimm-fix-sigterm-vs-hotplug-crash.patch
new file mode 100644 (file)
index 0000000..dc57e69
--- /dev/null
@@ -0,0 +1,60 @@
+From d8d378fa1a0c98ecb50ca52c9bf3bc14e25aa2d2 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Wed, 10 Aug 2016 15:59:09 -0700
+Subject: tools/testing/nvdimm: fix SIGTERM vs hotplug crash
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit d8d378fa1a0c98ecb50ca52c9bf3bc14e25aa2d2 upstream.
+
+The unit tests crash when hotplug races the previous probe. This race
+requires that the loading of the nfit_test module be terminated with
+SIGTERM, and the module to be unloaded while the ars scan is still
+running.
+
+In contrast to the normal nfit driver, the unit test calls
+acpi_nfit_init() twice to simulate hotplug, whereas the nominal case
+goes through the acpi_nfit_notify() event handler.  The
+acpi_nfit_notify() path is careful to flush the previous region
+registration before servicing the hotplug event. The unit test was
+missing this guarantee.
+
+ BUG: unable to handle kernel NULL pointer dereference at           (null)
+ IP: [<ffffffff810cdce7>] pwq_activate_delayed_work+0x47/0x170
+ [..]
+ Call Trace:
+  [<ffffffff810ce186>] pwq_dec_nr_in_flight+0x66/0xa0
+  [<ffffffff810ce490>] process_one_work+0x2d0/0x680
+  [<ffffffff810ce331>] ? process_one_work+0x171/0x680
+  [<ffffffff810ce88e>] worker_thread+0x4e/0x480
+  [<ffffffff810ce840>] ? process_one_work+0x680/0x680
+  [<ffffffff810ce840>] ? process_one_work+0x680/0x680
+  [<ffffffff810d5343>] kthread+0xf3/0x110
+  [<ffffffff8199846f>] ret_from_fork+0x1f/0x40
+  [<ffffffff810d5250>] ? kthread_create_on_node+0x230/0x230
+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/nvdimm/test/nfit.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/nvdimm/test/nfit.c
++++ b/tools/testing/nvdimm/test/nfit.c
+@@ -13,6 +13,7 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ #include <linux/platform_device.h>
+ #include <linux/dma-mapping.h>
++#include <linux/workqueue.h>
+ #include <linux/libnvdimm.h>
+ #include <linux/vmalloc.h>
+ #include <linux/device.h>
+@@ -1246,6 +1247,7 @@ static int nfit_test_probe(struct platfo
+       if (nfit_test->setup != nfit_test0_setup)
+               return 0;
++      flush_work(&acpi_desc->work);
+       nfit_test->setup_hotplug = 1;
+       nfit_test->setup(nfit_test);
diff --git a/queue-4.4/uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch b/queue-4.4/uprobes-x86-fix-rip-relative-handling-of-evex-encoded-instructions.patch
new file mode 100644 (file)
index 0000000..62f3e28
--- /dev/null
@@ -0,0 +1,100 @@
+From 68187872c76a96ed4db7bfb064272591f02e208b Mon Sep 17 00:00:00 2001
+From: Denys Vlasenko <dvlasenk@redhat.com>
+Date: Thu, 11 Aug 2016 17:45:21 +0200
+Subject: uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions
+
+From: Denys Vlasenko <dvlasenk@redhat.com>
+
+commit 68187872c76a96ed4db7bfb064272591f02e208b upstream.
+
+Since instruction decoder now supports EVEX-encoded instructions, two fixes
+are needed to correctly handle them in uprobes.
+
+Extended bits for MODRM.rm field need to be sanitized just like we do it
+for VEX3, to avoid encoding wrong register for register-relative access.
+
+EVEX has _two_ extended bits: b and x. Theoretically, EVEX.x should be
+ignored by the CPU (since GPRs go only up to 15, not 31), but let's be
+paranoid here: proper encoding for register-relative access
+should have EVEX.x = 1.
+
+Secondly, we should fetch vex.vvvv for EVEX too.
+This is now super easy because instruction decoder populates
+vex_prefix.bytes[2] for all flavors of (e)vex encodings, even for VEX2.
+
+Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jim Keniston <jkenisto@us.ibm.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: linux-kernel@vger.kernel.org
+Fixes: 8a764a875fe3 ("x86/asm/decoder: Create artificial 3rd byte for 2-byte VEX")
+Link: http://lkml.kernel.org/r/20160811154521.20469-1-dvlasenk@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/uprobes.c |   22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/uprobes.c
++++ b/arch/x86/kernel/uprobes.c
+@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_u
+               *cursor &= 0xfe;
+       }
+       /*
+-       * Similar treatment for VEX3 prefix.
+-       * TODO: add XOP/EVEX treatment when insn decoder supports them
++       * Similar treatment for VEX3/EVEX prefix.
++       * TODO: add XOP treatment when insn decoder supports them
+        */
+-      if (insn->vex_prefix.nbytes == 3) {
++      if (insn->vex_prefix.nbytes >= 3) {
+               /*
+                * vex2:     c5    rvvvvLpp   (has no b bit)
+                * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+                * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
+-               *   (evex will need setting of both b and x since
+-               *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
+-               * Setting VEX3.b (setting because it has inverted meaning):
++               * Setting VEX3.b (setting because it has inverted meaning).
++               * Setting EVEX.x since (in non-SIB encoding) EVEX.x
++               * is the 4th bit of MODRM.rm, and needs the same treatment.
++               * For VEX3-encoded insns, VEX3.x value has no effect in
++               * non-SIB encoding, the change is superfluous but harmless.
+                */
+               cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+-              *cursor |= 0x20;
++              *cursor |= 0x60;
+       }
+       /*
+@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_u
+       reg = MODRM_REG(insn);  /* Fetch modrm.reg */
+       reg2 = 0xff;            /* Fetch vex.vvvv */
+-      if (insn->vex_prefix.nbytes == 2)
+-              reg2 = insn->vex_prefix.bytes[1];
+-      else if (insn->vex_prefix.nbytes == 3)
++      if (insn->vex_prefix.nbytes)
+               reg2 = insn->vex_prefix.bytes[2];
+       /*
+-       * TODO: add XOP, EXEV vvvv reading.
++       * TODO: add XOP vvvv reading.
+        *
+        * vex.vvvv field is in bits 6-3, bits are inverted.
+        * But in 32-bit mode, high-order bit may be ignored.
diff --git a/queue-4.4/x86-mm-disable-preemption-during-cr3-read-write.patch b/queue-4.4/x86-mm-disable-preemption-during-cr3-read-write.patch
new file mode 100644 (file)
index 0000000..a597ef4
--- /dev/null
@@ -0,0 +1,109 @@
+From 5cf0791da5c162ebc14b01eb01631cfa7ed4fa6e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 5 Aug 2016 15:37:39 +0200
+Subject: x86/mm: Disable preemption during CR3 read+write
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 5cf0791da5c162ebc14b01eb01631cfa7ed4fa6e upstream.
+
+There's a subtle preemption race on UP kernels:
+
+Usually current->mm (and therefore mm->pgd) stays the same during the
+lifetime of a task so it does not matter if a task gets preempted during
+the read and write of the CR3.
+
+But then, there is this scenario on x86-UP:
+
+TaskA is in do_exit() and exit_mm() sets current->mm = NULL followed by:
+
+ -> mmput()
+ -> exit_mmap()
+ -> tlb_finish_mmu()
+ -> tlb_flush_mmu()
+ -> tlb_flush_mmu_tlbonly()
+ -> tlb_flush()
+ -> flush_tlb_mm_range()
+ -> __flush_tlb_up()
+ -> __flush_tlb()
+ ->  __native_flush_tlb()
+
+At this point current->mm is NULL but current->active_mm still points to
+the "old" mm.
+
+Let's preempt taskA _after_ native_read_cr3() by taskB. TaskB has its
+own mm so CR3 has changed.
+
+Now preempt back to taskA. TaskA has no ->mm set so it borrows taskB's
+mm and so CR3 remains unchanged. Once taskA gets active it continues
+where it was interrupted and that means it writes its old CR3 value
+back. Everything is fine because userland won't need its memory
+anymore.
+
+Now the fun part:
+
+Let's preempt taskA one more time and get back to taskB. This
+time switch_mm() won't do a thing because oldmm (->active_mm)
+is the same as mm (as per context_switch()). So we remain
+with a bad CR3 / PGD and return to userland.
+
+The next thing that happens is handle_mm_fault() with an address for
+the execution of its code in userland. handle_mm_fault() realizes that
+it has a PTE with proper rights so it returns doing nothing. But the
+CPU looks at the wrong PGD and insists that something is wrong and
+faults again. And again. And one more time…
+
+This pagefault circle continues until the scheduler gets tired of it and
+puts another task on the CPU. It gets little difficult if the task is a
+RT task with a high priority. The system will either freeze or it gets
+fixed by the software watchdog thread which usually runs at RT-max prio.
+But waiting for the watchdog will increase the latency of the RT task
+which is no good.
+
+Fix this by disabling preemption across the critical code section.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Acked-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/1470404259-26290-1-git-send-email-bigeasy@linutronix.de
+[ Prettified the changelog. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -86,7 +86,14 @@ static inline void cr4_set_bits_and_upda
+ static inline void __native_flush_tlb(void)
+ {
++      /*
++       * If current->mm == NULL then we borrow a mm which may change during a
++       * task switch and therefore we must not be preempted while we write CR3
++       * back:
++       */
++      preempt_disable();
+       native_write_cr3(native_read_cr3());
++      preempt_enable();
+ }
+ static inline void __native_flush_tlb_global_irq_disabled(void)