4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 22 Sep 2017 11:33:30 +0000 (13:33 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 22 Sep 2017 11:33:30 +0000 (13:33 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Sep 2017 11:33:30 +0000 (13:33 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Sep 2017 11:33:30 +0000 (13:33 +0200)
diff --git a/queue-4.9/arc-re-enable-mmu-upon-machine-check-exception.patch b/queue-4.9/arc-re-enable-mmu-upon-machine-check-exception.patch

new file mode 100644 (file)

index 0000000..3fdb05b
--- /dev/null
+++ b/queue-4.9/arc-re-enable-mmu-upon-machine-check-exception.patch
@@ -0,0 +1,58 @@
+From 1ee55a8f7f6b7ca4c0c59e0b4b4e3584a085c2d3 Mon Sep 17 00:00:00 2001
+From: Jose Abreu <Jose.Abreu@synopsys.com>
+Date: Fri, 1 Sep 2017 17:00:23 +0100
+Subject: ARC: Re-enable MMU upon Machine Check exception
+
+From: Jose Abreu <Jose.Abreu@synopsys.com>
+
+commit 1ee55a8f7f6b7ca4c0c59e0b4b4e3584a085c2d3 upstream.
+
+I recently came upon a scenario where I would get a double fault
+machine check exception tiriggered by a kernel module.
+However the ensuing crash stacktrace (ksym lookup) was not working
+correctly.
+
+Turns out that machine check auto-disables MMU while modules are allocated
+in kernel vaddr spapce.
+
+This patch re-enables the MMU before start printing the stacktrace
+making stacktracing of modules work upon a fatal exception.
+
+Signed-off-by: Jose Abreu <joabreu@synopsys.com>
+Reviewed-by: Alexey Brodkin <abrodkin@synopsys.com>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+[vgupta: moved code into low level handler to avoid in 2 places]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/kernel/entry.S |    6 ++++++
+ arch/arc/mm/tlb.c       |    3 ---
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/arc/kernel/entry.S
++++ b/arch/arc/kernel/entry.S
+@@ -92,6 +92,12 @@ ENTRY(EV_MachineCheck)
+       lr  r0, [efa]
+       mov r1, sp
+ 
++      ; hardware auto-disables MMU, re-enable it to allow kernel vaddr
++      ; access for say stack unwinding of modules for crash dumps
++      lr      r3, [ARC_REG_PID]
++      or      r3, r3, MMU_ENABLE
++      sr      r3, [ARC_REG_PID]
++
+       lsr     r3, r2, 8
+       bmsk    r3, r3, 7
+       brne    r3, ECR_C_MCHK_DUP_TLB, 1f
+--- a/arch/arc/mm/tlb.c
++++ b/arch/arc/mm/tlb.c
+@@ -896,9 +896,6 @@ void do_tlb_overlap_fault(unsigned long
+ 
+       local_irq_save(flags);
+ 
+-      /* re-enable the MMU */
+-      write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
+-
+       /* loop thru all sets of TLB */
+       for (set = 0; set < mmu->sets; set++) {
+ 
diff --git a/queue-4.9/ftrace-fix-memleak-when-unregistering-dynamic-ops-when-tracing-disabled.patch b/queue-4.9/ftrace-fix-memleak-when-unregistering-dynamic-ops-when-tracing-disabled.patch

new file mode 100644 (file)

index 0000000..e0591ab
--- /dev/null
+++ b/queue-4.9/ftrace-fix-memleak-when-unregistering-dynamic-ops-when-tracing-disabled.patch
@@ -0,0 +1,75 @@
+From edb096e00724f02db5f6ec7900f3bbd465c6c76f Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Fri, 1 Sep 2017 12:18:28 -0400
+Subject: ftrace: Fix memleak when unregistering dynamic ops when tracing disabled
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit edb096e00724f02db5f6ec7900f3bbd465c6c76f upstream.
+
+If function tracing is disabled by the user via the function-trace option or
+the proc sysctl file, and a ftrace_ops that was allocated on the heap is
+unregistered, then the shutdown code exits out without doing the proper
+clean up. This was found via kmemleak and running the ftrace selftests, as
+one of the tests unregisters with function tracing disabled.
+
+ # cat kmemleak
+unreferenced object 0xffffffffa0020000 (size 4096):
+  comm "swapper/0", pid 1, jiffies 4294668889 (age 569.209s)
+  hex dump (first 32 bytes):
+    55 ff 74 24 10 55 48 89 e5 ff 74 24 18 55 48 89  U.t$.UH...t$.UH.
+    e5 48 81 ec a8 00 00 00 48 89 44 24 50 48 89 4c  .H......H.D$PH.L
+  backtrace:
+    [<ffffffff81d64665>] kmemleak_vmalloc+0x85/0xf0
+    [<ffffffff81355631>] __vmalloc_node_range+0x281/0x3e0
+    [<ffffffff8109697f>] module_alloc+0x4f/0x90
+    [<ffffffff81091170>] arch_ftrace_update_trampoline+0x160/0x420
+    [<ffffffff81249947>] ftrace_startup+0xe7/0x300
+    [<ffffffff81249bd2>] register_ftrace_function+0x72/0x90
+    [<ffffffff81263786>] trace_selftest_ops+0x204/0x397
+    [<ffffffff82bb8971>] trace_selftest_startup_function+0x394/0x624
+    [<ffffffff81263a75>] run_tracer_selftest+0x15c/0x1d7
+    [<ffffffff82bb83f1>] init_trace_selftests+0x75/0x192
+    [<ffffffff81002230>] do_one_initcall+0x90/0x1e2
+    [<ffffffff82b7d620>] kernel_init_freeable+0x350/0x3fe
+    [<ffffffff81d61ec3>] kernel_init+0x13/0x122
+    [<ffffffff81d72c6a>] ret_from_fork+0x2a/0x40
+    [<ffffffffffffffff>] 0xffffffffffffffff
+
+Fixes: 12cce594fa ("ftrace/x86: Allow !CONFIG_PREEMPT dynamic ops to use allocated trampolines")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/ftrace.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -2747,13 +2747,14 @@ static int ftrace_shutdown(struct ftrace
+ 
+       if (!command || !ftrace_enabled) {
+               /*
+-               * If these are per_cpu ops, they still need their
+-               * per_cpu field freed. Since, function tracing is
++               * If these are dynamic or per_cpu ops, they still
++               * need their data freed. Since, function tracing is
+                * not currently active, we can just free them
+                * without synchronizing all CPUs.
+                */
+-              if (ops->flags & FTRACE_OPS_FL_PER_CPU)
+-                      per_cpu_ops_free(ops);
++              if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU))
++                      goto free_ops;
++
+               return 0;
+       }
+ 
+@@ -2808,6 +2809,7 @@ static int ftrace_shutdown(struct ftrace
+       if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) {
+               schedule_on_each_cpu(ftrace_sync);
+ 
++ free_ops:
+               arch_ftrace_trampoline_free(ops);
+ 
+               if (ops->flags & FTRACE_OPS_FL_PER_CPU)
diff --git a/queue-4.9/ftrace-fix-selftest-goto-location-on-error.patch b/queue-4.9/ftrace-fix-selftest-goto-location-on-error.patch

new file mode 100644 (file)

index 0000000..2947a0d
--- /dev/null
+++ b/queue-4.9/ftrace-fix-selftest-goto-location-on-error.patch
@@ -0,0 +1,32 @@
+From 46320a6acc4fb58f04bcf78c4c942cc43b20f986 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Fri, 1 Sep 2017 12:04:09 -0400
+Subject: ftrace: Fix selftest goto location on error
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 46320a6acc4fb58f04bcf78c4c942cc43b20f986 upstream.
+
+In the second iteration of trace_selftest_ops(), the error goto label is
+wrong in the case where trace_selftest_test_global_cnt is off. In the
+case of error, it leaks the dynamic ops that was allocated.
+
+Fixes: 95950c2e ("ftrace: Add self-tests for multiple function trace users")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace_selftest.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_selftest.c
++++ b/kernel/trace/trace_selftest.c
+@@ -272,7 +272,7 @@ static int trace_selftest_ops(struct tra
+               goto out_free;
+       if (cnt > 1) {
+               if (trace_selftest_test_global_cnt == 0)
+-                      goto out;
++                      goto out_free;
+       }
+       if (trace_selftest_test_dyn_cnt == 0)
+               goto out_free;
diff --git a/queue-4.9/net-netfilter-nf_conntrack_core-fix-net_conntrack_lock.patch b/queue-4.9/net-netfilter-nf_conntrack_core-fix-net_conntrack_lock.patch

new file mode 100644 (file)

index 0000000..7efc49f
--- /dev/null
+++ b/queue-4.9/net-netfilter-nf_conntrack_core-fix-net_conntrack_lock.patch
@@ -0,0 +1,116 @@
+From 3ef0c7a730de0bae03d86c19570af764fa3c4445 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Thu, 6 Jul 2017 20:45:59 +0200
+Subject: net/netfilter/nf_conntrack_core: Fix net_conntrack_lock()
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 3ef0c7a730de0bae03d86c19570af764fa3c4445 upstream.
+
+As we want to remove spin_unlock_wait() and replace it with explicit
+spin_lock()/spin_unlock() calls, we can use this to simplify the
+locking.
+
+In addition:
+- Reading nf_conntrack_locks_all needs ACQUIRE memory ordering.
+- The new code avoids the backwards loop.
+
+Only slightly tested, I did not manage to trigger calls to
+nf_conntrack_all_lock().
+
+V2: With improved comments, to clearly show how the barriers
+    pair.
+
+Fixes: b16c29191dc8 ("netfilter: nf_conntrack: use safer way to lock all buckets")
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Alan Stern <stern@rowland.harvard.edu>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Pablo Neira Ayuso <pablo@netfilter.org>
+Cc: netfilter-devel@vger.kernel.org
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conntrack_core.c |   52 +++++++++++++++++++++-----------------
+ 1 file changed, 29 insertions(+), 23 deletions(-)
+
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -95,19 +95,26 @@ static struct conntrack_gc_work conntrac
+ 
+ void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+ {
++      /* 1) Acquire the lock */
+       spin_lock(lock);
+-      while (unlikely(nf_conntrack_locks_all)) {
+-              spin_unlock(lock);
+ 
+-              /*
+-               * Order the 'nf_conntrack_locks_all' load vs. the
+-               * spin_unlock_wait() loads below, to ensure
+-               * that 'nf_conntrack_locks_all_lock' is indeed held:
+-               */
+-              smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+-              spin_unlock_wait(&nf_conntrack_locks_all_lock);
+-              spin_lock(lock);
+-      }
++      /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
++       * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
++       */
++      if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
++              return;
++
++      /* fast path failed, unlock */
++      spin_unlock(lock);
++
++      /* Slow path 1) get global lock */
++      spin_lock(&nf_conntrack_locks_all_lock);
++
++      /* Slow path 2) get the lock we want */
++      spin_lock(lock);
++
++      /* Slow path 3) release the global lock */
++      spin_unlock(&nf_conntrack_locks_all_lock);
+ }
+ EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+ 
+@@ -148,28 +155,27 @@ static void nf_conntrack_all_lock(void)
+       int i;
+ 
+       spin_lock(&nf_conntrack_locks_all_lock);
+-      nf_conntrack_locks_all = true;
+ 
+-      /*
+-       * Order the above store of 'nf_conntrack_locks_all' against
+-       * the spin_unlock_wait() loads below, such that if
+-       * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
+-       * we must observe nf_conntrack_locks[] held:
+-       */
+-      smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
++      nf_conntrack_locks_all = true;
+ 
+       for (i = 0; i < CONNTRACK_LOCKS; i++) {
+-              spin_unlock_wait(&nf_conntrack_locks[i]);
++              spin_lock(&nf_conntrack_locks[i]);
++
++              /* This spin_unlock provides the "release" to ensure that
++               * nf_conntrack_locks_all==true is visible to everyone that
++               * acquired spin_lock(&nf_conntrack_locks[]).
++               */
++              spin_unlock(&nf_conntrack_locks[i]);
+       }
+ }
+ 
+ static void nf_conntrack_all_unlock(void)
+ {
+-      /*
+-       * All prior stores must be complete before we clear
++      /* All prior stores must be complete before we clear
+        * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
+        * might observe the false value but not the entire
+-       * critical section:
++       * critical section.
++       * It pairs with the smp_load_acquire() in nf_conntrack_lock()
+        */
+       smp_store_release(&nf_conntrack_locks_all, false);
+       spin_unlock(&nf_conntrack_locks_all_lock);
diff --git a/queue-4.9/pci-pciehp-report-power-fault-only-once-until-we-clear-it.patch b/queue-4.9/pci-pciehp-report-power-fault-only-once-until-we-clear-it.patch

new file mode 100644 (file)

index 0000000..452d794
--- /dev/null
+++ b/queue-4.9/pci-pciehp-report-power-fault-only-once-until-we-clear-it.patch
@@ -0,0 +1,49 @@
+From 7612b3b28c0b900dcbcdf5e9b9747cc20a1e2455 Mon Sep 17 00:00:00 2001
+From: Keith Busch <keith.busch@intel.com>
+Date: Tue, 1 Aug 2017 03:11:52 -0400
+Subject: PCI: pciehp: Report power fault only once until we clear it
+
+From: Keith Busch <keith.busch@intel.com>
+
+commit 7612b3b28c0b900dcbcdf5e9b9747cc20a1e2455 upstream.
+
+When a power fault occurs, the power controller sets Power Fault Detected
+in the Slot Status register, and pciehp_isr() queues an INT_POWER_FAULT
+event to handle it.
+
+It also clears Power Fault Detected, but since nothing has yet changed to
+correct the power fault, the power controller will likely set it again
+immediately, which may cause an infinite loop when pcie_isr() rechecks
+Slot Status.
+
+Fix that by masking off Power Fault Detected from new events if the driver
+hasn't seen the power fault clear from the previous handling attempt.
+
+Fixes: fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking for new ones")
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+[bhelgaas: changelog, pull test out and add comment]
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pciehp_hpc.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/pci/hotplug/pciehp_hpc.c
++++ b/drivers/pci/hotplug/pciehp_hpc.c
+@@ -586,6 +586,14 @@ static irqreturn_t pciehp_isr(int irq, v
+       events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
+                          PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
+                          PCI_EXP_SLTSTA_DLLSC);
++
++      /*
++       * If we've already reported a power fault, don't report it again
++       * until we've done something to handle it.
++       */
++      if (ctrl->power_fault_detected)
++              events &= ~PCI_EXP_SLTSTA_PFD;
++
+       if (!events)
+               return IRQ_NONE;
+ 
diff --git a/queue-4.9/pci-shpchp-enable-bridge-bus-mastering-if-msi-is-enabled.patch b/queue-4.9/pci-shpchp-enable-bridge-bus-mastering-if-msi-is-enabled.patch

new file mode 100644 (file)

index 0000000..2c0ad98
--- /dev/null
+++ b/queue-4.9/pci-shpchp-enable-bridge-bus-mastering-if-msi-is-enabled.patch
@@ -0,0 +1,38 @@
+From 48b79a14505349a29b3e20f03619ada9b33c4b17 Mon Sep 17 00:00:00 2001
+From: Aleksandr Bezzubikov <zuban32s@gmail.com>
+Date: Tue, 18 Jul 2017 17:12:25 +0300
+Subject: PCI: shpchp: Enable bridge bus mastering if MSI is enabled
+
+From: Aleksandr Bezzubikov <zuban32s@gmail.com>
+
+commit 48b79a14505349a29b3e20f03619ada9b33c4b17 upstream.
+
+An SHPC may generate MSIs to notify software about slot or controller
+events (SHPC spec r1.0, sec 4.7).  A PCI device can only generate an MSI if
+it has bus mastering enabled.
+
+Enable bus mastering if the bridge contains an SHPC that uses MSI for event
+notifications.
+
+Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
+[bhelgaas: changelog]
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/shpchp_hpc.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/pci/hotplug/shpchp_hpc.c
++++ b/drivers/pci/hotplug/shpchp_hpc.c
+@@ -1062,6 +1062,8 @@ int shpc_init(struct controller *ctrl, s
+               if (rc) {
+                       ctrl_info(ctrl, "Can't get msi for the hotplug controller\n");
+                       ctrl_info(ctrl, "Use INTx for the hotplug controller\n");
++              } else {
++                      pci_set_master(pdev);
+               }
+ 
+               rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED,
diff --git a/queue-4.9/s390-mm-fix-local-tlb-flushing-vs.-detach-of-an-mm-address-space.patch b/queue-4.9/s390-mm-fix-local-tlb-flushing-vs.-detach-of-an-mm-address-space.patch

new file mode 100644 (file)

index 0000000..7abf568
--- /dev/null
+++ b/queue-4.9/s390-mm-fix-local-tlb-flushing-vs.-detach-of-an-mm-address-space.patch
@@ -0,0 +1,112 @@
+From b3e5dc45fd1ec2aa1de6b80008f9295eb17e0659 Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Wed, 16 Aug 2017 14:10:01 +0200
+Subject: s390/mm: fix local TLB flushing vs. detach of an mm address space
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit b3e5dc45fd1ec2aa1de6b80008f9295eb17e0659 upstream.
+
+The local TLB flushing code keeps an additional mask in the mm.context,
+the cpu_attach_mask. At the time a global flush of an address space is
+done the cpu_attach_mask is copied to the mm_cpumask in order to avoid
+future global flushes in case the mm is used by a single CPU only after
+the flush.
+
+Trouble is that the reset of the mm_cpumask is racy against the detach
+of an mm address space by switch_mm. The current order is first the
+global TLB flush and then the copy of the cpu_attach_mask to the
+mm_cpumask. The order needs to be the other way around.
+
+Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/mmu_context.h |    4 ++--
+ arch/s390/include/asm/tlbflush.h    |   26 +++++---------------------
+ 2 files changed, 7 insertions(+), 23 deletions(-)
+
+--- a/arch/s390/include/asm/mmu_context.h
++++ b/arch/s390/include/asm/mmu_context.h
+@@ -93,7 +93,6 @@ static inline void switch_mm(struct mm_s
+       if (prev == next)
+               return;
+       cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+-      cpumask_set_cpu(cpu, mm_cpumask(next));
+       /* Clear old ASCE by loading the kernel ASCE. */
+       __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+       __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+@@ -111,7 +110,7 @@ static inline void finish_arch_post_lock
+               preempt_disable();
+               while (atomic_read(&mm->context.flush_count))
+                       cpu_relax();
+-
++              cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+               if (mm->context.flush_mm)
+                       __tlb_flush_mm(mm);
+               preempt_enable();
+@@ -126,6 +125,7 @@ static inline void activate_mm(struct mm
+                                struct mm_struct *next)
+ {
+       switch_mm(prev, next, current);
++      cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+       set_user_asce(next);
+ }
+ 
+--- a/arch/s390/include/asm/tlbflush.h
++++ b/arch/s390/include/asm/tlbflush.h
+@@ -43,23 +43,6 @@ static inline void __tlb_flush_global(vo
+  * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+  * this implicates multiple ASCEs!).
+  */
+-static inline void __tlb_flush_full(struct mm_struct *mm)
+-{
+-      preempt_disable();
+-      atomic_inc(&mm->context.flush_count);
+-      if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+-              /* Local TLB flush */
+-              __tlb_flush_local();
+-      } else {
+-              /* Global TLB flush */
+-              __tlb_flush_global();
+-              /* Reset TLB flush mask */
+-              cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
+-      }
+-      atomic_dec(&mm->context.flush_count);
+-      preempt_enable();
+-}
+-
+ static inline void __tlb_flush_mm(struct mm_struct *mm)
+ {
+       unsigned long gmap_asce;
+@@ -71,16 +54,18 @@ static inline void __tlb_flush_mm(struct
+        */
+       preempt_disable();
+       atomic_inc(&mm->context.flush_count);
++      /* Reset TLB flush mask */
++      cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
++      barrier();
+       gmap_asce = READ_ONCE(mm->context.gmap_asce);
+       if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+               if (gmap_asce)
+                       __tlb_flush_idte(gmap_asce);
+               __tlb_flush_idte(mm->context.asce);
+       } else {
+-              __tlb_flush_full(mm);
++              /* Global TLB flush */
++              __tlb_flush_global();
+       }
+-      /* Reset TLB flush mask */
+-      cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
+       atomic_dec(&mm->context.flush_count);
+       preempt_enable();
+ }
+@@ -94,7 +79,6 @@ static inline void __tlb_flush_kernel(vo
+ }
+ #else
+ #define __tlb_flush_global()  __tlb_flush_local()
+-#define __tlb_flush_full(mm)  __tlb_flush_local()
+ 
+ /*
+  * Flush TLB entries for a specific ASCE on all CPUs.
diff --git a/queue-4.9/s390-mm-fix-race-on-mm-context.flush_mm.patch b/queue-4.9/s390-mm-fix-race-on-mm-context.flush_mm.patch

new file mode 100644 (file)

index 0000000..90b41e2
--- /dev/null
+++ b/queue-4.9/s390-mm-fix-race-on-mm-context.flush_mm.patch
@@ -0,0 +1,86 @@
+From 60f07c8ec5fae06c23e9fd7bab67dabce92b3414 Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Thu, 17 Aug 2017 08:15:16 +0200
+Subject: s390/mm: fix race on mm->context.flush_mm
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit 60f07c8ec5fae06c23e9fd7bab67dabce92b3414 upstream.
+
+The order in __tlb_flush_mm_lazy is to flush TLB first and then clear
+the mm->context.flush_mm bit. This can lead to missed flushes as the
+bit can be set anytime, the order needs to be the other way aronud.
+
+But this leads to a different race, __tlb_flush_mm_lazy may be called
+on two CPUs concurrently. If mm->context.flush_mm is cleared first then
+another CPU can bypass __tlb_flush_mm_lazy although the first CPU has
+not done the flush yet. In a virtualized environment the time until the
+flush is finally completed can be arbitrarily long.
+
+Add a spinlock to serialize __tlb_flush_mm_lazy and use the function
+in finish_arch_post_lock_switch as well.
+
+Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/mmu.h         |    2 ++
+ arch/s390/include/asm/mmu_context.h |    4 ++--
+ arch/s390/include/asm/tlbflush.h    |    4 +++-
+ 3 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/include/asm/mmu.h
++++ b/arch/s390/include/asm/mmu.h
+@@ -5,6 +5,7 @@
+ #include <linux/errno.h>
+ 
+ typedef struct {
++      spinlock_t lock;
+       cpumask_t cpu_attach_mask;
+       atomic_t flush_count;
+       unsigned int flush_mm;
+@@ -25,6 +26,7 @@ typedef struct {
+ } mm_context_t;
+ 
+ #define INIT_MM_CONTEXT(name)                                            \
++      .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock),           \
+       .context.pgtable_lock =                                            \
+                       __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock),   \
+       .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
+--- a/arch/s390/include/asm/mmu_context.h
++++ b/arch/s390/include/asm/mmu_context.h
+@@ -15,6 +15,7 @@
+ static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+ {
++      spin_lock_init(&mm->context.lock);
+       spin_lock_init(&mm->context.pgtable_lock);
+       INIT_LIST_HEAD(&mm->context.pgtable_list);
+       spin_lock_init(&mm->context.gmap_lock);
+@@ -111,8 +112,7 @@ static inline void finish_arch_post_lock
+               while (atomic_read(&mm->context.flush_count))
+                       cpu_relax();
+               cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+-              if (mm->context.flush_mm)
+-                      __tlb_flush_mm(mm);
++              __tlb_flush_mm_lazy(mm);
+               preempt_enable();
+       }
+       set_fs(current->thread.mm_segment);
+--- a/arch/s390/include/asm/tlbflush.h
++++ b/arch/s390/include/asm/tlbflush.h
+@@ -96,10 +96,12 @@ static inline void __tlb_flush_kernel(vo
+ 
+ static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
+ {
++      spin_lock(&mm->context.lock);
+       if (mm->context.flush_mm) {
+-              __tlb_flush_mm(mm);
+               mm->context.flush_mm = 0;
++              __tlb_flush_mm(mm);
+       }
++      spin_unlock(&mm->context.lock);
+ }
+ 
+ /*
diff --git a/queue-4.9/series b/queue-4.9/series

index 8a09b971e529eed25f00a839084b4fda005d8863..b24c6a6c31c048d246ef6d9222c93ee549ca3de7 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -51,3 +51,13 @@ scsi-sg-factor-out-sg_fill_request_table.patch
  scsi-sg-fixup-infoleak-when-using-sg_get_request_table.patch
  scsi-qla2xxx-correction-to-vha-vref_count-timeout.patch
  scsi-qla2xxx-fix-an-integer-overflow-in-sysfs-code.patch
+ftrace-fix-selftest-goto-location-on-error.patch
+ftrace-fix-memleak-when-unregistering-dynamic-ops-when-tracing-disabled.patch
+tracing-add-barrier-to-trace_printk-buffer-nesting-modification.patch
+tracing-apply-trace_clock-changes-to-instance-max-buffer.patch
+arc-re-enable-mmu-upon-machine-check-exception.patch
+pci-shpchp-enable-bridge-bus-mastering-if-msi-is-enabled.patch
+pci-pciehp-report-power-fault-only-once-until-we-clear-it.patch
+net-netfilter-nf_conntrack_core-fix-net_conntrack_lock.patch
+s390-mm-fix-local-tlb-flushing-vs.-detach-of-an-mm-address-space.patch
+s390-mm-fix-race-on-mm-context.flush_mm.patch
diff --git a/queue-4.9/tracing-add-barrier-to-trace_printk-buffer-nesting-modification.patch b/queue-4.9/tracing-add-barrier-to-trace_printk-buffer-nesting-modification.patch

new file mode 100644 (file)

index 0000000..d61d957
--- /dev/null
+++ b/queue-4.9/tracing-add-barrier-to-trace_printk-buffer-nesting-modification.patch
@@ -0,0 +1,56 @@
+From 3d9622c12c8873911f4cc0ccdabd0362c2fca06b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 5 Sep 2017 11:32:01 -0400
+Subject: tracing: Add barrier to trace_printk() buffer nesting modification
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 3d9622c12c8873911f4cc0ccdabd0362c2fca06b upstream.
+
+trace_printk() uses 4 buffers, one for each context (normal, softirq, irq
+and NMI), such that it does not need to worry about one context preempting
+the other. There's a nesting counter that gets incremented to figure out
+which buffer to use. If the context gets preempted by another context which
+calls trace_printk() it will increment the counter and use the next buffer,
+and restore the counter when it is finished.
+
+The problem is that gcc may optimize the modification of the buffer nesting
+counter and it may not be incremented in memory before the buffer is used.
+If this happens, and the context gets interrupted by another context, it
+could pick the same buffer and corrupt the one that is being used.
+
+Compiler barriers need to be added after the nesting variable is incremented
+and before it is decremented to prevent usage of the context buffers by more
+than one context at the same time.
+
+Cc: Andy Lutomirski <luto@kernel.org>
+Fixes: e2ace00117 ("tracing: Choose static tp_printk buffer by explicit nesting count")
+Hat-tip-to: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2369,11 +2369,17 @@ static char *get_trace_buf(void)
+       if (!buffer || buffer->nesting >= 4)
+               return NULL;
+ 
+-      return &buffer->buffer[buffer->nesting++][0];
++      buffer->nesting++;
++
++      /* Interrupts must see nesting incremented before we use the buffer */
++      barrier();
++      return &buffer->buffer[buffer->nesting][0];
+ }
+ 
+ static void put_trace_buf(void)
+ {
++      /* Don't let the decrement of nesting leak before this */
++      barrier();
+       this_cpu_dec(trace_percpu_buffer->nesting);
+ }
+ 
diff --git a/queue-4.9/tracing-apply-trace_clock-changes-to-instance-max-buffer.patch b/queue-4.9/tracing-apply-trace_clock-changes-to-instance-max-buffer.patch

new file mode 100644 (file)

index 0000000..e941ac2
--- /dev/null
+++ b/queue-4.9/tracing-apply-trace_clock-changes-to-instance-max-buffer.patch
@@ -0,0 +1,38 @@
+From 170b3b1050e28d1ba0700e262f0899ffa4fccc52 Mon Sep 17 00:00:00 2001
+From: Baohong Liu <baohong.liu@intel.com>
+Date: Tue, 5 Sep 2017 16:57:19 -0500
+Subject: tracing: Apply trace_clock changes to instance max buffer
+
+From: Baohong Liu <baohong.liu@intel.com>
+
+commit 170b3b1050e28d1ba0700e262f0899ffa4fccc52 upstream.
+
+Currently trace_clock timestamps are applied to both regular and max
+buffers only for global trace. For instance trace, trace_clock
+timestamps are applied only to regular buffer. But, regular and max
+buffers can be swapped, for example, following a snapshot. So, for
+instance trace, bad timestamps can be seen following a snapshot.
+Let's apply trace_clock timestamps to instance max buffer as well.
+
+Link: http://lkml.kernel.org/r/ebdb168d0be042dcdf51f81e696b17fabe3609c1.1504642143.git.tom.zanussi@linux.intel.com
+
+Fixes: 277ba0446 ("tracing: Add interface to allow multiple trace buffers")
+Signed-off-by: Baohong Liu <baohong.liu@intel.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5664,7 +5664,7 @@ static int tracing_set_clock(struct trac
+       tracing_reset_online_cpus(&tr->trace_buffer);
+ 
+ #ifdef CONFIG_TRACER_MAX_TRACE
+-      if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
++      if (tr->max_buffer.buffer)
+               ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
+       tracing_reset_online_cpus(&tr->max_buffer);
+ #endif
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 22 Sep 2017 11:33:30 +0000 (13:33 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 22 Sep 2017 11:33:30 +0000 (13:33 +0200)
queue-4.9/arc-re-enable-mmu-upon-machine-check-exception.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ftrace-fix-memleak-when-unregistering-dynamic-ops-when-tracing-disabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ftrace-fix-selftest-goto-location-on-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-netfilter-nf_conntrack_core-fix-net_conntrack_lock.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pci-pciehp-report-power-fault-only-once-until-we-clear-it.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pci-shpchp-enable-bridge-bus-mastering-if-msi-is-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/s390-mm-fix-local-tlb-flushing-vs.-detach-of-an-mm-address-space.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/s390-mm-fix-race-on-mm-context.flush_mm.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history
queue-4.9/tracing-add-barrier-to-trace_printk-buffer-nesting-modification.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/tracing-apply-trace_clock-changes-to-instance-max-buffer.patch	[new file with mode: 0644]	patch \| blob