4.8-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 12 Dec 2016 22:47:18 +0000 (14:47 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 12 Dec 2016 22:47:18 +0000 (14:47 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Dec 2016 22:47:18 +0000 (14:47 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Dec 2016 22:47:18 +0000 (14:47 -0800)
diff --git a/queue-4.8/device-dax-fix-private-mapping-restriction-permit-read-only.patch b/queue-4.8/device-dax-fix-private-mapping-restriction-permit-read-only.patch

new file mode 100644 (file)

index 0000000..c5697b9
--- /dev/null
+++ b/queue-4.8/device-dax-fix-private-mapping-restriction-permit-read-only.patch
@@ -0,0 +1,41 @@
+From 325896ffdf90f7cbd59fb873b7ba20d60d1ddf3c Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 6 Dec 2016 17:03:35 -0800
+Subject: device-dax: fix private mapping restriction, permit read-only
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 325896ffdf90f7cbd59fb873b7ba20d60d1ddf3c upstream.
+
+Hugh notes in response to commit 4cb19355ea19 "device-dax: fail all
+private mapping attempts":
+
+  "I think that is more restrictive than you intended: haven't tried, but I
+  believe it rejects a PROT_READ, MAP_SHARED, O_RDONLY fd mmap, leaving no
+  way to mmap /dev/dax without write permission to it."
+
+Indeed it does restrict read-only mappings, switch to checking
+VM_MAYSHARE, not VM_SHARED.
+
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Pawel Lebioda <pawel.lebioda@intel.com>
+Fixes: 4cb19355ea19 ("device-dax: fail all private mapping attempts")
+Reported-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dax/dax.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/dax/dax.c
++++ b/drivers/dax/dax.c
+@@ -324,7 +324,7 @@ static int check_vma(struct dax_dev *dax
+               return -ENXIO;
+ 
+       /* prevent private mappings from being established */
+-      if ((vma->vm_flags & VM_SHARED) != VM_SHARED) {
++      if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
+               dev_info(dev, "%s: %s: fail, attempted private mapping\n",
+                               current->comm, func);
+               return -EINVAL;
diff --git a/queue-4.8/locking-rtmutex-prevent-dequeue-vs.-unlock-race.patch b/queue-4.8/locking-rtmutex-prevent-dequeue-vs.-unlock-race.patch

new file mode 100644 (file)

index 0000000..49be2fb
--- /dev/null
+++ b/queue-4.8/locking-rtmutex-prevent-dequeue-vs.-unlock-race.patch
@@ -0,0 +1,181 @@
+From dbb26055defd03d59f678cb5f2c992abe05b064a Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 30 Nov 2016 21:04:41 +0000
+Subject: locking/rtmutex: Prevent dequeue vs. unlock race
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit dbb26055defd03d59f678cb5f2c992abe05b064a upstream.
+
+David reported a futex/rtmutex state corruption. It's caused by the
+following problem:
+
+CPU0           CPU1            CPU2
+
+l->owner=T1
+               rt_mutex_lock(l)
+               lock(l->wait_lock)
+               l->owner = T1 | HAS_WAITERS;
+               enqueue(T2)
+               boost()
+                 unlock(l->wait_lock)
+               schedule()
+
+                               rt_mutex_lock(l)
+                               lock(l->wait_lock)
+                               l->owner = T1 | HAS_WAITERS;
+                               enqueue(T3)
+                               boost()
+                                 unlock(l->wait_lock)
+                               schedule()
+               signal(->T2)    signal(->T3)
+               lock(l->wait_lock)
+               dequeue(T2)
+               deboost()
+                 unlock(l->wait_lock)
+                               lock(l->wait_lock)
+                               dequeue(T3)
+                                 ===> wait list is now empty
+                               deboost()
+                                unlock(l->wait_lock)
+               lock(l->wait_lock)
+               fixup_rt_mutex_waiters()
+                 if (wait_list_empty(l)) {
+                   owner = l->owner & ~HAS_WAITERS;
+                   l->owner = owner
+                    ==> l->owner = T1
+                 }
+
+                               lock(l->wait_lock)
+rt_mutex_unlock(l)             fixup_rt_mutex_waiters()
+                                 if (wait_list_empty(l)) {
+                                   owner = l->owner & ~HAS_WAITERS;
+cmpxchg(l->owner, T1, NULL)
+ ===> Success (l->owner = NULL)
+                                   l->owner = owner
+                                    ==> l->owner = T1
+                                 }
+
+That means the problem is caused by fixup_rt_mutex_waiters() which does the
+RMW to clear the waiters bit unconditionally when there are no waiters in
+the rtmutexes rbtree.
+
+This can be fatal: A concurrent unlock can release the rtmutex in the
+fastpath because the waiters bit is not set. If the cmpxchg() gets in the
+middle of the RMW operation then the previous owner, which just unlocked
+the rtmutex is set as the owner again when the write takes place after the
+successfull cmpxchg().
+
+The solution is rather trivial: verify that the owner member of the rtmutex
+has the waiters bit set before clearing it. This does not require a
+cmpxchg() or other atomic operations because the waiters bit can only be
+set and cleared with the rtmutex wait_lock held. It's also safe against the
+fast path unlock attempt. The unlock attempt via cmpxchg() will either see
+the bit set and take the slowpath or see the bit cleared and release it
+atomically in the fastpath.
+
+It's remarkable that the test program provided by David triggers on ARM64
+and MIPS64 really quick, but it refuses to reproduce on x86-64, while the
+problem exists there as well. That refusal might explain that this got not
+discovered earlier despite the bug existing from day one of the rtmutex
+implementation more than 10 years ago.
+
+Thanks to David for meticulously instrumenting the code and providing the
+information which allowed to decode this subtle problem.
+
+Reported-by: David Daney <ddaney@caviumnetworks.com>
+Tested-by: David Daney <david.daney@cavium.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sebastian Siewior <bigeasy@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core")
+Link: http://lkml.kernel.org/r/20161130210030.351136722@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/locking/rtmutex.c |   68 +++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 66 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiter
+ 
+ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
+ {
+-      if (!rt_mutex_has_waiters(lock))
+-              clear_rt_mutex_waiters(lock);
++      unsigned long owner, *p = (unsigned long *) &lock->owner;
++
++      if (rt_mutex_has_waiters(lock))
++              return;
++
++      /*
++       * The rbtree has no waiters enqueued, now make sure that the
++       * lock->owner still has the waiters bit set, otherwise the
++       * following can happen:
++       *
++       * CPU 0        CPU 1           CPU2
++       * l->owner=T1
++       *              rt_mutex_lock(l)
++       *              lock(l->lock)
++       *              l->owner = T1 | HAS_WAITERS;
++       *              enqueue(T2)
++       *              boost()
++       *                unlock(l->lock)
++       *              block()
++       *
++       *                              rt_mutex_lock(l)
++       *                              lock(l->lock)
++       *                              l->owner = T1 | HAS_WAITERS;
++       *                              enqueue(T3)
++       *                              boost()
++       *                                unlock(l->lock)
++       *                              block()
++       *              signal(->T2)    signal(->T3)
++       *              lock(l->lock)
++       *              dequeue(T2)
++       *              deboost()
++       *                unlock(l->lock)
++       *                              lock(l->lock)
++       *                              dequeue(T3)
++       *                               ==> wait list is empty
++       *                              deboost()
++       *                               unlock(l->lock)
++       *              lock(l->lock)
++       *              fixup_rt_mutex_waiters()
++       *                if (wait_list_empty(l) {
++       *                  l->owner = owner
++       *                  owner = l->owner & ~HAS_WAITERS;
++       *                    ==> l->owner = T1
++       *                }
++       *                              lock(l->lock)
++       * rt_mutex_unlock(l)           fixup_rt_mutex_waiters()
++       *                                if (wait_list_empty(l) {
++       *                                  owner = l->owner & ~HAS_WAITERS;
++       * cmpxchg(l->owner, T1, NULL)
++       *  ===> Success (l->owner = NULL)
++       *
++       *                                  l->owner = owner
++       *                                    ==> l->owner = T1
++       *                                }
++       *
++       * With the check for the waiter bit in place T3 on CPU2 will not
++       * overwrite. All tasks fiddling with the waiters bit are
++       * serialized by l->lock, so nothing else can modify the waiters
++       * bit. If the bit is set then nothing can change l->owner either
++       * so the simple RMW is safe. The cmpxchg() will simply fail if it
++       * happens in the middle of the RMW because the waiters bit is
++       * still set.
++       */
++      owner = READ_ONCE(*p);
++      if (owner & RT_MUTEX_HAS_WAITERS)
++              WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ }
+ 
+ /*
diff --git a/queue-4.8/locking-rtmutex-use-read_once-in-rt_mutex_owner.patch b/queue-4.8/locking-rtmutex-use-read_once-in-rt_mutex_owner.patch

new file mode 100644 (file)

index 0000000..46ae188
--- /dev/null
+++ b/queue-4.8/locking-rtmutex-use-read_once-in-rt_mutex_owner.patch
@@ -0,0 +1,49 @@
+From 1be5d4fa0af34fb7bafa205aeb59f5c7cc7a089d Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 30 Nov 2016 21:04:42 +0000
+Subject: locking/rtmutex: Use READ_ONCE() in rt_mutex_owner()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 1be5d4fa0af34fb7bafa205aeb59f5c7cc7a089d upstream.
+
+While debugging the rtmutex unlock vs. dequeue race Will suggested to use
+READ_ONCE() in rt_mutex_owner() as it might race against the
+cmpxchg_release() in unlock_rt_mutex_safe().
+
+Will: "It's a minor thing which will most likely not matter in practice"
+
+Careful search did not unearth an actual problem in todays code, but it's
+better to be safe than surprised.
+
+Suggested-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: David Daney <ddaney@caviumnetworks.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sebastian Siewior <bigeasy@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/20161130210030.431379999@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/locking/rtmutex_common.h |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p
+ 
+ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
+ {
+-      return (struct task_struct *)
+-              ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
++      unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
++
++      return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL);
+ }
+ 
+ /*
diff --git a/queue-4.8/parisc-fix-tlb-related-boot-crash-on-smp-machines.patch b/queue-4.8/parisc-fix-tlb-related-boot-crash-on-smp-machines.patch

new file mode 100644 (file)

index 0000000..6eacba9
--- /dev/null
+++ b/queue-4.8/parisc-fix-tlb-related-boot-crash-on-smp-machines.patch
@@ -0,0 +1,66 @@
+From 24d0492b7d5d321a9c5846c8c974eba9823ffaa0 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Thu, 8 Dec 2016 21:00:46 +0100
+Subject: parisc: Fix TLB related boot crash on SMP machines
+
+From: Helge Deller <deller@gmx.de>
+
+commit 24d0492b7d5d321a9c5846c8c974eba9823ffaa0 upstream.
+
+At bootup we run measurements to calculate the best threshold for when we
+should be using full TLB flushes instead of just flushing a specific amount of
+TLB entries.  This performance test is run over the kernel text segment.
+
+But running this TLB performance test on the kernel text segment turned out to
+crash some SMP machines when the kernel text pages were mapped as huge pages.
+
+To avoid those crashes this patch simply skips this test on some SMP machines
+and calculates an optimal threshold based on the maximum number of available
+TLB entries and number of online CPUs.
+
+On a technical side, this seems to happen:
+The TLB measurement code uses flush_tlb_kernel_range() to flush specific TLB
+entries with a page size of 4k (pdtlb 0(sr1,addr)). On UP systems this purge
+instruction seems to work without problems even if the pages were mapped as
+huge pages.  But on SMP systems the TLB purge instruction is broadcasted to
+other CPUs. Those CPUs then crash the machine because the page size is not as
+expected.  C8000 machines with PA8800/PA8900 CPUs were not affected by this
+problem, because the required cache coherency prohibits to use huge pages at
+all.  Sadly I didn't found any documentation about this behaviour, so this
+finding is purely based on testing with phyiscal SMP machines (A500-44 and
+J5000, both were 2-way boxes).
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/kernel/cache.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/parisc/kernel/cache.c
++++ b/arch/parisc/kernel/cache.c
+@@ -393,6 +393,15 @@ void __init parisc_setup_cache_timing(vo
+ 
+       /* calculate TLB flush threshold */
+ 
++      /* On SMP machines, skip the TLB measure of kernel text which
++       * has been mapped as huge pages. */
++      if (num_online_cpus() > 1 && !parisc_requires_coherency()) {
++              threshold = max(cache_info.it_size, cache_info.dt_size);
++              threshold *= PAGE_SIZE;
++              threshold /= num_online_cpus();
++              goto set_tlb_threshold;
++      }
++
+       alltime = mfctl(16);
+       flush_tlb_all();
+       alltime = mfctl(16) - alltime;
+@@ -411,6 +420,8 @@ void __init parisc_setup_cache_timing(vo
+               alltime, size, rangetime);
+ 
+       threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime);
++
++set_tlb_threshold:
+       if (threshold)
+               parisc_tlb_flush_threshold = threshold;
+       printk(KERN_INFO "TLB flush threshold set to %lu KiB\n",
diff --git a/queue-4.8/parisc-purge-tlb-before-setting-pte.patch b/queue-4.8/parisc-purge-tlb-before-setting-pte.patch

new file mode 100644 (file)

index 0000000..b1794f2
--- /dev/null
+++ b/queue-4.8/parisc-purge-tlb-before-setting-pte.patch
@@ -0,0 +1,74 @@
+From c78e710c1c9fbeff43dddc0aa3d0ff458e70b0cc Mon Sep 17 00:00:00 2001
+From: John David Anglin <dave.anglin@bell.net>
+Date: Tue, 6 Dec 2016 21:47:04 -0500
+Subject: parisc: Purge TLB before setting PTE
+
+From: John David Anglin <dave.anglin@bell.net>
+
+commit c78e710c1c9fbeff43dddc0aa3d0ff458e70b0cc upstream.
+
+The attached change interchanges the order of purging the TLB and
+setting the corresponding page table entry.  TLB purges are strongly
+ordered.  It occurred to me one night that setting the PTE first might
+have subtle ordering issues on SMP machines and cause random memory
+corruption.
+
+A TLB lock guards the insertion of user TLB entries.  So after the TLB
+is purged, a new entry can't be inserted until the lock is released.
+This ensures that the new PTE value is used when the lock is released.
+
+Since making this change, no random segmentation faults have been
+observed on the Debian hppa buildd servers.
+
+Signed-off-by: John David Anglin  <dave.anglin@bell.net>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/include/asm/pgtable.h |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/parisc/include/asm/pgtable.h
++++ b/arch/parisc/include/asm/pgtable.h
+@@ -65,9 +65,9 @@ static inline void purge_tlb_entries(str
+               unsigned long flags;                            \
+               spin_lock_irqsave(&pa_tlb_lock, flags);         \
+               old_pte = *ptep;                                \
+-              set_pte(ptep, pteval);                          \
+               if (pte_inserted(old_pte))                      \
+                       purge_tlb_entries(mm, addr);            \
++              set_pte(ptep, pteval);                          \
+               spin_unlock_irqrestore(&pa_tlb_lock, flags);    \
+       } while (0)
+ 
+@@ -478,8 +478,8 @@ static inline int ptep_test_and_clear_yo
+               spin_unlock_irqrestore(&pa_tlb_lock, flags);
+               return 0;
+       }
+-      set_pte(ptep, pte_mkold(pte));
+       purge_tlb_entries(vma->vm_mm, addr);
++      set_pte(ptep, pte_mkold(pte));
+       spin_unlock_irqrestore(&pa_tlb_lock, flags);
+       return 1;
+ }
+@@ -492,9 +492,9 @@ static inline pte_t ptep_get_and_clear(s
+ 
+       spin_lock_irqsave(&pa_tlb_lock, flags);
+       old_pte = *ptep;
+-      set_pte(ptep, __pte(0));
+       if (pte_inserted(old_pte))
+               purge_tlb_entries(mm, addr);
++      set_pte(ptep, __pte(0));
+       spin_unlock_irqrestore(&pa_tlb_lock, flags);
+ 
+       return old_pte;
+@@ -504,8 +504,8 @@ static inline void ptep_set_wrprotect(st
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&pa_tlb_lock, flags);
+-      set_pte(ptep, pte_wrprotect(*ptep));
+       purge_tlb_entries(mm, addr);
++      set_pte(ptep, pte_wrprotect(*ptep));
+       spin_unlock_irqrestore(&pa_tlb_lock, flags);
+ }
+ 
diff --git a/queue-4.8/parisc-remove-unnecessary-tlb-purges-from-flush_dcache_page_asm-and-flush_icache_page_asm.patch b/queue-4.8/parisc-remove-unnecessary-tlb-purges-from-flush_dcache_page_asm-and-flush_icache_page_asm.patch

new file mode 100644 (file)

index 0000000..63b3efc
--- /dev/null
+++ b/queue-4.8/parisc-remove-unnecessary-tlb-purges-from-flush_dcache_page_asm-and-flush_icache_page_asm.patch
@@ -0,0 +1,71 @@
+From febe42964fe182281859b3d43d844bb25ca49367 Mon Sep 17 00:00:00 2001
+From: John David Anglin <dave.anglin@bell.net>
+Date: Tue, 6 Dec 2016 22:02:01 -0500
+Subject: parisc: Remove unnecessary TLB purges from flush_dcache_page_asm and flush_icache_page_asm
+
+From: John David Anglin <dave.anglin@bell.net>
+
+commit febe42964fe182281859b3d43d844bb25ca49367 upstream.
+
+We have four routines in pacache.S that use temporary alias pages:
+copy_user_page_asm(), clear_user_page_asm(), flush_dcache_page_asm() and
+flush_icache_page_asm().  copy_user_page_asm() and clear_user_page_asm()
+don't purge the TLB entry used for the operation.
+flush_dcache_page_asm() and flush_icache_page_asm do purge the entry.
+
+Presumably, this was thought to optimize TLB use.  However, the
+operation is quite heavy weight on PA 1.X processors as we need to take
+the TLB lock and a TLB broadcast is sent to all processors.
+
+This patch removes the purges from flush_dcache_page_asm() and
+flush_icache_page_asm.
+
+Signed-off-by: John David Anglin  <dave.anglin@bell.net>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/kernel/pacache.S |   22 +---------------------
+ 1 file changed, 1 insertion(+), 21 deletions(-)
+
+--- a/arch/parisc/kernel/pacache.S
++++ b/arch/parisc/kernel/pacache.S
+@@ -886,19 +886,10 @@ ENTRY(flush_dcache_page_asm)
+       fdc,m           r31(%r28)
+       fdc,m           r31(%r28)
+       fdc,m           r31(%r28)
+-      cmpb,COND(<<)           %r28, %r25,1b
++      cmpb,COND(<<)   %r28, %r25,1b
+       fdc,m           r31(%r28)
+ 
+       sync
+-
+-#ifdef CONFIG_PA20
+-      pdtlb,l         %r0(%r25)
+-#else
+-      tlb_lock        %r20,%r21,%r22
+-      pdtlb           %r0(%r25)
+-      tlb_unlock      %r20,%r21,%r22
+-#endif
+-
+       bv              %r0(%r2)
+       nop
+       .exit
+@@ -973,17 +964,6 @@ ENTRY(flush_icache_page_asm)
+       fic,m           %r31(%sr4,%r28)
+ 
+       sync
+-
+-#ifdef CONFIG_PA20
+-      pdtlb,l         %r0(%r28)
+-      pitlb,l         %r0(%sr4,%r25)
+-#else
+-      tlb_lock        %r20,%r21,%r22
+-      pdtlb           %r0(%r28)
+-      pitlb           %r0(%sr4,%r25)
+-      tlb_unlock      %r20,%r21,%r22
+-#endif
+-
+       bv              %r0(%r2)
+       nop
+       .exit
diff --git a/queue-4.8/perf-x86-fix-full-width-counter-counter-overflow.patch b/queue-4.8/perf-x86-fix-full-width-counter-counter-overflow.patch

new file mode 100644 (file)

index 0000000..62abd6b
--- /dev/null
+++ b/queue-4.8/perf-x86-fix-full-width-counter-counter-overflow.patch
@@ -0,0 +1,72 @@
+From 7f612a7f0bc13a2361a152862435b7941156b6af Mon Sep 17 00:00:00 2001
+From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Date: Tue, 29 Nov 2016 20:33:28 +0000
+Subject: perf/x86: Fix full width counter, counter overflow
+
+From: Peter Zijlstra (Intel) <peterz@infradead.org>
+
+commit 7f612a7f0bc13a2361a152862435b7941156b6af upstream.
+
+Lukasz reported that perf stat counters overflow handling is broken on KNL/SLM.
+
+Both these parts have full_width_write set, and that does indeed have
+a problem. In order to deal with counter wrap, we must sample the
+counter at at least half the counter period (see also the sampling
+theorem) such that we can unambiguously reconstruct the count.
+
+However commit:
+
+  069e0c3c4058 ("perf/x86/intel: Support full width counting")
+
+sets the sampling interval to the full period, not half.
+
+Fixing that exposes another issue, in that we must not sign extend the
+delta value when we shift it right; the counter cannot have
+decremented after all.
+
+With both these issues fixed, counter overflow functions correctly
+again.
+
+Reported-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
+Tested-by: Liang, Kan <kan.liang@intel.com>
+Tested-by: Odzioba, Lukasz <lukasz.odzioba@intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Fixes: 069e0c3c4058 ("perf/x86/intel: Support full width counting")
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/events/core.c       |    2 +-
+ arch/x86/events/intel/core.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -68,7 +68,7 @@ u64 x86_perf_event_update(struct perf_ev
+       int shift = 64 - x86_pmu.cntval_bits;
+       u64 prev_raw_count, new_raw_count;
+       int idx = hwc->idx;
+-      s64 delta;
++      u64 delta;
+ 
+       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+               return 0;
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -4024,7 +4024,7 @@ __init int intel_pmu_init(void)
+ 
+       /* Support full width counters using alternative MSR range */
+       if (x86_pmu.intel_cap.full_width_write) {
+-              x86_pmu.max_period = x86_pmu.cntval_mask;
++              x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
+               x86_pmu.perfctr = MSR_IA32_PMC0;
+               pr_cont("full-width counters, ");
+       }
diff --git a/queue-4.8/sched-autogroup-fix-64-bit-kernel-nice-level-adjustment.patch b/queue-4.8/sched-autogroup-fix-64-bit-kernel-nice-level-adjustment.patch

new file mode 100644 (file)

index 0000000..b331774
--- /dev/null
+++ b/queue-4.8/sched-autogroup-fix-64-bit-kernel-nice-level-adjustment.patch
@@ -0,0 +1,77 @@
+From 83929cce95251cc77e5659bf493bd424ae0e7a67 Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <efault@gmx.de>
+Date: Wed, 23 Nov 2016 11:33:37 +0100
+Subject: sched/autogroup: Fix 64-bit kernel nice level adjustment
+
+From: Mike Galbraith <efault@gmx.de>
+
+commit 83929cce95251cc77e5659bf493bd424ae0e7a67 upstream.
+
+Michael Kerrisk reported:
+
+> Regarding the previous paragraph...  My tests indicate
+> that writing *any* value to the autogroup [nice priority level]
+> file causes the task group to get a lower priority.
+
+Because autogroup didn't call the then meaningless scale_load()...
+
+Autogroup nice level adjustment has been broken ever since load
+resolution was increased for 64-bit kernels.  Use scale_load() to
+scale group weight.
+
+Michael Kerrisk tested this patch to fix the problem:
+
+> Applied and tested against 4.9-rc6 on an Intel u7 (4 cores).
+> Test setup:
+>
+> Terminal window 1: running 40 CPU burner jobs
+> Terminal window 2: running 40 CPU burner jobs
+> Terminal window 1: running  1 CPU burner job
+>
+> Demonstrated that:
+> * Writing "0" to the autogroup file for TW1 now causes no change
+>   to the rate at which the process on the terminal consume CPU.
+> * Writing -20 to the autogroup file for TW1 caused those processes
+>   to get the lion's share of CPU while TW2 TW3 get a tiny amount.
+> * Writing -20 to the autogroup files for TW1 and TW3 allowed the
+>   process on TW3 to get as much CPU as it was getting as when
+>   the autogroup nice values for both terminals were 0.
+
+Reported-by: Michael Kerrisk <mtk.manpages@gmail.com>
+Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
+Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-man <linux-man@vger.kernel.org>
+Link: http://lkml.kernel.org/r/1479897217.4306.6.camel@gmx.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/auto_group.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/auto_group.c
++++ b/kernel/sched/auto_group.c
+@@ -192,6 +192,7 @@ int proc_sched_autogroup_set_nice(struct
+ {
+       static unsigned long next = INITIAL_JIFFIES;
+       struct autogroup *ag;
++      unsigned long shares;
+       int err;
+ 
+       if (nice < MIN_NICE || nice > MAX_NICE)
+@@ -210,9 +211,10 @@ int proc_sched_autogroup_set_nice(struct
+ 
+       next = HZ / 10 + jiffies;
+       ag = autogroup_task_get(p);
++      shares = scale_load(sched_prio_to_weight[nice + 20]);
+ 
+       down_write(&ag->lock);
+-      err = sched_group_set_shares(ag->tg, sched_prio_to_weight[nice + 20]);
++      err = sched_group_set_shares(ag->tg, shares);
+       if (!err)
+               ag->nice = nice;
+       up_write(&ag->lock);
diff --git a/queue-4.8/scsi-lpfc-fix-oops-bug-in-lpfc_sli_ringtxcmpl_put.patch b/queue-4.8/scsi-lpfc-fix-oops-bug-in-lpfc_sli_ringtxcmpl_put.patch

new file mode 100644 (file)

index 0000000..7c1ae58
--- /dev/null
+++ b/queue-4.8/scsi-lpfc-fix-oops-bug-in-lpfc_sli_ringtxcmpl_put.patch
@@ -0,0 +1,112 @@
+From 2319f847a8910cff1d46c9b66aa1dd7cc3e836a9 Mon Sep 17 00:00:00 2001
+From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
+Date: Wed, 23 Nov 2016 10:33:19 -0200
+Subject: scsi: lpfc: fix oops/BUG in lpfc_sli_ringtxcmpl_put()
+
+From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
+
+commit 2319f847a8910cff1d46c9b66aa1dd7cc3e836a9 upstream.
+
+The BUG_ON() recently introduced in lpfc_sli_ringtxcmpl_put() is hit in
+the lpfc_els_abort() > lpfc_sli_issue_abort_iotag() >
+lpfc_sli_abort_iotag_issue() function path [similar names], due to
+'piocb->vport == NULL':
+
+       BUG_ON(!piocb || !piocb->vport);
+
+This happens because lpfc_sli_abort_iotag_issue() doesn't set the
+'abtsiocbp->vport' pointer -- but this is not the problem.
+
+Previously, lpfc_sli_ringtxcmpl_put() accessed 'piocb->vport' only if
+'piocb->iocb.ulpCommand' is neither CMD_ABORT_XRI_CN nor
+CMD_CLOSE_XRI_CN, which are the only possible values for
+lpfc_sli_abort_iotag_issue():
+
+    lpfc_sli_ringtxcmpl_put():
+
+        if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
+           (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
+           (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN) &&
+            (!(piocb->vport->load_flag & FC_UNLOADING)))
+
+    lpfc_sli_abort_iotag_issue():
+
+        if (phba->link_state >= LPFC_LINK_UP)
+                iabt->ulpCommand = CMD_ABORT_XRI_CN;
+        else
+                iabt->ulpCommand = CMD_CLOSE_XRI_CN;
+
+So, this function path would not have hit this possible NULL pointer
+dereference before.
+
+In order to fix this regression, move the second part of the BUG_ON()
+check prior to the pointer dereference that it does check for.
+
+For reference, this is the stack trace observed. The problem happened
+because an unsolicited event was received - a PLOGI was received after
+our PLOGI was issued but not yet complete, so the discovery state
+machine goes on to sw-abort our PLOGI.
+
+    kernel BUG at drivers/scsi/lpfc/lpfc_sli.c:1326!
+    Oops: Exception in kernel mode, sig: 5 [#1]
+    <...>
+    NIP [...] lpfc_sli_ringtxcmpl_put+0x1c/0xf0 [lpfc]
+    LR  [...] __lpfc_sli_issue_iocb_s4+0x188/0x200 [lpfc]
+    Call Trace:
+    [...] [...] __lpfc_sli_issue_iocb_s4+0xb0/0x200 [lpfc] (unreliable)
+    [...] [...] lpfc_sli_issue_abort_iotag+0x2b4/0x350 [lpfc]
+    [...] [...] lpfc_els_abort+0x1a8/0x4a0 [lpfc]
+    [...] [...] lpfc_rcv_plogi+0x6d4/0x700 [lpfc]
+    [...] [...] lpfc_rcv_plogi_plogi_issue+0xd8/0x1d0 [lpfc]
+    [...] [...] lpfc_disc_state_machine+0xc0/0x2b0 [lpfc]
+    [...] [...] lpfc_els_unsol_buffer+0xcc0/0x26c0 [lpfc]
+    [...] [...] lpfc_els_unsol_event+0xa8/0x220 [lpfc]
+    [...] [...] lpfc_complete_unsol_iocb+0xb8/0x138 [lpfc]
+    [...] [...] lpfc_sli4_handle_received_buffer+0x6a0/0xec0 [lpfc]
+    [...] [...] lpfc_sli_handle_slow_ring_event_s4+0x1c4/0x240 [lpfc]
+    [...] [...] lpfc_sli_handle_slow_ring_event+0x24/0x40 [lpfc]
+    [...] [...] lpfc_do_work+0xd88/0x1970 [lpfc]
+    [...] [...] kthread+0x108/0x130
+    [...] [...] ret_from_kernel_thread+0x5c/0xbc
+    <...>
+
+Fixes: 22466da5b4b7 ("lpfc: Fix possible NULL pointer dereference")
+Reported-by: Harsha Thyagaraja <hathyaga@in.ibm.com>
+Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/lpfc/lpfc_sli.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/scsi/lpfc/lpfc_sli.c
++++ b/drivers/scsi/lpfc/lpfc_sli.c
+@@ -1323,18 +1323,20 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba
+ {
+       lockdep_assert_held(&phba->hbalock);
+ 
+-      BUG_ON(!piocb || !piocb->vport);
++      BUG_ON(!piocb);
+ 
+       list_add_tail(&piocb->list, &pring->txcmplq);
+       piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ;
+ 
+       if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
+          (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
+-         (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN) &&
+-          (!(piocb->vport->load_flag & FC_UNLOADING)))
+-              mod_timer(&piocb->vport->els_tmofunc,
+-                        jiffies +
+-                        msecs_to_jiffies(1000 * (phba->fc_ratov << 1)));
++         (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) {
++              BUG_ON(!piocb->vport);
++              if (!(piocb->vport->load_flag & FC_UNLOADING))
++                      mod_timer(&piocb->vport->els_tmofunc,
++                                jiffies +
++                                msecs_to_jiffies(1000 * (phba->fc_ratov << 1)));
++      }
+ 
+       return 0;
+ }
diff --git a/queue-4.8/series b/queue-4.8/series

index 24550cc4edf230537caa411976995d8cffbf012b..19d1189885b73372bde185874fbbacb280090858 100644 (file)
--- a/queue-4.8/series
+++ b/queue-4.8/series
@@ -2,3 +2,14 @@ powerpc-eeh-fix-deadlock-when-pe-frozen-state-can-t-be-cleared.patch
  powerpc-mm-fix-lazy-icache-flush-on-pre-power5.patch
  powerpc-boot-fix-build-failure-in-32-bit-boot-wrapper.patch
  fuse-fix-clearing-suid-sgid-for-chown.patch
+parisc-purge-tlb-before-setting-pte.patch
+parisc-remove-unnecessary-tlb-purges-from-flush_dcache_page_asm-and-flush_icache_page_asm.patch
+parisc-fix-tlb-related-boot-crash-on-smp-machines.patch
+zram-restrict-add-remove-attributes-to-root-only.patch
+locking-rtmutex-prevent-dequeue-vs.-unlock-race.patch
+locking-rtmutex-use-read_once-in-rt_mutex_owner.patch
+device-dax-fix-private-mapping-restriction-permit-read-only.patch
+scsi-lpfc-fix-oops-bug-in-lpfc_sli_ringtxcmpl_put.patch
+sched-autogroup-fix-64-bit-kernel-nice-level-adjustment.patch
+vhost-vsock-fix-orphan-connection-reset.patch
+perf-x86-fix-full-width-counter-counter-overflow.patch
diff --git a/queue-4.8/vhost-vsock-fix-orphan-connection-reset.patch b/queue-4.8/vhost-vsock-fix-orphan-connection-reset.patch

new file mode 100644 (file)

index 0000000..169ec83
--- /dev/null
+++ b/queue-4.8/vhost-vsock-fix-orphan-connection-reset.patch
@@ -0,0 +1,33 @@
+From c4587631c7bad47c045e081d1553cd73a23be59a Mon Sep 17 00:00:00 2001
+From: Peng Tao <bergwolf@gmail.com>
+Date: Fri, 9 Dec 2016 01:10:46 +0800
+Subject: vhost-vsock: fix orphan connection reset
+
+From: Peng Tao <bergwolf@gmail.com>
+
+commit c4587631c7bad47c045e081d1553cd73a23be59a upstream.
+
+local_addr.svm_cid is host cid. We should check guest cid instead,
+which is remote_addr.svm_cid. Otherwise we end up resetting all
+connections to all guests.
+
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Peng Tao <bergwolf@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vhost/vsock.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -506,7 +506,7 @@ static void vhost_vsock_reset_orphans(st
+        * executing.
+        */
+ 
+-      if (!vhost_vsock_get(vsk->local_addr.svm_cid)) {
++      if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) {
+               sock_set_flag(sk, SOCK_DONE);
+               vsk->peer_shutdown = SHUTDOWN_MASK;
+               sk->sk_state = SS_UNCONNECTED;
diff --git a/queue-4.8/zram-restrict-add-remove-attributes-to-root-only.patch b/queue-4.8/zram-restrict-add-remove-attributes-to-root-only.patch

new file mode 100644 (file)

index 0000000..4a5ca04
--- /dev/null
+++ b/queue-4.8/zram-restrict-add-remove-attributes-to-root-only.patch
@@ -0,0 +1,48 @@
+From 5c7e9ccd91b90d87029261f8856294ee51934cab Mon Sep 17 00:00:00 2001
+From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Date: Wed, 7 Dec 2016 14:44:31 -0800
+Subject: zram: restrict add/remove attributes to root only
+
+From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+
+commit 5c7e9ccd91b90d87029261f8856294ee51934cab upstream.
+
+zram hot_add sysfs attribute is a very 'special' attribute - reading
+from it creates a new uninitialized zram device.  This file, by a
+mistake, can be read by a 'normal' user at the moment, while only root
+must be able to create a new zram device, therefore hot_add attribute
+must have S_IRUSR mode, not S_IRUGO.
+
+[akpm@linux-foundation.org: s/sence/sense/, reflow comment to use 80 cols]
+Fixes: 6566d1a32bf72 ("zram: add dynamic device add/remove functionality")
+Link: http://lkml.kernel.org/r/20161205155845.20129-1-sergey.senozhatsky@gmail.com
+Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Reported-by: Steven Allen <steven@stebalien.com>
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Minchan Kim <minchan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -1413,8 +1413,14 @@ static ssize_t hot_remove_store(struct c
+       return ret ? ret : count;
+ }
+ 
++/*
++ * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
++ * sense that reading from this file does alter the state of your system -- it
++ * creates a new un-initialized zram device and returns back this device's
++ * device_id (or an error code if it fails to create a new device).
++ */
+ static struct class_attribute zram_control_class_attrs[] = {
+-      __ATTR_RO(hot_add),
++      __ATTR(hot_add, 0400, hot_add_show, NULL),
+       __ATTR_WO(hot_remove),
+       __ATTR_NULL,
+ };
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 12 Dec 2016 22:47:18 +0000 (14:47 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 12 Dec 2016 22:47:18 +0000 (14:47 -0800)
queue-4.8/device-dax-fix-private-mapping-restriction-permit-read-only.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/locking-rtmutex-prevent-dequeue-vs.-unlock-race.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/locking-rtmutex-use-read_once-in-rt_mutex_owner.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/parisc-fix-tlb-related-boot-crash-on-smp-machines.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/parisc-purge-tlb-before-setting-pte.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/parisc-remove-unnecessary-tlb-purges-from-flush_dcache_page_asm-and-flush_icache_page_asm.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/perf-x86-fix-full-width-counter-counter-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/sched-autogroup-fix-64-bit-kernel-nice-level-adjustment.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/scsi-lpfc-fix-oops-bug-in-lpfc_sli_ringtxcmpl_put.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/series		patch \| blob \| blame \| history
queue-4.8/vhost-vsock-fix-orphan-connection-reset.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/zram-restrict-add-remove-attributes-to-root-only.patch	[new file with mode: 0644]	patch \| blob