4.0-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 17 Jul 2015 00:58:49 +0000 (17:58 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 17 Jul 2015 00:58:49 +0000 (17:58 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Jul 2015 00:58:49 +0000 (17:58 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Jul 2015 00:58:49 +0000 (17:58 -0700)
diff --git a/queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch b/queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch

new file mode 100644 (file)

index 0000000..1ddc114
--- /dev/null
+++ b/queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch
@@ -0,0 +1,60 @@
+From d57f727264f1425a94689bafc7e99e502cb135b5 Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Thu, 13 Nov 2014 15:54:01 +0530
+Subject: ARC: add compiler barrier to LLSC based cmpxchg
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit d57f727264f1425a94689bafc7e99e502cb135b5 upstream.
+
+When auditing cmpxchg call sites, Chuck noted that gcc was optimizing
+away some of the desired LDs.
+
+|      do {
+|              new = old = *ipi_data_ptr;
+|              new |= 1U << msg;
+|      } while (cmpxchg(ipi_data_ptr, old, new) != old);
+
+was generating to below
+
+| 8015cef8:    ld         r2,[r4,0]  <-- First LD
+| 8015cefc:    bset       r1,r2,r1
+|
+| 8015cf00:    llock      r3,[r4]  <-- atomic op
+| 8015cf04:    brne       r3,r2,8015cf10
+| 8015cf08:    scond      r1,[r4]
+| 8015cf0c:    bnz        8015cf00
+|
+| 8015cf10:    brne       r3,r2,8015cf00  <-- Branch doesn't go to orig LD
+
+Although this was fixed by adding a ACCESS_ONCE in this call site, it
+seems safer (for now at least) to add compiler barrier to LLSC based
+cmpxchg
+
+Reported-by: Chuck Jordan <cjordan@synopsys,com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/include/asm/cmpxchg.h |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/arc/include/asm/cmpxchg.h
++++ b/arch/arc/include/asm/cmpxchg.h
+@@ -33,10 +33,11 @@ __cmpxchg(volatile void *ptr, unsigned l
+       "       scond   %3, [%1]        \n"
+       "       bnz     1b              \n"
+       "2:                             \n"
+-      : "=&r"(prev)
+-      : "r"(ptr), "ir"(expected),
+-        "r"(new) /* can't be "ir". scond can't take limm for "b" */
+-      : "cc");
++      : "=&r"(prev)   /* Early clobber, to prevent reg reuse */
++      : "r"(ptr),     /* Not "m": llock only supports reg direct addr mode */
++        "ir"(expected),
++        "r"(new)      /* can't be "ir". scond can't take LIMM for "b" */
++      : "cc", "memory"); /* so that gcc knows memory is being written here */
+ 
+       smp_mb();
+ 
diff --git a/queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch b/queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch

new file mode 100644 (file)

index 0000000..9a6bfb8
--- /dev/null
+++ b/queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch
@@ -0,0 +1,289 @@
+From 2576c28e3f623ed401db7e6197241865328620ef Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Thu, 20 Nov 2014 15:42:09 +0530
+Subject: ARC: add smp barriers around atomics per Documentation/atomic_ops.txt
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit 2576c28e3f623ed401db7e6197241865328620ef upstream.
+
+ - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers
+   Since ARCv2 only provides load/load, store/store and all/all, we need
+   the full barrier
+
+ - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified
+   values were lacking the explicit smp barriers.
+
+ - Non LLOCK/SCOND varaints don't need the explicit barriers since that
+   is implicity provided by the spin locks used to implement the
+   critical section (the spin lock barriers in turn are also fixed in
+   this commit as explained above
+
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/include/asm/atomic.h   |   21 +++++++++++++++++++++
+ arch/arc/include/asm/bitops.h   |   19 +++++++++++++++++++
+ arch/arc/include/asm/cmpxchg.h  |   17 +++++++++++++++++
+ arch/arc/include/asm/spinlock.h |   32 ++++++++++++++++++++++++++++++++
+ 4 files changed, 89 insertions(+)
+
+--- a/arch/arc/include/asm/atomic.h
++++ b/arch/arc/include/asm/atomic.h
+@@ -43,6 +43,12 @@ static inline int atomic_##op##_return(i
+ {                                                                     \
+       unsigned int temp;                                              \
+                                                                       \
++      /*                                                              \
++       * Explicit full memory barrier needed before/after as          \
++       * LLOCK/SCOND thmeselves don't provide any such semantics      \
++       */                                                             \
++      smp_mb();                                                       \
++                                                                      \
+       __asm__ __volatile__(                                           \
+       "1:     llock   %0, [%1]        \n"                             \
+       "       " #asm_op " %0, %0, %2  \n"                             \
+@@ -52,6 +58,8 @@ static inline int atomic_##op##_return(i
+       : "r"(&v->counter), "ir"(i)                                     \
+       : "cc");                                                        \
+                                                                       \
++      smp_mb();                                                       \
++                                                                      \
+       return temp;                                                    \
+ }
+ 
+@@ -105,6 +113,9 @@ static inline int atomic_##op##_return(i
+       unsigned long flags;                                            \
+       unsigned long temp;                                             \
+                                                                       \
++      /*                                                              \
++       * spin lock/unlock provides the needed smp_mb() before/after   \
++       */                                                             \
+       atomic_ops_lock(flags);                                         \
+       temp = v->counter;                                              \
+       temp c_op i;                                                    \
+@@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and)
+ #define __atomic_add_unless(v, a, u)                                  \
+ ({                                                                    \
+       int c, old;                                                     \
++                                                                      \
++      /*                                                              \
++       * Explicit full memory barrier needed before/after as          \
++       * LLOCK/SCOND thmeselves don't provide any such semantics      \
++       */                                                             \
++      smp_mb();                                                       \
++                                                                      \
+       c = atomic_read(v);                                             \
+       while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
+               c = old;                                                \
++                                                                      \
++      smp_mb();                                                       \
++                                                                      \
+       c;                                                              \
+ })
+ 
+--- a/arch/arc/include/asm/bitops.h
++++ b/arch/arc/include/asm/bitops.h
+@@ -103,6 +103,12 @@ static inline int test_and_set_bit(unsig
+       if (__builtin_constant_p(nr))
+               nr &= 0x1f;
+ 
++      /*
++       * Explicit full memory barrier needed before/after as
++       * LLOCK/SCOND themselves don't provide any such semantics
++       */
++      smp_mb();
++
+       __asm__ __volatile__(
+       "1:     llock   %0, [%2]        \n"
+       "       bset    %1, %0, %3      \n"
+@@ -112,6 +118,8 @@ static inline int test_and_set_bit(unsig
+       : "r"(m), "ir"(nr)
+       : "cc");
+ 
++      smp_mb();
++
+       return (old & (1 << nr)) != 0;
+ }
+ 
+@@ -125,6 +133,8 @@ test_and_clear_bit(unsigned long nr, vol
+       if (__builtin_constant_p(nr))
+               nr &= 0x1f;
+ 
++      smp_mb();
++
+       __asm__ __volatile__(
+       "1:     llock   %0, [%2]        \n"
+       "       bclr    %1, %0, %3      \n"
+@@ -134,6 +144,8 @@ test_and_clear_bit(unsigned long nr, vol
+       : "r"(m), "ir"(nr)
+       : "cc");
+ 
++      smp_mb();
++
+       return (old & (1 << nr)) != 0;
+ }
+ 
+@@ -147,6 +159,8 @@ test_and_change_bit(unsigned long nr, vo
+       if (__builtin_constant_p(nr))
+               nr &= 0x1f;
+ 
++      smp_mb();
++
+       __asm__ __volatile__(
+       "1:     llock   %0, [%2]        \n"
+       "       bxor    %1, %0, %3      \n"
+@@ -156,6 +170,8 @@ test_and_change_bit(unsigned long nr, vo
+       : "r"(m), "ir"(nr)
+       : "cc");
+ 
++      smp_mb();
++
+       return (old & (1 << nr)) != 0;
+ }
+ 
+@@ -235,6 +251,9 @@ static inline int test_and_set_bit(unsig
+       if (__builtin_constant_p(nr))
+               nr &= 0x1f;
+ 
++      /*
++       * spin lock/unlock provide the needed smp_mb() before/after
++       */
+       bitops_lock(flags);
+ 
+       old = *m;
+--- a/arch/arc/include/asm/cmpxchg.h
++++ b/arch/arc/include/asm/cmpxchg.h
+@@ -10,6 +10,8 @@
+ #define __ASM_ARC_CMPXCHG_H
+ 
+ #include <linux/types.h>
++
++#include <asm/barrier.h>
+ #include <asm/smp.h>
+ 
+ #ifdef CONFIG_ARC_HAS_LLSC
+@@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned l
+ {
+       unsigned long prev;
+ 
++      /*
++       * Explicit full memory barrier needed before/after as
++       * LLOCK/SCOND thmeselves don't provide any such semantics
++       */
++      smp_mb();
++
+       __asm__ __volatile__(
+       "1:     llock   %0, [%1]        \n"
+       "       brne    %0, %2, 2f      \n"
+@@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned l
+         "r"(new) /* can't be "ir". scond can't take limm for "b" */
+       : "cc");
+ 
++      smp_mb();
++
+       return prev;
+ }
+ 
+@@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned l
+       int prev;
+       volatile unsigned long *p = ptr;
+ 
++      /*
++       * spin lock/unlock provide the needed smp_mb() before/after
++       */
+       atomic_ops_lock(flags);
+       prev = *p;
+       if (prev == expected)
+@@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsig
+ 
+       switch (size) {
+       case 4:
++              smp_mb();
++
+               __asm__ __volatile__(
+               "       ex  %0, [%1]    \n"
+               : "+r"(val)
+               : "r"(ptr)
+               : "memory");
+ 
++              smp_mb();
++
+               return val;
+       }
+       return __xchg_bad_pointer();
+--- a/arch/arc/include/asm/spinlock.h
++++ b/arch/arc/include/asm/spinlock.h
+@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_s
+ {
+       unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ 
++      /*
++       * This smp_mb() is technically superfluous, we only need the one
++       * after the lock for providing the ACQUIRE semantics.
++       * However doing the "right" thing was regressing hackbench
++       * so keeping this, pending further investigation
++       */
++      smp_mb();
++
+       __asm__ __volatile__(
+       "1:     ex  %0, [%1]            \n"
+       "       breq  %0, %2, 1b        \n"
+       : "+&r" (tmp)
+       : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
+       : "memory");
++
++      /*
++       * ACQUIRE barrier to ensure load/store after taking the lock
++       * don't "bleed-up" out of the critical section (leak-in is allowed)
++       * http://www.spinics.net/lists/kernel/msg2010409.html
++       *
++       * ARCv2 only has load-load, store-store and all-all barrier
++       * thus need the full all-all barrier
++       */
++      smp_mb();
+ }
+ 
+ static inline int arch_spin_trylock(arch_spinlock_t *lock)
+ {
+       unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ 
++      smp_mb();
++
+       __asm__ __volatile__(
+       "1:     ex  %0, [%1]            \n"
+       : "+r" (tmp)
+       : "r"(&(lock->slock))
+       : "memory");
+ 
++      smp_mb();
++
+       return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
+ }
+ 
+@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch
+ {
+       unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+ 
++      /*
++       * RELEASE barrier: given the instructions avail on ARCv2, full barrier
++       * is the only option
++       */
++      smp_mb();
++
+       __asm__ __volatile__(
+       "       ex  %0, [%1]            \n"
+       : "+r" (tmp)
+       : "r"(&(lock->slock))
+       : "memory");
+ 
++      /*
++       * superfluous, but keeping for now - see pairing version in
++       * arch_spin_lock above
++       */
+       smp_mb();
+ }
+ 
diff --git a/queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch b/queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch

new file mode 100644 (file)

index 0000000..3832376
--- /dev/null
+++ b/queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch
@@ -0,0 +1,39 @@
+From 565630d503ef24e44c252bed55571b3a0d68455f Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Fri, 12 Jun 2015 11:24:41 +0100
+Subject: arm64: Do not attempt to use init_mm in reset_context()
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 565630d503ef24e44c252bed55571b3a0d68455f upstream.
+
+After secondary CPU boot or hotplug, the active_mm of the idle thread is
+&init_mm. The init_mm.pgd (swapper_pg_dir) is only meant for TTBR1_EL1
+and must not be set in TTBR0_EL1. Since when active_mm == &init_mm the
+TTBR0_EL1 is already set to the reserved value, there is no need to
+perform any context reset.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/context.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/arm64/mm/context.c
++++ b/arch/arm64/mm/context.c
+@@ -92,6 +92,14 @@ static void reset_context(void *info)
+       unsigned int cpu = smp_processor_id();
+       struct mm_struct *mm = current->active_mm;
+ 
++      /*
++       * current->active_mm could be init_mm for the idle thread immediately
++       * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
++       * the reserved value, so no need to reset any context.
++       */
++      if (mm == &init_mm)
++              return;
++
+       smp_rmb();
+       asid = cpu_last_asid + cpu;
+ 
diff --git a/queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch b/queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch

new file mode 100644 (file)

index 0000000..51844d3
--- /dev/null
+++ b/queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch
@@ -0,0 +1,69 @@
+From 46b0567c851cf85d6ba6f23eef385ec9111d09bc Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Mon, 15 Jun 2015 16:40:27 +0100
+Subject: arm64: entry: fix context tracking for el0_sp_pc
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit 46b0567c851cf85d6ba6f23eef385ec9111d09bc upstream.
+
+Commit 6c81fe7925cc4c42 ("arm64: enable context tracking") did not
+update el0_sp_pc to use ct_user_exit, but this appears to have been
+unintentional. In commit 6ab6463aeb5fbc75 ("arm64: adjust el0_sync so
+that a function can be called") we made x0 available, and in the return
+to userspace we call ct_user_enter in the kernel_exit macro.
+
+Due to this, we currently don't correctly inform RCU of the user->kernel
+transition, and may erroneously account for time spent in the kernel as
+if we were in an extended quiescent state when CONFIG_CONTEXT_TRACKING
+is enabled.
+
+As we do record the kernel->user transition, a userspace application
+making accesses from an unaligned stack pointer can demonstrate the
+imbalance, provoking the following warning:
+
+------------[ cut here ]------------
+WARNING: CPU: 2 PID: 3660 at kernel/context_tracking.c:75 context_tracking_enter+0xd8/0xe4()
+Modules linked in:
+CPU: 2 PID: 3660 Comm: a.out Not tainted 4.1.0-rc7+ #8
+Hardware name: ARM Juno development board (r0) (DT)
+Call trace:
+[<ffffffc000089914>] dump_backtrace+0x0/0x124
+[<ffffffc000089a48>] show_stack+0x10/0x1c
+[<ffffffc0005b3cbc>] dump_stack+0x84/0xc8
+[<ffffffc0000b3214>] warn_slowpath_common+0x98/0xd0
+[<ffffffc0000b330c>] warn_slowpath_null+0x14/0x20
+[<ffffffc00013ada4>] context_tracking_enter+0xd4/0xe4
+[<ffffffc0005b534c>] preempt_schedule_irq+0xd4/0x114
+[<ffffffc00008561c>] el1_preempt+0x4/0x28
+[<ffffffc0001b8040>] exit_files+0x38/0x4c
+[<ffffffc0000b5b94>] do_exit+0x430/0x978
+[<ffffffc0000b614c>] do_group_exit+0x40/0xd4
+[<ffffffc0000c0208>] get_signal+0x23c/0x4f4
+[<ffffffc0000890b4>] do_signal+0x1ac/0x518
+[<ffffffc000089650>] do_notify_resume+0x5c/0x68
+---[ end trace 963c192600337066 ]---
+
+This patch adds the missing ct_user_exit to the el0_sp_pc entry path,
+correcting the context tracking for this case.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Fixes: 6c81fe7925cc ("arm64: enable context tracking")
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/entry.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/kernel/entry.S
++++ b/arch/arm64/kernel/entry.S
+@@ -517,6 +517,7 @@ el0_sp_pc:
+       mrs     x26, far_el1
+       // enable interrupts before calling the main handler
+       enable_dbg_and_irq
++      ct_user_exit
+       mov     x0, x26
+       mov     x1, x25
+       mov     x2, sp
diff --git a/queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch b/queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch

new file mode 100644 (file)

index 0000000..0585b06
--- /dev/null
+++ b/queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch
@@ -0,0 +1,49 @@
+From b9bcc919931611498e856eae9bf66337330d04cc Mon Sep 17 00:00:00 2001
+From: Dave P Martin <Dave.Martin@arm.com>
+Date: Tue, 16 Jun 2015 17:38:47 +0100
+Subject: arm64: mm: Fix freeing of the wrong memmap entries with !SPARSEMEM_VMEMMAP
+
+From: Dave P Martin <Dave.Martin@arm.com>
+
+commit b9bcc919931611498e856eae9bf66337330d04cc upstream.
+
+The memmap freeing code in free_unused_memmap() computes the end of
+each memblock by adding the memblock size onto the base.  However,
+if SPARSEMEM is enabled then the value (start) used for the base
+may already have been rounded downwards to work out which memmap
+entries to free after the previous memblock.
+
+This may cause memmap entries that are in use to get freed.
+
+In general, you're not likely to hit this problem unless there
+are at least 2 memblocks and one of them is not aligned to a
+sparsemem section boundary.  Note that carve-outs can increase
+the number of memblocks by splitting the regions listed in the
+device tree.
+
+This problem doesn't occur with SPARSEMEM_VMEMMAP, because the
+vmemmap code deals with freeing the unused regions of the memmap
+instead of requiring the arch code to do it.
+
+This patch gets the memblock base out of the memblock directly when
+computing the block end address to ensure the correct value is used.
+
+Signed-off-by: Dave Martin <Dave.Martin@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/init.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/init.c
++++ b/arch/arm64/mm/init.c
+@@ -260,7 +260,7 @@ static void __init free_unused_memmap(vo
+                * memmap entries are valid from the bank end aligned to
+                * MAX_ORDER_NR_PAGES.
+                */
+-              prev_end = ALIGN(start + __phys_to_pfn(reg->size),
++              prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
+                                MAX_ORDER_NR_PAGES);
+       }
+ 
diff --git a/queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch b/queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch

new file mode 100644 (file)

index 0000000..0257ff0
--- /dev/null
+++ b/queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch
@@ -0,0 +1,45 @@
+From 6f1a6ae87c0c60d7c462ef8fd071f291aa7a9abb Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 19 Jun 2015 13:56:33 +0100
+Subject: arm64: vdso: work-around broken ELF toolchains in Makefile
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 6f1a6ae87c0c60d7c462ef8fd071f291aa7a9abb upstream.
+
+When building the kernel with a bare-metal (ELF) toolchain, the -shared
+option may not be passed down to collect2, resulting in silent corruption
+of the vDSO image (in particular, the DYNAMIC section is omitted).
+
+The effect of this corruption is that the dynamic linker fails to find
+the vDSO symbols and libc is instead used for the syscalls that we
+intended to optimise (e.g. gettimeofday). Functionally, there is no
+issue as the sigreturn trampoline is still intact and located by the
+kernel.
+
+This patch fixes the problem by explicitly passing -shared to the linker
+when building the vDSO.
+
+Reported-by: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
+Reported-by: James Greenlaigh <james.greenhalgh@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/vdso/Makefile |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/arm64/kernel/vdso/Makefile
++++ b/arch/arm64/kernel/vdso/Makefile
+@@ -15,6 +15,10 @@ ccflags-y := -shared -fno-common -fno-bu
+ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
+               $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ 
++# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
++# down to collect2, resulting in silent corruption of the vDSO image.
++ccflags-y += -Wl,-shared
++
+ obj-y += vdso.o
+ extra-y += vdso.lds vdso-offsets.h
+ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch b/queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch

new file mode 100644 (file)

index 0000000..1c9a5e7
--- /dev/null
+++ b/queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch
@@ -0,0 +1,232 @@
+From 3dc196eae1db548f05e53e5875ff87b8ff79f249 Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Sat, 13 Jun 2015 08:51:17 +0300
+Subject: mei: me: wait for power gating exit confirmation
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit 3dc196eae1db548f05e53e5875ff87b8ff79f249 upstream.
+
+Fix the hbm power gating state machine so it will wait till it receives
+confirmation interrupt for the PG_ISOLATION_EXIT message.
+
+In process of the suspend flow the devices first have to exit from the
+power gating state (runtime pm resume).
+If we do not handle the confirmation interrupt after sending
+PG_ISOLATION_EXIT message, we may receive it already after the suspend
+flow has changed the device state and interrupt will be interpreted as a
+spurious event, consequently link reset will be invoked which will
+prevent the device from completing the suspend flow
+
+kernel: [6603] mei_reset:136: mei_me 0000:00:16.0: powering down: end of reset
+kernel: [476] mei_me_irq_thread_handler:643: mei_me 0000:00:16.0: function called after ISR to handle the interrupt processing.
+kernel: mei_me 0000:00:16.0: FW not ready: resetting
+
+Cc: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=86241
+Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=770397
+Tested-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/client.c  |    2 -
+ drivers/misc/mei/hw-me.c   |   59 +++++++++++++++++++++++++++++++++++++++++----
+ drivers/misc/mei/hw-txe.c  |   13 +++++++++
+ drivers/misc/mei/mei_dev.h |   11 ++++++++
+ 4 files changed, 80 insertions(+), 5 deletions(-)
+
+--- a/drivers/misc/mei/client.c
++++ b/drivers/misc/mei/client.c
+@@ -573,7 +573,7 @@ void mei_host_client_init(struct work_st
+ bool mei_hbuf_acquire(struct mei_device *dev)
+ {
+       if (mei_pg_state(dev) == MEI_PG_ON ||
+-          dev->pg_event == MEI_PG_EVENT_WAIT) {
++          mei_pg_in_transition(dev)) {
+               dev_dbg(dev->dev, "device is in pg\n");
+               return false;
+       }
+--- a/drivers/misc/mei/hw-me.c
++++ b/drivers/misc/mei/hw-me.c
+@@ -629,11 +629,27 @@ int mei_me_pg_unset_sync(struct mei_devi
+       mutex_lock(&dev->device_lock);
+ 
+ reply:
+-      if (dev->pg_event == MEI_PG_EVENT_RECEIVED)
+-              ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
++      if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
++              ret = -ETIME;
++              goto out;
++      }
++
++      dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
++      ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
++      if (ret)
++              return ret;
++
++      mutex_unlock(&dev->device_lock);
++      wait_event_timeout(dev->wait_pg,
++              dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
++      mutex_lock(&dev->device_lock);
++
++      if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED)
++              ret = 0;
+       else
+               ret = -ETIME;
+ 
++out:
+       dev->pg_event = MEI_PG_EVENT_IDLE;
+       hw->pg_state = MEI_PG_OFF;
+ 
+@@ -641,6 +657,19 @@ reply:
+ }
+ 
+ /**
++ * mei_me_pg_in_transition - is device now in pg transition
++ *
++ * @dev: the device structure
++ *
++ * Return: true if in pg transition, false otherwise
++ */
++static bool mei_me_pg_in_transition(struct mei_device *dev)
++{
++      return dev->pg_event >= MEI_PG_EVENT_WAIT &&
++             dev->pg_event <= MEI_PG_EVENT_INTR_WAIT;
++}
++
++/**
+  * mei_me_pg_is_enabled - detect if PG is supported by HW
+  *
+  * @dev: the device structure
+@@ -672,6 +701,24 @@ notsupported:
+ }
+ 
+ /**
++ * mei_me_pg_intr - perform pg processing in interrupt thread handler
++ *
++ * @dev: the device structure
++ */
++static void mei_me_pg_intr(struct mei_device *dev)
++{
++      struct mei_me_hw *hw = to_me_hw(dev);
++
++      if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT)
++              return;
++
++      dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
++      hw->pg_state = MEI_PG_OFF;
++      if (waitqueue_active(&dev->wait_pg))
++              wake_up(&dev->wait_pg);
++}
++
++/**
+  * mei_me_irq_quick_handler - The ISR of the MEI device
+  *
+  * @irq: The irq number
+@@ -729,6 +776,8 @@ irqreturn_t mei_me_irq_thread_handler(in
+               goto end;
+       }
+ 
++      mei_me_pg_intr(dev);
++
+       /*  check if we need to start the dev */
+       if (!mei_host_is_ready(dev)) {
+               if (mei_hw_is_ready(dev)) {
+@@ -765,9 +814,10 @@ irqreturn_t mei_me_irq_thread_handler(in
+       /*
+        * During PG handshake only allowed write is the replay to the
+        * PG exit message, so block calling write function
+-       * if the pg state is not idle
++       * if the pg event is in PG handshake
+        */
+-      if (dev->pg_event == MEI_PG_EVENT_IDLE) {
++      if (dev->pg_event != MEI_PG_EVENT_WAIT &&
++          dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+               rets = mei_irq_write_handler(dev, &complete_list);
+               dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+       }
+@@ -792,6 +842,7 @@ static const struct mei_hw_ops mei_me_hw
+       .hw_config = mei_me_hw_config,
+       .hw_start = mei_me_hw_start,
+ 
++      .pg_in_transition = mei_me_pg_in_transition,
+       .pg_is_enabled = mei_me_pg_is_enabled,
+ 
+       .intr_clear = mei_me_intr_clear,
+--- a/drivers/misc/mei/hw-txe.c
++++ b/drivers/misc/mei/hw-txe.c
+@@ -302,6 +302,18 @@ int mei_txe_aliveness_set_sync(struct me
+ }
+ 
+ /**
++ * mei_txe_pg_in_transition - is device now in pg transition
++ *
++ * @dev: the device structure
++ *
++ * Return: true if in pg transition, false otherwise
++ */
++static bool mei_txe_pg_in_transition(struct mei_device *dev)
++{
++      return dev->pg_event == MEI_PG_EVENT_WAIT;
++}
++
++/**
+  * mei_txe_pg_is_enabled - detect if PG is supported by HW
+  *
+  * @dev: the device structure
+@@ -1138,6 +1150,7 @@ static const struct mei_hw_ops mei_txe_h
+       .hw_config = mei_txe_hw_config,
+       .hw_start = mei_txe_hw_start,
+ 
++      .pg_in_transition = mei_txe_pg_in_transition,
+       .pg_is_enabled = mei_txe_pg_is_enabled,
+ 
+       .intr_clear = mei_txe_intr_clear,
+--- a/drivers/misc/mei/mei_dev.h
++++ b/drivers/misc/mei/mei_dev.h
+@@ -269,6 +269,7 @@ struct mei_cl {
+ 
+  * @fw_status        : get fw status registers
+  * @pg_state         : power gating state of the device
++ * @pg_in_transition : is device now in pg transition
+  * @pg_is_enabled    : is power gating enabled
+ 
+  * @intr_clear       : clear pending interrupts
+@@ -298,6 +299,7 @@ struct mei_hw_ops {
+ 
+       int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts);
+       enum mei_pg_state (*pg_state)(struct mei_device *dev);
++      bool (*pg_in_transition)(struct mei_device *dev);
+       bool (*pg_is_enabled)(struct mei_device *dev);
+ 
+       void (*intr_clear)(struct mei_device *dev);
+@@ -396,11 +398,15 @@ struct mei_cl_device {
+  * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition
+  * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete
+  * @MEI_PG_EVENT_RECEIVED: the driver received pg event
++ * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt
++ * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt
+  */
+ enum mei_pg_event {
+       MEI_PG_EVENT_IDLE,
+       MEI_PG_EVENT_WAIT,
+       MEI_PG_EVENT_RECEIVED,
++      MEI_PG_EVENT_INTR_WAIT,
++      MEI_PG_EVENT_INTR_RECEIVED,
+ };
+ 
+ /**
+@@ -727,6 +733,11 @@ static inline enum mei_pg_state mei_pg_s
+       return dev->ops->pg_state(dev);
+ }
+ 
++static inline bool mei_pg_in_transition(struct mei_device *dev)
++{
++      return dev->ops->pg_in_transition(dev);
++}
++
+ static inline bool mei_pg_is_enabled(struct mei_device *dev)
+ {
+       return dev->ops->pg_is_enabled(dev);
diff --git a/queue-4.0/mei-txe-reduce-suspend-resume-time.patch b/queue-4.0/mei-txe-reduce-suspend-resume-time.patch

new file mode 100644 (file)

index 0000000..d98a1ed
--- /dev/null
+++ b/queue-4.0/mei-txe-reduce-suspend-resume-time.patch
@@ -0,0 +1,67 @@
+From fe292283c23329218e384bffc6cb4bfa3fd92277 Mon Sep 17 00:00:00 2001
+From: Tomas Winkler <tomas.winkler@intel.com>
+Date: Tue, 14 Apr 2015 10:27:26 +0300
+Subject: mei: txe: reduce suspend/resume time
+
+From: Tomas Winkler <tomas.winkler@intel.com>
+
+commit fe292283c23329218e384bffc6cb4bfa3fd92277 upstream.
+
+HW has to be in known state before the initialisation
+sequence is started. The polling step for settling aliveness
+was set to 200ms while in practise this can be done in up to 30msecs.
+
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Barak Yoresh <barak.yoresh@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/hw-txe.c |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/drivers/misc/mei/hw-txe.c
++++ b/drivers/misc/mei/hw-txe.c
+@@ -16,6 +16,7 @@
+ 
+ #include <linux/pci.h>
+ #include <linux/jiffies.h>
++#include <linux/ktime.h>
+ #include <linux/delay.h>
+ #include <linux/kthread.h>
+ #include <linux/irqreturn.h>
+@@ -218,26 +219,25 @@ static u32 mei_txe_aliveness_get(struct
+  *
+  * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+  *
+- * Return: > 0 if the expected value was received, -ETIME otherwise
++ * Return: 0 if the expected value was received, -ETIME otherwise
+  */
+ static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected)
+ {
+       struct mei_txe_hw *hw = to_txe_hw(dev);
+-      int t = 0;
++      ktime_t stop, start;
+ 
++      start = ktime_get();
++      stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT));
+       do {
+               hw->aliveness = mei_txe_aliveness_get(dev);
+               if (hw->aliveness == expected) {
+                       dev->pg_event = MEI_PG_EVENT_IDLE;
+-                      dev_dbg(dev->dev,
+-                              "aliveness settled after %d msecs\n", t);
+-                      return t;
++                      dev_dbg(dev->dev, "aliveness settled after %lld usecs\n",
++                              ktime_to_us(ktime_sub(ktime_get(), start)));
++                      return 0;
+               }
+-              mutex_unlock(&dev->device_lock);
+-              msleep(MSEC_PER_SEC / 5);
+-              mutex_lock(&dev->device_lock);
+-              t += MSEC_PER_SEC / 5;
+-      } while (t < SEC_ALIVENESS_WAIT_TIMEOUT);
++              usleep_range(20, 50);
++      } while (ktime_before(ktime_get(), stop));
+ 
+       dev->pg_event = MEI_PG_EVENT_IDLE;
+       dev_err(dev->dev, "aliveness timed out\n");
diff --git a/queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch b/queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch

new file mode 100644 (file)

index 0000000..a3c810c
--- /dev/null
+++ b/queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch
@@ -0,0 +1,105 @@
+From c5f3b1a51a591c18c8b33983908e7fdda6ae417e Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Wed, 24 Jun 2015 16:58:26 -0700
+Subject: mm: kmemleak: allow safe memory scanning during kmemleak disabling
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit c5f3b1a51a591c18c8b33983908e7fdda6ae417e upstream.
+
+The kmemleak scanning thread can run for minutes.  Callbacks like
+kmemleak_free() are allowed during this time, the race being taken care
+of by the object->lock spinlock.  Such lock also prevents a memory block
+from being freed or unmapped while it is being scanned by blocking the
+kmemleak_free() -> ...  -> __delete_object() function until the lock is
+released in scan_object().
+
+When a kmemleak error occurs (e.g.  it fails to allocate its metadata),
+kmemleak_enabled is set and __delete_object() is no longer called on
+freed objects.  If kmemleak_scan is running at the same time,
+kmemleak_free() no longer waits for the object scanning to complete,
+allowing the corresponding memory block to be freed or unmapped (in the
+case of vfree()).  This leads to kmemleak_scan potentially triggering a
+page fault.
+
+This patch separates the kmemleak_free() enabling/disabling from the
+overall kmemleak_enabled nob so that we can defer the disabling of the
+object freeing tracking until the scanning thread completed.  The
+kmemleak_free_part() is deliberately ignored by this patch since this is
+only called during boot before the scanning thread started.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Reported-by: Vignesh Radhakrishnan <vigneshr@codeaurora.org>
+Tested-by: Vignesh Radhakrishnan <vigneshr@codeaurora.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/kmemleak.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -195,6 +195,8 @@ static struct kmem_cache *scan_area_cach
+ 
+ /* set if tracing memory operations is enabled */
+ static int kmemleak_enabled;
++/* same as above but only for the kmemleak_free() callback */
++static int kmemleak_free_enabled;
+ /* set in the late_initcall if there were no errors */
+ static int kmemleak_initialized;
+ /* enables or disables early logging of the memory operations */
+@@ -942,7 +944,7 @@ void __ref kmemleak_free(const void *ptr
+ {
+       pr_debug("%s(0x%p)\n", __func__, ptr);
+ 
+-      if (kmemleak_enabled && ptr && !IS_ERR(ptr))
++      if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
+               delete_object_full((unsigned long)ptr);
+       else if (kmemleak_early_log)
+               log_early(KMEMLEAK_FREE, ptr, 0, 0);
+@@ -982,7 +984,7 @@ void __ref kmemleak_free_percpu(const vo
+ 
+       pr_debug("%s(0x%p)\n", __func__, ptr);
+ 
+-      if (kmemleak_enabled && ptr && !IS_ERR(ptr))
++      if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
+               for_each_possible_cpu(cpu)
+                       delete_object_full((unsigned long)per_cpu_ptr(ptr,
+                                                                     cpu));
+@@ -1750,6 +1752,13 @@ static void kmemleak_do_cleanup(struct w
+       mutex_lock(&scan_mutex);
+       stop_scan_thread();
+ 
++      /*
++       * Once the scan thread has stopped, it is safe to no longer track
++       * object freeing. Ordering of the scan thread stopping and the memory
++       * accesses below is guaranteed by the kthread_stop() function.
++       */
++      kmemleak_free_enabled = 0;
++
+       if (!kmemleak_found_leaks)
+               __kmemleak_do_cleanup();
+       else
+@@ -1776,6 +1785,8 @@ static void kmemleak_disable(void)
+       /* check whether it is too early for a kernel thread */
+       if (kmemleak_initialized)
+               schedule_work(&cleanup_work);
++      else
++              kmemleak_free_enabled = 0;
+ 
+       pr_info("Kernel memory leak detector disabled\n");
+ }
+@@ -1840,8 +1851,10 @@ void __init kmemleak_init(void)
+       if (kmemleak_error) {
+               local_irq_restore(flags);
+               return;
+-      } else
++      } else {
+               kmemleak_enabled = 1;
++              kmemleak_free_enabled = 1;
++      }
+       local_irq_restore(flags);
+ 
+       /*
diff --git a/queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch b/queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch

new file mode 100644 (file)

index 0000000..688294e
--- /dev/null
+++ b/queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch
@@ -0,0 +1,122 @@
+From 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 Mon Sep 17 00:00:00 2001
+From: Larry Finger <Larry.Finger@lwfinger.net>
+Date: Wed, 24 Jun 2015 16:58:51 -0700
+Subject: mm: kmemleak_alloc_percpu() should follow the gfp from per_alloc()
+
+From: Larry Finger <Larry.Finger@lwfinger.net>
+
+commit 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 upstream.
+
+Beginning at commit d52d3997f843 ("ipv6: Create percpu rt6_info"), the
+following INFO splat is logged:
+
+  ===============================
+  [ INFO: suspicious RCU usage. ]
+  4.1.0-rc7-next-20150612 #1 Not tainted
+  -------------------------------
+  kernel/sched/core.c:7318 Illegal context switch in RCU-bh read-side critical section!
+  other info that might help us debug this:
+  rcu_scheduler_active = 1, debug_locks = 0
+   3 locks held by systemd/1:
+   #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff815f0c8f>] rtnetlink_rcv+0x1f/0x40
+   #1:  (rcu_read_lock_bh){......}, at: [<ffffffff816a34e2>] ipv6_add_addr+0x62/0x540
+   #2:  (addrconf_hash_lock){+...+.}, at: [<ffffffff816a3604>] ipv6_add_addr+0x184/0x540
+  stack backtrace:
+  CPU: 0 PID: 1 Comm: systemd Not tainted 4.1.0-rc7-next-20150612 #1
+  Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.20   04/17/2014
+  Call Trace:
+    dump_stack+0x4c/0x6e
+    lockdep_rcu_suspicious+0xe7/0x120
+    ___might_sleep+0x1d5/0x1f0
+    __might_sleep+0x4d/0x90
+    kmem_cache_alloc+0x47/0x250
+    create_object+0x39/0x2e0
+    kmemleak_alloc_percpu+0x61/0xe0
+    pcpu_alloc+0x370/0x630
+
+Additional backtrace lines are truncated.  In addition, the above splat
+is followed by several "BUG: sleeping function called from invalid
+context at mm/slub.c:1268" outputs.  As suggested by Martin KaFai Lau,
+these are the clue to the fix.  Routine kmemleak_alloc_percpu() always
+uses GFP_KERNEL for its allocations, whereas it should follow the gfp
+from its callers.
+
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
+Cc: Martin KaFai Lau <kafai@fb.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Christoph Lameter <cl@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/kmemleak.h |    6 ++++--
+ mm/kmemleak.c            |    9 +++++----
+ mm/percpu.c              |    2 +-
+ 3 files changed, 10 insertions(+), 7 deletions(-)
+
+--- a/include/linux/kmemleak.h
++++ b/include/linux/kmemleak.h
+@@ -28,7 +28,8 @@
+ extern void kmemleak_init(void) __ref;
+ extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+                          gfp_t gfp) __ref;
+-extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
++extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
++                                gfp_t gfp) __ref;
+ extern void kmemleak_free(const void *ptr) __ref;
+ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
+ extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+@@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recurs
+                                           gfp_t gfp)
+ {
+ }
+-static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
++static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
++                                       gfp_t gfp)
+ {
+ }
+ static inline void kmemleak_free(const void *ptr)
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -909,12 +909,13 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc);
+  * kmemleak_alloc_percpu - register a newly allocated __percpu object
+  * @ptr:      __percpu pointer to beginning of the object
+  * @size:     size of the object
++ * @gfp:      flags used for kmemleak internal memory allocations
+  *
+  * This function is called from the kernel percpu allocator when a new object
+- * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL
+- * allocation.
++ * (memory block) is allocated (alloc_percpu).
+  */
+-void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
++void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
++                               gfp_t gfp)
+ {
+       unsigned int cpu;
+ 
+@@ -927,7 +928,7 @@ void __ref kmemleak_alloc_percpu(const v
+       if (kmemleak_enabled && ptr && !IS_ERR(ptr))
+               for_each_possible_cpu(cpu)
+                       create_object((unsigned long)per_cpu_ptr(ptr, cpu),
+-                                    size, 0, GFP_KERNEL);
++                                    size, 0, gfp);
+       else if (kmemleak_early_log)
+               log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
+ }
+--- a/mm/percpu.c
++++ b/mm/percpu.c
+@@ -1030,7 +1030,7 @@ area_found:
+               memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
+ 
+       ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
+-      kmemleak_alloc_percpu(ptr, size);
++      kmemleak_alloc_percpu(ptr, size, gfp);
+       return ptr;
+ 
+ fail_unlock:
diff --git a/queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch b/queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch

new file mode 100644 (file)

index 0000000..8c53ff5
--- /dev/null
+++ b/queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch
@@ -0,0 +1,156 @@
+From 0867a57c4f80a566dda1bac975b42fcd857cb489 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Wed, 24 Jun 2015 16:58:48 -0700
+Subject: mm, thp: respect MPOL_PREFERRED policy with non-local node
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 0867a57c4f80a566dda1bac975b42fcd857cb489 upstream.
+
+Since commit 077fcf116c8c ("mm/thp: allocate transparent hugepages on
+local node"), we handle THP allocations on page fault in a special way -
+for non-interleave memory policies, the allocation is only attempted on
+the node local to the current CPU, if the policy's nodemask allows the
+node.
+
+This is motivated by the assumption that THP benefits cannot offset the
+cost of remote accesses, so it's better to fallback to base pages on the
+local node (which might still be available, while huge pages are not due
+to fragmentation) than to allocate huge pages on a remote node.
+
+The nodemask check prevents us from violating e.g.  MPOL_BIND policies
+where the local node is not among the allowed nodes.  However, the
+current implementation can still give surprising results for the
+MPOL_PREFERRED policy when the preferred node is different than the
+current CPU's local node.
+
+In such case we should honor the preferred node and not use the local
+node, which is what this patch does.  If hugepage allocation on the
+preferred node fails, we fall back to base pages and don't try other
+nodes, with the same motivation as is done for the local node hugepage
+allocations.  The patch also moves the MPOL_INTERLEAVE check around to
+simplify the hugepage specific test.
+
+The difference can be demonstrated using in-tree transhuge-stress test
+on the following 2-node machine where half memory on one node was
+occupied to show the difference.
+
+> numactl --hardware
+available: 2 nodes (0-1)
+node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 24 25 26 27 28 29 30 31 32 33 34 35
+node 0 size: 7878 MB
+node 0 free: 3623 MB
+node 1 cpus: 12 13 14 15 16 17 18 19 20 21 22 23 36 37 38 39 40 41 42 43 44 45 46 47
+node 1 size: 8045 MB
+node 1 free: 7818 MB
+node distances:
+node   0   1
+  0:  10  21
+  1:  21  10
+
+Before the patch:
+> numactl -p0 -C0 ./transhuge-stress
+transhuge-stress: 2.197 s/loop, 0.276 ms/page,   7249.168 MiB/s 7962 succeed,    0 failed, 1786 different pages
+
+> numactl -p0 -C12 ./transhuge-stress
+transhuge-stress: 2.962 s/loop, 0.372 ms/page,   5376.172 MiB/s 7962 succeed,    0 failed, 3873 different pages
+
+Number of successful THP allocations corresponds to free memory on node 0 in
+the first case and node 1 in the second case, i.e. -p parameter is ignored and
+cpu binding "wins".
+
+After the patch:
+> numactl -p0 -C0 ./transhuge-stress
+transhuge-stress: 2.183 s/loop, 0.274 ms/page,   7295.516 MiB/s 7962 succeed,    0 failed, 1760 different pages
+
+> numactl -p0 -C12 ./transhuge-stress
+transhuge-stress: 2.878 s/loop, 0.361 ms/page,   5533.638 MiB/s 7962 succeed,    0 failed, 1750 different pages
+
+> numactl -p1 -C0 ./transhuge-stress
+transhuge-stress: 4.628 s/loop, 0.581 ms/page,   3440.893 MiB/s 7962 succeed,    0 failed, 3918 different pages
+
+The -p parameter is respected regardless of cpu binding.
+
+> numactl -C0 ./transhuge-stress
+transhuge-stress: 2.202 s/loop, 0.277 ms/page,   7230.003 MiB/s 7962 succeed,    0 failed, 1750 different pages
+
+> numactl -C12 ./transhuge-stress
+transhuge-stress: 3.020 s/loop, 0.379 ms/page,   5273.324 MiB/s 7962 succeed,    0 failed, 3916 different pages
+
+Without -p parameter, hugepage restriction to CPU-local node works as before.
+
+Fixes: 077fcf116c8c ("mm/thp: allocate transparent hugepages on local node")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   38 ++++++++++++++++++++++----------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1971,35 +1971,41 @@ retry_cpuset:
+       pol = get_vma_policy(vma, addr);
+       cpuset_mems_cookie = read_mems_allowed_begin();
+ 
+-      if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
+-                                      pol->mode != MPOL_INTERLEAVE)) {
++      if (pol->mode == MPOL_INTERLEAVE) {
++              unsigned nid;
++
++              nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
++              mpol_cond_put(pol);
++              page = alloc_page_interleave(gfp, order, nid);
++              goto out;
++      }
++
++      if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
++              int hpage_node = node;
++
+               /*
+                * For hugepage allocation and non-interleave policy which
+-               * allows the current node, we only try to allocate from the
+-               * current node and don't fall back to other nodes, as the
+-               * cost of remote accesses would likely offset THP benefits.
++               * allows the current node (or other explicitly preferred
++               * node) we only try to allocate from the current/preferred
++               * node and don't fall back to other nodes, as the cost of
++               * remote accesses would likely offset THP benefits.
+                *
+                * If the policy is interleave, or does not allow the current
+                * node in its nodemask, we allocate the standard way.
+                */
++              if (pol->mode == MPOL_PREFERRED &&
++                                              !(pol->flags & MPOL_F_LOCAL))
++                      hpage_node = pol->v.preferred_node;
++
+               nmask = policy_nodemask(gfp, pol);
+-              if (!nmask || node_isset(node, *nmask)) {
++              if (!nmask || node_isset(hpage_node, *nmask)) {
+                       mpol_cond_put(pol);
+-                      page = alloc_pages_exact_node(node,
++                      page = alloc_pages_exact_node(hpage_node,
+                                               gfp | __GFP_THISNODE, order);
+                       goto out;
+               }
+       }
+ 
+-      if (pol->mode == MPOL_INTERLEAVE) {
+-              unsigned nid;
+-
+-              nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
+-              mpol_cond_put(pol);
+-              page = alloc_page_interleave(gfp, order, nid);
+-              goto out;
+-      }
+-
+       nmask = policy_nodemask(gfp, pol);
+       zl = policy_zonelist(gfp, pol, node);
+       mpol_cond_put(pol);
diff --git a/queue-4.0/series b/queue-4.0/series

index f1e0d424d1068314dc6531fe2631c16ff69374e1..a76a213ca069f2f47a8b69148245430b2f6c473c 100644 (file)
--- a/queue-4.0/series
+++ b/queue-4.0/series
@@ -20,3 +20,14 @@ acpi-init-switch-over-platform-to-the-acpi-mode-later.patch
  acpi-pm-add-missing-pm_generic_complete-invocation.patch
  iio-accel-kxcjk-1013-add-the-kxcj9000-acpi-id.patch
  tools-selftests-fix-clean-target-with-make-3.81.patch
+arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch
+arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch
+mei-me-wait-for-power-gating-exit-confirmation.patch
+mei-txe-reduce-suspend-resume-time.patch
+arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch
+arm64-entry-fix-context-tracking-for-el0_sp_pc.patch
+arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch
+arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch
+mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch
+mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch
+mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 17 Jul 2015 00:58:49 +0000 (17:58 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 17 Jul 2015 00:58:49 +0000 (17:58 -0700)
queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/mei-txe-reduce-suspend-resume-time.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch	[new file with mode: 0644]	patch \| blob
queue-4.0/series		patch \| blob \| blame \| history