From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 17 Jul 2015 00:58:49 +0000 (-0700)
Subject: 4.0-stable patches
X-Git-Tag: v4.0.9~11
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=add1b9b215f9af24dff70d17f6b2e12d55840db8;p=thirdparty%2Fkernel%2Fstable-queue.git

4.0-stable patches

added patches:
	arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch
	arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch
	arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch
	arm64-entry-fix-context-tracking-for-el0_sp_pc.patch
	arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch
	arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch
	mei-me-wait-for-power-gating-exit-confirmation.patch
	mei-txe-reduce-suspend-resume-time.patch
	mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch
	mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch
	mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch
---

diff --git a/queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch b/queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch
new file mode 100644
index 00000000000..1ddc114f218
--- /dev/null
+++ b/queue-4.0/arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch
@@ -0,0 +1,60 @@
+From d57f727264f1425a94689bafc7e99e502cb135b5 Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Thu, 13 Nov 2014 15:54:01 +0530
+Subject: ARC: add compiler barrier to LLSC based cmpxchg
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit d57f727264f1425a94689bafc7e99e502cb135b5 upstream.
+
+When auditing cmpxchg call sites, Chuck noted that gcc was optimizing
+away some of the desired LDs.
+
+|	do {
+|		new = old = *ipi_data_ptr;
+|		new |= 1U << msg;
+|	} while (cmpxchg(ipi_data_ptr, old, new) != old);
+
+was generating to below
+
+| 8015cef8:	ld         r2,[r4,0]  <-- First LD
+| 8015cefc:	bset       r1,r2,r1
+|
+| 8015cf00:	llock      r3,[r4]  <-- atomic op
+| 8015cf04:	brne       r3,r2,8015cf10
+| 8015cf08:	scond      r1,[r4]
+| 8015cf0c:	bnz        8015cf00
+|
+| 8015cf10:	brne       r3,r2,8015cf00  <-- Branch doesn't go to orig LD
+
+Although this was fixed by adding a ACCESS_ONCE in this call site, it
+seems safer (for now at least) to add compiler barrier to LLSC based
+cmpxchg
+
+Reported-by: Chuck Jordan <cjordan@synopsys,com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/include/asm/cmpxchg.h |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/arc/include/asm/cmpxchg.h
++++ b/arch/arc/include/asm/cmpxchg.h
+@@ -33,10 +33,11 @@ __cmpxchg(volatile void *ptr, unsigned l
+ 	"	scond   %3, [%1]	\n"
+ 	"	bnz     1b		\n"
+ 	"2:				\n"
+-	: "=&r"(prev)
+-	: "r"(ptr), "ir"(expected),
+-	  "r"(new) /* can't be "ir". scond can't take limm for "b" */
+-	: "cc");
++	: "=&r"(prev)	/* Early clobber, to prevent reg reuse */
++	: "r"(ptr),	/* Not "m": llock only supports reg direct addr mode */
++	  "ir"(expected),
++	  "r"(new)	/* can't be "ir". scond can't take LIMM for "b" */
++	: "cc", "memory"); /* so that gcc knows memory is being written here */
+ 
+ 	smp_mb();
+ 
diff --git a/queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch b/queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch
new file mode 100644
index 00000000000..9a6bfb84316
--- /dev/null
+++ b/queue-4.0/arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch
@@ -0,0 +1,289 @@
+From 2576c28e3f623ed401db7e6197241865328620ef Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Thu, 20 Nov 2014 15:42:09 +0530
+Subject: ARC: add smp barriers around atomics per Documentation/atomic_ops.txt
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit 2576c28e3f623ed401db7e6197241865328620ef upstream.
+
+ - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers
+   Since ARCv2 only provides load/load, store/store and all/all, we need
+   the full barrier
+
+ - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified
+   values were lacking the explicit smp barriers.
+
+ - Non LLOCK/SCOND varaints don't need the explicit barriers since that
+   is implicity provided by the spin locks used to implement the
+   critical section (the spin lock barriers in turn are also fixed in
+   this commit as explained above
+
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/include/asm/atomic.h   |   21 +++++++++++++++++++++
+ arch/arc/include/asm/bitops.h   |   19 +++++++++++++++++++
+ arch/arc/include/asm/cmpxchg.h  |   17 +++++++++++++++++
+ arch/arc/include/asm/spinlock.h |   32 ++++++++++++++++++++++++++++++++
+ 4 files changed, 89 insertions(+)
+
+--- a/arch/arc/include/asm/atomic.h
++++ b/arch/arc/include/asm/atomic.h
+@@ -43,6 +43,12 @@ static inline int atomic_##op##_return(i
+ {									\
+ 	unsigned int temp;						\
+ 									\
++	/*								\
++	 * Explicit full memory barrier needed before/after as		\
++	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
++	 */								\
++	smp_mb();							\
++									\
+ 	__asm__ __volatile__(						\
+ 	"1:	llock   %0, [%1]	\n"				\
+ 	"	" #asm_op " %0, %0, %2	\n"				\
+@@ -52,6 +58,8 @@ static inline int atomic_##op##_return(i
+ 	: "r"(&v->counter), "ir"(i)					\
+ 	: "cc");							\
+ 									\
++	smp_mb();							\
++									\
+ 	return temp;							\
+ }
+ 
+@@ -105,6 +113,9 @@ static inline int atomic_##op##_return(i
+ 	unsigned long flags;						\
+ 	unsigned long temp;						\
+ 									\
++	/*								\
++	 * spin lock/unlock provides the needed smp_mb() before/after	\
++	 */								\
+ 	atomic_ops_lock(flags);						\
+ 	temp = v->counter;						\
+ 	temp c_op i;							\
+@@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and)
+ #define __atomic_add_unless(v, a, u)					\
+ ({									\
+ 	int c, old;							\
++									\
++	/*								\
++	 * Explicit full memory barrier needed before/after as		\
++	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
++	 */								\
++	smp_mb();							\
++									\
+ 	c = atomic_read(v);						\
+ 	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
+ 		c = old;						\
++									\
++	smp_mb();							\
++									\
+ 	c;								\
+ })
+ 
+--- a/arch/arc/include/asm/bitops.h
++++ b/arch/arc/include/asm/bitops.h
+@@ -103,6 +103,12 @@ static inline int test_and_set_bit(unsig
+ 	if (__builtin_constant_p(nr))
+ 		nr &= 0x1f;
+ 
++	/*
++	 * Explicit full memory barrier needed before/after as
++	 * LLOCK/SCOND themselves don't provide any such semantics
++	 */
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"1:	llock   %0, [%2]	\n"
+ 	"	bset    %1, %0, %3	\n"
+@@ -112,6 +118,8 @@ static inline int test_and_set_bit(unsig
+ 	: "r"(m), "ir"(nr)
+ 	: "cc");
+ 
++	smp_mb();
++
+ 	return (old & (1 << nr)) != 0;
+ }
+ 
+@@ -125,6 +133,8 @@ test_and_clear_bit(unsigned long nr, vol
+ 	if (__builtin_constant_p(nr))
+ 		nr &= 0x1f;
+ 
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"1:	llock   %0, [%2]	\n"
+ 	"	bclr    %1, %0, %3	\n"
+@@ -134,6 +144,8 @@ test_and_clear_bit(unsigned long nr, vol
+ 	: "r"(m), "ir"(nr)
+ 	: "cc");
+ 
++	smp_mb();
++
+ 	return (old & (1 << nr)) != 0;
+ }
+ 
+@@ -147,6 +159,8 @@ test_and_change_bit(unsigned long nr, vo
+ 	if (__builtin_constant_p(nr))
+ 		nr &= 0x1f;
+ 
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"1:	llock   %0, [%2]	\n"
+ 	"	bxor    %1, %0, %3	\n"
+@@ -156,6 +170,8 @@ test_and_change_bit(unsigned long nr, vo
+ 	: "r"(m), "ir"(nr)
+ 	: "cc");
+ 
++	smp_mb();
++
+ 	return (old & (1 << nr)) != 0;
+ }
+ 
+@@ -235,6 +251,9 @@ static inline int test_and_set_bit(unsig
+ 	if (__builtin_constant_p(nr))
+ 		nr &= 0x1f;
+ 
++	/*
++	 * spin lock/unlock provide the needed smp_mb() before/after
++	 */
+ 	bitops_lock(flags);
+ 
+ 	old = *m;
+--- a/arch/arc/include/asm/cmpxchg.h
++++ b/arch/arc/include/asm/cmpxchg.h
+@@ -10,6 +10,8 @@
+ #define __ASM_ARC_CMPXCHG_H
+ 
+ #include <linux/types.h>
++
++#include <asm/barrier.h>
+ #include <asm/smp.h>
+ 
+ #ifdef CONFIG_ARC_HAS_LLSC
+@@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned l
+ {
+ 	unsigned long prev;
+ 
++	/*
++	 * Explicit full memory barrier needed before/after as
++	 * LLOCK/SCOND thmeselves don't provide any such semantics
++	 */
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"1:	llock   %0, [%1]	\n"
+ 	"	brne    %0, %2, 2f	\n"
+@@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned l
+ 	  "r"(new) /* can't be "ir". scond can't take limm for "b" */
+ 	: "cc");
+ 
++	smp_mb();
++
+ 	return prev;
+ }
+ 
+@@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned l
+ 	int prev;
+ 	volatile unsigned long *p = ptr;
+ 
++	/*
++	 * spin lock/unlock provide the needed smp_mb() before/after
++	 */
+ 	atomic_ops_lock(flags);
+ 	prev = *p;
+ 	if (prev == expected)
+@@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsig
+ 
+ 	switch (size) {
+ 	case 4:
++		smp_mb();
++
+ 		__asm__ __volatile__(
+ 		"	ex  %0, [%1]	\n"
+ 		: "+r"(val)
+ 		: "r"(ptr)
+ 		: "memory");
+ 
++		smp_mb();
++
+ 		return val;
+ 	}
+ 	return __xchg_bad_pointer();
+--- a/arch/arc/include/asm/spinlock.h
++++ b/arch/arc/include/asm/spinlock.h
+@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_s
+ {
+ 	unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ 
++	/*
++	 * This smp_mb() is technically superfluous, we only need the one
++	 * after the lock for providing the ACQUIRE semantics.
++	 * However doing the "right" thing was regressing hackbench
++	 * so keeping this, pending further investigation
++	 */
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"1:	ex  %0, [%1]		\n"
+ 	"	breq  %0, %2, 1b	\n"
+ 	: "+&r" (tmp)
+ 	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
+ 	: "memory");
++
++	/*
++	 * ACQUIRE barrier to ensure load/store after taking the lock
++	 * don't "bleed-up" out of the critical section (leak-in is allowed)
++	 * http://www.spinics.net/lists/kernel/msg2010409.html
++	 *
++	 * ARCv2 only has load-load, store-store and all-all barrier
++	 * thus need the full all-all barrier
++	 */
++	smp_mb();
+ }
+ 
+ static inline int arch_spin_trylock(arch_spinlock_t *lock)
+ {
+ 	unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ 
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"1:	ex  %0, [%1]		\n"
+ 	: "+r" (tmp)
+ 	: "r"(&(lock->slock))
+ 	: "memory");
+ 
++	smp_mb();
++
+ 	return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
+ }
+ 
+@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch
+ {
+ 	unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+ 
++	/*
++	 * RELEASE barrier: given the instructions avail on ARCv2, full barrier
++	 * is the only option
++	 */
++	smp_mb();
++
+ 	__asm__ __volatile__(
+ 	"	ex  %0, [%1]		\n"
+ 	: "+r" (tmp)
+ 	: "r"(&(lock->slock))
+ 	: "memory");
+ 
++	/*
++	 * superfluous, but keeping for now - see pairing version in
++	 * arch_spin_lock above
++	 */
+ 	smp_mb();
+ }
+ 
diff --git a/queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch b/queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch
new file mode 100644
index 00000000000..38323766cb2
--- /dev/null
+++ b/queue-4.0/arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch
@@ -0,0 +1,39 @@
+From 565630d503ef24e44c252bed55571b3a0d68455f Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Fri, 12 Jun 2015 11:24:41 +0100
+Subject: arm64: Do not attempt to use init_mm in reset_context()
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 565630d503ef24e44c252bed55571b3a0d68455f upstream.
+
+After secondary CPU boot or hotplug, the active_mm of the idle thread is
+&init_mm. The init_mm.pgd (swapper_pg_dir) is only meant for TTBR1_EL1
+and must not be set in TTBR0_EL1. Since when active_mm == &init_mm the
+TTBR0_EL1 is already set to the reserved value, there is no need to
+perform any context reset.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/context.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/arm64/mm/context.c
++++ b/arch/arm64/mm/context.c
+@@ -92,6 +92,14 @@ static void reset_context(void *info)
+ 	unsigned int cpu = smp_processor_id();
+ 	struct mm_struct *mm = current->active_mm;
+ 
++	/*
++	 * current->active_mm could be init_mm for the idle thread immediately
++	 * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
++	 * the reserved value, so no need to reset any context.
++	 */
++	if (mm == &init_mm)
++		return;
++
+ 	smp_rmb();
+ 	asid = cpu_last_asid + cpu;
+ 
diff --git a/queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch b/queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch
new file mode 100644
index 00000000000..51844d3b1c8
--- /dev/null
+++ b/queue-4.0/arm64-entry-fix-context-tracking-for-el0_sp_pc.patch
@@ -0,0 +1,69 @@
+From 46b0567c851cf85d6ba6f23eef385ec9111d09bc Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Mon, 15 Jun 2015 16:40:27 +0100
+Subject: arm64: entry: fix context tracking for el0_sp_pc
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit 46b0567c851cf85d6ba6f23eef385ec9111d09bc upstream.
+
+Commit 6c81fe7925cc4c42 ("arm64: enable context tracking") did not
+update el0_sp_pc to use ct_user_exit, but this appears to have been
+unintentional. In commit 6ab6463aeb5fbc75 ("arm64: adjust el0_sync so
+that a function can be called") we made x0 available, and in the return
+to userspace we call ct_user_enter in the kernel_exit macro.
+
+Due to this, we currently don't correctly inform RCU of the user->kernel
+transition, and may erroneously account for time spent in the kernel as
+if we were in an extended quiescent state when CONFIG_CONTEXT_TRACKING
+is enabled.
+
+As we do record the kernel->user transition, a userspace application
+making accesses from an unaligned stack pointer can demonstrate the
+imbalance, provoking the following warning:
+
+------------[ cut here ]------------
+WARNING: CPU: 2 PID: 3660 at kernel/context_tracking.c:75 context_tracking_enter+0xd8/0xe4()
+Modules linked in:
+CPU: 2 PID: 3660 Comm: a.out Not tainted 4.1.0-rc7+ #8
+Hardware name: ARM Juno development board (r0) (DT)
+Call trace:
+[<ffffffc000089914>] dump_backtrace+0x0/0x124
+[<ffffffc000089a48>] show_stack+0x10/0x1c
+[<ffffffc0005b3cbc>] dump_stack+0x84/0xc8
+[<ffffffc0000b3214>] warn_slowpath_common+0x98/0xd0
+[<ffffffc0000b330c>] warn_slowpath_null+0x14/0x20
+[<ffffffc00013ada4>] context_tracking_enter+0xd4/0xe4
+[<ffffffc0005b534c>] preempt_schedule_irq+0xd4/0x114
+[<ffffffc00008561c>] el1_preempt+0x4/0x28
+[<ffffffc0001b8040>] exit_files+0x38/0x4c
+[<ffffffc0000b5b94>] do_exit+0x430/0x978
+[<ffffffc0000b614c>] do_group_exit+0x40/0xd4
+[<ffffffc0000c0208>] get_signal+0x23c/0x4f4
+[<ffffffc0000890b4>] do_signal+0x1ac/0x518
+[<ffffffc000089650>] do_notify_resume+0x5c/0x68
+---[ end trace 963c192600337066 ]---
+
+This patch adds the missing ct_user_exit to the el0_sp_pc entry path,
+correcting the context tracking for this case.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Fixes: 6c81fe7925cc ("arm64: enable context tracking")
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/entry.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/kernel/entry.S
++++ b/arch/arm64/kernel/entry.S
+@@ -517,6 +517,7 @@ el0_sp_pc:
+ 	mrs	x26, far_el1
+ 	// enable interrupts before calling the main handler
+ 	enable_dbg_and_irq
++	ct_user_exit
+ 	mov	x0, x26
+ 	mov	x1, x25
+ 	mov	x2, sp
diff --git a/queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch b/queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch
new file mode 100644
index 00000000000..0585b060726
--- /dev/null
+++ b/queue-4.0/arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch
@@ -0,0 +1,49 @@
+From b9bcc919931611498e856eae9bf66337330d04cc Mon Sep 17 00:00:00 2001
+From: Dave P Martin <Dave.Martin@arm.com>
+Date: Tue, 16 Jun 2015 17:38:47 +0100
+Subject: arm64: mm: Fix freeing of the wrong memmap entries with !SPARSEMEM_VMEMMAP
+
+From: Dave P Martin <Dave.Martin@arm.com>
+
+commit b9bcc919931611498e856eae9bf66337330d04cc upstream.
+
+The memmap freeing code in free_unused_memmap() computes the end of
+each memblock by adding the memblock size onto the base.  However,
+if SPARSEMEM is enabled then the value (start) used for the base
+may already have been rounded downwards to work out which memmap
+entries to free after the previous memblock.
+
+This may cause memmap entries that are in use to get freed.
+
+In general, you're not likely to hit this problem unless there
+are at least 2 memblocks and one of them is not aligned to a
+sparsemem section boundary.  Note that carve-outs can increase
+the number of memblocks by splitting the regions listed in the
+device tree.
+
+This problem doesn't occur with SPARSEMEM_VMEMMAP, because the
+vmemmap code deals with freeing the unused regions of the memmap
+instead of requiring the arch code to do it.
+
+This patch gets the memblock base out of the memblock directly when
+computing the block end address to ensure the correct value is used.
+
+Signed-off-by: Dave Martin <Dave.Martin@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/init.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/init.c
++++ b/arch/arm64/mm/init.c
+@@ -260,7 +260,7 @@ static void __init free_unused_memmap(vo
+ 		 * memmap entries are valid from the bank end aligned to
+ 		 * MAX_ORDER_NR_PAGES.
+ 		 */
+-		prev_end = ALIGN(start + __phys_to_pfn(reg->size),
++		prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
+ 				 MAX_ORDER_NR_PAGES);
+ 	}
+ 
diff --git a/queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch b/queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch
new file mode 100644
index 00000000000..0257ff05662
--- /dev/null
+++ b/queue-4.0/arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch
@@ -0,0 +1,45 @@
+From 6f1a6ae87c0c60d7c462ef8fd071f291aa7a9abb Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 19 Jun 2015 13:56:33 +0100
+Subject: arm64: vdso: work-around broken ELF toolchains in Makefile
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 6f1a6ae87c0c60d7c462ef8fd071f291aa7a9abb upstream.
+
+When building the kernel with a bare-metal (ELF) toolchain, the -shared
+option may not be passed down to collect2, resulting in silent corruption
+of the vDSO image (in particular, the DYNAMIC section is omitted).
+
+The effect of this corruption is that the dynamic linker fails to find
+the vDSO symbols and libc is instead used for the syscalls that we
+intended to optimise (e.g. gettimeofday). Functionally, there is no
+issue as the sigreturn trampoline is still intact and located by the
+kernel.
+
+This patch fixes the problem by explicitly passing -shared to the linker
+when building the vDSO.
+
+Reported-by: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
+Reported-by: James Greenlaigh <james.greenhalgh@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/vdso/Makefile |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/arm64/kernel/vdso/Makefile
++++ b/arch/arm64/kernel/vdso/Makefile
+@@ -15,6 +15,10 @@ ccflags-y := -shared -fno-common -fno-bu
+ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
+ 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ 
++# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
++# down to collect2, resulting in silent corruption of the vDSO image.
++ccflags-y += -Wl,-shared
++
+ obj-y += vdso.o
+ extra-y += vdso.lds vdso-offsets.h
+ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch b/queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch
new file mode 100644
index 00000000000..1c9a5e75486
--- /dev/null
+++ b/queue-4.0/mei-me-wait-for-power-gating-exit-confirmation.patch
@@ -0,0 +1,232 @@
+From 3dc196eae1db548f05e53e5875ff87b8ff79f249 Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Sat, 13 Jun 2015 08:51:17 +0300
+Subject: mei: me: wait for power gating exit confirmation
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit 3dc196eae1db548f05e53e5875ff87b8ff79f249 upstream.
+
+Fix the hbm power gating state machine so it will wait till it receives
+confirmation interrupt for the PG_ISOLATION_EXIT message.
+
+In process of the suspend flow the devices first have to exit from the
+power gating state (runtime pm resume).
+If we do not handle the confirmation interrupt after sending
+PG_ISOLATION_EXIT message, we may receive it already after the suspend
+flow has changed the device state and interrupt will be interpreted as a
+spurious event, consequently link reset will be invoked which will
+prevent the device from completing the suspend flow
+
+kernel: [6603] mei_reset:136: mei_me 0000:00:16.0: powering down: end of reset
+kernel: [476] mei_me_irq_thread_handler:643: mei_me 0000:00:16.0: function called after ISR to handle the interrupt processing.
+kernel: mei_me 0000:00:16.0: FW not ready: resetting
+
+Cc: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=86241
+Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=770397
+Tested-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/client.c  |    2 -
+ drivers/misc/mei/hw-me.c   |   59 +++++++++++++++++++++++++++++++++++++++++----
+ drivers/misc/mei/hw-txe.c  |   13 +++++++++
+ drivers/misc/mei/mei_dev.h |   11 ++++++++
+ 4 files changed, 80 insertions(+), 5 deletions(-)
+
+--- a/drivers/misc/mei/client.c
++++ b/drivers/misc/mei/client.c
+@@ -573,7 +573,7 @@ void mei_host_client_init(struct work_st
+ bool mei_hbuf_acquire(struct mei_device *dev)
+ {
+ 	if (mei_pg_state(dev) == MEI_PG_ON ||
+-	    dev->pg_event == MEI_PG_EVENT_WAIT) {
++	    mei_pg_in_transition(dev)) {
+ 		dev_dbg(dev->dev, "device is in pg\n");
+ 		return false;
+ 	}
+--- a/drivers/misc/mei/hw-me.c
++++ b/drivers/misc/mei/hw-me.c
+@@ -629,11 +629,27 @@ int mei_me_pg_unset_sync(struct mei_devi
+ 	mutex_lock(&dev->device_lock);
+ 
+ reply:
+-	if (dev->pg_event == MEI_PG_EVENT_RECEIVED)
+-		ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
++	if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
++		ret = -ETIME;
++		goto out;
++	}
++
++	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
++	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
++	if (ret)
++		return ret;
++
++	mutex_unlock(&dev->device_lock);
++	wait_event_timeout(dev->wait_pg,
++		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
++	mutex_lock(&dev->device_lock);
++
++	if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED)
++		ret = 0;
+ 	else
+ 		ret = -ETIME;
+ 
++out:
+ 	dev->pg_event = MEI_PG_EVENT_IDLE;
+ 	hw->pg_state = MEI_PG_OFF;
+ 
+@@ -641,6 +657,19 @@ reply:
+ }
+ 
+ /**
++ * mei_me_pg_in_transition - is device now in pg transition
++ *
++ * @dev: the device structure
++ *
++ * Return: true if in pg transition, false otherwise
++ */
++static bool mei_me_pg_in_transition(struct mei_device *dev)
++{
++	return dev->pg_event >= MEI_PG_EVENT_WAIT &&
++	       dev->pg_event <= MEI_PG_EVENT_INTR_WAIT;
++}
++
++/**
+  * mei_me_pg_is_enabled - detect if PG is supported by HW
+  *
+  * @dev: the device structure
+@@ -672,6 +701,24 @@ notsupported:
+ }
+ 
+ /**
++ * mei_me_pg_intr - perform pg processing in interrupt thread handler
++ *
++ * @dev: the device structure
++ */
++static void mei_me_pg_intr(struct mei_device *dev)
++{
++	struct mei_me_hw *hw = to_me_hw(dev);
++
++	if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT)
++		return;
++
++	dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
++	hw->pg_state = MEI_PG_OFF;
++	if (waitqueue_active(&dev->wait_pg))
++		wake_up(&dev->wait_pg);
++}
++
++/**
+  * mei_me_irq_quick_handler - The ISR of the MEI device
+  *
+  * @irq: The irq number
+@@ -729,6 +776,8 @@ irqreturn_t mei_me_irq_thread_handler(in
+ 		goto end;
+ 	}
+ 
++	mei_me_pg_intr(dev);
++
+ 	/*  check if we need to start the dev */
+ 	if (!mei_host_is_ready(dev)) {
+ 		if (mei_hw_is_ready(dev)) {
+@@ -765,9 +814,10 @@ irqreturn_t mei_me_irq_thread_handler(in
+ 	/*
+ 	 * During PG handshake only allowed write is the replay to the
+ 	 * PG exit message, so block calling write function
+-	 * if the pg state is not idle
++	 * if the pg event is in PG handshake
+ 	 */
+-	if (dev->pg_event == MEI_PG_EVENT_IDLE) {
++	if (dev->pg_event != MEI_PG_EVENT_WAIT &&
++	    dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+ 		rets = mei_irq_write_handler(dev, &complete_list);
+ 		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+ 	}
+@@ -792,6 +842,7 @@ static const struct mei_hw_ops mei_me_hw
+ 	.hw_config = mei_me_hw_config,
+ 	.hw_start = mei_me_hw_start,
+ 
++	.pg_in_transition = mei_me_pg_in_transition,
+ 	.pg_is_enabled = mei_me_pg_is_enabled,
+ 
+ 	.intr_clear = mei_me_intr_clear,
+--- a/drivers/misc/mei/hw-txe.c
++++ b/drivers/misc/mei/hw-txe.c
+@@ -302,6 +302,18 @@ int mei_txe_aliveness_set_sync(struct me
+ }
+ 
+ /**
++ * mei_txe_pg_in_transition - is device now in pg transition
++ *
++ * @dev: the device structure
++ *
++ * Return: true if in pg transition, false otherwise
++ */
++static bool mei_txe_pg_in_transition(struct mei_device *dev)
++{
++	return dev->pg_event == MEI_PG_EVENT_WAIT;
++}
++
++/**
+  * mei_txe_pg_is_enabled - detect if PG is supported by HW
+  *
+  * @dev: the device structure
+@@ -1138,6 +1150,7 @@ static const struct mei_hw_ops mei_txe_h
+ 	.hw_config = mei_txe_hw_config,
+ 	.hw_start = mei_txe_hw_start,
+ 
++	.pg_in_transition = mei_txe_pg_in_transition,
+ 	.pg_is_enabled = mei_txe_pg_is_enabled,
+ 
+ 	.intr_clear = mei_txe_intr_clear,
+--- a/drivers/misc/mei/mei_dev.h
++++ b/drivers/misc/mei/mei_dev.h
+@@ -269,6 +269,7 @@ struct mei_cl {
+ 
+  * @fw_status        : get fw status registers
+  * @pg_state         : power gating state of the device
++ * @pg_in_transition : is device now in pg transition
+  * @pg_is_enabled    : is power gating enabled
+ 
+  * @intr_clear       : clear pending interrupts
+@@ -298,6 +299,7 @@ struct mei_hw_ops {
+ 
+ 	int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts);
+ 	enum mei_pg_state (*pg_state)(struct mei_device *dev);
++	bool (*pg_in_transition)(struct mei_device *dev);
+ 	bool (*pg_is_enabled)(struct mei_device *dev);
+ 
+ 	void (*intr_clear)(struct mei_device *dev);
+@@ -396,11 +398,15 @@ struct mei_cl_device {
+  * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition
+  * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete
+  * @MEI_PG_EVENT_RECEIVED: the driver received pg event
++ * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt
++ * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt
+  */
+ enum mei_pg_event {
+ 	MEI_PG_EVENT_IDLE,
+ 	MEI_PG_EVENT_WAIT,
+ 	MEI_PG_EVENT_RECEIVED,
++	MEI_PG_EVENT_INTR_WAIT,
++	MEI_PG_EVENT_INTR_RECEIVED,
+ };
+ 
+ /**
+@@ -727,6 +733,11 @@ static inline enum mei_pg_state mei_pg_s
+ 	return dev->ops->pg_state(dev);
+ }
+ 
++static inline bool mei_pg_in_transition(struct mei_device *dev)
++{
++	return dev->ops->pg_in_transition(dev);
++}
++
+ static inline bool mei_pg_is_enabled(struct mei_device *dev)
+ {
+ 	return dev->ops->pg_is_enabled(dev);
diff --git a/queue-4.0/mei-txe-reduce-suspend-resume-time.patch b/queue-4.0/mei-txe-reduce-suspend-resume-time.patch
new file mode 100644
index 00000000000..d98a1ed4653
--- /dev/null
+++ b/queue-4.0/mei-txe-reduce-suspend-resume-time.patch
@@ -0,0 +1,67 @@
+From fe292283c23329218e384bffc6cb4bfa3fd92277 Mon Sep 17 00:00:00 2001
+From: Tomas Winkler <tomas.winkler@intel.com>
+Date: Tue, 14 Apr 2015 10:27:26 +0300
+Subject: mei: txe: reduce suspend/resume time
+
+From: Tomas Winkler <tomas.winkler@intel.com>
+
+commit fe292283c23329218e384bffc6cb4bfa3fd92277 upstream.
+
+HW has to be in known state before the initialisation
+sequence is started. The polling step for settling aliveness
+was set to 200ms while in practise this can be done in up to 30msecs.
+
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Barak Yoresh <barak.yoresh@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/hw-txe.c |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/drivers/misc/mei/hw-txe.c
++++ b/drivers/misc/mei/hw-txe.c
+@@ -16,6 +16,7 @@
+ 
+ #include <linux/pci.h>
+ #include <linux/jiffies.h>
++#include <linux/ktime.h>
+ #include <linux/delay.h>
+ #include <linux/kthread.h>
+ #include <linux/irqreturn.h>
+@@ -218,26 +219,25 @@ static u32 mei_txe_aliveness_get(struct
+  *
+  * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+  *
+- * Return: > 0 if the expected value was received, -ETIME otherwise
++ * Return: 0 if the expected value was received, -ETIME otherwise
+  */
+ static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected)
+ {
+ 	struct mei_txe_hw *hw = to_txe_hw(dev);
+-	int t = 0;
++	ktime_t stop, start;
+ 
++	start = ktime_get();
++	stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT));
+ 	do {
+ 		hw->aliveness = mei_txe_aliveness_get(dev);
+ 		if (hw->aliveness == expected) {
+ 			dev->pg_event = MEI_PG_EVENT_IDLE;
+-			dev_dbg(dev->dev,
+-				"aliveness settled after %d msecs\n", t);
+-			return t;
++			dev_dbg(dev->dev, "aliveness settled after %lld usecs\n",
++				ktime_to_us(ktime_sub(ktime_get(), start)));
++			return 0;
+ 		}
+-		mutex_unlock(&dev->device_lock);
+-		msleep(MSEC_PER_SEC / 5);
+-		mutex_lock(&dev->device_lock);
+-		t += MSEC_PER_SEC / 5;
+-	} while (t < SEC_ALIVENESS_WAIT_TIMEOUT);
++		usleep_range(20, 50);
++	} while (ktime_before(ktime_get(), stop));
+ 
+ 	dev->pg_event = MEI_PG_EVENT_IDLE;
+ 	dev_err(dev->dev, "aliveness timed out\n");
diff --git a/queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch b/queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch
new file mode 100644
index 00000000000..a3c810c6bb7
--- /dev/null
+++ b/queue-4.0/mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch
@@ -0,0 +1,105 @@
+From c5f3b1a51a591c18c8b33983908e7fdda6ae417e Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Wed, 24 Jun 2015 16:58:26 -0700
+Subject: mm: kmemleak: allow safe memory scanning during kmemleak disabling
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit c5f3b1a51a591c18c8b33983908e7fdda6ae417e upstream.
+
+The kmemleak scanning thread can run for minutes.  Callbacks like
+kmemleak_free() are allowed during this time, the race being taken care
+of by the object->lock spinlock.  Such lock also prevents a memory block
+from being freed or unmapped while it is being scanned by blocking the
+kmemleak_free() -> ...  -> __delete_object() function until the lock is
+released in scan_object().
+
+When a kmemleak error occurs (e.g.  it fails to allocate its metadata),
+kmemleak_enabled is set and __delete_object() is no longer called on
+freed objects.  If kmemleak_scan is running at the same time,
+kmemleak_free() no longer waits for the object scanning to complete,
+allowing the corresponding memory block to be freed or unmapped (in the
+case of vfree()).  This leads to kmemleak_scan potentially triggering a
+page fault.
+
+This patch separates the kmemleak_free() enabling/disabling from the
+overall kmemleak_enabled nob so that we can defer the disabling of the
+object freeing tracking until the scanning thread completed.  The
+kmemleak_free_part() is deliberately ignored by this patch since this is
+only called during boot before the scanning thread started.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Reported-by: Vignesh Radhakrishnan <vigneshr@codeaurora.org>
+Tested-by: Vignesh Radhakrishnan <vigneshr@codeaurora.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/kmemleak.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -195,6 +195,8 @@ static struct kmem_cache *scan_area_cach
+ 
+ /* set if tracing memory operations is enabled */
+ static int kmemleak_enabled;
++/* same as above but only for the kmemleak_free() callback */
++static int kmemleak_free_enabled;
+ /* set in the late_initcall if there were no errors */
+ static int kmemleak_initialized;
+ /* enables or disables early logging of the memory operations */
+@@ -942,7 +944,7 @@ void __ref kmemleak_free(const void *ptr
+ {
+ 	pr_debug("%s(0x%p)\n", __func__, ptr);
+ 
+-	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
++	if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
+ 		delete_object_full((unsigned long)ptr);
+ 	else if (kmemleak_early_log)
+ 		log_early(KMEMLEAK_FREE, ptr, 0, 0);
+@@ -982,7 +984,7 @@ void __ref kmemleak_free_percpu(const vo
+ 
+ 	pr_debug("%s(0x%p)\n", __func__, ptr);
+ 
+-	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
++	if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
+ 		for_each_possible_cpu(cpu)
+ 			delete_object_full((unsigned long)per_cpu_ptr(ptr,
+ 								      cpu));
+@@ -1750,6 +1752,13 @@ static void kmemleak_do_cleanup(struct w
+ 	mutex_lock(&scan_mutex);
+ 	stop_scan_thread();
+ 
++	/*
++	 * Once the scan thread has stopped, it is safe to no longer track
++	 * object freeing. Ordering of the scan thread stopping and the memory
++	 * accesses below is guaranteed by the kthread_stop() function.
++	 */
++	kmemleak_free_enabled = 0;
++
+ 	if (!kmemleak_found_leaks)
+ 		__kmemleak_do_cleanup();
+ 	else
+@@ -1776,6 +1785,8 @@ static void kmemleak_disable(void)
+ 	/* check whether it is too early for a kernel thread */
+ 	if (kmemleak_initialized)
+ 		schedule_work(&cleanup_work);
++	else
++		kmemleak_free_enabled = 0;
+ 
+ 	pr_info("Kernel memory leak detector disabled\n");
+ }
+@@ -1840,8 +1851,10 @@ void __init kmemleak_init(void)
+ 	if (kmemleak_error) {
+ 		local_irq_restore(flags);
+ 		return;
+-	} else
++	} else {
+ 		kmemleak_enabled = 1;
++		kmemleak_free_enabled = 1;
++	}
+ 	local_irq_restore(flags);
+ 
+ 	/*
diff --git a/queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch b/queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch
new file mode 100644
index 00000000000..688294ece1c
--- /dev/null
+++ b/queue-4.0/mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch
@@ -0,0 +1,122 @@
+From 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 Mon Sep 17 00:00:00 2001
+From: Larry Finger <Larry.Finger@lwfinger.net>
+Date: Wed, 24 Jun 2015 16:58:51 -0700
+Subject: mm: kmemleak_alloc_percpu() should follow the gfp from per_alloc()
+
+From: Larry Finger <Larry.Finger@lwfinger.net>
+
+commit 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 upstream.
+
+Beginning at commit d52d3997f843 ("ipv6: Create percpu rt6_info"), the
+following INFO splat is logged:
+
+  ===============================
+  [ INFO: suspicious RCU usage. ]
+  4.1.0-rc7-next-20150612 #1 Not tainted
+  -------------------------------
+  kernel/sched/core.c:7318 Illegal context switch in RCU-bh read-side critical section!
+  other info that might help us debug this:
+  rcu_scheduler_active = 1, debug_locks = 0
+   3 locks held by systemd/1:
+   #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff815f0c8f>] rtnetlink_rcv+0x1f/0x40
+   #1:  (rcu_read_lock_bh){......}, at: [<ffffffff816a34e2>] ipv6_add_addr+0x62/0x540
+   #2:  (addrconf_hash_lock){+...+.}, at: [<ffffffff816a3604>] ipv6_add_addr+0x184/0x540
+  stack backtrace:
+  CPU: 0 PID: 1 Comm: systemd Not tainted 4.1.0-rc7-next-20150612 #1
+  Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.20   04/17/2014
+  Call Trace:
+    dump_stack+0x4c/0x6e
+    lockdep_rcu_suspicious+0xe7/0x120
+    ___might_sleep+0x1d5/0x1f0
+    __might_sleep+0x4d/0x90
+    kmem_cache_alloc+0x47/0x250
+    create_object+0x39/0x2e0
+    kmemleak_alloc_percpu+0x61/0xe0
+    pcpu_alloc+0x370/0x630
+
+Additional backtrace lines are truncated.  In addition, the above splat
+is followed by several "BUG: sleeping function called from invalid
+context at mm/slub.c:1268" outputs.  As suggested by Martin KaFai Lau,
+these are the clue to the fix.  Routine kmemleak_alloc_percpu() always
+uses GFP_KERNEL for its allocations, whereas it should follow the gfp
+from its callers.
+
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
+Cc: Martin KaFai Lau <kafai@fb.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Christoph Lameter <cl@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/kmemleak.h |    6 ++++--
+ mm/kmemleak.c            |    9 +++++----
+ mm/percpu.c              |    2 +-
+ 3 files changed, 10 insertions(+), 7 deletions(-)
+
+--- a/include/linux/kmemleak.h
++++ b/include/linux/kmemleak.h
+@@ -28,7 +28,8 @@
+ extern void kmemleak_init(void) __ref;
+ extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+ 			   gfp_t gfp) __ref;
+-extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
++extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
++				  gfp_t gfp) __ref;
+ extern void kmemleak_free(const void *ptr) __ref;
+ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
+ extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+@@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recurs
+ 					    gfp_t gfp)
+ {
+ }
+-static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
++static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
++					 gfp_t gfp)
+ {
+ }
+ static inline void kmemleak_free(const void *ptr)
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -909,12 +909,13 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc);
+  * kmemleak_alloc_percpu - register a newly allocated __percpu object
+  * @ptr:	__percpu pointer to beginning of the object
+  * @size:	size of the object
++ * @gfp:	flags used for kmemleak internal memory allocations
+  *
+  * This function is called from the kernel percpu allocator when a new object
+- * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL
+- * allocation.
++ * (memory block) is allocated (alloc_percpu).
+  */
+-void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
++void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
++				 gfp_t gfp)
+ {
+ 	unsigned int cpu;
+ 
+@@ -927,7 +928,7 @@ void __ref kmemleak_alloc_percpu(const v
+ 	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
+ 		for_each_possible_cpu(cpu)
+ 			create_object((unsigned long)per_cpu_ptr(ptr, cpu),
+-				      size, 0, GFP_KERNEL);
++				      size, 0, gfp);
+ 	else if (kmemleak_early_log)
+ 		log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
+ }
+--- a/mm/percpu.c
++++ b/mm/percpu.c
+@@ -1030,7 +1030,7 @@ area_found:
+ 		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
+ 
+ 	ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
+-	kmemleak_alloc_percpu(ptr, size);
++	kmemleak_alloc_percpu(ptr, size, gfp);
+ 	return ptr;
+ 
+ fail_unlock:
diff --git a/queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch b/queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch
new file mode 100644
index 00000000000..8c53ff549bd
--- /dev/null
+++ b/queue-4.0/mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch
@@ -0,0 +1,156 @@
+From 0867a57c4f80a566dda1bac975b42fcd857cb489 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Wed, 24 Jun 2015 16:58:48 -0700
+Subject: mm, thp: respect MPOL_PREFERRED policy with non-local node
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 0867a57c4f80a566dda1bac975b42fcd857cb489 upstream.
+
+Since commit 077fcf116c8c ("mm/thp: allocate transparent hugepages on
+local node"), we handle THP allocations on page fault in a special way -
+for non-interleave memory policies, the allocation is only attempted on
+the node local to the current CPU, if the policy's nodemask allows the
+node.
+
+This is motivated by the assumption that THP benefits cannot offset the
+cost of remote accesses, so it's better to fallback to base pages on the
+local node (which might still be available, while huge pages are not due
+to fragmentation) than to allocate huge pages on a remote node.
+
+The nodemask check prevents us from violating e.g.  MPOL_BIND policies
+where the local node is not among the allowed nodes.  However, the
+current implementation can still give surprising results for the
+MPOL_PREFERRED policy when the preferred node is different than the
+current CPU's local node.
+
+In such case we should honor the preferred node and not use the local
+node, which is what this patch does.  If hugepage allocation on the
+preferred node fails, we fall back to base pages and don't try other
+nodes, with the same motivation as is done for the local node hugepage
+allocations.  The patch also moves the MPOL_INTERLEAVE check around to
+simplify the hugepage specific test.
+
+The difference can be demonstrated using in-tree transhuge-stress test
+on the following 2-node machine where half memory on one node was
+occupied to show the difference.
+
+> numactl --hardware
+available: 2 nodes (0-1)
+node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 24 25 26 27 28 29 30 31 32 33 34 35
+node 0 size: 7878 MB
+node 0 free: 3623 MB
+node 1 cpus: 12 13 14 15 16 17 18 19 20 21 22 23 36 37 38 39 40 41 42 43 44 45 46 47
+node 1 size: 8045 MB
+node 1 free: 7818 MB
+node distances:
+node   0   1
+  0:  10  21
+  1:  21  10
+
+Before the patch:
+> numactl -p0 -C0 ./transhuge-stress
+transhuge-stress: 2.197 s/loop, 0.276 ms/page,   7249.168 MiB/s 7962 succeed,    0 failed, 1786 different pages
+
+> numactl -p0 -C12 ./transhuge-stress
+transhuge-stress: 2.962 s/loop, 0.372 ms/page,   5376.172 MiB/s 7962 succeed,    0 failed, 3873 different pages
+
+Number of successful THP allocations corresponds to free memory on node 0 in
+the first case and node 1 in the second case, i.e. -p parameter is ignored and
+cpu binding "wins".
+
+After the patch:
+> numactl -p0 -C0 ./transhuge-stress
+transhuge-stress: 2.183 s/loop, 0.274 ms/page,   7295.516 MiB/s 7962 succeed,    0 failed, 1760 different pages
+
+> numactl -p0 -C12 ./transhuge-stress
+transhuge-stress: 2.878 s/loop, 0.361 ms/page,   5533.638 MiB/s 7962 succeed,    0 failed, 1750 different pages
+
+> numactl -p1 -C0 ./transhuge-stress
+transhuge-stress: 4.628 s/loop, 0.581 ms/page,   3440.893 MiB/s 7962 succeed,    0 failed, 3918 different pages
+
+The -p parameter is respected regardless of cpu binding.
+
+> numactl -C0 ./transhuge-stress
+transhuge-stress: 2.202 s/loop, 0.277 ms/page,   7230.003 MiB/s 7962 succeed,    0 failed, 1750 different pages
+
+> numactl -C12 ./transhuge-stress
+transhuge-stress: 3.020 s/loop, 0.379 ms/page,   5273.324 MiB/s 7962 succeed,    0 failed, 3916 different pages
+
+Without -p parameter, hugepage restriction to CPU-local node works as before.
+
+Fixes: 077fcf116c8c ("mm/thp: allocate transparent hugepages on local node")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   38 ++++++++++++++++++++++----------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1971,35 +1971,41 @@ retry_cpuset:
+ 	pol = get_vma_policy(vma, addr);
+ 	cpuset_mems_cookie = read_mems_allowed_begin();
+ 
+-	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
+-					pol->mode != MPOL_INTERLEAVE)) {
++	if (pol->mode == MPOL_INTERLEAVE) {
++		unsigned nid;
++
++		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
++		mpol_cond_put(pol);
++		page = alloc_page_interleave(gfp, order, nid);
++		goto out;
++	}
++
++	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
++		int hpage_node = node;
++
+ 		/*
+ 		 * For hugepage allocation and non-interleave policy which
+-		 * allows the current node, we only try to allocate from the
+-		 * current node and don't fall back to other nodes, as the
+-		 * cost of remote accesses would likely offset THP benefits.
++		 * allows the current node (or other explicitly preferred
++		 * node) we only try to allocate from the current/preferred
++		 * node and don't fall back to other nodes, as the cost of
++		 * remote accesses would likely offset THP benefits.
+ 		 *
+ 		 * If the policy is interleave, or does not allow the current
+ 		 * node in its nodemask, we allocate the standard way.
+ 		 */
++		if (pol->mode == MPOL_PREFERRED &&
++						!(pol->flags & MPOL_F_LOCAL))
++			hpage_node = pol->v.preferred_node;
++
+ 		nmask = policy_nodemask(gfp, pol);
+-		if (!nmask || node_isset(node, *nmask)) {
++		if (!nmask || node_isset(hpage_node, *nmask)) {
+ 			mpol_cond_put(pol);
+-			page = alloc_pages_exact_node(node,
++			page = alloc_pages_exact_node(hpage_node,
+ 						gfp | __GFP_THISNODE, order);
+ 			goto out;
+ 		}
+ 	}
+ 
+-	if (pol->mode == MPOL_INTERLEAVE) {
+-		unsigned nid;
+-
+-		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
+-		mpol_cond_put(pol);
+-		page = alloc_page_interleave(gfp, order, nid);
+-		goto out;
+-	}
+-
+ 	nmask = policy_nodemask(gfp, pol);
+ 	zl = policy_zonelist(gfp, pol, node);
+ 	mpol_cond_put(pol);
diff --git a/queue-4.0/series b/queue-4.0/series
index f1e0d424d10..a76a213ca06 100644
--- a/queue-4.0/series
+++ b/queue-4.0/series
@@ -20,3 +20,14 @@ acpi-init-switch-over-platform-to-the-acpi-mode-later.patch
 acpi-pm-add-missing-pm_generic_complete-invocation.patch
 iio-accel-kxcjk-1013-add-the-kxcj9000-acpi-id.patch
 tools-selftests-fix-clean-target-with-make-3.81.patch
+arc-add-smp-barriers-around-atomics-per-documentation-atomic_ops.txt.patch
+arc-add-compiler-barrier-to-llsc-based-cmpxchg.patch
+mei-me-wait-for-power-gating-exit-confirmation.patch
+mei-txe-reduce-suspend-resume-time.patch
+arm64-do-not-attempt-to-use-init_mm-in-reset_context.patch
+arm64-entry-fix-context-tracking-for-el0_sp_pc.patch
+arm64-mm-fix-freeing-of-the-wrong-memmap-entries-with-sparsemem_vmemmap.patch
+arm64-vdso-work-around-broken-elf-toolchains-in-makefile.patch
+mm-kmemleak-allow-safe-memory-scanning-during-kmemleak-disabling.patch
+mm-kmemleak_alloc_percpu-should-follow-the-gfp-from-per_alloc.patch
+mm-thp-respect-mpol_preferred-policy-with-non-local-node.patch