From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 20 Jun 2016 17:49:00 +0000 (-0700)
Subject: 4.6-stable patches
X-Git-Tag: v3.14.73~24
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2d8824bfdd837df10ee6767981638ed2356f7c40;p=thirdparty%2Fkernel%2Fstable-queue.git

4.6-stable patches

added patches:
	sparc-harden-signal-return-frame-checks.patch
	sparc64-fix-return-from-trap-window-fill-crashes.patch
	sparc64-reduce-tlb-flushes-during-hugepte-changes.patch
	sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch
---

diff --git a/queue-4.6/series b/queue-4.6/series
index 6d8a6339992..ff0c26fb18a 100644
--- a/queue-4.6/series
+++ b/queue-4.6/series
@@ -61,3 +61,7 @@ x86-entry-traps-don-t-force-in_interrupt-to-return-true-in-ist-handlers.patch
 proc-prevent-stacking-filesystems-on-top.patch
 sched-panic-on-corrupted-stack-end.patch
 fix-d_walk-non-delayed-__d_free-race.patch
+sparc64-reduce-tlb-flushes-during-hugepte-changes.patch
+sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch
+sparc-harden-signal-return-frame-checks.patch
+sparc64-fix-return-from-trap-window-fill-crashes.patch
diff --git a/queue-4.6/sparc-harden-signal-return-frame-checks.patch b/queue-4.6/sparc-harden-signal-return-frame-checks.patch
new file mode 100644
index 00000000000..6bf366a2aad
--- /dev/null
+++ b/queue-4.6/sparc-harden-signal-return-frame-checks.patch
@@ -0,0 +1,315 @@
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 28 May 2016 21:21:31 -0700
+Subject: sparc: Harden signal return frame checks.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit d11c2a0de2824395656cf8ed15811580c9dd38aa ]
+
+All signal frames must be at least 16-byte aligned, because that is
+the alignment we explicitly create when we build signal return stack
+frames.
+
+All stack pointers must be at least 8-byte aligned.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/signal32.c   |   46 ++++++++++++++++++++++++++---------------
+ arch/sparc/kernel/signal_32.c  |   41 +++++++++++++++++++++++-------------
+ arch/sparc/kernel/signal_64.c  |   31 +++++++++++++++++----------
+ arch/sparc/kernel/sigutil_32.c |    9 +++++++-
+ arch/sparc/kernel/sigutil_64.c |   10 +++++++-
+ 5 files changed, 92 insertions(+), 45 deletions(-)
+
+--- a/arch/sparc/kernel/signal32.c
++++ b/arch/sparc/kernel/signal32.c
+@@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t *
+ 	return 0;
+ }
+ 
++/* Checks if the fp is valid.  We always build signal frames which are
++ * 16-byte aligned, therefore we can always enforce that the restore
++ * frame has that property as well.
++ */
++static bool invalid_frame_pointer(void __user *fp, int fplen)
++{
++	if ((((unsigned long) fp) & 15) ||
++	    ((unsigned long)fp) > 0x100000000ULL - fplen)
++		return true;
++	return false;
++}
++
+ void do_sigreturn32(struct pt_regs *regs)
+ {
+ 	struct signal_frame32 __user *sf;
+ 	compat_uptr_t fpu_save;
+ 	compat_uptr_t rwin_save;
+-	unsigned int psr;
++	unsigned int psr, ufp;
+ 	unsigned int pc, npc;
+ 	sigset_t set;
+ 	compat_sigset_t seta;
+@@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs
+ 	sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+-	    (((unsigned long) sf) & 3))
++	if (invalid_frame_pointer(sf, sizeof(*sf)))
++		goto segv;
++
++	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
++		goto segv;
++
++	if (ufp & 0x7)
+ 		goto segv;
+ 
+-	if (get_user(pc, &sf->info.si_regs.pc) ||
++	if (__get_user(pc, &sf->info.si_regs.pc) ||
+ 	    __get_user(npc, &sf->info.si_regs.npc))
+ 		goto segv;
+ 
+@@ -227,7 +244,7 @@ segv:
+ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
+ {
+ 	struct rt_signal_frame32 __user *sf;
+-	unsigned int psr, pc, npc;
++	unsigned int psr, pc, npc, ufp;
+ 	compat_uptr_t fpu_save;
+ 	compat_uptr_t rwin_save;
+ 	sigset_t set;
+@@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct
+ 	sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP];
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+-	    (((unsigned long) sf) & 3))
++	if (invalid_frame_pointer(sf, sizeof(*sf)))
+ 		goto segv;
+ 
+-	if (get_user(pc, &sf->regs.pc) || 
++	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
++		goto segv;
++
++	if (ufp & 0x7)
++		goto segv;
++
++	if (__get_user(pc, &sf->regs.pc) ||
+ 	    __get_user(npc, &sf->regs.npc))
+ 		goto segv;
+ 
+@@ -307,14 +329,6 @@ segv:
+ 	force_sig(SIGSEGV, current);
+ }
+ 
+-/* Checks if the fp is valid */
+-static int invalid_frame_pointer(void __user *fp, int fplen)
+-{
+-	if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen)
+-		return 1;
+-	return 0;
+-}
+-
+ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ 	unsigned long sp;
+--- a/arch/sparc/kernel/signal_32.c
++++ b/arch/sparc/kernel/signal_32.c
+@@ -60,10 +60,22 @@ struct rt_signal_frame {
+ #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
+ #define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame) + 7) & (~7)))
+ 
++/* Checks if the fp is valid.  We always build signal frames which are
++ * 16-byte aligned, therefore we can always enforce that the restore
++ * frame has that property as well.
++ */
++static inline bool invalid_frame_pointer(void __user *fp, int fplen)
++{
++	if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen))
++		return true;
++
++	return false;
++}
++
+ asmlinkage void do_sigreturn(struct pt_regs *regs)
+ {
++	unsigned long up_psr, pc, npc, ufp;
+ 	struct signal_frame __user *sf;
+-	unsigned long up_psr, pc, npc;
+ 	sigset_t set;
+ 	__siginfo_fpu_t __user *fpu_save;
+ 	__siginfo_rwin_t __user *rwin_save;
+@@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_r
+ 	sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
++	if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ 		goto segv_and_exit;
+ 
+-	if (((unsigned long) sf) & 3)
++	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
++		goto segv_and_exit;
++
++	if (ufp & 0x7)
+ 		goto segv_and_exit;
+ 
+ 	err = __get_user(pc,  &sf->info.si_regs.pc);
+@@ -127,7 +142,7 @@ segv_and_exit:
+ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
+ {
+ 	struct rt_signal_frame __user *sf;
+-	unsigned int psr, pc, npc;
++	unsigned int psr, pc, npc, ufp;
+ 	__siginfo_fpu_t __user *fpu_save;
+ 	__siginfo_rwin_t __user *rwin_save;
+ 	sigset_t set;
+@@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct p
+ 
+ 	synchronize_user_stack();
+ 	sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+-	    (((unsigned long) sf) & 0x03))
++	if (!invalid_frame_pointer(sf, sizeof(*sf)))
++		goto segv;
++
++	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
++		goto segv;
++
++	if (ufp & 0x7)
+ 		goto segv;
+ 
+ 	err = __get_user(pc, &sf->regs.pc);
+@@ -178,15 +198,6 @@ segv:
+ 	force_sig(SIGSEGV, current);
+ }
+ 
+-/* Checks if the fp is valid */
+-static inline int invalid_frame_pointer(void __user *fp, int fplen)
+-{
+-	if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen))
+-		return 1;
+-
+-	return 0;
+-}
+-
+ static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ 	unsigned long sp = regs->u_regs[UREG_FP];
+--- a/arch/sparc/kernel/signal_64.c
++++ b/arch/sparc/kernel/signal_64.c
+@@ -234,6 +234,17 @@ do_sigsegv:
+ 	goto out;
+ }
+ 
++/* Checks if the fp is valid.  We always build rt signal frames which
++ * are 16-byte aligned, therefore we can always enforce that the
++ * restore frame has that property as well.
++ */
++static bool invalid_frame_pointer(void __user *fp)
++{
++	if (((unsigned long) fp) & 15)
++		return true;
++	return false;
++}
++
+ struct rt_signal_frame {
+ 	struct sparc_stackf	ss;
+ 	siginfo_t		info;
+@@ -246,8 +257,8 @@ struct rt_signal_frame {
+ 
+ void do_rt_sigreturn(struct pt_regs *regs)
+ {
++	unsigned long tpc, tnpc, tstate, ufp;
+ 	struct rt_signal_frame __user *sf;
+-	unsigned long tpc, tnpc, tstate;
+ 	__siginfo_fpu_t __user *fpu_save;
+ 	__siginfo_rwin_t __user *rwin_save;
+ 	sigset_t set;
+@@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *reg
+ 		(regs->u_regs [UREG_FP] + STACK_BIAS);
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (((unsigned long) sf) & 3)
++	if (invalid_frame_pointer(sf))
++		goto segv;
++
++	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+ 		goto segv;
+ 
+-	err = get_user(tpc, &sf->regs.tpc);
++	if ((ufp + STACK_BIAS) & 0x7)
++		goto segv;
++
++	err = __get_user(tpc, &sf->regs.tpc);
+ 	err |= __get_user(tnpc, &sf->regs.tnpc);
+ 	if (test_thread_flag(TIF_32BIT)) {
+ 		tpc &= 0xffffffff;
+@@ -308,14 +325,6 @@ segv:
+ 	force_sig(SIGSEGV, current);
+ }
+ 
+-/* Checks if the fp is valid */
+-static int invalid_frame_pointer(void __user *fp)
+-{
+-	if (((unsigned long) fp) & 15)
+-		return 1;
+-	return 0;
+-}
+-
+ static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ 	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
+--- a/arch/sparc/kernel/sigutil_32.c
++++ b/arch/sparc/kernel/sigutil_32.c
+@@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs,
+ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ {
+ 	int err;
++
++	if (((unsigned long) fpu) & 3)
++		return -EFAULT;
++
+ #ifdef CONFIG_SMP
+ 	if (test_tsk_thread_flag(current, TIF_USEDFPU))
+ 		regs->psr &= ~PSR_EF;
+@@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t
+ 	struct thread_info *t = current_thread_info();
+ 	int i, wsaved, err;
+ 
+-	__get_user(wsaved, &rp->wsaved);
++	if (((unsigned long) rp) & 3)
++		return -EFAULT;
++
++	get_user(wsaved, &rp->wsaved);
+ 	if (wsaved > NSWINS)
+ 		return -EFAULT;
+ 
+--- a/arch/sparc/kernel/sigutil_64.c
++++ b/arch/sparc/kernel/sigutil_64.c
+@@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *re
+ 	unsigned long fprs;
+ 	int err;
+ 
+-	err = __get_user(fprs, &fpu->si_fprs);
++	if (((unsigned long) fpu) & 7)
++		return -EFAULT;
++
++	err = get_user(fprs, &fpu->si_fprs);
+ 	fprs_write(0);
+ 	regs->tstate &= ~TSTATE_PEF;
+ 	if (fprs & FPRS_DL)
+@@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t
+ 	struct thread_info *t = current_thread_info();
+ 	int i, wsaved, err;
+ 
+-	__get_user(wsaved, &rp->wsaved);
++	if (((unsigned long) rp) & 7)
++		return -EFAULT;
++
++	get_user(wsaved, &rp->wsaved);
+ 	if (wsaved > NSWINS)
+ 		return -EFAULT;
+ 
diff --git a/queue-4.6/sparc64-fix-return-from-trap-window-fill-crashes.patch b/queue-4.6/sparc64-fix-return-from-trap-window-fill-crashes.patch
new file mode 100644
index 00000000000..b7d73863b9a
--- /dev/null
+++ b/queue-4.6/sparc64-fix-return-from-trap-window-fill-crashes.patch
@@ -0,0 +1,355 @@
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 28 May 2016 20:41:12 -0700
+Subject: sparc64: Fix return from trap window fill crashes.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 7cafc0b8bf130f038b0ec2dcdd6a9de6dc59b65a ]
+
+We must handle data access exception as well as memory address unaligned
+exceptions from return from trap window fill faults, not just normal
+TLB misses.
+
+Otherwise we can get an OOPS that looks like this:
+
+ld-linux.so.2(36808): Kernel bad sw trap 5 [#1]
+CPU: 1 PID: 36808 Comm: ld-linux.so.2 Not tainted 4.6.0 #34
+task: fff8000303be5c60 ti: fff8000301344000 task.ti: fff8000301344000
+TSTATE: 0000004410001601 TPC: 0000000000a1a784 TNPC: 0000000000a1a788 Y: 00000002    Not tainted
+TPC: <do_sparc64_fault+0x5c4/0x700>
+g0: fff8000024fc8248 g1: 0000000000db04dc g2: 0000000000000000 g3: 0000000000000001
+g4: fff8000303be5c60 g5: fff800030e672000 g6: fff8000301344000 g7: 0000000000000001
+o0: 0000000000b95ee8 o1: 000000000000012b o2: 0000000000000000 o3: 0000000200b9b358
+o4: 0000000000000000 o5: fff8000301344040 sp: fff80003013475c1 ret_pc: 0000000000a1a77c
+RPC: <do_sparc64_fault+0x5bc/0x700>
+l0: 00000000000007ff l1: 0000000000000000 l2: 000000000000005f l3: 0000000000000000
+l4: fff8000301347e98 l5: fff8000024ff3060 l6: 0000000000000000 l7: 0000000000000000
+i0: fff8000301347f60 i1: 0000000000102400 i2: 0000000000000000 i3: 0000000000000000
+i4: 0000000000000000 i5: 0000000000000000 i6: fff80003013476a1 i7: 0000000000404d4c
+I7: <user_rtt_fill_fixup+0x6c/0x7c>
+Call Trace:
+ [0000000000404d4c] user_rtt_fill_fixup+0x6c/0x7c
+
+The window trap handlers are slightly clever, the trap table entries for them are
+composed of two pieces of code.  First comes the code that actually performs
+the window fill or spill trap handling, and then there are three instructions at
+the end which are for exception processing.
+
+The userland register window fill handler is:
+
+	add	%sp, STACK_BIAS + 0x00, %g1;		\
+	ldxa	[%g1 + %g0] ASI, %l0;			\
+	mov	0x08, %g2;				\
+	mov	0x10, %g3;				\
+	ldxa	[%g1 + %g2] ASI, %l1;			\
+	mov	0x18, %g5;				\
+	ldxa	[%g1 + %g3] ASI, %l2;			\
+	ldxa	[%g1 + %g5] ASI, %l3;			\
+	add	%g1, 0x20, %g1;				\
+	ldxa	[%g1 + %g0] ASI, %l4;			\
+	ldxa	[%g1 + %g2] ASI, %l5;			\
+	ldxa	[%g1 + %g3] ASI, %l6;			\
+	ldxa	[%g1 + %g5] ASI, %l7;			\
+	add	%g1, 0x20, %g1;				\
+	ldxa	[%g1 + %g0] ASI, %i0;			\
+	ldxa	[%g1 + %g2] ASI, %i1;			\
+	ldxa	[%g1 + %g3] ASI, %i2;			\
+	ldxa	[%g1 + %g5] ASI, %i3;			\
+	add	%g1, 0x20, %g1;				\
+	ldxa	[%g1 + %g0] ASI, %i4;			\
+	ldxa	[%g1 + %g2] ASI, %i5;			\
+	ldxa	[%g1 + %g3] ASI, %i6;			\
+	ldxa	[%g1 + %g5] ASI, %i7;			\
+	restored;					\
+	retry; nop; nop; nop; nop;			\
+	b,a,pt	%xcc, fill_fixup_dax;			\
+	b,a,pt	%xcc, fill_fixup_mna;			\
+	b,a,pt	%xcc, fill_fixup;
+
+And the way this works is that if any of those memory accesses
+generate an exception, the exception handler can revector to one of
+those final three branch instructions depending upon which kind of
+exception the memory access took.  In this way, the fault handler
+doesn't have to know if it was a spill or a fill that it's handling
+the fault for.  It just always branches to the last instruction in
+the parent trap's handler.
+
+For example, for a regular fault, the code goes:
+
+winfix_trampoline:
+	rdpr	%tpc, %g3
+	or	%g3, 0x7c, %g3
+	wrpr	%g3, %tnpc
+	done
+
+All window trap handlers are 0x80 aligned, so if we "or" 0x7c into the
+trap time program counter, we'll get that final instruction in the
+trap handler.
+
+On return from trap, we have to pull the register window in but we do
+this by hand instead of just executing a "restore" instruction for
+several reasons.  The largest being that from Niagara and onward we
+simply don't have enough levels in the trap stack to fully resolve all
+possible exception cases of a window fault when we are already at
+trap level 1 (which we enter to get ready to return from the original
+trap).
+
+This is executed inline via the FILL_*_RTRAP handlers.  rtrap_64.S's
+code branches directly to these to do the window fill by hand if
+necessary.  Now if you look at them, we'll see at the end:
+
+	    ba,a,pt    %xcc, user_rtt_fill_fixup;
+	    ba,a,pt    %xcc, user_rtt_fill_fixup;
+	    ba,a,pt    %xcc, user_rtt_fill_fixup;
+
+And oops, all three cases are handled like a fault.
+
+This doesn't work because each of these trap types (data access
+exception, memory address unaligned, and faults) store their auxiliary
+info in different registers to pass on to the C handler which does the
+real work.
+
+So in the case where the stack was unaligned, the unaligned trap
+handler sets up the arg registers one way, and then we branched to
+the fault handler which expects them setup another way.
+
+So the FAULT_TYPE_* value ends up basically being garbage, and
+randomly would generate the backtrace seen above.
+
+Reported-by: Nick Alcock <nix@esperi.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/head_64.h |    4 +
+ arch/sparc/include/asm/ttable.h  |    8 +--
+ arch/sparc/kernel/Makefile       |    1 
+ arch/sparc/kernel/rtrap_64.S     |   59 +++--------------------
+ arch/sparc/kernel/urtt_fill.S    |   98 +++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 117 insertions(+), 53 deletions(-)
+ create mode 100644 arch/sparc/kernel/urtt_fill.S
+
+--- a/arch/sparc/include/asm/head_64.h
++++ b/arch/sparc/include/asm/head_64.h
+@@ -15,6 +15,10 @@
+ 
+ #define	PTREGS_OFF	(STACK_BIAS + STACKFRAME_SZ)
+ 
++#define	RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
++#define	RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
++#define RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
++
+ #define __CHEETAH_ID	0x003e0014
+ #define __JALAPENO_ID	0x003e0016
+ #define __SERRANO_ID	0x003e0022
+--- a/arch/sparc/include/asm/ttable.h
++++ b/arch/sparc/include/asm/ttable.h
+@@ -589,8 +589,8 @@ user_rtt_fill_64bit:					\
+ 	 restored;					\
+ 	nop; nop; nop; nop; nop; nop;			\
+ 	nop; nop; nop; nop; nop;			\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
+ 	ba,a,pt	%xcc, user_rtt_fill_fixup;
+ 
+ 
+@@ -652,8 +652,8 @@ user_rtt_fill_32bit:					\
+ 	 restored;					\
+ 	nop; nop; nop; nop; nop;			\
+ 	nop; nop; nop;					\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
+ 	ba,a,pt	%xcc, user_rtt_fill_fixup;
+ 
+ 
+--- a/arch/sparc/kernel/Makefile
++++ b/arch/sparc/kernel/Makefile
+@@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg
+ CFLAGS_REMOVE_pcr.o := -pg
+ endif
+ 
++obj-$(CONFIG_SPARC64)   += urtt_fill.o
+ obj-$(CONFIG_SPARC32)   += entry.o wof.o wuf.o
+ obj-$(CONFIG_SPARC32)   += etrap_32.o
+ obj-$(CONFIG_SPARC32)   += rtrap_32.o
+--- a/arch/sparc/kernel/rtrap_64.S
++++ b/arch/sparc/kernel/rtrap_64.S
+@@ -14,10 +14,6 @@
+ #include <asm/visasm.h>
+ #include <asm/processor.h>
+ 
+-#define		RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
+-#define		RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
+-#define		RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+-
+ #ifdef CONFIG_CONTEXT_TRACKING
+ # define SCHEDULE_USER schedule_user
+ #else
+@@ -242,52 +238,17 @@ rt_continue:	ldx			[%sp + PTREGS_OFF + P
+ 		 wrpr			%g1, %cwp
+ 		ba,a,pt			%xcc, user_rtt_fill_64bit
+ 
+-user_rtt_fill_fixup:
+-		rdpr	%cwp, %g1
+-		add	%g1, 1, %g1
+-		wrpr	%g1, 0x0, %cwp
+-
+-		rdpr	%wstate, %g2
+-		sll	%g2, 3, %g2
+-		wrpr	%g2, 0x0, %wstate
+-
+-		/* We know %canrestore and %otherwin are both zero.  */
+-
+-		sethi	%hi(sparc64_kern_pri_context), %g2
+-		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
+-		mov	PRIMARY_CONTEXT, %g1
+-
+-661:		stxa	%g2, [%g1] ASI_DMMU
+-		.section .sun4v_1insn_patch, "ax"
+-		.word	661b
+-		stxa	%g2, [%g1] ASI_MMU
+-		.previous
+-
+-		sethi	%hi(KERNBASE), %g1
+-		flush	%g1
++user_rtt_fill_fixup_dax:
++		ba,pt	%xcc, user_rtt_fill_fixup_common
++		 mov	1, %g3
++
++user_rtt_fill_fixup_mna:
++		ba,pt	%xcc, user_rtt_fill_fixup_common
++		 mov	2, %g3
+ 
+-		or	%g4, FAULT_CODE_WINFIXUP, %g4
+-		stb	%g4, [%g6 + TI_FAULT_CODE]
+-		stx	%g5, [%g6 + TI_FAULT_ADDR]
+-
+-		mov	%g6, %l1
+-		wrpr	%g0, 0x0, %tl
+-
+-661:		nop
+-		.section		.sun4v_1insn_patch, "ax"
+-		.word			661b
+-		SET_GL(0)
+-		.previous
+-
+-		wrpr	%g0, RTRAP_PSTATE, %pstate
+-
+-		mov	%l1, %g6
+-		ldx	[%g6 + TI_TASK], %g4
+-		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+-		call	do_sparc64_fault
+-		 add	%sp, PTREGS_OFF, %o0
+-		ba,pt	%xcc, rtrap
+-		 nop
++user_rtt_fill_fixup:
++		ba,pt	%xcc, user_rtt_fill_fixup_common
++		 clr	%g3
+ 
+ user_rtt_pre_restore:
+ 		add			%g1, 1, %g1
+--- /dev/null
++++ b/arch/sparc/kernel/urtt_fill.S
+@@ -0,0 +1,98 @@
++#include <asm/thread_info.h>
++#include <asm/trap_block.h>
++#include <asm/spitfire.h>
++#include <asm/ptrace.h>
++#include <asm/head.h>
++
++		.text
++		.align	8
++		.globl	user_rtt_fill_fixup_common
++user_rtt_fill_fixup_common:
++		rdpr	%cwp, %g1
++		add	%g1, 1, %g1
++		wrpr	%g1, 0x0, %cwp
++
++		rdpr	%wstate, %g2
++		sll	%g2, 3, %g2
++		wrpr	%g2, 0x0, %wstate
++
++		/* We know %canrestore and %otherwin are both zero.  */
++
++		sethi	%hi(sparc64_kern_pri_context), %g2
++		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
++		mov	PRIMARY_CONTEXT, %g1
++
++661:		stxa	%g2, [%g1] ASI_DMMU
++		.section .sun4v_1insn_patch, "ax"
++		.word	661b
++		stxa	%g2, [%g1] ASI_MMU
++		.previous
++
++		sethi	%hi(KERNBASE), %g1
++		flush	%g1
++
++		mov	%g4, %l4
++		mov	%g5, %l5
++		brnz,pn	%g3, 1f
++		 mov	%g3, %l3
++
++		or	%g4, FAULT_CODE_WINFIXUP, %g4
++		stb	%g4, [%g6 + TI_FAULT_CODE]
++		stx	%g5, [%g6 + TI_FAULT_ADDR]
++1:
++		mov	%g6, %l1
++		wrpr	%g0, 0x0, %tl
++
++661:		nop
++		.section		.sun4v_1insn_patch, "ax"
++		.word			661b
++		SET_GL(0)
++		.previous
++
++		wrpr	%g0, RTRAP_PSTATE, %pstate
++
++		mov	%l1, %g6
++		ldx	[%g6 + TI_TASK], %g4
++		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
++
++		brnz,pn	%l3, 1f
++		 nop
++
++		call	do_sparc64_fault
++		 add	%sp, PTREGS_OFF, %o0
++		ba,pt	%xcc, rtrap
++		 nop
++
++1:		cmp	%g3, 2
++		bne,pn	%xcc, 2f
++		 nop
++
++		sethi	%hi(tlb_type), %g1
++		lduw	[%g1 + %lo(tlb_type)], %g1
++		cmp	%g1, 3
++		bne,pt	%icc, 1f
++		 add	%sp, PTREGS_OFF, %o0
++		mov	%l4, %o2
++		call	sun4v_do_mna
++		 mov	%l5, %o1
++		ba,a,pt	%xcc, rtrap
++1:		mov	%l4, %o1
++		mov	%l5, %o2
++		call	mem_address_unaligned
++		 nop
++		ba,a,pt	%xcc, rtrap
++
++2:		sethi	%hi(tlb_type), %g1
++		mov	%l4, %o1
++		lduw	[%g1 + %lo(tlb_type)], %g1
++		mov	%l5, %o2
++		cmp	%g1, 3
++		bne,pt	%icc, 1f
++		 add	%sp, PTREGS_OFF, %o0
++		call	sun4v_data_access_exception
++		 nop
++		ba,a,pt	%xcc, rtrap
++
++1:		call	spitfire_data_access_exception
++		 nop
++		ba,a,pt	%xcc, rtrap
diff --git a/queue-4.6/sparc64-reduce-tlb-flushes-during-hugepte-changes.patch b/queue-4.6/sparc64-reduce-tlb-flushes-during-hugepte-changes.patch
new file mode 100644
index 00000000000..4312bdc5b7f
--- /dev/null
+++ b/queue-4.6/sparc64-reduce-tlb-flushes-during-hugepte-changes.patch
@@ -0,0 +1,349 @@
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+Date: Wed, 30 Mar 2016 11:17:13 -0700
+Subject: sparc64: Reduce TLB flushes during hugepte changes
+
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+
+[ Upstream commit 24e49ee3d76b70853a96520e46b8837e5eae65b2 ]
+
+During hugepage map/unmap, TSB and TLB flushes are currently
+issued at every PAGE_SIZE'd boundary which is unnecessary.
+We now issue the flush at REAL_HPAGE_SIZE boundaries only.
+
+Without this patch workloads which unmap a large hugepage
+backed VMA region get CPU lockups due to excessive TLB
+flush calls.
+
+Orabug: 22365539, 22643230, 22995196
+
+Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/pgtable_64.h  |   43 ++++++++++++++++++++++++++---------
+ arch/sparc/include/asm/tlbflush_64.h |    3 +-
+ arch/sparc/mm/hugetlbpage.c          |   33 ++++++++++++++++++++++----
+ arch/sparc/mm/init_64.c              |   12 ---------
+ arch/sparc/mm/tlb.c                  |   25 ++++++++++++++------
+ arch/sparc/mm/tsb.c                  |   32 +++++++++++++-------------
+ 6 files changed, 97 insertions(+), 51 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -375,7 +375,7 @@ static inline pgprot_t pgprot_noncached(
+ #define pgprot_noncached pgprot_noncached
+ 
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-static inline pte_t pte_mkhuge(pte_t pte)
++static inline unsigned long __pte_huge_mask(void)
+ {
+ 	unsigned long mask;
+ 
+@@ -390,8 +390,19 @@ static inline pte_t pte_mkhuge(pte_t pte
+ 	: "=r" (mask)
+ 	: "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V));
+ 
+-	return __pte(pte_val(pte) | mask);
++	return mask;
++}
++
++static inline pte_t pte_mkhuge(pte_t pte)
++{
++	return __pte(pte_val(pte) | __pte_huge_mask());
++}
++
++static inline bool is_hugetlb_pte(pte_t pte)
++{
++	return !!(pte_val(pte) & __pte_huge_mask());
+ }
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ static inline pmd_t pmd_mkhuge(pmd_t pmd)
+ {
+@@ -403,6 +414,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd
+ 	return __pmd(pte_val(pte));
+ }
+ #endif
++#else
++static inline bool is_hugetlb_pte(pte_t pte)
++{
++	return false;
++}
+ #endif
+ 
+ static inline pte_t pte_mkdirty(pte_t pte)
+@@ -858,6 +874,19 @@ static inline unsigned long pud_pfn(pud_
+ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ 		   pte_t *ptep, pte_t orig, int fullmm);
+ 
++static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
++				pte_t *ptep, pte_t orig, int fullmm)
++{
++	/* It is more efficient to let flush_tlb_kernel_range()
++	 * handle init_mm tlb flushes.
++	 *
++	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
++	 *             and SUN4V pte layout, so this inline test is fine.
++	 */
++	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
++		tlb_batch_add(mm, vaddr, ptep, orig, fullmm);
++}
++
+ #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ 					    unsigned long addr,
+@@ -874,15 +903,7 @@ static inline void __set_pte_at(struct m
+ 	pte_t orig = *ptep;
+ 
+ 	*ptep = pte;
+-
+-	/* It is more efficient to let flush_tlb_kernel_range()
+-	 * handle init_mm tlb flushes.
+-	 *
+-	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+-	 *             and SUN4V pte layout, so this inline test is fine.
+-	 */
+-	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
+-		tlb_batch_add(mm, addr, ptep, orig, fullmm);
++	maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm);
+ }
+ 
+ #define set_pte_at(mm,addr,ptep,pte)	\
+--- a/arch/sparc/include/asm/tlbflush_64.h
++++ b/arch/sparc/include/asm/tlbflush_64.h
+@@ -8,6 +8,7 @@
+ #define TLB_BATCH_NR	192
+ 
+ struct tlb_batch {
++	bool huge;
+ 	struct mm_struct *mm;
+ 	unsigned long tlb_nr;
+ 	unsigned long active;
+@@ -16,7 +17,7 @@ struct tlb_batch {
+ 
+ void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+ void flush_tsb_user(struct tlb_batch *tb);
+-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr);
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge);
+ 
+ /* TLB flush operations. */
+ 
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -176,17 +176,31 @@ void set_huge_pte_at(struct mm_struct *m
+ 		     pte_t *ptep, pte_t entry)
+ {
+ 	int i;
++	pte_t orig[2];
++	unsigned long nptes;
+ 
+ 	if (!pte_present(*ptep) && pte_present(entry))
+ 		mm->context.huge_pte_count++;
+ 
+ 	addr &= HPAGE_MASK;
+-	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+-		set_pte_at(mm, addr, ptep, entry);
++
++	nptes = 1 << HUGETLB_PAGE_ORDER;
++	orig[0] = *ptep;
++	orig[1] = *(ptep + nptes / 2);
++	for (i = 0; i < nptes; i++) {
++		*ptep = entry;
+ 		ptep++;
+ 		addr += PAGE_SIZE;
+ 		pte_val(entry) += PAGE_SIZE;
+ 	}
++
++	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0);
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0);
+ }
+ 
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+@@ -194,19 +208,28 @@ pte_t huge_ptep_get_and_clear(struct mm_
+ {
+ 	pte_t entry;
+ 	int i;
++	unsigned long nptes;
+ 
+ 	entry = *ptep;
+ 	if (pte_present(entry))
+ 		mm->context.huge_pte_count--;
+ 
+ 	addr &= HPAGE_MASK;
+-
+-	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+-		pte_clear(mm, addr, ptep);
++	nptes = 1 << HUGETLB_PAGE_ORDER;
++	for (i = 0; i < nptes; i++) {
++		*ptep = __pte(0UL);
+ 		addr += PAGE_SIZE;
+ 		ptep++;
+ 	}
+ 
++	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
++
+ 	return entry;
+ }
+ 
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -324,18 +324,6 @@ static void __update_mmu_tsb_insert(stru
+ 	tsb_insert(tsb, tag, tte);
+ }
+ 
+-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-static inline bool is_hugetlb_pte(pte_t pte)
+-{
+-	if ((tlb_type == hypervisor &&
+-	     (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
+-	    (tlb_type != hypervisor &&
+-	     (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U))
+-		return true;
+-	return false;
+-}
+-#endif
+-
+ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+ {
+ 	struct mm_struct *mm;
+--- a/arch/sparc/mm/tlb.c
++++ b/arch/sparc/mm/tlb.c
+@@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void)
+ }
+ 
+ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+-			      bool exec)
++			      bool exec, bool huge)
+ {
+ 	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
+ 	unsigned long nr;
+@@ -84,13 +84,21 @@ static void tlb_batch_add_one(struct mm_
+ 	}
+ 
+ 	if (!tb->active) {
+-		flush_tsb_user_page(mm, vaddr);
++		flush_tsb_user_page(mm, vaddr, huge);
+ 		global_flush_tlb_page(mm, vaddr);
+ 		goto out;
+ 	}
+ 
+-	if (nr == 0)
++	if (nr == 0) {
+ 		tb->mm = mm;
++		tb->huge = huge;
++	}
++
++	if (tb->huge != huge) {
++		flush_tlb_pending();
++		tb->huge = huge;
++		nr = 0;
++	}
+ 
+ 	tb->vaddrs[nr] = vaddr;
+ 	tb->tlb_nr = ++nr;
+@@ -104,6 +112,8 @@ out:
+ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ 		   pte_t *ptep, pte_t orig, int fullmm)
+ {
++	bool huge = is_hugetlb_pte(orig);
++
+ 	if (tlb_type != hypervisor &&
+ 	    pte_dirty(orig)) {
+ 		unsigned long paddr, pfn = pte_pfn(orig);
+@@ -129,7 +139,7 @@ void tlb_batch_add(struct mm_struct *mm,
+ 
+ no_cache_flush:
+ 	if (!fullmm)
+-		tlb_batch_add_one(mm, vaddr, pte_exec(orig));
++		tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge);
+ }
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+@@ -145,7 +155,7 @@ static void tlb_batch_pmd_scan(struct mm
+ 		if (pte_val(*pte) & _PAGE_VALID) {
+ 			bool exec = pte_exec(*pte);
+ 
+-			tlb_batch_add_one(mm, vaddr, exec);
++			tlb_batch_add_one(mm, vaddr, exec, false);
+ 		}
+ 		pte++;
+ 		vaddr += PAGE_SIZE;
+@@ -185,8 +195,9 @@ void set_pmd_at(struct mm_struct *mm, un
+ 			pte_t orig_pte = __pte(pmd_val(orig));
+ 			bool exec = pte_exec(orig_pte);
+ 
+-			tlb_batch_add_one(mm, addr, exec);
+-			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
++			tlb_batch_add_one(mm, addr, exec, true);
++			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
++					true);
+ 		} else {
+ 			tlb_batch_pmd_scan(mm, addr, orig);
+ 		}
+--- a/arch/sparc/mm/tsb.c
++++ b/arch/sparc/mm/tsb.c
+@@ -76,14 +76,15 @@ void flush_tsb_user(struct tlb_batch *tb
+ 
+ 	spin_lock_irqsave(&mm->context.lock, flags);
+ 
+-	base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+-	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+-	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+-		base = __pa(base);
+-	__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+-
++	if (!tb->huge) {
++		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++			base = __pa(base);
++		__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
++	}
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++	if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ 		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+@@ -94,20 +95,21 @@ void flush_tsb_user(struct tlb_batch *tb
+ 	spin_unlock_irqrestore(&mm->context.lock, flags);
+ }
+ 
+-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
+ {
+ 	unsigned long nentries, base, flags;
+ 
+ 	spin_lock_irqsave(&mm->context.lock, flags);
+ 
+-	base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+-	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+-	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+-		base = __pa(base);
+-	__flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
+-
++	if (!huge) {
++		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++			base = __pa(base);
++		__flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
++	}
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++	if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ 		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
diff --git a/queue-4.6/sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch b/queue-4.6/sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch
new file mode 100644
index 00000000000..fa6ea3721fa
--- /dev/null
+++ b/queue-4.6/sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch
@@ -0,0 +1,54 @@
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 25 May 2016 12:51:20 -0700
+Subject: sparc64: Take ctx_alloc_lock properly in hugetlb_setup().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 9ea46abe22550e3366ff7cee2f8391b35b12f730 ]
+
+On cheetahplus chips we take the ctx_alloc_lock in order to
+modify the TLB lookup parameters for the indexed TLBs, which
+are stored in the context register.
+
+This is called with interrupts disabled, however ctx_alloc_lock
+is an IRQ safe lock, therefore we must take acquire/release it
+properly with spin_{lock,unlock}_irq().
+
+Reported-by: Meelis Roos <mroos@linux.ee>
+Tested-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -2824,9 +2824,10 @@ void hugetlb_setup(struct pt_regs *regs)
+ 	 * the Data-TLB for huge pages.
+ 	 */
+ 	if (tlb_type == cheetah_plus) {
++		bool need_context_reload = false;
+ 		unsigned long ctx;
+ 
+-		spin_lock(&ctx_alloc_lock);
++		spin_lock_irq(&ctx_alloc_lock);
+ 		ctx = mm->context.sparc64_ctx_val;
+ 		ctx &= ~CTX_PGSZ_MASK;
+ 		ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+@@ -2845,9 +2846,12 @@ void hugetlb_setup(struct pt_regs *regs)
+ 			 * also executing in this address space.
+ 			 */
+ 			mm->context.sparc64_ctx_val = ctx;
+-			on_each_cpu(context_reload, mm, 0);
++			need_context_reload = true;
+ 		}
+-		spin_unlock(&ctx_alloc_lock);
++		spin_unlock_irq(&ctx_alloc_lock);
++
++		if (need_context_reload)
++			on_each_cpu(context_reload, mm, 0);
+ 	}
+ }
+ #endif