proc-prevent-stacking-filesystems-on-top.patch
sched-panic-on-corrupted-stack-end.patch
fix-d_walk-non-delayed-__d_free-race.patch
+sparc64-reduce-tlb-flushes-during-hugepte-changes.patch
+sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch
+sparc-harden-signal-return-frame-checks.patch
+sparc64-fix-return-from-trap-window-fill-crashes.patch
--- /dev/null
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 28 May 2016 21:21:31 -0700
+Subject: sparc: Harden signal return frame checks.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit d11c2a0de2824395656cf8ed15811580c9dd38aa ]
+
+All signal frames must be at least 16-byte aligned, because that is
+the alignment we explicitly create when we build signal return stack
+frames.
+
+All stack pointers must be at least 8-byte aligned.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/signal32.c | 46 ++++++++++++++++++++++++++---------------
+ arch/sparc/kernel/signal_32.c | 41 +++++++++++++++++++++++-------------
+ arch/sparc/kernel/signal_64.c | 31 +++++++++++++++++----------
+ arch/sparc/kernel/sigutil_32.c | 9 +++++++-
+ arch/sparc/kernel/sigutil_64.c | 10 +++++++-
+ 5 files changed, 92 insertions(+), 45 deletions(-)
+
+--- a/arch/sparc/kernel/signal32.c
++++ b/arch/sparc/kernel/signal32.c
+@@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t *
+ return 0;
+ }
+
++/* Checks if the fp is valid. We always build signal frames which are
++ * 16-byte aligned, therefore we can always enforce that the restore
++ * frame has that property as well.
++ */
++static bool invalid_frame_pointer(void __user *fp, int fplen)
++{
++ if ((((unsigned long) fp) & 15) ||
++ ((unsigned long)fp) > 0x100000000ULL - fplen)
++ return true;
++ return false;
++}
++
+ void do_sigreturn32(struct pt_regs *regs)
+ {
+ struct signal_frame32 __user *sf;
+ compat_uptr_t fpu_save;
+ compat_uptr_t rwin_save;
+- unsigned int psr;
++ unsigned int psr, ufp;
+ unsigned int pc, npc;
+ sigset_t set;
+ compat_sigset_t seta;
+@@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs
+ sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
+
+ /* 1. Make sure we are not getting garbage from the user */
+- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+- (((unsigned long) sf) & 3))
++ if (invalid_frame_pointer(sf, sizeof(*sf)))
++ goto segv;
++
++ if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
++ goto segv;
++
++ if (ufp & 0x7)
+ goto segv;
+
+- if (get_user(pc, &sf->info.si_regs.pc) ||
++ if (__get_user(pc, &sf->info.si_regs.pc) ||
+ __get_user(npc, &sf->info.si_regs.npc))
+ goto segv;
+
+@@ -227,7 +244,7 @@ segv:
+ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
+ {
+ struct rt_signal_frame32 __user *sf;
+- unsigned int psr, pc, npc;
++ unsigned int psr, pc, npc, ufp;
+ compat_uptr_t fpu_save;
+ compat_uptr_t rwin_save;
+ sigset_t set;
+@@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct
+ sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP];
+
+ /* 1. Make sure we are not getting garbage from the user */
+- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+- (((unsigned long) sf) & 3))
++ if (invalid_frame_pointer(sf, sizeof(*sf)))
+ goto segv;
+
+- if (get_user(pc, &sf->regs.pc) ||
++ if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
++ goto segv;
++
++ if (ufp & 0x7)
++ goto segv;
++
++ if (__get_user(pc, &sf->regs.pc) ||
+ __get_user(npc, &sf->regs.npc))
+ goto segv;
+
+@@ -307,14 +329,6 @@ segv:
+ force_sig(SIGSEGV, current);
+ }
+
+-/* Checks if the fp is valid */
+-static int invalid_frame_pointer(void __user *fp, int fplen)
+-{
+- if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen)
+- return 1;
+- return 0;
+-}
+-
+ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ unsigned long sp;
+--- a/arch/sparc/kernel/signal_32.c
++++ b/arch/sparc/kernel/signal_32.c
+@@ -60,10 +60,22 @@ struct rt_signal_frame {
+ #define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7)))
+ #define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7)))
+
++/* Checks if the fp is valid. We always build signal frames which are
++ * 16-byte aligned, therefore we can always enforce that the restore
++ * frame has that property as well.
++ */
++static inline bool invalid_frame_pointer(void __user *fp, int fplen)
++{
++ if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen))
++ return true;
++
++ return false;
++}
++
+ asmlinkage void do_sigreturn(struct pt_regs *regs)
+ {
++ unsigned long up_psr, pc, npc, ufp;
+ struct signal_frame __user *sf;
+- unsigned long up_psr, pc, npc;
+ sigset_t set;
+ __siginfo_fpu_t __user *fpu_save;
+ __siginfo_rwin_t __user *rwin_save;
+@@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_r
+ sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
+
+ /* 1. Make sure we are not getting garbage from the user */
+- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
++ if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ goto segv_and_exit;
+
+- if (((unsigned long) sf) & 3)
++ if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
++ goto segv_and_exit;
++
++ if (ufp & 0x7)
+ goto segv_and_exit;
+
+ err = __get_user(pc, &sf->info.si_regs.pc);
+@@ -127,7 +142,7 @@ segv_and_exit:
+ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
+ {
+ struct rt_signal_frame __user *sf;
+- unsigned int psr, pc, npc;
++ unsigned int psr, pc, npc, ufp;
+ __siginfo_fpu_t __user *fpu_save;
+ __siginfo_rwin_t __user *rwin_save;
+ sigset_t set;
+@@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct p
+
+ synchronize_user_stack();
+ sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
+- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+- (((unsigned long) sf) & 0x03))
++ if (!invalid_frame_pointer(sf, sizeof(*sf)))
++ goto segv;
++
++ if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
++ goto segv;
++
++ if (ufp & 0x7)
+ goto segv;
+
+ err = __get_user(pc, &sf->regs.pc);
+@@ -178,15 +198,6 @@ segv:
+ force_sig(SIGSEGV, current);
+ }
+
+-/* Checks if the fp is valid */
+-static inline int invalid_frame_pointer(void __user *fp, int fplen)
+-{
+- if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen))
+- return 1;
+-
+- return 0;
+-}
+-
+ static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ unsigned long sp = regs->u_regs[UREG_FP];
+--- a/arch/sparc/kernel/signal_64.c
++++ b/arch/sparc/kernel/signal_64.c
+@@ -234,6 +234,17 @@ do_sigsegv:
+ goto out;
+ }
+
++/* Checks if the fp is valid. We always build rt signal frames which
++ * are 16-byte aligned, therefore we can always enforce that the
++ * restore frame has that property as well.
++ */
++static bool invalid_frame_pointer(void __user *fp)
++{
++ if (((unsigned long) fp) & 15)
++ return true;
++ return false;
++}
++
+ struct rt_signal_frame {
+ struct sparc_stackf ss;
+ siginfo_t info;
+@@ -246,8 +257,8 @@ struct rt_signal_frame {
+
+ void do_rt_sigreturn(struct pt_regs *regs)
+ {
++ unsigned long tpc, tnpc, tstate, ufp;
+ struct rt_signal_frame __user *sf;
+- unsigned long tpc, tnpc, tstate;
+ __siginfo_fpu_t __user *fpu_save;
+ __siginfo_rwin_t __user *rwin_save;
+ sigset_t set;
+@@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *reg
+ (regs->u_regs [UREG_FP] + STACK_BIAS);
+
+ /* 1. Make sure we are not getting garbage from the user */
+- if (((unsigned long) sf) & 3)
++ if (invalid_frame_pointer(sf))
++ goto segv;
++
++ if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+ goto segv;
+
+- err = get_user(tpc, &sf->regs.tpc);
++ if ((ufp + STACK_BIAS) & 0x7)
++ goto segv;
++
++ err = __get_user(tpc, &sf->regs.tpc);
+ err |= __get_user(tnpc, &sf->regs.tnpc);
+ if (test_thread_flag(TIF_32BIT)) {
+ tpc &= 0xffffffff;
+@@ -308,14 +325,6 @@ segv:
+ force_sig(SIGSEGV, current);
+ }
+
+-/* Checks if the fp is valid */
+-static int invalid_frame_pointer(void __user *fp)
+-{
+- if (((unsigned long) fp) & 15)
+- return 1;
+- return 0;
+-}
+-
+ static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
+--- a/arch/sparc/kernel/sigutil_32.c
++++ b/arch/sparc/kernel/sigutil_32.c
+@@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs,
+ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ {
+ int err;
++
++ if (((unsigned long) fpu) & 3)
++ return -EFAULT;
++
+ #ifdef CONFIG_SMP
+ if (test_tsk_thread_flag(current, TIF_USEDFPU))
+ regs->psr &= ~PSR_EF;
+@@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t
+ struct thread_info *t = current_thread_info();
+ int i, wsaved, err;
+
+- __get_user(wsaved, &rp->wsaved);
++ if (((unsigned long) rp) & 3)
++ return -EFAULT;
++
++ get_user(wsaved, &rp->wsaved);
+ if (wsaved > NSWINS)
+ return -EFAULT;
+
+--- a/arch/sparc/kernel/sigutil_64.c
++++ b/arch/sparc/kernel/sigutil_64.c
+@@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *re
+ unsigned long fprs;
+ int err;
+
+- err = __get_user(fprs, &fpu->si_fprs);
++ if (((unsigned long) fpu) & 7)
++ return -EFAULT;
++
++ err = get_user(fprs, &fpu->si_fprs);
+ fprs_write(0);
+ regs->tstate &= ~TSTATE_PEF;
+ if (fprs & FPRS_DL)
+@@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t
+ struct thread_info *t = current_thread_info();
+ int i, wsaved, err;
+
+- __get_user(wsaved, &rp->wsaved);
++ if (((unsigned long) rp) & 7)
++ return -EFAULT;
++
++ get_user(wsaved, &rp->wsaved);
+ if (wsaved > NSWINS)
+ return -EFAULT;
+
--- /dev/null
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 28 May 2016 20:41:12 -0700
+Subject: sparc64: Fix return from trap window fill crashes.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 7cafc0b8bf130f038b0ec2dcdd6a9de6dc59b65a ]
+
+We must handle data access exception as well as memory address unaligned
+exceptions from return from trap window fill faults, not just normal
+TLB misses.
+
+Otherwise we can get an OOPS that looks like this:
+
+ld-linux.so.2(36808): Kernel bad sw trap 5 [#1]
+CPU: 1 PID: 36808 Comm: ld-linux.so.2 Not tainted 4.6.0 #34
+task: fff8000303be5c60 ti: fff8000301344000 task.ti: fff8000301344000
+TSTATE: 0000004410001601 TPC: 0000000000a1a784 TNPC: 0000000000a1a788 Y: 00000002 Not tainted
+TPC: <do_sparc64_fault+0x5c4/0x700>
+g0: fff8000024fc8248 g1: 0000000000db04dc g2: 0000000000000000 g3: 0000000000000001
+g4: fff8000303be5c60 g5: fff800030e672000 g6: fff8000301344000 g7: 0000000000000001
+o0: 0000000000b95ee8 o1: 000000000000012b o2: 0000000000000000 o3: 0000000200b9b358
+o4: 0000000000000000 o5: fff8000301344040 sp: fff80003013475c1 ret_pc: 0000000000a1a77c
+RPC: <do_sparc64_fault+0x5bc/0x700>
+l0: 00000000000007ff l1: 0000000000000000 l2: 000000000000005f l3: 0000000000000000
+l4: fff8000301347e98 l5: fff8000024ff3060 l6: 0000000000000000 l7: 0000000000000000
+i0: fff8000301347f60 i1: 0000000000102400 i2: 0000000000000000 i3: 0000000000000000
+i4: 0000000000000000 i5: 0000000000000000 i6: fff80003013476a1 i7: 0000000000404d4c
+I7: <user_rtt_fill_fixup+0x6c/0x7c>
+Call Trace:
+ [0000000000404d4c] user_rtt_fill_fixup+0x6c/0x7c
+
+The window trap handlers are slightly clever, the trap table entries for them are
+composed of two pieces of code. First comes the code that actually performs
+the window fill or spill trap handling, and then there are three instructions at
+the end which are for exception processing.
+
+The userland register window fill handler is:
+
+ add %sp, STACK_BIAS + 0x00, %g1; \
+ ldxa [%g1 + %g0] ASI, %l0; \
+ mov 0x08, %g2; \
+ mov 0x10, %g3; \
+ ldxa [%g1 + %g2] ASI, %l1; \
+ mov 0x18, %g5; \
+ ldxa [%g1 + %g3] ASI, %l2; \
+ ldxa [%g1 + %g5] ASI, %l3; \
+ add %g1, 0x20, %g1; \
+ ldxa [%g1 + %g0] ASI, %l4; \
+ ldxa [%g1 + %g2] ASI, %l5; \
+ ldxa [%g1 + %g3] ASI, %l6; \
+ ldxa [%g1 + %g5] ASI, %l7; \
+ add %g1, 0x20, %g1; \
+ ldxa [%g1 + %g0] ASI, %i0; \
+ ldxa [%g1 + %g2] ASI, %i1; \
+ ldxa [%g1 + %g3] ASI, %i2; \
+ ldxa [%g1 + %g5] ASI, %i3; \
+ add %g1, 0x20, %g1; \
+ ldxa [%g1 + %g0] ASI, %i4; \
+ ldxa [%g1 + %g2] ASI, %i5; \
+ ldxa [%g1 + %g3] ASI, %i6; \
+ ldxa [%g1 + %g5] ASI, %i7; \
+ restored; \
+ retry; nop; nop; nop; nop; \
+ b,a,pt %xcc, fill_fixup_dax; \
+ b,a,pt %xcc, fill_fixup_mna; \
+ b,a,pt %xcc, fill_fixup;
+
+And the way this works is that if any of those memory accesses
+generate an exception, the exception handler can revector to one of
+those final three branch instructions depending upon which kind of
+exception the memory access took. In this way, the fault handler
+doesn't have to know if it was a spill or a fill that it's handling
+the fault for. It just always branches to the last instruction in
+the parent trap's handler.
+
+For example, for a regular fault, the code goes:
+
+winfix_trampoline:
+ rdpr %tpc, %g3
+ or %g3, 0x7c, %g3
+ wrpr %g3, %tnpc
+ done
+
+All window trap handlers are 0x80 aligned, so if we "or" 0x7c into the
+trap time program counter, we'll get that final instruction in the
+trap handler.
+
+On return from trap, we have to pull the register window in but we do
+this by hand instead of just executing a "restore" instruction for
+several reasons. The largest being that from Niagara and onward we
+simply don't have enough levels in the trap stack to fully resolve all
+possible exception cases of a window fault when we are already at
+trap level 1 (which we enter to get ready to return from the original
+trap).
+
+This is executed inline via the FILL_*_RTRAP handlers. rtrap_64.S's
+code branches directly to these to do the window fill by hand if
+necessary. Now if you look at them, we'll see at the end:
+
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+And oops, all three cases are handled like a fault.
+
+This doesn't work because each of these trap types (data access
+exception, memory address unaligned, and faults) store their auxiliary
+info in different registers to pass on to the C handler which does the
+real work.
+
+So in the case where the stack was unaligned, the unaligned trap
+handler sets up the arg registers one way, and then we branched to
+the fault handler which expects them setup another way.
+
+So the FAULT_TYPE_* value ends up basically being garbage, and
+randomly would generate the backtrace seen above.
+
+Reported-by: Nick Alcock <nix@esperi.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/head_64.h | 4 +
+ arch/sparc/include/asm/ttable.h | 8 +--
+ arch/sparc/kernel/Makefile | 1
+ arch/sparc/kernel/rtrap_64.S | 59 +++--------------------
+ arch/sparc/kernel/urtt_fill.S | 98 +++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 117 insertions(+), 53 deletions(-)
+ create mode 100644 arch/sparc/kernel/urtt_fill.S
+
+--- a/arch/sparc/include/asm/head_64.h
++++ b/arch/sparc/include/asm/head_64.h
+@@ -15,6 +15,10 @@
+
+ #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ)
+
++#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
++#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
++#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
++
+ #define __CHEETAH_ID 0x003e0014
+ #define __JALAPENO_ID 0x003e0016
+ #define __SERRANO_ID 0x003e0022
+--- a/arch/sparc/include/asm/ttable.h
++++ b/arch/sparc/include/asm/ttable.h
+@@ -589,8 +589,8 @@ user_rtt_fill_64bit: \
+ restored; \
+ nop; nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; nop; \
+- ba,a,pt %xcc, user_rtt_fill_fixup; \
+- ba,a,pt %xcc, user_rtt_fill_fixup; \
++ ba,a,pt %xcc, user_rtt_fill_fixup_dax; \
++ ba,a,pt %xcc, user_rtt_fill_fixup_mna; \
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+
+@@ -652,8 +652,8 @@ user_rtt_fill_32bit: \
+ restored; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; \
+- ba,a,pt %xcc, user_rtt_fill_fixup; \
+- ba,a,pt %xcc, user_rtt_fill_fixup; \
++ ba,a,pt %xcc, user_rtt_fill_fixup_dax; \
++ ba,a,pt %xcc, user_rtt_fill_fixup_mna; \
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+
+--- a/arch/sparc/kernel/Makefile
++++ b/arch/sparc/kernel/Makefile
+@@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg
+ CFLAGS_REMOVE_pcr.o := -pg
+ endif
+
++obj-$(CONFIG_SPARC64) += urtt_fill.o
+ obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o
+ obj-$(CONFIG_SPARC32) += etrap_32.o
+ obj-$(CONFIG_SPARC32) += rtrap_32.o
+--- a/arch/sparc/kernel/rtrap_64.S
++++ b/arch/sparc/kernel/rtrap_64.S
+@@ -14,10 +14,6 @@
+ #include <asm/visasm.h>
+ #include <asm/processor.h>
+
+-#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
+-#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
+-#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+-
+ #ifdef CONFIG_CONTEXT_TRACKING
+ # define SCHEDULE_USER schedule_user
+ #else
+@@ -242,52 +238,17 @@ rt_continue: ldx [%sp + PTREGS_OFF + P
+ wrpr %g1, %cwp
+ ba,a,pt %xcc, user_rtt_fill_64bit
+
+-user_rtt_fill_fixup:
+- rdpr %cwp, %g1
+- add %g1, 1, %g1
+- wrpr %g1, 0x0, %cwp
+-
+- rdpr %wstate, %g2
+- sll %g2, 3, %g2
+- wrpr %g2, 0x0, %wstate
+-
+- /* We know %canrestore and %otherwin are both zero. */
+-
+- sethi %hi(sparc64_kern_pri_context), %g2
+- ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2
+- mov PRIMARY_CONTEXT, %g1
+-
+-661: stxa %g2, [%g1] ASI_DMMU
+- .section .sun4v_1insn_patch, "ax"
+- .word 661b
+- stxa %g2, [%g1] ASI_MMU
+- .previous
+-
+- sethi %hi(KERNBASE), %g1
+- flush %g1
++user_rtt_fill_fixup_dax:
++ ba,pt %xcc, user_rtt_fill_fixup_common
++ mov 1, %g3
++
++user_rtt_fill_fixup_mna:
++ ba,pt %xcc, user_rtt_fill_fixup_common
++ mov 2, %g3
+
+- or %g4, FAULT_CODE_WINFIXUP, %g4
+- stb %g4, [%g6 + TI_FAULT_CODE]
+- stx %g5, [%g6 + TI_FAULT_ADDR]
+-
+- mov %g6, %l1
+- wrpr %g0, 0x0, %tl
+-
+-661: nop
+- .section .sun4v_1insn_patch, "ax"
+- .word 661b
+- SET_GL(0)
+- .previous
+-
+- wrpr %g0, RTRAP_PSTATE, %pstate
+-
+- mov %l1, %g6
+- ldx [%g6 + TI_TASK], %g4
+- LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+- call do_sparc64_fault
+- add %sp, PTREGS_OFF, %o0
+- ba,pt %xcc, rtrap
+- nop
++user_rtt_fill_fixup:
++ ba,pt %xcc, user_rtt_fill_fixup_common
++ clr %g3
+
+ user_rtt_pre_restore:
+ add %g1, 1, %g1
+--- /dev/null
++++ b/arch/sparc/kernel/urtt_fill.S
+@@ -0,0 +1,98 @@
++#include <asm/thread_info.h>
++#include <asm/trap_block.h>
++#include <asm/spitfire.h>
++#include <asm/ptrace.h>
++#include <asm/head.h>
++
++ .text
++ .align 8
++ .globl user_rtt_fill_fixup_common
++user_rtt_fill_fixup_common:
++ rdpr %cwp, %g1
++ add %g1, 1, %g1
++ wrpr %g1, 0x0, %cwp
++
++ rdpr %wstate, %g2
++ sll %g2, 3, %g2
++ wrpr %g2, 0x0, %wstate
++
++ /* We know %canrestore and %otherwin are both zero. */
++
++ sethi %hi(sparc64_kern_pri_context), %g2
++ ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2
++ mov PRIMARY_CONTEXT, %g1
++
++661: stxa %g2, [%g1] ASI_DMMU
++ .section .sun4v_1insn_patch, "ax"
++ .word 661b
++ stxa %g2, [%g1] ASI_MMU
++ .previous
++
++ sethi %hi(KERNBASE), %g1
++ flush %g1
++
++ mov %g4, %l4
++ mov %g5, %l5
++ brnz,pn %g3, 1f
++ mov %g3, %l3
++
++ or %g4, FAULT_CODE_WINFIXUP, %g4
++ stb %g4, [%g6 + TI_FAULT_CODE]
++ stx %g5, [%g6 + TI_FAULT_ADDR]
++1:
++ mov %g6, %l1
++ wrpr %g0, 0x0, %tl
++
++661: nop
++ .section .sun4v_1insn_patch, "ax"
++ .word 661b
++ SET_GL(0)
++ .previous
++
++ wrpr %g0, RTRAP_PSTATE, %pstate
++
++ mov %l1, %g6
++ ldx [%g6 + TI_TASK], %g4
++ LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
++
++ brnz,pn %l3, 1f
++ nop
++
++ call do_sparc64_fault
++ add %sp, PTREGS_OFF, %o0
++ ba,pt %xcc, rtrap
++ nop
++
++1: cmp %g3, 2
++ bne,pn %xcc, 2f
++ nop
++
++ sethi %hi(tlb_type), %g1
++ lduw [%g1 + %lo(tlb_type)], %g1
++ cmp %g1, 3
++ bne,pt %icc, 1f
++ add %sp, PTREGS_OFF, %o0
++ mov %l4, %o2
++ call sun4v_do_mna
++ mov %l5, %o1
++ ba,a,pt %xcc, rtrap
++1: mov %l4, %o1
++ mov %l5, %o2
++ call mem_address_unaligned
++ nop
++ ba,a,pt %xcc, rtrap
++
++2: sethi %hi(tlb_type), %g1
++ mov %l4, %o1
++ lduw [%g1 + %lo(tlb_type)], %g1
++ mov %l5, %o2
++ cmp %g1, 3
++ bne,pt %icc, 1f
++ add %sp, PTREGS_OFF, %o0
++ call sun4v_data_access_exception
++ nop
++ ba,a,pt %xcc, rtrap
++
++1: call spitfire_data_access_exception
++ nop
++ ba,a,pt %xcc, rtrap
--- /dev/null
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+Date: Wed, 30 Mar 2016 11:17:13 -0700
+Subject: sparc64: Reduce TLB flushes during hugepte changes
+
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+
+[ Upstream commit 24e49ee3d76b70853a96520e46b8837e5eae65b2 ]
+
+During hugepage map/unmap, TSB and TLB flushes are currently
+issued at every PAGE_SIZE'd boundary which is unnecessary.
+We now issue the flush at REAL_HPAGE_SIZE boundaries only.
+
+Without this patch workloads which unmap a large hugepage
+backed VMA region get CPU lockups due to excessive TLB
+flush calls.
+
+Orabug: 22365539, 22643230, 22995196
+
+Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/pgtable_64.h | 43 ++++++++++++++++++++++++++---------
+ arch/sparc/include/asm/tlbflush_64.h | 3 +-
+ arch/sparc/mm/hugetlbpage.c | 33 ++++++++++++++++++++++----
+ arch/sparc/mm/init_64.c | 12 ---------
+ arch/sparc/mm/tlb.c | 25 ++++++++++++++------
+ arch/sparc/mm/tsb.c | 32 +++++++++++++-------------
+ 6 files changed, 97 insertions(+), 51 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -375,7 +375,7 @@ static inline pgprot_t pgprot_noncached(
+ #define pgprot_noncached pgprot_noncached
+
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-static inline pte_t pte_mkhuge(pte_t pte)
++static inline unsigned long __pte_huge_mask(void)
+ {
+ unsigned long mask;
+
+@@ -390,8 +390,19 @@ static inline pte_t pte_mkhuge(pte_t pte
+ : "=r" (mask)
+ : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V));
+
+- return __pte(pte_val(pte) | mask);
++ return mask;
++}
++
++static inline pte_t pte_mkhuge(pte_t pte)
++{
++ return __pte(pte_val(pte) | __pte_huge_mask());
++}
++
++static inline bool is_hugetlb_pte(pte_t pte)
++{
++ return !!(pte_val(pte) & __pte_huge_mask());
+ }
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ static inline pmd_t pmd_mkhuge(pmd_t pmd)
+ {
+@@ -403,6 +414,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd
+ return __pmd(pte_val(pte));
+ }
+ #endif
++#else
++static inline bool is_hugetlb_pte(pte_t pte)
++{
++ return false;
++}
+ #endif
+
+ static inline pte_t pte_mkdirty(pte_t pte)
+@@ -858,6 +874,19 @@ static inline unsigned long pud_pfn(pud_
+ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ pte_t *ptep, pte_t orig, int fullmm);
+
++static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
++ pte_t *ptep, pte_t orig, int fullmm)
++{
++ /* It is more efficient to let flush_tlb_kernel_range()
++ * handle init_mm tlb flushes.
++ *
++ * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
++ * and SUN4V pte layout, so this inline test is fine.
++ */
++ if (likely(mm != &init_mm) && pte_accessible(mm, orig))
++ tlb_batch_add(mm, vaddr, ptep, orig, fullmm);
++}
++
+ #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr,
+@@ -874,15 +903,7 @@ static inline void __set_pte_at(struct m
+ pte_t orig = *ptep;
+
+ *ptep = pte;
+-
+- /* It is more efficient to let flush_tlb_kernel_range()
+- * handle init_mm tlb flushes.
+- *
+- * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+- * and SUN4V pte layout, so this inline test is fine.
+- */
+- if (likely(mm != &init_mm) && pte_accessible(mm, orig))
+- tlb_batch_add(mm, addr, ptep, orig, fullmm);
++ maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm);
+ }
+
+ #define set_pte_at(mm,addr,ptep,pte) \
+--- a/arch/sparc/include/asm/tlbflush_64.h
++++ b/arch/sparc/include/asm/tlbflush_64.h
+@@ -8,6 +8,7 @@
+ #define TLB_BATCH_NR 192
+
+ struct tlb_batch {
++ bool huge;
+ struct mm_struct *mm;
+ unsigned long tlb_nr;
+ unsigned long active;
+@@ -16,7 +17,7 @@ struct tlb_batch {
+
+ void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+ void flush_tsb_user(struct tlb_batch *tb);
+-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr);
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge);
+
+ /* TLB flush operations. */
+
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -176,17 +176,31 @@ void set_huge_pte_at(struct mm_struct *m
+ pte_t *ptep, pte_t entry)
+ {
+ int i;
++ pte_t orig[2];
++ unsigned long nptes;
+
+ if (!pte_present(*ptep) && pte_present(entry))
+ mm->context.huge_pte_count++;
+
+ addr &= HPAGE_MASK;
+- for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+- set_pte_at(mm, addr, ptep, entry);
++
++ nptes = 1 << HUGETLB_PAGE_ORDER;
++ orig[0] = *ptep;
++ orig[1] = *(ptep + nptes / 2);
++ for (i = 0; i < nptes; i++) {
++ *ptep = entry;
+ ptep++;
+ addr += PAGE_SIZE;
+ pte_val(entry) += PAGE_SIZE;
+ }
++
++ /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
++ addr -= REAL_HPAGE_SIZE;
++ ptep -= nptes / 2;
++ maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0);
++ addr -= REAL_HPAGE_SIZE;
++ ptep -= nptes / 2;
++ maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0);
+ }
+
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+@@ -194,19 +208,28 @@ pte_t huge_ptep_get_and_clear(struct mm_
+ {
+ pte_t entry;
+ int i;
++ unsigned long nptes;
+
+ entry = *ptep;
+ if (pte_present(entry))
+ mm->context.huge_pte_count--;
+
+ addr &= HPAGE_MASK;
+-
+- for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+- pte_clear(mm, addr, ptep);
++ nptes = 1 << HUGETLB_PAGE_ORDER;
++ for (i = 0; i < nptes; i++) {
++ *ptep = __pte(0UL);
+ addr += PAGE_SIZE;
+ ptep++;
+ }
+
++ /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
++ addr -= REAL_HPAGE_SIZE;
++ ptep -= nptes / 2;
++ maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
++ addr -= REAL_HPAGE_SIZE;
++ ptep -= nptes / 2;
++ maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
++
+ return entry;
+ }
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -324,18 +324,6 @@ static void __update_mmu_tsb_insert(stru
+ tsb_insert(tsb, tag, tte);
+ }
+
+-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-static inline bool is_hugetlb_pte(pte_t pte)
+-{
+- if ((tlb_type == hypervisor &&
+- (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
+- (tlb_type != hypervisor &&
+- (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U))
+- return true;
+- return false;
+-}
+-#endif
+-
+ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+ {
+ struct mm_struct *mm;
+--- a/arch/sparc/mm/tlb.c
++++ b/arch/sparc/mm/tlb.c
+@@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void)
+ }
+
+ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+- bool exec)
++ bool exec, bool huge)
+ {
+ struct tlb_batch *tb = &get_cpu_var(tlb_batch);
+ unsigned long nr;
+@@ -84,13 +84,21 @@ static void tlb_batch_add_one(struct mm_
+ }
+
+ if (!tb->active) {
+- flush_tsb_user_page(mm, vaddr);
++ flush_tsb_user_page(mm, vaddr, huge);
+ global_flush_tlb_page(mm, vaddr);
+ goto out;
+ }
+
+- if (nr == 0)
++ if (nr == 0) {
+ tb->mm = mm;
++ tb->huge = huge;
++ }
++
++ if (tb->huge != huge) {
++ flush_tlb_pending();
++ tb->huge = huge;
++ nr = 0;
++ }
+
+ tb->vaddrs[nr] = vaddr;
+ tb->tlb_nr = ++nr;
+@@ -104,6 +112,8 @@ out:
+ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ pte_t *ptep, pte_t orig, int fullmm)
+ {
++ bool huge = is_hugetlb_pte(orig);
++
+ if (tlb_type != hypervisor &&
+ pte_dirty(orig)) {
+ unsigned long paddr, pfn = pte_pfn(orig);
+@@ -129,7 +139,7 @@ void tlb_batch_add(struct mm_struct *mm,
+
+ no_cache_flush:
+ if (!fullmm)
+- tlb_batch_add_one(mm, vaddr, pte_exec(orig));
++ tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge);
+ }
+
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+@@ -145,7 +155,7 @@ static void tlb_batch_pmd_scan(struct mm
+ if (pte_val(*pte) & _PAGE_VALID) {
+ bool exec = pte_exec(*pte);
+
+- tlb_batch_add_one(mm, vaddr, exec);
++ tlb_batch_add_one(mm, vaddr, exec, false);
+ }
+ pte++;
+ vaddr += PAGE_SIZE;
+@@ -185,8 +195,9 @@ void set_pmd_at(struct mm_struct *mm, un
+ pte_t orig_pte = __pte(pmd_val(orig));
+ bool exec = pte_exec(orig_pte);
+
+- tlb_batch_add_one(mm, addr, exec);
+- tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
++ tlb_batch_add_one(mm, addr, exec, true);
++ tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
++ true);
+ } else {
+ tlb_batch_pmd_scan(mm, addr, orig);
+ }
+--- a/arch/sparc/mm/tsb.c
++++ b/arch/sparc/mm/tsb.c
+@@ -76,14 +76,15 @@ void flush_tsb_user(struct tlb_batch *tb
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+- base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+- nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+- if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+- base = __pa(base);
+- __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+-
++ if (!tb->huge) {
++ base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++ nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++ base = __pa(base);
++ __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
++ }
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+- if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++ if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+@@ -94,20 +95,21 @@ void flush_tsb_user(struct tlb_batch *tb
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+ }
+
+-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
+ {
+ unsigned long nentries, base, flags;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+- base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+- nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+- if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+- base = __pa(base);
+- __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
+-
++ if (!huge) {
++ base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++ nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++ base = __pa(base);
++ __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
++ }
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+- if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++ if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
--- /dev/null
+From foo@baz Mon Jun 20 10:48:40 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 25 May 2016 12:51:20 -0700
+Subject: sparc64: Take ctx_alloc_lock properly in hugetlb_setup().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 9ea46abe22550e3366ff7cee2f8391b35b12f730 ]
+
+On cheetahplus chips we take the ctx_alloc_lock in order to
+modify the TLB lookup parameters for the indexed TLBs, which
+are stored in the context register.
+
+This is called with interrupts disabled, however ctx_alloc_lock
+is an IRQ safe lock, therefore we must take acquire/release it
+properly with spin_{lock,unlock}_irq().
+
+Reported-by: Meelis Roos <mroos@linux.ee>
+Tested-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -2824,9 +2824,10 @@ void hugetlb_setup(struct pt_regs *regs)
+ * the Data-TLB for huge pages.
+ */
+ if (tlb_type == cheetah_plus) {
++ bool need_context_reload = false;
+ unsigned long ctx;
+
+- spin_lock(&ctx_alloc_lock);
++ spin_lock_irq(&ctx_alloc_lock);
+ ctx = mm->context.sparc64_ctx_val;
+ ctx &= ~CTX_PGSZ_MASK;
+ ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+@@ -2845,9 +2846,12 @@ void hugetlb_setup(struct pt_regs *regs)
+ * also executing in this address space.
+ */
+ mm->context.sparc64_ctx_val = ctx;
+- on_each_cpu(context_reload, mm, 0);
++ need_context_reload = true;
+ }
+- spin_unlock(&ctx_alloc_lock);
++ spin_unlock_irq(&ctx_alloc_lock);
++
++ if (need_context_reload)
++ on_each_cpu(context_reload, mm, 0);
+ }
+ }
+ #endif