From: Greg Kroah-Hartman Date: Mon, 20 Jun 2016 17:49:00 +0000 (-0700) Subject: 4.6-stable patches X-Git-Tag: v3.14.73~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2d8824bfdd837df10ee6767981638ed2356f7c40;p=thirdparty%2Fkernel%2Fstable-queue.git 4.6-stable patches added patches: sparc-harden-signal-return-frame-checks.patch sparc64-fix-return-from-trap-window-fill-crashes.patch sparc64-reduce-tlb-flushes-during-hugepte-changes.patch sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch --- diff --git a/queue-4.6/series b/queue-4.6/series index 6d8a6339992..ff0c26fb18a 100644 --- a/queue-4.6/series +++ b/queue-4.6/series @@ -61,3 +61,7 @@ x86-entry-traps-don-t-force-in_interrupt-to-return-true-in-ist-handlers.patch proc-prevent-stacking-filesystems-on-top.patch sched-panic-on-corrupted-stack-end.patch fix-d_walk-non-delayed-__d_free-race.patch +sparc64-reduce-tlb-flushes-during-hugepte-changes.patch +sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch +sparc-harden-signal-return-frame-checks.patch +sparc64-fix-return-from-trap-window-fill-crashes.patch diff --git a/queue-4.6/sparc-harden-signal-return-frame-checks.patch b/queue-4.6/sparc-harden-signal-return-frame-checks.patch new file mode 100644 index 00000000000..6bf366a2aad --- /dev/null +++ b/queue-4.6/sparc-harden-signal-return-frame-checks.patch @@ -0,0 +1,315 @@ +From foo@baz Mon Jun 20 10:48:40 PDT 2016 +From: "David S. Miller" +Date: Sat, 28 May 2016 21:21:31 -0700 +Subject: sparc: Harden signal return frame checks. + +From: "David S. Miller" + +[ Upstream commit d11c2a0de2824395656cf8ed15811580c9dd38aa ] + +All signal frames must be at least 16-byte aligned, because that is +the alignment we explicitly create when we build signal return stack +frames. + +All stack pointers must be at least 8-byte aligned. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/signal32.c | 46 ++++++++++++++++++++++++++--------------- + arch/sparc/kernel/signal_32.c | 41 +++++++++++++++++++++++------------- + arch/sparc/kernel/signal_64.c | 31 +++++++++++++++++---------- + arch/sparc/kernel/sigutil_32.c | 9 +++++++- + arch/sparc/kernel/sigutil_64.c | 10 +++++++- + 5 files changed, 92 insertions(+), 45 deletions(-) + +--- a/arch/sparc/kernel/signal32.c ++++ b/arch/sparc/kernel/signal32.c +@@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t * + return 0; + } + ++/* Checks if the fp is valid. We always build signal frames which are ++ * 16-byte aligned, therefore we can always enforce that the restore ++ * frame has that property as well. ++ */ ++static bool invalid_frame_pointer(void __user *fp, int fplen) ++{ ++ if ((((unsigned long) fp) & 15) || ++ ((unsigned long)fp) > 0x100000000ULL - fplen) ++ return true; ++ return false; ++} ++ + void do_sigreturn32(struct pt_regs *regs) + { + struct signal_frame32 __user *sf; + compat_uptr_t fpu_save; + compat_uptr_t rwin_save; +- unsigned int psr; ++ unsigned int psr, ufp; + unsigned int pc, npc; + sigset_t set; + compat_sigset_t seta; +@@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs + sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP]; + + /* 1. Make sure we are not getting garbage from the user */ +- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || +- (((unsigned long) sf) & 3)) ++ if (invalid_frame_pointer(sf, sizeof(*sf))) ++ goto segv; ++ ++ if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) ++ goto segv; ++ ++ if (ufp & 0x7) + goto segv; + +- if (get_user(pc, &sf->info.si_regs.pc) || ++ if (__get_user(pc, &sf->info.si_regs.pc) || + __get_user(npc, &sf->info.si_regs.npc)) + goto segv; + +@@ -227,7 +244,7 @@ segv: + asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) + { + struct rt_signal_frame32 __user *sf; +- unsigned int psr, pc, npc; ++ unsigned int psr, pc, npc, ufp; + compat_uptr_t fpu_save; + compat_uptr_t rwin_save; + sigset_t set; +@@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct + sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP]; + + /* 1. Make sure we are not getting garbage from the user */ +- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || +- (((unsigned long) sf) & 3)) ++ if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv; + +- if (get_user(pc, &sf->regs.pc) || ++ if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) ++ goto segv; ++ ++ if (ufp & 0x7) ++ goto segv; ++ ++ if (__get_user(pc, &sf->regs.pc) || + __get_user(npc, &sf->regs.npc)) + goto segv; + +@@ -307,14 +329,6 @@ segv: + force_sig(SIGSEGV, current); + } + +-/* Checks if the fp is valid */ +-static int invalid_frame_pointer(void __user *fp, int fplen) +-{ +- if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen) +- return 1; +- return 0; +-} +- + static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) + { + unsigned long sp; +--- a/arch/sparc/kernel/signal_32.c ++++ b/arch/sparc/kernel/signal_32.c +@@ -60,10 +60,22 @@ struct rt_signal_frame { + #define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7))) + #define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7))) + ++/* Checks if the fp is valid. We always build signal frames which are ++ * 16-byte aligned, therefore we can always enforce that the restore ++ * frame has that property as well. ++ */ ++static inline bool invalid_frame_pointer(void __user *fp, int fplen) ++{ ++ if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen)) ++ return true; ++ ++ return false; ++} ++ + asmlinkage void do_sigreturn(struct pt_regs *regs) + { ++ unsigned long up_psr, pc, npc, ufp; + struct signal_frame __user *sf; +- unsigned long up_psr, pc, npc; + sigset_t set; + __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; +@@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_r + sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; + + /* 1. Make sure we are not getting garbage from the user */ +- if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) ++ if (!invalid_frame_pointer(sf, sizeof(*sf))) + goto segv_and_exit; + +- if (((unsigned long) sf) & 3) ++ if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) ++ goto segv_and_exit; ++ ++ if (ufp & 0x7) + goto segv_and_exit; + + err = __get_user(pc, &sf->info.si_regs.pc); +@@ -127,7 +142,7 @@ segv_and_exit: + asmlinkage void do_rt_sigreturn(struct pt_regs *regs) + { + struct rt_signal_frame __user *sf; +- unsigned int psr, pc, npc; ++ unsigned int psr, pc, npc, ufp; + __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; + sigset_t set; +@@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct p + + synchronize_user_stack(); + sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; +- if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || +- (((unsigned long) sf) & 0x03)) ++ if (!invalid_frame_pointer(sf, sizeof(*sf))) ++ goto segv; ++ ++ if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) ++ goto segv; ++ ++ if (ufp & 0x7) + goto segv; + + err = __get_user(pc, &sf->regs.pc); +@@ -178,15 +198,6 @@ segv: + force_sig(SIGSEGV, current); + } + +-/* Checks if the fp is valid */ +-static inline int invalid_frame_pointer(void __user *fp, int fplen) +-{ +- if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen)) +- return 1; +- +- return 0; +-} +- + static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) + { + unsigned long sp = regs->u_regs[UREG_FP]; +--- a/arch/sparc/kernel/signal_64.c ++++ b/arch/sparc/kernel/signal_64.c +@@ -234,6 +234,17 @@ do_sigsegv: + goto out; + } + ++/* Checks if the fp is valid. We always build rt signal frames which ++ * are 16-byte aligned, therefore we can always enforce that the ++ * restore frame has that property as well. ++ */ ++static bool invalid_frame_pointer(void __user *fp) ++{ ++ if (((unsigned long) fp) & 15) ++ return true; ++ return false; ++} ++ + struct rt_signal_frame { + struct sparc_stackf ss; + siginfo_t info; +@@ -246,8 +257,8 @@ struct rt_signal_frame { + + void do_rt_sigreturn(struct pt_regs *regs) + { ++ unsigned long tpc, tnpc, tstate, ufp; + struct rt_signal_frame __user *sf; +- unsigned long tpc, tnpc, tstate; + __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; + sigset_t set; +@@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *reg + (regs->u_regs [UREG_FP] + STACK_BIAS); + + /* 1. Make sure we are not getting garbage from the user */ +- if (((unsigned long) sf) & 3) ++ if (invalid_frame_pointer(sf)) ++ goto segv; ++ ++ if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) + goto segv; + +- err = get_user(tpc, &sf->regs.tpc); ++ if ((ufp + STACK_BIAS) & 0x7) ++ goto segv; ++ ++ err = __get_user(tpc, &sf->regs.tpc); + err |= __get_user(tnpc, &sf->regs.tnpc); + if (test_thread_flag(TIF_32BIT)) { + tpc &= 0xffffffff; +@@ -308,14 +325,6 @@ segv: + force_sig(SIGSEGV, current); + } + +-/* Checks if the fp is valid */ +-static int invalid_frame_pointer(void __user *fp) +-{ +- if (((unsigned long) fp) & 15) +- return 1; +- return 0; +-} +- + static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) + { + unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; +--- a/arch/sparc/kernel/sigutil_32.c ++++ b/arch/sparc/kernel/sigutil_32.c +@@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs, + int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) + { + int err; ++ ++ if (((unsigned long) fpu) & 3) ++ return -EFAULT; ++ + #ifdef CONFIG_SMP + if (test_tsk_thread_flag(current, TIF_USEDFPU)) + regs->psr &= ~PSR_EF; +@@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t + struct thread_info *t = current_thread_info(); + int i, wsaved, err; + +- __get_user(wsaved, &rp->wsaved); ++ if (((unsigned long) rp) & 3) ++ return -EFAULT; ++ ++ get_user(wsaved, &rp->wsaved); + if (wsaved > NSWINS) + return -EFAULT; + +--- a/arch/sparc/kernel/sigutil_64.c ++++ b/arch/sparc/kernel/sigutil_64.c +@@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *re + unsigned long fprs; + int err; + +- err = __get_user(fprs, &fpu->si_fprs); ++ if (((unsigned long) fpu) & 7) ++ return -EFAULT; ++ ++ err = get_user(fprs, &fpu->si_fprs); + fprs_write(0); + regs->tstate &= ~TSTATE_PEF; + if (fprs & FPRS_DL) +@@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t + struct thread_info *t = current_thread_info(); + int i, wsaved, err; + +- __get_user(wsaved, &rp->wsaved); ++ if (((unsigned long) rp) & 7) ++ return -EFAULT; ++ ++ get_user(wsaved, &rp->wsaved); + if (wsaved > NSWINS) + return -EFAULT; + diff --git a/queue-4.6/sparc64-fix-return-from-trap-window-fill-crashes.patch b/queue-4.6/sparc64-fix-return-from-trap-window-fill-crashes.patch new file mode 100644 index 00000000000..b7d73863b9a --- /dev/null +++ b/queue-4.6/sparc64-fix-return-from-trap-window-fill-crashes.patch @@ -0,0 +1,355 @@ +From foo@baz Mon Jun 20 10:48:40 PDT 2016 +From: "David S. Miller" +Date: Sat, 28 May 2016 20:41:12 -0700 +Subject: sparc64: Fix return from trap window fill crashes. + +From: "David S. Miller" + +[ Upstream commit 7cafc0b8bf130f038b0ec2dcdd6a9de6dc59b65a ] + +We must handle data access exception as well as memory address unaligned +exceptions from return from trap window fill faults, not just normal +TLB misses. + +Otherwise we can get an OOPS that looks like this: + +ld-linux.so.2(36808): Kernel bad sw trap 5 [#1] +CPU: 1 PID: 36808 Comm: ld-linux.so.2 Not tainted 4.6.0 #34 +task: fff8000303be5c60 ti: fff8000301344000 task.ti: fff8000301344000 +TSTATE: 0000004410001601 TPC: 0000000000a1a784 TNPC: 0000000000a1a788 Y: 00000002 Not tainted +TPC: +g0: fff8000024fc8248 g1: 0000000000db04dc g2: 0000000000000000 g3: 0000000000000001 +g4: fff8000303be5c60 g5: fff800030e672000 g6: fff8000301344000 g7: 0000000000000001 +o0: 0000000000b95ee8 o1: 000000000000012b o2: 0000000000000000 o3: 0000000200b9b358 +o4: 0000000000000000 o5: fff8000301344040 sp: fff80003013475c1 ret_pc: 0000000000a1a77c +RPC: +l0: 00000000000007ff l1: 0000000000000000 l2: 000000000000005f l3: 0000000000000000 +l4: fff8000301347e98 l5: fff8000024ff3060 l6: 0000000000000000 l7: 0000000000000000 +i0: fff8000301347f60 i1: 0000000000102400 i2: 0000000000000000 i3: 0000000000000000 +i4: 0000000000000000 i5: 0000000000000000 i6: fff80003013476a1 i7: 0000000000404d4c +I7: +Call Trace: + [0000000000404d4c] user_rtt_fill_fixup+0x6c/0x7c + +The window trap handlers are slightly clever, the trap table entries for them are +composed of two pieces of code. First comes the code that actually performs +the window fill or spill trap handling, and then there are three instructions at +the end which are for exception processing. + +The userland register window fill handler is: + + add %sp, STACK_BIAS + 0x00, %g1; \ + ldxa [%g1 + %g0] ASI, %l0; \ + mov 0x08, %g2; \ + mov 0x10, %g3; \ + ldxa [%g1 + %g2] ASI, %l1; \ + mov 0x18, %g5; \ + ldxa [%g1 + %g3] ASI, %l2; \ + ldxa [%g1 + %g5] ASI, %l3; \ + add %g1, 0x20, %g1; \ + ldxa [%g1 + %g0] ASI, %l4; \ + ldxa [%g1 + %g2] ASI, %l5; \ + ldxa [%g1 + %g3] ASI, %l6; \ + ldxa [%g1 + %g5] ASI, %l7; \ + add %g1, 0x20, %g1; \ + ldxa [%g1 + %g0] ASI, %i0; \ + ldxa [%g1 + %g2] ASI, %i1; \ + ldxa [%g1 + %g3] ASI, %i2; \ + ldxa [%g1 + %g5] ASI, %i3; \ + add %g1, 0x20, %g1; \ + ldxa [%g1 + %g0] ASI, %i4; \ + ldxa [%g1 + %g2] ASI, %i5; \ + ldxa [%g1 + %g3] ASI, %i6; \ + ldxa [%g1 + %g5] ASI, %i7; \ + restored; \ + retry; nop; nop; nop; nop; \ + b,a,pt %xcc, fill_fixup_dax; \ + b,a,pt %xcc, fill_fixup_mna; \ + b,a,pt %xcc, fill_fixup; + +And the way this works is that if any of those memory accesses +generate an exception, the exception handler can revector to one of +those final three branch instructions depending upon which kind of +exception the memory access took. In this way, the fault handler +doesn't have to know if it was a spill or a fill that it's handling +the fault for. It just always branches to the last instruction in +the parent trap's handler. + +For example, for a regular fault, the code goes: + +winfix_trampoline: + rdpr %tpc, %g3 + or %g3, 0x7c, %g3 + wrpr %g3, %tnpc + done + +All window trap handlers are 0x80 aligned, so if we "or" 0x7c into the +trap time program counter, we'll get that final instruction in the +trap handler. + +On return from trap, we have to pull the register window in but we do +this by hand instead of just executing a "restore" instruction for +several reasons. The largest being that from Niagara and onward we +simply don't have enough levels in the trap stack to fully resolve all +possible exception cases of a window fault when we are already at +trap level 1 (which we enter to get ready to return from the original +trap). + +This is executed inline via the FILL_*_RTRAP handlers. rtrap_64.S's +code branches directly to these to do the window fill by hand if +necessary. Now if you look at them, we'll see at the end: + + ba,a,pt %xcc, user_rtt_fill_fixup; + ba,a,pt %xcc, user_rtt_fill_fixup; + ba,a,pt %xcc, user_rtt_fill_fixup; + +And oops, all three cases are handled like a fault. + +This doesn't work because each of these trap types (data access +exception, memory address unaligned, and faults) store their auxiliary +info in different registers to pass on to the C handler which does the +real work. + +So in the case where the stack was unaligned, the unaligned trap +handler sets up the arg registers one way, and then we branched to +the fault handler which expects them setup another way. + +So the FAULT_TYPE_* value ends up basically being garbage, and +randomly would generate the backtrace seen above. + +Reported-by: Nick Alcock +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/head_64.h | 4 + + arch/sparc/include/asm/ttable.h | 8 +-- + arch/sparc/kernel/Makefile | 1 + arch/sparc/kernel/rtrap_64.S | 59 +++-------------------- + arch/sparc/kernel/urtt_fill.S | 98 +++++++++++++++++++++++++++++++++++++++ + 5 files changed, 117 insertions(+), 53 deletions(-) + create mode 100644 arch/sparc/kernel/urtt_fill.S + +--- a/arch/sparc/include/asm/head_64.h ++++ b/arch/sparc/include/asm/head_64.h +@@ -15,6 +15,10 @@ + + #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ) + ++#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) ++#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) ++#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) ++ + #define __CHEETAH_ID 0x003e0014 + #define __JALAPENO_ID 0x003e0016 + #define __SERRANO_ID 0x003e0022 +--- a/arch/sparc/include/asm/ttable.h ++++ b/arch/sparc/include/asm/ttable.h +@@ -589,8 +589,8 @@ user_rtt_fill_64bit: \ + restored; \ + nop; nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; nop; \ +- ba,a,pt %xcc, user_rtt_fill_fixup; \ +- ba,a,pt %xcc, user_rtt_fill_fixup; \ ++ ba,a,pt %xcc, user_rtt_fill_fixup_dax; \ ++ ba,a,pt %xcc, user_rtt_fill_fixup_mna; \ + ba,a,pt %xcc, user_rtt_fill_fixup; + + +@@ -652,8 +652,8 @@ user_rtt_fill_32bit: \ + restored; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; \ +- ba,a,pt %xcc, user_rtt_fill_fixup; \ +- ba,a,pt %xcc, user_rtt_fill_fixup; \ ++ ba,a,pt %xcc, user_rtt_fill_fixup_dax; \ ++ ba,a,pt %xcc, user_rtt_fill_fixup_mna; \ + ba,a,pt %xcc, user_rtt_fill_fixup; + + +--- a/arch/sparc/kernel/Makefile ++++ b/arch/sparc/kernel/Makefile +@@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg + CFLAGS_REMOVE_pcr.o := -pg + endif + ++obj-$(CONFIG_SPARC64) += urtt_fill.o + obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o + obj-$(CONFIG_SPARC32) += etrap_32.o + obj-$(CONFIG_SPARC32) += rtrap_32.o +--- a/arch/sparc/kernel/rtrap_64.S ++++ b/arch/sparc/kernel/rtrap_64.S +@@ -14,10 +14,6 @@ + #include + #include + +-#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) +-#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) +-#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) +- + #ifdef CONFIG_CONTEXT_TRACKING + # define SCHEDULE_USER schedule_user + #else +@@ -242,52 +238,17 @@ rt_continue: ldx [%sp + PTREGS_OFF + P + wrpr %g1, %cwp + ba,a,pt %xcc, user_rtt_fill_64bit + +-user_rtt_fill_fixup: +- rdpr %cwp, %g1 +- add %g1, 1, %g1 +- wrpr %g1, 0x0, %cwp +- +- rdpr %wstate, %g2 +- sll %g2, 3, %g2 +- wrpr %g2, 0x0, %wstate +- +- /* We know %canrestore and %otherwin are both zero. */ +- +- sethi %hi(sparc64_kern_pri_context), %g2 +- ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 +- mov PRIMARY_CONTEXT, %g1 +- +-661: stxa %g2, [%g1] ASI_DMMU +- .section .sun4v_1insn_patch, "ax" +- .word 661b +- stxa %g2, [%g1] ASI_MMU +- .previous +- +- sethi %hi(KERNBASE), %g1 +- flush %g1 ++user_rtt_fill_fixup_dax: ++ ba,pt %xcc, user_rtt_fill_fixup_common ++ mov 1, %g3 ++ ++user_rtt_fill_fixup_mna: ++ ba,pt %xcc, user_rtt_fill_fixup_common ++ mov 2, %g3 + +- or %g4, FAULT_CODE_WINFIXUP, %g4 +- stb %g4, [%g6 + TI_FAULT_CODE] +- stx %g5, [%g6 + TI_FAULT_ADDR] +- +- mov %g6, %l1 +- wrpr %g0, 0x0, %tl +- +-661: nop +- .section .sun4v_1insn_patch, "ax" +- .word 661b +- SET_GL(0) +- .previous +- +- wrpr %g0, RTRAP_PSTATE, %pstate +- +- mov %l1, %g6 +- ldx [%g6 + TI_TASK], %g4 +- LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) +- call do_sparc64_fault +- add %sp, PTREGS_OFF, %o0 +- ba,pt %xcc, rtrap +- nop ++user_rtt_fill_fixup: ++ ba,pt %xcc, user_rtt_fill_fixup_common ++ clr %g3 + + user_rtt_pre_restore: + add %g1, 1, %g1 +--- /dev/null ++++ b/arch/sparc/kernel/urtt_fill.S +@@ -0,0 +1,98 @@ ++#include ++#include ++#include ++#include ++#include ++ ++ .text ++ .align 8 ++ .globl user_rtt_fill_fixup_common ++user_rtt_fill_fixup_common: ++ rdpr %cwp, %g1 ++ add %g1, 1, %g1 ++ wrpr %g1, 0x0, %cwp ++ ++ rdpr %wstate, %g2 ++ sll %g2, 3, %g2 ++ wrpr %g2, 0x0, %wstate ++ ++ /* We know %canrestore and %otherwin are both zero. */ ++ ++ sethi %hi(sparc64_kern_pri_context), %g2 ++ ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 ++ mov PRIMARY_CONTEXT, %g1 ++ ++661: stxa %g2, [%g1] ASI_DMMU ++ .section .sun4v_1insn_patch, "ax" ++ .word 661b ++ stxa %g2, [%g1] ASI_MMU ++ .previous ++ ++ sethi %hi(KERNBASE), %g1 ++ flush %g1 ++ ++ mov %g4, %l4 ++ mov %g5, %l5 ++ brnz,pn %g3, 1f ++ mov %g3, %l3 ++ ++ or %g4, FAULT_CODE_WINFIXUP, %g4 ++ stb %g4, [%g6 + TI_FAULT_CODE] ++ stx %g5, [%g6 + TI_FAULT_ADDR] ++1: ++ mov %g6, %l1 ++ wrpr %g0, 0x0, %tl ++ ++661: nop ++ .section .sun4v_1insn_patch, "ax" ++ .word 661b ++ SET_GL(0) ++ .previous ++ ++ wrpr %g0, RTRAP_PSTATE, %pstate ++ ++ mov %l1, %g6 ++ ldx [%g6 + TI_TASK], %g4 ++ LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) ++ ++ brnz,pn %l3, 1f ++ nop ++ ++ call do_sparc64_fault ++ add %sp, PTREGS_OFF, %o0 ++ ba,pt %xcc, rtrap ++ nop ++ ++1: cmp %g3, 2 ++ bne,pn %xcc, 2f ++ nop ++ ++ sethi %hi(tlb_type), %g1 ++ lduw [%g1 + %lo(tlb_type)], %g1 ++ cmp %g1, 3 ++ bne,pt %icc, 1f ++ add %sp, PTREGS_OFF, %o0 ++ mov %l4, %o2 ++ call sun4v_do_mna ++ mov %l5, %o1 ++ ba,a,pt %xcc, rtrap ++1: mov %l4, %o1 ++ mov %l5, %o2 ++ call mem_address_unaligned ++ nop ++ ba,a,pt %xcc, rtrap ++ ++2: sethi %hi(tlb_type), %g1 ++ mov %l4, %o1 ++ lduw [%g1 + %lo(tlb_type)], %g1 ++ mov %l5, %o2 ++ cmp %g1, 3 ++ bne,pt %icc, 1f ++ add %sp, PTREGS_OFF, %o0 ++ call sun4v_data_access_exception ++ nop ++ ba,a,pt %xcc, rtrap ++ ++1: call spitfire_data_access_exception ++ nop ++ ba,a,pt %xcc, rtrap diff --git a/queue-4.6/sparc64-reduce-tlb-flushes-during-hugepte-changes.patch b/queue-4.6/sparc64-reduce-tlb-flushes-during-hugepte-changes.patch new file mode 100644 index 00000000000..4312bdc5b7f --- /dev/null +++ b/queue-4.6/sparc64-reduce-tlb-flushes-during-hugepte-changes.patch @@ -0,0 +1,349 @@ +From foo@baz Mon Jun 20 10:48:40 PDT 2016 +From: Nitin Gupta +Date: Wed, 30 Mar 2016 11:17:13 -0700 +Subject: sparc64: Reduce TLB flushes during hugepte changes + +From: Nitin Gupta + +[ Upstream commit 24e49ee3d76b70853a96520e46b8837e5eae65b2 ] + +During hugepage map/unmap, TSB and TLB flushes are currently +issued at every PAGE_SIZE'd boundary which is unnecessary. +We now issue the flush at REAL_HPAGE_SIZE boundaries only. + +Without this patch workloads which unmap a large hugepage +backed VMA region get CPU lockups due to excessive TLB +flush calls. + +Orabug: 22365539, 22643230, 22995196 + +Signed-off-by: Nitin Gupta +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/pgtable_64.h | 43 ++++++++++++++++++++++++++--------- + arch/sparc/include/asm/tlbflush_64.h | 3 +- + arch/sparc/mm/hugetlbpage.c | 33 ++++++++++++++++++++++---- + arch/sparc/mm/init_64.c | 12 --------- + arch/sparc/mm/tlb.c | 25 ++++++++++++++------ + arch/sparc/mm/tsb.c | 32 +++++++++++++------------- + 6 files changed, 97 insertions(+), 51 deletions(-) + +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -375,7 +375,7 @@ static inline pgprot_t pgprot_noncached( + #define pgprot_noncached pgprot_noncached + + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +-static inline pte_t pte_mkhuge(pte_t pte) ++static inline unsigned long __pte_huge_mask(void) + { + unsigned long mask; + +@@ -390,8 +390,19 @@ static inline pte_t pte_mkhuge(pte_t pte + : "=r" (mask) + : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V)); + +- return __pte(pte_val(pte) | mask); ++ return mask; ++} ++ ++static inline pte_t pte_mkhuge(pte_t pte) ++{ ++ return __pte(pte_val(pte) | __pte_huge_mask()); ++} ++ ++static inline bool is_hugetlb_pte(pte_t pte) ++{ ++ return !!(pte_val(pte) & __pte_huge_mask()); + } ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + static inline pmd_t pmd_mkhuge(pmd_t pmd) + { +@@ -403,6 +414,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd + return __pmd(pte_val(pte)); + } + #endif ++#else ++static inline bool is_hugetlb_pte(pte_t pte) ++{ ++ return false; ++} + #endif + + static inline pte_t pte_mkdirty(pte_t pte) +@@ -858,6 +874,19 @@ static inline unsigned long pud_pfn(pud_ + void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, + pte_t *ptep, pte_t orig, int fullmm); + ++static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, ++ pte_t *ptep, pte_t orig, int fullmm) ++{ ++ /* It is more efficient to let flush_tlb_kernel_range() ++ * handle init_mm tlb flushes. ++ * ++ * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U ++ * and SUN4V pte layout, so this inline test is fine. ++ */ ++ if (likely(mm != &init_mm) && pte_accessible(mm, orig)) ++ tlb_batch_add(mm, vaddr, ptep, orig, fullmm); ++} ++ + #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR + static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, +@@ -874,15 +903,7 @@ static inline void __set_pte_at(struct m + pte_t orig = *ptep; + + *ptep = pte; +- +- /* It is more efficient to let flush_tlb_kernel_range() +- * handle init_mm tlb flushes. +- * +- * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U +- * and SUN4V pte layout, so this inline test is fine. +- */ +- if (likely(mm != &init_mm) && pte_accessible(mm, orig)) +- tlb_batch_add(mm, addr, ptep, orig, fullmm); ++ maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm); + } + + #define set_pte_at(mm,addr,ptep,pte) \ +--- a/arch/sparc/include/asm/tlbflush_64.h ++++ b/arch/sparc/include/asm/tlbflush_64.h +@@ -8,6 +8,7 @@ + #define TLB_BATCH_NR 192 + + struct tlb_batch { ++ bool huge; + struct mm_struct *mm; + unsigned long tlb_nr; + unsigned long active; +@@ -16,7 +17,7 @@ struct tlb_batch { + + void flush_tsb_kernel_range(unsigned long start, unsigned long end); + void flush_tsb_user(struct tlb_batch *tb); +-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); ++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge); + + /* TLB flush operations. */ + +--- a/arch/sparc/mm/hugetlbpage.c ++++ b/arch/sparc/mm/hugetlbpage.c +@@ -176,17 +176,31 @@ void set_huge_pte_at(struct mm_struct *m + pte_t *ptep, pte_t entry) + { + int i; ++ pte_t orig[2]; ++ unsigned long nptes; + + if (!pte_present(*ptep) && pte_present(entry)) + mm->context.huge_pte_count++; + + addr &= HPAGE_MASK; +- for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { +- set_pte_at(mm, addr, ptep, entry); ++ ++ nptes = 1 << HUGETLB_PAGE_ORDER; ++ orig[0] = *ptep; ++ orig[1] = *(ptep + nptes / 2); ++ for (i = 0; i < nptes; i++) { ++ *ptep = entry; + ptep++; + addr += PAGE_SIZE; + pte_val(entry) += PAGE_SIZE; + } ++ ++ /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ ++ addr -= REAL_HPAGE_SIZE; ++ ptep -= nptes / 2; ++ maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0); ++ addr -= REAL_HPAGE_SIZE; ++ ptep -= nptes / 2; ++ maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0); + } + + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, +@@ -194,19 +208,28 @@ pte_t huge_ptep_get_and_clear(struct mm_ + { + pte_t entry; + int i; ++ unsigned long nptes; + + entry = *ptep; + if (pte_present(entry)) + mm->context.huge_pte_count--; + + addr &= HPAGE_MASK; +- +- for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { +- pte_clear(mm, addr, ptep); ++ nptes = 1 << HUGETLB_PAGE_ORDER; ++ for (i = 0; i < nptes; i++) { ++ *ptep = __pte(0UL); + addr += PAGE_SIZE; + ptep++; + } + ++ /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ ++ addr -= REAL_HPAGE_SIZE; ++ ptep -= nptes / 2; ++ maybe_tlb_batch_add(mm, addr, ptep, entry, 0); ++ addr -= REAL_HPAGE_SIZE; ++ ptep -= nptes / 2; ++ maybe_tlb_batch_add(mm, addr, ptep, entry, 0); ++ + return entry; + } + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -324,18 +324,6 @@ static void __update_mmu_tsb_insert(stru + tsb_insert(tsb, tag, tte); + } + +-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +-static inline bool is_hugetlb_pte(pte_t pte) +-{ +- if ((tlb_type == hypervisor && +- (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || +- (tlb_type != hypervisor && +- (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) +- return true; +- return false; +-} +-#endif +- + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) + { + struct mm_struct *mm; +--- a/arch/sparc/mm/tlb.c ++++ b/arch/sparc/mm/tlb.c +@@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void) + } + + static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, +- bool exec) ++ bool exec, bool huge) + { + struct tlb_batch *tb = &get_cpu_var(tlb_batch); + unsigned long nr; +@@ -84,13 +84,21 @@ static void tlb_batch_add_one(struct mm_ + } + + if (!tb->active) { +- flush_tsb_user_page(mm, vaddr); ++ flush_tsb_user_page(mm, vaddr, huge); + global_flush_tlb_page(mm, vaddr); + goto out; + } + +- if (nr == 0) ++ if (nr == 0) { + tb->mm = mm; ++ tb->huge = huge; ++ } ++ ++ if (tb->huge != huge) { ++ flush_tlb_pending(); ++ tb->huge = huge; ++ nr = 0; ++ } + + tb->vaddrs[nr] = vaddr; + tb->tlb_nr = ++nr; +@@ -104,6 +112,8 @@ out: + void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, + pte_t *ptep, pte_t orig, int fullmm) + { ++ bool huge = is_hugetlb_pte(orig); ++ + if (tlb_type != hypervisor && + pte_dirty(orig)) { + unsigned long paddr, pfn = pte_pfn(orig); +@@ -129,7 +139,7 @@ void tlb_batch_add(struct mm_struct *mm, + + no_cache_flush: + if (!fullmm) +- tlb_batch_add_one(mm, vaddr, pte_exec(orig)); ++ tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge); + } + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +@@ -145,7 +155,7 @@ static void tlb_batch_pmd_scan(struct mm + if (pte_val(*pte) & _PAGE_VALID) { + bool exec = pte_exec(*pte); + +- tlb_batch_add_one(mm, vaddr, exec); ++ tlb_batch_add_one(mm, vaddr, exec, false); + } + pte++; + vaddr += PAGE_SIZE; +@@ -185,8 +195,9 @@ void set_pmd_at(struct mm_struct *mm, un + pte_t orig_pte = __pte(pmd_val(orig)); + bool exec = pte_exec(orig_pte); + +- tlb_batch_add_one(mm, addr, exec); +- tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); ++ tlb_batch_add_one(mm, addr, exec, true); ++ tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec, ++ true); + } else { + tlb_batch_pmd_scan(mm, addr, orig); + } +--- a/arch/sparc/mm/tsb.c ++++ b/arch/sparc/mm/tsb.c +@@ -76,14 +76,15 @@ void flush_tsb_user(struct tlb_batch *tb + + spin_lock_irqsave(&mm->context.lock, flags); + +- base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; +- nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; +- if (tlb_type == cheetah_plus || tlb_type == hypervisor) +- base = __pa(base); +- __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); +- ++ if (!tb->huge) { ++ base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; ++ nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; ++ if (tlb_type == cheetah_plus || tlb_type == hypervisor) ++ base = __pa(base); ++ __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); ++ } + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { ++ if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) +@@ -94,20 +95,21 @@ void flush_tsb_user(struct tlb_batch *tb + spin_unlock_irqrestore(&mm->context.lock, flags); + } + +-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) ++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge) + { + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + +- base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; +- nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; +- if (tlb_type == cheetah_plus || tlb_type == hypervisor) +- base = __pa(base); +- __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); +- ++ if (!huge) { ++ base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; ++ nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; ++ if (tlb_type == cheetah_plus || tlb_type == hypervisor) ++ base = __pa(base); ++ __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); ++ } + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { ++ if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) diff --git a/queue-4.6/sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch b/queue-4.6/sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch new file mode 100644 index 00000000000..fa6ea3721fa --- /dev/null +++ b/queue-4.6/sparc64-take-ctx_alloc_lock-properly-in-hugetlb_setup.patch @@ -0,0 +1,54 @@ +From foo@baz Mon Jun 20 10:48:40 PDT 2016 +From: "David S. Miller" +Date: Wed, 25 May 2016 12:51:20 -0700 +Subject: sparc64: Take ctx_alloc_lock properly in hugetlb_setup(). + +From: "David S. Miller" + +[ Upstream commit 9ea46abe22550e3366ff7cee2f8391b35b12f730 ] + +On cheetahplus chips we take the ctx_alloc_lock in order to +modify the TLB lookup parameters for the indexed TLBs, which +are stored in the context register. + +This is called with interrupts disabled, however ctx_alloc_lock +is an IRQ safe lock, therefore we must take acquire/release it +properly with spin_{lock,unlock}_irq(). + +Reported-by: Meelis Roos +Tested-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -2824,9 +2824,10 @@ void hugetlb_setup(struct pt_regs *regs) + * the Data-TLB for huge pages. + */ + if (tlb_type == cheetah_plus) { ++ bool need_context_reload = false; + unsigned long ctx; + +- spin_lock(&ctx_alloc_lock); ++ spin_lock_irq(&ctx_alloc_lock); + ctx = mm->context.sparc64_ctx_val; + ctx &= ~CTX_PGSZ_MASK; + ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; +@@ -2845,9 +2846,12 @@ void hugetlb_setup(struct pt_regs *regs) + * also executing in this address space. + */ + mm->context.sparc64_ctx_val = ctx; +- on_each_cpu(context_reload, mm, 0); ++ need_context_reload = true; + } +- spin_unlock(&ctx_alloc_lock); ++ spin_unlock_irq(&ctx_alloc_lock); ++ ++ if (need_context_reload) ++ on_each_cpu(context_reload, mm, 0); + } + } + #endif