From: Greg Kroah-Hartman Date: Sat, 19 Nov 2016 08:53:22 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.34~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=69951f1f0d3350c5a0252e92f690aeb20d5a528b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: sparc-don-t-leak-context-bits-into-thread-fault_address.patch sparc-handle-negative-offsets-in-arch_jump_label_transform.patch sparc-serial-sunhv-fix-a-double-lock-bug.patch sparc64-convert-copy_in_user-to-accurate-exception-reporting.patch sparc64-convert-gencopy_-from-to-_user-to-accurate-exception-reporting.patch sparc64-convert-ng2copy_-from-to-_user-to-accurate-exception-reporting.patch sparc64-convert-ng4copy_-from-to-_user-to-accurate-exception-reporting.patch sparc64-convert-ngcopy_-from-to-_user-to-accurate-exception-reporting.patch sparc64-convert-u1copy_-from-to-_user-to-accurate-exception-reporting.patch sparc64-convert-u3copy_-from-to-_user-to-accurate-exception-reporting.patch sparc64-delete-__ret_efault.patch sparc64-delete-now-unused-user-copy-assembler-helpers.patch sparc64-delete-now-unused-user-copy-fixup-functions.patch sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-code.patch sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-cross-call-code.patch sparc64-fix-instruction-count-in-comment-for-__hypervisor_flush_tlb_pending.patch sparc64-handle-extremely-large-kernel-tlb-range-flushes-more-gracefully.patch sparc64-handle-extremely-large-kernel-tsb-range-flushes-sanely.patch sparc64-mm-fix-base-tsb-sizing-when-hugetlb-pages-are-used.patch sparc64-prepare-to-move-to-more-saner-user-copy-exception-handling.patch --- diff --git a/queue-4.4/series b/queue-4.4/series index 3b9e550195d..af9108da7e5 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -15,3 +15,23 @@ net-__skb_flow_dissect-must-cap-its-return-value.patch ipv4-use-new_gw-for-redirect-neigh-lookup.patch tcp-take-care-of-truncations-done-by-sk_filter.patch tty-prevent-ldisc-drivers-from-re-using-stale-tty-fields.patch +sparc-don-t-leak-context-bits-into-thread-fault_address.patch +sparc-serial-sunhv-fix-a-double-lock-bug.patch +sparc64-mm-fix-base-tsb-sizing-when-hugetlb-pages-are-used.patch +sparc-handle-negative-offsets-in-arch_jump_label_transform.patch +sparc64-handle-extremely-large-kernel-tsb-range-flushes-sanely.patch +sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-code.patch +sparc64-fix-instruction-count-in-comment-for-__hypervisor_flush_tlb_pending.patch +sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-cross-call-code.patch +sparc64-handle-extremely-large-kernel-tlb-range-flushes-more-gracefully.patch +sparc64-delete-__ret_efault.patch +sparc64-prepare-to-move-to-more-saner-user-copy-exception-handling.patch +sparc64-convert-copy_in_user-to-accurate-exception-reporting.patch +sparc64-convert-gencopy_-from-to-_user-to-accurate-exception-reporting.patch +sparc64-convert-u1copy_-from-to-_user-to-accurate-exception-reporting.patch +sparc64-convert-ng4copy_-from-to-_user-to-accurate-exception-reporting.patch +sparc64-convert-ngcopy_-from-to-_user-to-accurate-exception-reporting.patch +sparc64-convert-ng2copy_-from-to-_user-to-accurate-exception-reporting.patch +sparc64-convert-u3copy_-from-to-_user-to-accurate-exception-reporting.patch +sparc64-delete-now-unused-user-copy-assembler-helpers.patch +sparc64-delete-now-unused-user-copy-fixup-functions.patch diff --git a/queue-4.4/sparc-don-t-leak-context-bits-into-thread-fault_address.patch b/queue-4.4/sparc-don-t-leak-context-bits-into-thread-fault_address.patch new file mode 100644 index 00000000000..1ba3ec54bd3 --- /dev/null +++ b/queue-4.4/sparc-don-t-leak-context-bits-into-thread-fault_address.patch @@ -0,0 +1,116 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Wed, 27 Jul 2016 17:50:26 -0700 +Subject: sparc: Don't leak context bits into thread->fault_address + +From: "David S. Miller" + + +[ Upstream commit 4f6deb8cbab532a8d7250bc09234c1795ecb5e2c ] + +On pre-Niagara systems, we fetch the fault address on data TLB +exceptions from the TLB_TAG_ACCESS register. But this register also +contains the context ID assosciated with the fault in the low 13 bits +of the register value. + +This propagates into current_thread_info()->fault_address and can +cause trouble later on. + +So clear the low 13-bits out of the TLB_TAG_ACCESS value in the cases +where it matters. + +Reported-by: Mikulas Patocka +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/dtlb_prot.S | 4 ++-- + arch/sparc/kernel/ktlb.S | 12 ++++++++++++ + arch/sparc/kernel/tsb.S | 12 ++++++++++-- + 3 files changed, 24 insertions(+), 4 deletions(-) + +--- a/arch/sparc/kernel/dtlb_prot.S ++++ b/arch/sparc/kernel/dtlb_prot.S +@@ -25,13 +25,13 @@ + + /* PROT ** ICACHE line 2: More real fault processing */ + ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 ++ srlx %g5, PAGE_SHIFT, %g5 ++ sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits + bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault + nop + nop +- nop +- nop + + /* PROT ** ICACHE line 3: Unused... */ + nop +--- a/arch/sparc/kernel/ktlb.S ++++ b/arch/sparc/kernel/ktlb.S +@@ -20,6 +20,10 @@ kvmap_itlb: + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + ++ /* The kernel executes in context zero, therefore we do not ++ * need to clear the context ID bits out of %g4 here. ++ */ ++ + /* sun4v_itlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +@@ -128,6 +132,10 @@ kvmap_dtlb: + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + ++ /* The kernel executes in context zero, therefore we do not ++ * need to clear the context ID bits out of %g4 here. ++ */ ++ + /* sun4v_dtlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +@@ -251,6 +259,10 @@ kvmap_dtlb_longpath: + nop + .previous + ++ /* The kernel executes in context zero, therefore we do not ++ * need to clear the context ID bits out of %g5 here. ++ */ ++ + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline +--- a/arch/sparc/kernel/tsb.S ++++ b/arch/sparc/kernel/tsb.S +@@ -29,13 +29,17 @@ + */ + tsb_miss_dtlb: + mov TLB_TAG_ACCESS, %g4 ++ ldxa [%g4] ASI_DMMU, %g4 ++ srlx %g4, PAGE_SHIFT, %g4 + ba,pt %xcc, tsb_miss_page_table_walk +- ldxa [%g4] ASI_DMMU, %g4 ++ sllx %g4, PAGE_SHIFT, %g4 + + tsb_miss_itlb: + mov TLB_TAG_ACCESS, %g4 ++ ldxa [%g4] ASI_IMMU, %g4 ++ srlx %g4, PAGE_SHIFT, %g4 + ba,pt %xcc, tsb_miss_page_table_walk +- ldxa [%g4] ASI_IMMU, %g4 ++ sllx %g4, PAGE_SHIFT, %g4 + + /* At this point we have: + * %g1 -- PAGE_SIZE TSB entry address +@@ -284,6 +288,10 @@ tsb_do_dtlb_fault: + nop + .previous + ++ /* Clear context ID bits. */ ++ srlx %g5, PAGE_SHIFT, %g5 ++ sllx %g5, PAGE_SHIFT, %g5 ++ + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline diff --git a/queue-4.4/sparc-handle-negative-offsets-in-arch_jump_label_transform.patch b/queue-4.4/sparc-handle-negative-offsets-in-arch_jump_label_transform.patch new file mode 100644 index 00000000000..db85f5504c2 --- /dev/null +++ b/queue-4.4/sparc-handle-negative-offsets-in-arch_jump_label_transform.patch @@ -0,0 +1,59 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: James Clarke +Date: Mon, 24 Oct 2016 19:49:25 +0100 +Subject: sparc: Handle negative offsets in arch_jump_label_transform + +From: James Clarke + + +[ Upstream commit 9d9fa230206a3aea6ef451646c97122f04777983 ] + +Additionally, if the offset will overflow the immediate for a ba,pt +instruction, fall back on a standard ba to get an extra 3 bits. + +Signed-off-by: James Clarke +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/jump_label.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +--- a/arch/sparc/kernel/jump_label.c ++++ b/arch/sparc/kernel/jump_label.c +@@ -13,19 +13,30 @@ + void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) + { +- u32 val; + u32 *insn = (u32 *) (unsigned long) entry->code; ++ u32 val; + + if (type == JUMP_LABEL_JMP) { + s32 off = (s32)entry->target - (s32)entry->code; ++ bool use_v9_branch = false; ++ ++ BUG_ON(off & 3); + + #ifdef CONFIG_SPARC64 +- /* ba,pt %xcc, . + (off << 2) */ +- val = 0x10680000 | ((u32) off >> 2); +-#else +- /* ba . + (off << 2) */ +- val = 0x10800000 | ((u32) off >> 2); ++ if (off <= 0xfffff && off >= -0x100000) ++ use_v9_branch = true; + #endif ++ if (use_v9_branch) { ++ /* WDISP19 - target is . + immed << 2 */ ++ /* ba,pt %xcc, . + off */ ++ val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); ++ } else { ++ /* WDISP22 - target is . + immed << 2 */ ++ BUG_ON(off > 0x7fffff); ++ BUG_ON(off < -0x800000); ++ /* ba . + off */ ++ val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); ++ } + } else { + val = 0x01000000; + } diff --git a/queue-4.4/sparc-serial-sunhv-fix-a-double-lock-bug.patch b/queue-4.4/sparc-serial-sunhv-fix-a-double-lock-bug.patch new file mode 100644 index 00000000000..954edbabb1a --- /dev/null +++ b/queue-4.4/sparc-serial-sunhv-fix-a-double-lock-bug.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: Dan Carpenter +Date: Fri, 15 Jul 2016 14:17:33 +0300 +Subject: sparc: serial: sunhv: fix a double lock bug + +From: Dan Carpenter + + +[ Upstream commit 344e3c7734d5090b148c19ac6539b8947fed6767 ] + +We accidentally take the "port->lock" twice in a row. This old code +was supposed to be deleted. + +Fixes: e58e241c1788 ('sparc: serial: Clean up the locking for -rt') +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/sunhv.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/drivers/tty/serial/sunhv.c ++++ b/drivers/tty/serial/sunhv.c +@@ -490,12 +490,6 @@ static void sunhv_console_write_bychar(s + locked = spin_trylock_irqsave(&port->lock, flags); + else + spin_lock_irqsave(&port->lock, flags); +- if (port->sysrq) { +- locked = 0; +- } else if (oops_in_progress) { +- locked = spin_trylock(&port->lock); +- } else +- spin_lock(&port->lock); + + for (i = 0; i < n; i++) { + if (*s == '\n') diff --git a/queue-4.4/sparc64-convert-copy_in_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-copy_in_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..4c6c459152f --- /dev/null +++ b/queue-4.4/sparc64-convert-copy_in_user-to-accurate-exception-reporting.patch @@ -0,0 +1,101 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 15 Aug 2016 15:08:18 -0700 +Subject: sparc64: Convert copy_in_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit 0096ac9f47b1a2e851b3165d44065d18e5f13d58 ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/copy_in_user.S | 35 +++++++++++++++++++++++++---------- + 1 file changed, 25 insertions(+), 10 deletions(-) + +--- a/arch/sparc/lib/copy_in_user.S ++++ b/arch/sparc/lib/copy_in_user.S +@@ -8,18 +8,33 @@ + + #define XCC xcc + +-#define EX(x,y) \ ++#define EX(x,y,z) \ + 98: x,y; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, z; \ + .text; \ + .align 4; + ++#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) ++#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) ++#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) ++ + .register %g2,#scratch + .register %g3,#scratch + + .text ++__retl_o4_plus_8: ++ add %o4, %o2, %o4 ++ retl ++ add %o4, 8, %o0 ++__retl_o2_plus_4: ++ retl ++ add %o2, 4, %o0 ++__retl_o2_plus_1: ++ retl ++ add %o2, 1, %o0 ++ + .align 32 + + /* Don't try to get too fancy here, just nice and +@@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=s + andn %o2, 0x7, %o4 + and %o2, 0x7, %o2 + 1: subcc %o4, 0x8, %o4 +- EX(ldxa [%o1] %asi, %o5) +- EX(stxa %o5, [%o0] %asi) ++ EX_O4(ldxa [%o1] %asi, %o5) ++ EX_O4(stxa %o5, [%o0] %asi) + add %o1, 0x8, %o1 + bgu,pt %XCC, 1b + add %o0, 0x8, %o0 +@@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=s + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 +- EX(lduwa [%o1] %asi, %o5) +- EX(stwa %o5, [%o0] %asi) ++ EX_O2_4(lduwa [%o1] %asi, %o5) ++ EX_O2_4(stwa %o5, [%o0] %asi) + add %o1, 0x4, %o1 + add %o0, 0x4, %o0 + 1: cmp %o2, 0 +@@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=s + + 82: + subcc %o2, 4, %o2 +- EX(lduwa [%o1] %asi, %g1) +- EX(stwa %g1, [%o0] %asi) ++ EX_O2_4(lduwa [%o1] %asi, %g1) ++ EX_O2_4(stwa %g1, [%o0] %asi) + add %o1, 4, %o1 + bgu,pt %XCC, 82b + add %o0, 4, %o0 +@@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=s + .align 32 + 90: + subcc %o2, 1, %o2 +- EX(lduba [%o1] %asi, %g1) +- EX(stba %g1, [%o0] %asi) ++ EX_O2_1(lduba [%o1] %asi, %g1) ++ EX_O2_1(stba %g1, [%o0] %asi) + add %o1, 1, %o1 + bgu,pt %XCC, 90b + add %o0, 1, %o0 diff --git a/queue-4.4/sparc64-convert-gencopy_-from-to-_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-gencopy_-from-to-_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..29867a26166 --- /dev/null +++ b/queue-4.4/sparc64-convert-gencopy_-from-to-_user-to-accurate-exception-reporting.patch @@ -0,0 +1,154 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 15 Aug 2016 15:26:38 -0700 +Subject: sparc64: Convert GENcopy_{from,to}_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit d0796b555ba60c22eb41ae39a8362156cb08eee9 ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/GENcopy_from_user.S | 4 +-- + arch/sparc/lib/GENcopy_to_user.S | 4 +-- + arch/sparc/lib/GENmemcpy.S | 48 ++++++++++++++++++++++++++----------- + 3 files changed, 38 insertions(+), 18 deletions(-) + +--- a/arch/sparc/lib/GENcopy_from_user.S ++++ b/arch/sparc/lib/GENcopy_from_user.S +@@ -3,11 +3,11 @@ + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +-#define EX_LD(x) \ ++#define EX_LD(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, y; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/GENcopy_to_user.S ++++ b/arch/sparc/lib/GENcopy_to_user.S +@@ -3,11 +3,11 @@ + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +-#define EX_ST(x) \ ++#define EX_ST(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, y; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/GENmemcpy.S ++++ b/arch/sparc/lib/GENmemcpy.S +@@ -4,21 +4,18 @@ + */ + + #ifdef __KERNEL__ ++#include + #define GLOBAL_SPARE %g7 + #else + #define GLOBAL_SPARE %g5 + #endif + + #ifndef EX_LD +-#define EX_LD(x) x ++#define EX_LD(x,y) x + #endif + + #ifndef EX_ST +-#define EX_ST(x) x +-#endif +- +-#ifndef EX_RETVAL +-#define EX_RETVAL(x) x ++#define EX_ST(x,y) x + #endif + + #ifndef LOAD +@@ -45,6 +42,29 @@ + .register %g3,#scratch + + .text ++ ++#ifndef EX_RETVAL ++#define EX_RETVAL(x) x ++ENTRY(GEN_retl_o4_1) ++ add %o4, %o2, %o4 ++ retl ++ add %o4, 1, %o0 ++ENDPROC(GEN_retl_o4_1) ++ENTRY(GEN_retl_g1_8) ++ add %g1, %o2, %g1 ++ retl ++ add %g1, 8, %o0 ++ENDPROC(GEN_retl_g1_8) ++ENTRY(GEN_retl_o2_4) ++ retl ++ add %o2, 4, %o0 ++ENDPROC(GEN_retl_o2_4) ++ENTRY(GEN_retl_o2_1) ++ retl ++ add %o2, 1, %o0 ++ENDPROC(GEN_retl_o2_1) ++#endif ++ + .align 64 + + .globl FUNC_NAME +@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + sub %g0, %o4, %o4 + sub %o2, %o4, %o2 + 1: subcc %o4, 1, %o4 +- EX_LD(LOAD(ldub, %o1, %g1)) +- EX_ST(STORE(stb, %g1, %o0)) ++ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) ++ EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 +@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + andn %o2, 0x7, %g1 + sub %o2, %g1, %o2 + 1: subcc %g1, 0x8, %g1 +- EX_LD(LOAD(ldx, %o1, %g2)) +- EX_ST(STORE(stx, %g2, %o0)) ++ EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) ++ EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) + add %o1, 0x8, %o1 + bne,pt %XCC, 1b + add %o0, 0x8, %o0 +@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 1: + subcc %o2, 4, %o2 +- EX_LD(LOAD(lduw, %o1, %g1)) +- EX_ST(STORE(stw, %g1, %o1 + %o3)) ++ EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) ++ EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + .align 32 + 90: + subcc %o2, 1, %o2 +- EX_LD(LOAD(ldub, %o1, %g1)) +- EX_ST(STORE(stb, %g1, %o1 + %o3)) ++ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) ++ EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl diff --git a/queue-4.4/sparc64-convert-ng2copy_-from-to-_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-ng2copy_-from-to-_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..a773bbd7525 --- /dev/null +++ b/queue-4.4/sparc64-convert-ng2copy_-from-to-_user-to-accurate-exception-reporting.patch @@ -0,0 +1,477 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 24 Oct 2016 20:46:44 -0700 +Subject: sparc64: Convert NG2copy_{from,to}_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit e93704e4464fdc191f73fce35129c18de2ebf95d ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/NG2copy_from_user.S | 8 - + arch/sparc/lib/NG2copy_to_user.S | 8 - + arch/sparc/lib/NG2memcpy.S | 228 +++++++++++++++++++++++-------------- + 3 files changed, 153 insertions(+), 91 deletions(-) + +--- a/arch/sparc/lib/NG2copy_from_user.S ++++ b/arch/sparc/lib/NG2copy_from_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +-#define EX_LD(x) \ ++#define EX_LD(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_LD_FP(x) \ ++#define EX_LD_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi_fp;\ ++ .word 98b, y##_fp; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG2copy_to_user.S ++++ b/arch/sparc/lib/NG2copy_to_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +-#define EX_ST(x) \ ++#define EX_ST(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_ST_FP(x) \ ++#define EX_ST_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi_fp;\ ++ .word 98b, y##_fp; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG2memcpy.S ++++ b/arch/sparc/lib/NG2memcpy.S +@@ -4,6 +4,7 @@ + */ + + #ifdef __KERNEL__ ++#include + #include + #include + #define GLOBAL_SPARE %g7 +@@ -32,21 +33,17 @@ + #endif + + #ifndef EX_LD +-#define EX_LD(x) x ++#define EX_LD(x,y) x + #endif + #ifndef EX_LD_FP +-#define EX_LD_FP(x) x ++#define EX_LD_FP(x,y) x + #endif + + #ifndef EX_ST +-#define EX_ST(x) x ++#define EX_ST(x,y) x + #endif + #ifndef EX_ST_FP +-#define EX_ST_FP(x) x +-#endif +- +-#ifndef EX_RETVAL +-#define EX_RETVAL(x) x ++#define EX_ST_FP(x,y) x + #endif + + #ifndef LOAD +@@ -140,45 +137,110 @@ + fsrc2 %x6, %f12; \ + fsrc2 %x7, %f14; + #define FREG_LOAD_1(base, x0) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) + #define FREG_LOAD_2(base, x0, x1) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ +- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); + #define FREG_LOAD_3(base, x0, x1, x2) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ +- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ +- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); + #define FREG_LOAD_4(base, x0, x1, x2, x3) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ +- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ +- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ +- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); + #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ +- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ +- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ +- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ +- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); + #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ +- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ +- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ +- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ +- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ +- EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); + #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ +- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ +- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ +- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ +- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ +- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ +- EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ +- EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); ++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ ++ EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); + + .register %g2,#scratch + .register %g3,#scratch + + .text ++#ifndef EX_RETVAL ++#define EX_RETVAL(x) x ++__restore_fp: ++ VISExitHalf ++__restore_asi: ++ retl ++ wr %g0, ASI_AIUS, %asi ++ENTRY(NG2_retl_o2) ++ ba,pt %xcc, __restore_asi ++ mov %o2, %o0 ++ENDPROC(NG2_retl_o2) ++ENTRY(NG2_retl_o2_plus_1) ++ ba,pt %xcc, __restore_asi ++ add %o2, 1, %o0 ++ENDPROC(NG2_retl_o2_plus_1) ++ENTRY(NG2_retl_o2_plus_4) ++ ba,pt %xcc, __restore_asi ++ add %o2, 4, %o0 ++ENDPROC(NG2_retl_o2_plus_4) ++ENTRY(NG2_retl_o2_plus_8) ++ ba,pt %xcc, __restore_asi ++ add %o2, 8, %o0 ++ENDPROC(NG2_retl_o2_plus_8) ++ENTRY(NG2_retl_o2_plus_o4_plus_1) ++ add %o4, 1, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG2_retl_o2_plus_o4_plus_1) ++ENTRY(NG2_retl_o2_plus_o4_plus_8) ++ add %o4, 8, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG2_retl_o2_plus_o4_plus_8) ++ENTRY(NG2_retl_o2_plus_o4_plus_16) ++ add %o4, 16, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG2_retl_o2_plus_o4_plus_16) ++ENTRY(NG2_retl_o2_plus_g1_fp) ++ ba,pt %xcc, __restore_fp ++ add %o2, %g1, %o0 ++ENDPROC(NG2_retl_o2_plus_g1_fp) ++ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) ++ add %g1, 64, %g1 ++ ba,pt %xcc, __restore_fp ++ add %o2, %g1, %o0 ++ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) ++ENTRY(NG2_retl_o2_plus_g1_plus_1) ++ add %g1, 1, %g1 ++ ba,pt %xcc, __restore_asi ++ add %o2, %g1, %o0 ++ENDPROC(NG2_retl_o2_plus_g1_plus_1) ++ENTRY(NG2_retl_o2_and_7_plus_o4) ++ and %o2, 7, %o2 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG2_retl_o2_and_7_plus_o4) ++ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) ++ and %o2, 7, %o2 ++ add %o4, 8, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) ++#endif ++ + .align 64 + + .globl FUNC_NAME +@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 + 1: subcc %o4, 1, %o4 +- EX_LD(LOAD(ldub, %o1, %g1)) +- EX_ST(STORE(stb, %g1, %o0)) ++ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) ++ EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 +@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + nop + /* fall through for 0 < low bits < 8 */ + 110: sub %o4, 64, %g2 +- EX_LD_FP(LOAD_BLK(%g2, %f0)) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++ EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 120: sub %o4, 56, %g2 + FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 130: sub %o4, 48, %g2 + FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_6(f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 140: sub %o4, 40, %g2 + FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_5(f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 150: sub %o4, 32, %g2 + FREG_LOAD_4(%g2, f0, f2, f4, f6) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_4(f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 160: sub %o4, 24, %g2 + FREG_LOAD_3(%g2, f0, f2, f4) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_3(f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 170: sub %o4, 16, %g2 + FREG_LOAD_2(%g2, f0, f2) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_2(f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 180: sub %o4, 8, %g2 + FREG_LOAD_1(%g2, f0) +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +- EX_LD_FP(LOAD_BLK(%o4, %f16)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) ++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) + FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) + FREG_MOVE_1(f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 +@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + nop + + 190: +-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) ++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + subcc %g1, 64, %g1 +- EX_LD_FP(LOAD_BLK(%o4, %f0)) +- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) ++ EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) ++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) +@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 + 1: subcc %o4, 0x10, %o4 +- EX_LD(LOAD(ldx, %o1, %o5)) ++ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) + add %o1, 0x08, %o1 +- EX_LD(LOAD(ldx, %o1, %g1)) ++ EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) + sub %o1, 0x08, %o1 +- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) ++ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) + add %o1, 0x8, %o1 +- EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) ++ EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + 73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 +- EX_LD(LOAD(ldx, %o1, %o5)) +- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) ++ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) ++ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) + add %o1, 0x8, %o1 + 1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 +- EX_LD(LOAD(lduw, %o1, %o5)) +- EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) ++ EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) ++ EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) + add %o1, 0x4, %o1 + 1: cmp %o2, 0 + be,pt %XCC, 85f +@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + sub %o2, %g1, %o2 + + 1: subcc %g1, 1, %g1 +- EX_LD(LOAD(ldub, %o1, %o5)) +- EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) ++ EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) ++ EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 8: mov 64, GLOBAL_SPARE + andn %o1, 0x7, %o1 +- EX_LD(LOAD(ldx, %o1, %g2)) ++ EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) + sub GLOBAL_SPARE, %g1, GLOBAL_SPARE + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 + 1: add %o1, 0x8, %o1 +- EX_LD(LOAD(ldx, %o1, %g3)) ++ EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) + subcc %o4, 0x8, %o4 + srlx %g3, GLOBAL_SPARE, %o5 + or %o5, %g2, %o5 +- EX_ST(STORE(stx, %o5, %o0)) ++ EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 +@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 1: + subcc %o2, 4, %o2 +- EX_LD(LOAD(lduw, %o1, %g1)) +- EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) ++ EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) ++ EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + .align 32 + 90: + subcc %o2, 1, %o2 +- EX_LD(LOAD(ldub, %o1, %g1)) +- EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) ++ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) ++ EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl diff --git a/queue-4.4/sparc64-convert-ng4copy_-from-to-_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-ng4copy_-from-to-_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..b7958d31828 --- /dev/null +++ b/queue-4.4/sparc64-convert-ng4copy_-from-to-_user-to-accurate-exception-reporting.patch @@ -0,0 +1,534 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 24 Oct 2016 18:58:05 -0700 +Subject: sparc64: Convert NG4copy_{from,to}_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit 95707704800988093a9b9a27e0f2f67f5b4bf2fa ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/NG4copy_from_user.S | 8 - + arch/sparc/lib/NG4copy_to_user.S | 8 - + arch/sparc/lib/NG4memcpy.S | 294 ++++++++++++++++++++++++++++--------- + 3 files changed, 231 insertions(+), 79 deletions(-) + +--- a/arch/sparc/lib/NG4copy_from_user.S ++++ b/arch/sparc/lib/NG4copy_from_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +-#define EX_LD(x) \ ++#define EX_LD(x, y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_LD_FP(x) \ ++#define EX_LD_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi_fp;\ ++ .word 98b, y##_fp; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG4copy_to_user.S ++++ b/arch/sparc/lib/NG4copy_to_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +-#define EX_ST(x) \ ++#define EX_ST(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_ST_FP(x) \ ++#define EX_ST_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_asi_fp;\ ++ .word 98b, y##_fp; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG4memcpy.S ++++ b/arch/sparc/lib/NG4memcpy.S +@@ -4,6 +4,7 @@ + */ + + #ifdef __KERNEL__ ++#include + #include + #include + #define GLOBAL_SPARE %g7 +@@ -46,22 +47,19 @@ + #endif + + #ifndef EX_LD +-#define EX_LD(x) x ++#define EX_LD(x,y) x + #endif + #ifndef EX_LD_FP +-#define EX_LD_FP(x) x ++#define EX_LD_FP(x,y) x + #endif + + #ifndef EX_ST +-#define EX_ST(x) x ++#define EX_ST(x,y) x + #endif + #ifndef EX_ST_FP +-#define EX_ST_FP(x) x ++#define EX_ST_FP(x,y) x + #endif + +-#ifndef EX_RETVAL +-#define EX_RETVAL(x) x +-#endif + + #ifndef LOAD + #define LOAD(type,addr,dest) type [addr], dest +@@ -94,6 +92,158 @@ + .register %g3,#scratch + + .text ++#ifndef EX_RETVAL ++#define EX_RETVAL(x) x ++__restore_asi_fp: ++ VISExitHalf ++__restore_asi: ++ retl ++ wr %g0, ASI_AIUS, %asi ++ ++ENTRY(NG4_retl_o2) ++ ba,pt %xcc, __restore_asi ++ mov %o2, %o0 ++ENDPROC(NG4_retl_o2) ++ENTRY(NG4_retl_o2_plus_1) ++ ba,pt %xcc, __restore_asi ++ add %o2, 1, %o0 ++ENDPROC(NG4_retl_o2_plus_1) ++ENTRY(NG4_retl_o2_plus_4) ++ ba,pt %xcc, __restore_asi ++ add %o2, 4, %o0 ++ENDPROC(NG4_retl_o2_plus_4) ++ENTRY(NG4_retl_o2_plus_o5) ++ ba,pt %xcc, __restore_asi ++ add %o2, %o5, %o0 ++ENDPROC(NG4_retl_o2_plus_o5) ++ENTRY(NG4_retl_o2_plus_o5_plus_4) ++ add %o5, 4, %o5 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o5, %o0 ++ENDPROC(NG4_retl_o2_plus_o5_plus_4) ++ENTRY(NG4_retl_o2_plus_o5_plus_8) ++ add %o5, 8, %o5 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o5, %o0 ++ENDPROC(NG4_retl_o2_plus_o5_plus_8) ++ENTRY(NG4_retl_o2_plus_o5_plus_16) ++ add %o5, 16, %o5 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o5, %o0 ++ENDPROC(NG4_retl_o2_plus_o5_plus_16) ++ENTRY(NG4_retl_o2_plus_o5_plus_24) ++ add %o5, 24, %o5 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o5, %o0 ++ENDPROC(NG4_retl_o2_plus_o5_plus_24) ++ENTRY(NG4_retl_o2_plus_o5_plus_32) ++ add %o5, 32, %o5 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o5, %o0 ++ENDPROC(NG4_retl_o2_plus_o5_plus_32) ++ENTRY(NG4_retl_o2_plus_g1) ++ ba,pt %xcc, __restore_asi ++ add %o2, %g1, %o0 ++ENDPROC(NG4_retl_o2_plus_g1) ++ENTRY(NG4_retl_o2_plus_g1_plus_1) ++ add %g1, 1, %g1 ++ ba,pt %xcc, __restore_asi ++ add %o2, %g1, %o0 ++ENDPROC(NG4_retl_o2_plus_g1_plus_1) ++ENTRY(NG4_retl_o2_plus_g1_plus_8) ++ add %g1, 8, %g1 ++ ba,pt %xcc, __restore_asi ++ add %o2, %g1, %o0 ++ENDPROC(NG4_retl_o2_plus_g1_plus_8) ++ENTRY(NG4_retl_o2_plus_o4) ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4) ++ENTRY(NG4_retl_o2_plus_o4_plus_8) ++ add %o4, 8, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_8) ++ENTRY(NG4_retl_o2_plus_o4_plus_16) ++ add %o4, 16, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_16) ++ENTRY(NG4_retl_o2_plus_o4_plus_24) ++ add %o4, 24, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_24) ++ENTRY(NG4_retl_o2_plus_o4_plus_32) ++ add %o4, 32, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_32) ++ENTRY(NG4_retl_o2_plus_o4_plus_40) ++ add %o4, 40, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_40) ++ENTRY(NG4_retl_o2_plus_o4_plus_48) ++ add %o4, 48, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_48) ++ENTRY(NG4_retl_o2_plus_o4_plus_56) ++ add %o4, 56, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_56) ++ENTRY(NG4_retl_o2_plus_o4_plus_64) ++ add %o4, 64, %o4 ++ ba,pt %xcc, __restore_asi ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_64) ++ENTRY(NG4_retl_o2_plus_o4_fp) ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) ++ add %o4, 8, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) ++ add %o4, 16, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) ++ add %o4, 24, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) ++ add %o4, 32, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) ++ add %o4, 40, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) ++ add %o4, 48, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) ++ add %o4, 56, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) ++ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) ++ add %o4, 64, %o4 ++ ba,pt %xcc, __restore_asi_fp ++ add %o2, %o4, %o0 ++ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) ++#endif + .align 64 + + .globl FUNC_NAME +@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + brz,pt %g1, 51f + sub %o2, %g1, %o2 + +-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) ++ ++1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b +- EX_ST(STORE(stb, %g2, %o0 - 0x01)) ++ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) + + 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) + LOAD(prefetch, %o1 + 0x080, #n_reads_strong) +@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + brz,pt %g1, .Llarge_aligned + sub %o2, %g1, %o2 + +-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) ++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) + add %o1, 8, %o1 + subcc %g1, 8, %g1 + add %o0, 8, %o0 + bne,pt %icc, 1b +- EX_ST(STORE(stx, %g2, %o0 - 0x08)) ++ EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) + + .Llarge_aligned: + /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + +-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) ++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) + add %o1, 0x40, %o1 +- EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) ++ EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) + subcc %o4, 0x40, %o4 +- EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) +- EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) +- EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) +- EX_ST(STORE_INIT(%g1, %o0)) ++ EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) ++ EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) + add %o0, 0x08, %o0 +- EX_ST(STORE_INIT(%g2, %o0)) ++ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) + add %o0, 0x08, %o0 +- EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) +- EX_ST(STORE_INIT(%g3, %o0)) ++ EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) ++ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) + add %o0, 0x08, %o0 +- EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) +- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) ++ EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) ++ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) + add %o0, 0x08, %o0 +- EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) +- EX_ST(STORE_INIT(%o5, %o0)) ++ EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) ++ EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) + add %o0, 0x08, %o0 +- EX_ST(STORE_INIT(%g2, %o0)) ++ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) + add %o0, 0x08, %o0 +- EX_ST(STORE_INIT(%g3, %o0)) ++ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) + add %o0, 0x08, %o0 +- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) ++ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) +@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + sub %o2, %o4, %o2 + alignaddr %o1, %g0, %g1 + add %o1, %o4, %o1 +- EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) +-1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) ++1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) + subcc %o4, 0x40, %o4 +- EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) +- EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) +- EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) +- EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) +- EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) +- EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) + faligndata %f0, %f2, %f16 +- EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) ++ EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) + faligndata %f2, %f4, %f18 + add %g1, 0x40, %g1 + faligndata %f4, %f6, %f20 +@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 +- EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) +- EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) +- EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) +- EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) +- EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) +- EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) +- EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) +- EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) ++ EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) ++ EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) ++ EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) ++ EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) ++ EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) ++ EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) ++ EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) ++ EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) + add %o0, 0x40, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + andncc %o2, 0x20 - 1, %o5 + be,pn %icc, 2f + sub %o2, %o5, %o2 +-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +- EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) +- EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) +- EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) ++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) ++ EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) ++ EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) ++ EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) + add %o1, 0x20, %o1 + subcc %o5, 0x20, %o5 +- EX_ST(STORE(stx, %g1, %o0 + 0x00)) +- EX_ST(STORE(stx, %g2, %o0 + 0x08)) +- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) +- EX_ST(STORE(stx, %o4, %o0 + 0x18)) ++ EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) ++ EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) ++ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) ++ EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) + bne,pt %icc, 1b + add %o0, 0x20, %o0 + 2: andcc %o2, 0x18, %o5 + be,pt %icc, 3f + sub %o2, %o5, %o2 +-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) ++ ++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + add %o1, 0x08, %o1 + add %o0, 0x08, %o0 + subcc %o5, 0x08, %o5 + bne,pt %icc, 1b +- EX_ST(STORE(stx, %g1, %o0 - 0x08)) ++ EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) + 3: brz,pt %o2, .Lexit + cmp %o2, 0x04 + bl,pn %icc, .Ltiny + nop +- EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) ++ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) + add %o1, 0x04, %o1 + add %o0, 0x04, %o0 + subcc %o2, 0x04, %o2 + bne,pn %icc, .Ltiny +- EX_ST(STORE(stw, %g1, %o0 - 0x04)) ++ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) + ba,a,pt %icc, .Lexit + .Lmedium_unaligned: + /* First get dest 8 byte aligned. */ +@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + brz,pt %g1, 2f + sub %o2, %g1, %o2 + +-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) ++1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b +- EX_ST(STORE(stb, %g2, %o0 - 0x01)) ++ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) + 2: + and %o1, 0x7, %g1 + brz,pn %g1, .Lmedium_noprefetch +@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + mov 64, %g2 + sub %g2, %g1, %g2 + andn %o1, 0x7, %o1 +- EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) ++ EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) + sllx %o4, %g1, %o4 + andn %o2, 0x08 - 1, %o5 + sub %o2, %o5, %o2 +-1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) ++1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) + add %o1, 0x08, %o1 + subcc %o5, 0x08, %o5 + srlx %g3, %g2, GLOBAL_SPARE + or GLOBAL_SPARE, %o4, GLOBAL_SPARE +- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) ++ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + sllx %g3, %g1, %o4 +@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + ba,pt %icc, .Lsmall_unaligned + + .Ltiny: +- EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) ++ EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit +- EX_ST(STORE(stb, %g1, %o0 + 0x00)) +- EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) ++ EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) ++ EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit +- EX_ST(STORE(stb, %g1, %o0 + 0x01)) +- EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) ++ EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) ++ EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) + ba,pt %icc, .Lexit +- EX_ST(STORE(stb, %g1, %o0 + 0x02)) ++ EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) + + .Lsmall: + andcc %g2, 0x3, %g0 +@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + andn %o2, 0x4 - 1, %o5 + sub %o2, %o5, %o2 + 1: +- EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) ++ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + add %o1, 0x04, %o1 + subcc %o5, 0x04, %o5 + add %o0, 0x04, %o0 + bne,pt %icc, 1b +- EX_ST(STORE(stw, %g1, %o0 - 0x04)) ++ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) + brz,pt %o2, .Lexit + nop + ba,a,pt %icc, .Ltiny + + .Lsmall_unaligned: +-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) ++1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) + add %o1, 1, %o1 + add %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %icc, 1b +- EX_ST(STORE(stb, %g1, %o0 - 0x01)) ++ EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) + ba,a,pt %icc, .Lexit + .size FUNC_NAME, .-FUNC_NAME diff --git a/queue-4.4/sparc64-convert-ngcopy_-from-to-_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-ngcopy_-from-to-_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..6df42d6ad18 --- /dev/null +++ b/queue-4.4/sparc64-convert-ngcopy_-from-to-_user-to-accurate-exception-reporting.patch @@ -0,0 +1,444 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 24 Oct 2016 19:32:12 -0700 +Subject: sparc64: Convert NGcopy_{from,to}_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit 7ae3aaf53f1695877ccd5ebbc49ea65991e41f1e ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/NGcopy_from_user.S | 4 + arch/sparc/lib/NGcopy_to_user.S | 4 + arch/sparc/lib/NGmemcpy.S | 233 +++++++++++++++++++++++++------------- + 3 files changed, 162 insertions(+), 79 deletions(-) + +--- a/arch/sparc/lib/NGcopy_from_user.S ++++ b/arch/sparc/lib/NGcopy_from_user.S +@@ -3,11 +3,11 @@ + * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) + */ + +-#define EX_LD(x) \ ++#define EX_LD(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __ret_mone_asi;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NGcopy_to_user.S ++++ b/arch/sparc/lib/NGcopy_to_user.S +@@ -3,11 +3,11 @@ + * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) + */ + +-#define EX_ST(x) \ ++#define EX_ST(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __ret_mone_asi;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NGmemcpy.S ++++ b/arch/sparc/lib/NGmemcpy.S +@@ -4,6 +4,7 @@ + */ + + #ifdef __KERNEL__ ++#include + #include + #include + #define GLOBAL_SPARE %g7 +@@ -27,15 +28,11 @@ + #endif + + #ifndef EX_LD +-#define EX_LD(x) x ++#define EX_LD(x,y) x + #endif + + #ifndef EX_ST +-#define EX_ST(x) x +-#endif +- +-#ifndef EX_RETVAL +-#define EX_RETVAL(x) x ++#define EX_ST(x,y) x + #endif + + #ifndef LOAD +@@ -79,6 +76,92 @@ + .register %g3,#scratch + + .text ++#ifndef EX_RETVAL ++#define EX_RETVAL(x) x ++__restore_asi: ++ ret ++ wr %g0, ASI_AIUS, %asi ++ restore ++ENTRY(NG_ret_i2_plus_i4_plus_1) ++ ba,pt %xcc, __restore_asi ++ add %i2, %i5, %i0 ++ENDPROC(NG_ret_i2_plus_i4_plus_1) ++ENTRY(NG_ret_i2_plus_g1) ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1) ++ENTRY(NG_ret_i2_plus_g1_minus_8) ++ sub %g1, 8, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_8) ++ENTRY(NG_ret_i2_plus_g1_minus_16) ++ sub %g1, 16, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_16) ++ENTRY(NG_ret_i2_plus_g1_minus_24) ++ sub %g1, 24, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_24) ++ENTRY(NG_ret_i2_plus_g1_minus_32) ++ sub %g1, 32, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_32) ++ENTRY(NG_ret_i2_plus_g1_minus_40) ++ sub %g1, 40, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_40) ++ENTRY(NG_ret_i2_plus_g1_minus_48) ++ sub %g1, 48, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_48) ++ENTRY(NG_ret_i2_plus_g1_minus_56) ++ sub %g1, 56, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_minus_56) ++ENTRY(NG_ret_i2_plus_i4) ++ ba,pt %xcc, __restore_asi ++ add %i2, %i4, %i0 ++ENDPROC(NG_ret_i2_plus_i4) ++ENTRY(NG_ret_i2_plus_i4_minus_8) ++ sub %i4, 8, %i4 ++ ba,pt %xcc, __restore_asi ++ add %i2, %i4, %i0 ++ENDPROC(NG_ret_i2_plus_i4_minus_8) ++ENTRY(NG_ret_i2_plus_8) ++ ba,pt %xcc, __restore_asi ++ add %i2, 8, %i0 ++ENDPROC(NG_ret_i2_plus_8) ++ENTRY(NG_ret_i2_plus_4) ++ ba,pt %xcc, __restore_asi ++ add %i2, 4, %i0 ++ENDPROC(NG_ret_i2_plus_4) ++ENTRY(NG_ret_i2_plus_1) ++ ba,pt %xcc, __restore_asi ++ add %i2, 1, %i0 ++ENDPROC(NG_ret_i2_plus_1) ++ENTRY(NG_ret_i2_plus_g1_plus_1) ++ add %g1, 1, %g1 ++ ba,pt %xcc, __restore_asi ++ add %i2, %g1, %i0 ++ENDPROC(NG_ret_i2_plus_g1_plus_1) ++ENTRY(NG_ret_i2) ++ ba,pt %xcc, __restore_asi ++ mov %i2, %i0 ++ENDPROC(NG_ret_i2) ++ENTRY(NG_ret_i2_and_7_plus_i4) ++ and %i2, 7, %i2 ++ ba,pt %xcc, __restore_asi ++ add %i2, %i4, %i0 ++ENDPROC(NG_ret_i2_and_7_plus_i4) ++#endif ++ + .align 64 + + .globl FUNC_NAME +@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + sub %g0, %i4, %i4 ! bytes to align dst + sub %i2, %i4, %i2 + 1: subcc %i4, 1, %i4 +- EX_LD(LOAD(ldub, %i1, %g1)) +- EX_ST(STORE(stb, %g1, %o0)) ++ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) ++ EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) + add %i1, 1, %i1 + bne,pt %XCC, 1b + add %o0, 1, %o0 +@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + and %i4, 0x7, GLOBAL_SPARE + sll GLOBAL_SPARE, 3, GLOBAL_SPARE + mov 64, %i5 +- EX_LD(LOAD_TWIN(%i1, %g2, %g3)) ++ EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) + sub %i5, GLOBAL_SPARE, %i5 + mov 16, %o4 + mov 32, %o5 +@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + srlx WORD3, PRE_SHIFT, TMP; \ + or WORD2, TMP, WORD2; + +-8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) ++8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) + MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) + LOAD(prefetch, %i1 + %i3, #one_read) + +- EX_ST(STORE_INIT(%g2, %o0 + 0x00)) +- EX_ST(STORE_INIT(%g3, %o0 + 0x08)) ++ EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) ++ EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + +- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) ++ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) + MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) + +- EX_ST(STORE_INIT(%o2, %o0 + 0x10)) +- EX_ST(STORE_INIT(%o3, %o0 + 0x18)) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + +- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) ++ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) + +- EX_ST(STORE_INIT(%g2, %o0 + 0x20)) +- EX_ST(STORE_INIT(%g3, %o0 + 0x28)) ++ EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) ++ EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + +- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) ++ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) + add %i1, 64, %i1 + MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) + +- EX_ST(STORE_INIT(%o2, %o0 + 0x30)) +- EX_ST(STORE_INIT(%o3, %o0 + 0x38)) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) + + subcc %g1, 64, %g1 + bne,pt %XCC, 8b +@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + ba,pt %XCC, 60f + add %i1, %i4, %i1 + +-9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) ++9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) + MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) + LOAD(prefetch, %i1 + %i3, #one_read) + +- EX_ST(STORE_INIT(%g3, %o0 + 0x00)) +- EX_ST(STORE_INIT(%o2, %o0 + 0x08)) ++ EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + +- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) ++ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) + MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) + +- EX_ST(STORE_INIT(%o3, %o0 + 0x10)) +- EX_ST(STORE_INIT(%g2, %o0 + 0x18)) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) ++ EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + +- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) ++ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) + +- EX_ST(STORE_INIT(%g3, %o0 + 0x20)) +- EX_ST(STORE_INIT(%o2, %o0 + 0x28)) ++ EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + +- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) ++ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) + add %i1, 64, %i1 + MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) + +- EX_ST(STORE_INIT(%o3, %o0 + 0x30)) +- EX_ST(STORE_INIT(%g2, %o0 + 0x38)) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) ++ EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) + + subcc %g1, 64, %g1 + bne,pt %XCC, 9b +@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + * one twin load ahead, then add 8 back into source when + * we finish the loop. + */ +- EX_LD(LOAD_TWIN(%i1, %o4, %o5)) ++ EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) + mov 16, %o7 + mov 32, %g2 + mov 48, %g3 + mov 64, %o1 +-1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) ++1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) + LOAD(prefetch, %i1 + %o1, #one_read) +- EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line +- EX_ST(STORE_INIT(%o2, %o0 + 0x08)) +- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) +- EX_ST(STORE_INIT(%o3, %o0 + 0x10)) +- EX_ST(STORE_INIT(%o4, %o0 + 0x18)) +- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) +- EX_ST(STORE_INIT(%o5, %o0 + 0x20)) +- EX_ST(STORE_INIT(%o2, %o0 + 0x28)) +- EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) ++ EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line ++ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) ++ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) ++ EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) ++ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) ++ EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) ++ EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) + add %i1, 64, %i1 +- EX_ST(STORE_INIT(%o3, %o0 + 0x30)) +- EX_ST(STORE_INIT(%o4, %o0 + 0x38)) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) ++ EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 +@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + mov 32, %g2 + mov 48, %g3 + mov 64, %o1 +-1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) +- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) ++1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) ++ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) + LOAD(prefetch, %i1 + %o1, #one_read) +- EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line +- EX_ST(STORE_INIT(%o5, %o0 + 0x08)) +- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) +- EX_ST(STORE_INIT(%o2, %o0 + 0x10)) +- EX_ST(STORE_INIT(%o3, %o0 + 0x18)) +- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) ++ EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line ++ EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) ++ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) ++ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + add %i1, 64, %i1 +- EX_ST(STORE_INIT(%o4, %o0 + 0x20)) +- EX_ST(STORE_INIT(%o5, %o0 + 0x28)) +- EX_ST(STORE_INIT(%o2, %o0 + 0x30)) +- EX_ST(STORE_INIT(%o3, %o0 + 0x38)) ++ EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) ++ EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) ++ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) ++ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 +@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + andn %i2, 0xf, %i4 + and %i2, 0xf, %i2 + 1: subcc %i4, 0x10, %i4 +- EX_LD(LOAD(ldx, %i1, %o4)) ++ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) + add %i1, 0x08, %i1 +- EX_LD(LOAD(ldx, %i1, %g1)) ++ EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) + sub %i1, 0x08, %i1 +- EX_ST(STORE(stx, %o4, %i1 + %i3)) ++ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) + add %i1, 0x8, %i1 +- EX_ST(STORE(stx, %g1, %i1 + %i3)) ++ EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) + bgu,pt %XCC, 1b + add %i1, 0x8, %i1 + 73: andcc %i2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %i2, 0x8, %i2 +- EX_LD(LOAD(ldx, %i1, %o4)) +- EX_ST(STORE(stx, %o4, %i1 + %i3)) ++ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) ++ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) + add %i1, 0x8, %i1 + 1: andcc %i2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %i2, 0x4, %i2 +- EX_LD(LOAD(lduw, %i1, %i5)) +- EX_ST(STORE(stw, %i5, %i1 + %i3)) ++ EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) ++ EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) + add %i1, 0x4, %i1 + 1: cmp %i2, 0 + be,pt %XCC, 85f +@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + sub %i2, %g1, %i2 + + 1: subcc %g1, 1, %g1 +- EX_LD(LOAD(ldub, %i1, %i5)) +- EX_ST(STORE(stb, %i5, %i1 + %i3)) ++ EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) ++ EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) + bgu,pt %icc, 1b + add %i1, 1, %i1 + +@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + + 8: mov 64, %i3 + andn %i1, 0x7, %i1 +- EX_LD(LOAD(ldx, %i1, %g2)) ++ EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) + sub %i3, %g1, %i3 + andn %i2, 0x7, %i4 + sllx %g2, %g1, %g2 + 1: add %i1, 0x8, %i1 +- EX_LD(LOAD(ldx, %i1, %g3)) ++ EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) + subcc %i4, 0x8, %i4 + srlx %g3, %i3, %i5 + or %i5, %g2, %i5 +- EX_ST(STORE(stx, %i5, %o0)) ++ EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 +@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + + 1: + subcc %i2, 4, %i2 +- EX_LD(LOAD(lduw, %i1, %g1)) +- EX_ST(STORE(stw, %g1, %i1 + %i3)) ++ EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) ++ EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) + bgu,pt %XCC, 1b + add %i1, 4, %i1 + +@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len + .align 32 + 90: + subcc %i2, 1, %i2 +- EX_LD(LOAD(ldub, %i1, %g1)) +- EX_ST(STORE(stb, %g1, %i1 + %i3)) ++ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) ++ EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) + bgu,pt %XCC, 90b + add %i1, 1, %i1 + ret diff --git a/queue-4.4/sparc64-convert-u1copy_-from-to-_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-u1copy_-from-to-_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..fd0308e424d --- /dev/null +++ b/queue-4.4/sparc64-convert-u1copy_-from-to-_user-to-accurate-exception-reporting.patch @@ -0,0 +1,655 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 15 Aug 2016 16:07:50 -0700 +Subject: sparc64: Convert U1copy_{from,to}_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit cb736fdbb208eb3420f1a2eb2bfc024a6e9dcada ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/U1copy_from_user.S | 8 + arch/sparc/lib/U1copy_to_user.S | 8 + arch/sparc/lib/U1memcpy.S | 343 +++++++++++++++++++++++++------------- + 3 files changed, 236 insertions(+), 123 deletions(-) + +--- a/arch/sparc/lib/U1copy_from_user.S ++++ b/arch/sparc/lib/U1copy_from_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +-#define EX_LD(x) \ ++#define EX_LD(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_LD_FP(x) \ ++#define EX_LD_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_fp;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U1copy_to_user.S ++++ b/arch/sparc/lib/U1copy_to_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +-#define EX_ST(x) \ ++#define EX_ST(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_ST_FP(x) \ ++#define EX_ST_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_fp;\ ++ .word 98b, y; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U1memcpy.S ++++ b/arch/sparc/lib/U1memcpy.S +@@ -5,6 +5,7 @@ + */ + + #ifdef __KERNEL__ ++#include + #include + #include + #define GLOBAL_SPARE g7 +@@ -23,21 +24,17 @@ + #endif + + #ifndef EX_LD +-#define EX_LD(x) x ++#define EX_LD(x,y) x + #endif + #ifndef EX_LD_FP +-#define EX_LD_FP(x) x ++#define EX_LD_FP(x,y) x + #endif + + #ifndef EX_ST +-#define EX_ST(x) x ++#define EX_ST(x,y) x + #endif + #ifndef EX_ST_FP +-#define EX_ST_FP(x) x +-#endif +- +-#ifndef EX_RETVAL +-#define EX_RETVAL(x) x ++#define EX_ST_FP(x,y) x + #endif + + #ifndef LOAD +@@ -78,53 +75,169 @@ + faligndata %f7, %f8, %f60; \ + faligndata %f8, %f9, %f62; + +-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ +- EX_LD_FP(LOAD_BLK(%src, %fdest)); \ +- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ +- add %src, 0x40, %src; \ +- subcc %len, 0x40, %len; \ +- be,pn %xcc, jmptgt; \ +- add %dest, 0x40, %dest; \ +- +-#define LOOP_CHUNK1(src, dest, len, branch_dest) \ +- MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) +-#define LOOP_CHUNK2(src, dest, len, branch_dest) \ +- MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) +-#define LOOP_CHUNK3(src, dest, len, branch_dest) \ +- MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) ++#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ ++ EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ ++ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ ++ add %src, 0x40, %src; \ ++ subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ ++ be,pn %xcc, jmptgt; \ ++ add %dest, 0x40, %dest; \ ++ ++#define LOOP_CHUNK1(src, dest, branch_dest) \ ++ MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) ++#define LOOP_CHUNK2(src, dest, branch_dest) \ ++ MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) ++#define LOOP_CHUNK3(src, dest, branch_dest) \ ++ MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) + + #define DO_SYNC membar #Sync; + #define STORE_SYNC(dest, fsrc) \ +- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ ++ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %dest, 0x40, %dest; \ + DO_SYNC + + #define STORE_JUMP(dest, fsrc, target) \ +- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ ++ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ + add %dest, 0x40, %dest; \ + ba,pt %xcc, target; \ + nop; + +-#define FINISH_VISCHUNK(dest, f0, f1, left) \ +- subcc %left, 8, %left;\ +- bl,pn %xcc, 95f; \ +- faligndata %f0, %f1, %f48; \ +- EX_ST_FP(STORE(std, %f48, %dest)); \ ++#define FINISH_VISCHUNK(dest, f0, f1) \ ++ subcc %g3, 8, %g3; \ ++ bl,pn %xcc, 95f; \ ++ faligndata %f0, %f1, %f48; \ ++ EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ + add %dest, 8, %dest; + +-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +- subcc %left, 8, %left; \ +- bl,pn %xcc, 95f; \ ++#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ++ subcc %g3, 8, %g3; \ ++ bl,pn %xcc, 95f; \ + fsrc2 %f0, %f1; + +-#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ +- UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ ++#define UNEVEN_VISCHUNK(dest, f0, f1) \ ++ UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + ba,a,pt %xcc, 93f; + + .register %g2,#scratch + .register %g3,#scratch + + .text ++#ifndef EX_RETVAL ++#define EX_RETVAL(x) x ++ENTRY(U1_g1_1_fp) ++ VISExitHalf ++ add %g1, 1, %g1 ++ add %g1, %g2, %g1 ++ retl ++ add %g1, %o2, %o0 ++ENDPROC(U1_g1_1_fp) ++ENTRY(U1_g2_0_fp) ++ VISExitHalf ++ retl ++ add %g2, %o2, %o0 ++ENDPROC(U1_g2_0_fp) ++ENTRY(U1_g2_8_fp) ++ VISExitHalf ++ add %g2, 8, %g2 ++ retl ++ add %g2, %o2, %o0 ++ENDPROC(U1_g2_8_fp) ++ENTRY(U1_gs_0_fp) ++ VISExitHalf ++ add %GLOBAL_SPARE, %g3, %o0 ++ retl ++ add %o0, %o2, %o0 ++ENDPROC(U1_gs_0_fp) ++ENTRY(U1_gs_80_fp) ++ VISExitHalf ++ add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE ++ add %GLOBAL_SPARE, %g3, %o0 ++ retl ++ add %o0, %o2, %o0 ++ENDPROC(U1_gs_80_fp) ++ENTRY(U1_gs_40_fp) ++ VISExitHalf ++ add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE ++ add %GLOBAL_SPARE, %g3, %o0 ++ retl ++ add %o0, %o2, %o0 ++ENDPROC(U1_gs_40_fp) ++ENTRY(U1_g3_0_fp) ++ VISExitHalf ++ retl ++ add %g3, %o2, %o0 ++ENDPROC(U1_g3_0_fp) ++ENTRY(U1_g3_8_fp) ++ VISExitHalf ++ add %g3, 8, %g3 ++ retl ++ add %g3, %o2, %o0 ++ENDPROC(U1_g3_8_fp) ++ENTRY(U1_o2_0_fp) ++ VISExitHalf ++ retl ++ mov %o2, %o0 ++ENDPROC(U1_o2_0_fp) ++ENTRY(U1_o2_1_fp) ++ VISExitHalf ++ retl ++ add %o2, 1, %o0 ++ENDPROC(U1_o2_1_fp) ++ENTRY(U1_gs_0) ++ VISExitHalf ++ retl ++ add %GLOBAL_SPARE, %o2, %o0 ++ENDPROC(U1_gs_0) ++ENTRY(U1_gs_8) ++ VISExitHalf ++ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE ++ retl ++ add %GLOBAL_SPARE, 0x8, %o0 ++ENDPROC(U1_gs_8) ++ENTRY(U1_gs_10) ++ VISExitHalf ++ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE ++ retl ++ add %GLOBAL_SPARE, 0x10, %o0 ++ENDPROC(U1_gs_10) ++ENTRY(U1_o2_0) ++ retl ++ mov %o2, %o0 ++ENDPROC(U1_o2_0) ++ENTRY(U1_o2_8) ++ retl ++ add %o2, 8, %o0 ++ENDPROC(U1_o2_8) ++ENTRY(U1_o2_4) ++ retl ++ add %o2, 4, %o0 ++ENDPROC(U1_o2_4) ++ENTRY(U1_o2_1) ++ retl ++ add %o2, 1, %o0 ++ENDPROC(U1_o2_1) ++ENTRY(U1_g1_0) ++ retl ++ add %g1, %o2, %o0 ++ENDPROC(U1_g1_0) ++ENTRY(U1_g1_1) ++ add %g1, 1, %g1 ++ retl ++ add %g1, %o2, %o0 ++ENDPROC(U1_g1_1) ++ENTRY(U1_gs_0_o2_adj) ++ and %o2, 7, %o2 ++ retl ++ add %GLOBAL_SPARE, %o2, %o0 ++ENDPROC(U1_gs_0_o2_adj) ++ENTRY(U1_gs_8_o2_adj) ++ and %o2, 7, %o2 ++ add %GLOBAL_SPARE, 8, %GLOBAL_SPARE ++ retl ++ add %GLOBAL_SPARE, %o2, %o0 ++ENDPROC(U1_gs_8_o2_adj) ++#endif ++ + .align 64 + + .globl FUNC_NAME +@@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + and %g2, 0x38, %g2 + + 1: subcc %g1, 0x1, %g1 +- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) +- EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) ++ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) ++ EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) + bgu,pt %XCC, 1b + add %o1, 0x1, %o1 + +@@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + be,pt %icc, 3f + alignaddr %o1, %g0, %o1 + +- EX_LD_FP(LOAD(ldd, %o1, %f4)) +-1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) ++ EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) ++1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f4, %f6, %f0 +- EX_ST_FP(STORE(std, %f0, %o0)) ++ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) + be,pn %icc, 3f + add %o0, 0x8, %o0 + +- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f6, %f4, %f0 +- EX_ST_FP(STORE(std, %f0, %o0)) ++ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) + bne,pt %icc, 1b + add %o0, 0x8, %o0 + +@@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + add %g1, %GLOBAL_SPARE, %g1 + subcc %o2, %g3, %o2 + +- EX_LD_FP(LOAD_BLK(%o1, %f0)) ++ EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) + add %o1, 0x40, %o1 + add %g1, %g3, %g1 +- EX_LD_FP(LOAD_BLK(%o1, %f16)) ++ EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) + add %o1, 0x40, %o1 + sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE +- EX_LD_FP(LOAD_BLK(%o1, %f32)) ++ EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) + add %o1, 0x40, %o1 + + /* There are 8 instances of the unrolled loop, +@@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + .align 64 + 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f0, %f2, %f48 + 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) +@@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 56f) + + 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f2, %f4, %f48 + 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) +@@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 57f) + + 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f4, %f6, %f48 + 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) +@@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 58f) + + 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f6, %f8, %f48 + 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) +@@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 59f) + + 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f8, %f10, %f48 + 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) +@@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 60f) + + 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f10, %f12, %f48 + 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) +@@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 61f) + + 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f12, %f14, %f48 + 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) +@@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + STORE_JUMP(o0, f48, 62f) + + 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) +- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ++ LOOP_CHUNK1(o1, o0, 1f) + FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) +- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) ++ LOOP_CHUNK2(o1, o0, 2f) + FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) +- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) ++ LOOP_CHUNK3(o1, o0, 3f) + ba,pt %xcc, 1b+4 + faligndata %f14, %f16, %f48 + 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) +@@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) + STORE_JUMP(o0, f48, 63f) + +-40: FINISH_VISCHUNK(o0, f0, f2, g3) +-41: FINISH_VISCHUNK(o0, f2, f4, g3) +-42: FINISH_VISCHUNK(o0, f4, f6, g3) +-43: FINISH_VISCHUNK(o0, f6, f8, g3) +-44: FINISH_VISCHUNK(o0, f8, f10, g3) +-45: FINISH_VISCHUNK(o0, f10, f12, g3) +-46: FINISH_VISCHUNK(o0, f12, f14, g3) +-47: UNEVEN_VISCHUNK(o0, f14, f0, g3) +-48: FINISH_VISCHUNK(o0, f16, f18, g3) +-49: FINISH_VISCHUNK(o0, f18, f20, g3) +-50: FINISH_VISCHUNK(o0, f20, f22, g3) +-51: FINISH_VISCHUNK(o0, f22, f24, g3) +-52: FINISH_VISCHUNK(o0, f24, f26, g3) +-53: FINISH_VISCHUNK(o0, f26, f28, g3) +-54: FINISH_VISCHUNK(o0, f28, f30, g3) +-55: UNEVEN_VISCHUNK(o0, f30, f0, g3) +-56: FINISH_VISCHUNK(o0, f32, f34, g3) +-57: FINISH_VISCHUNK(o0, f34, f36, g3) +-58: FINISH_VISCHUNK(o0, f36, f38, g3) +-59: FINISH_VISCHUNK(o0, f38, f40, g3) +-60: FINISH_VISCHUNK(o0, f40, f42, g3) +-61: FINISH_VISCHUNK(o0, f42, f44, g3) +-62: FINISH_VISCHUNK(o0, f44, f46, g3) +-63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) ++40: FINISH_VISCHUNK(o0, f0, f2) ++41: FINISH_VISCHUNK(o0, f2, f4) ++42: FINISH_VISCHUNK(o0, f4, f6) ++43: FINISH_VISCHUNK(o0, f6, f8) ++44: FINISH_VISCHUNK(o0, f8, f10) ++45: FINISH_VISCHUNK(o0, f10, f12) ++46: FINISH_VISCHUNK(o0, f12, f14) ++47: UNEVEN_VISCHUNK(o0, f14, f0) ++48: FINISH_VISCHUNK(o0, f16, f18) ++49: FINISH_VISCHUNK(o0, f18, f20) ++50: FINISH_VISCHUNK(o0, f20, f22) ++51: FINISH_VISCHUNK(o0, f22, f24) ++52: FINISH_VISCHUNK(o0, f24, f26) ++53: FINISH_VISCHUNK(o0, f26, f28) ++54: FINISH_VISCHUNK(o0, f28, f30) ++55: UNEVEN_VISCHUNK(o0, f30, f0) ++56: FINISH_VISCHUNK(o0, f32, f34) ++57: FINISH_VISCHUNK(o0, f34, f36) ++58: FINISH_VISCHUNK(o0, f36, f38) ++59: FINISH_VISCHUNK(o0, f38, f40) ++60: FINISH_VISCHUNK(o0, f40, f42) ++61: FINISH_VISCHUNK(o0, f42, f44) ++62: FINISH_VISCHUNK(o0, f44, f46) ++63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) + +-93: EX_LD_FP(LOAD(ldd, %o1, %f2)) ++93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) + add %o1, 8, %o1 + subcc %g3, 8, %g3 + faligndata %f0, %f2, %f8 +- EX_ST_FP(STORE(std, %f8, %o0)) ++ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) + bl,pn %xcc, 95f + add %o0, 8, %o0 +- EX_LD_FP(LOAD(ldd, %o1, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) + add %o1, 8, %o1 + subcc %g3, 8, %g3 + faligndata %f2, %f0, %f8 +- EX_ST_FP(STORE(std, %f8, %o0)) ++ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) + bge,pt %xcc, 93b + add %o0, 8, %o0 + + 95: brz,pt %o2, 2f + mov %g1, %o1 + +-1: EX_LD_FP(LOAD(ldub, %o1, %o3)) ++1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) + add %o1, 1, %o1 + subcc %o2, 1, %o2 +- EX_ST_FP(STORE(stb, %o3, %o0)) ++ EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) + bne,pt %xcc, 1b + add %o0, 1, %o0 + +@@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 72: andn %o2, 0xf, %GLOBAL_SPARE + and %o2, 0xf, %o2 +-1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) +- EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) ++1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) ++ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) + subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE +- EX_ST(STORE(stx, %o5, %o1 + %o3)) ++ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) + add %o1, 0x8, %o1 +- EX_ST(STORE(stx, %g1, %o1 + %o3)) ++ EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + 73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop +- EX_LD(LOAD(ldx, %o1, %o5)) ++ EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) + sub %o2, 0x8, %o2 +- EX_ST(STORE(stx, %o5, %o1 + %o3)) ++ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) + add %o1, 0x8, %o1 + 1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop +- EX_LD(LOAD(lduw, %o1, %o5)) ++ EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) + sub %o2, 0x4, %o2 +- EX_ST(STORE(stw, %o5, %o1 + %o3)) ++ EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) + add %o1, 0x4, %o1 + 1: cmp %o2, 0 + be,pt %XCC, 85f +@@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +-1: EX_LD(LOAD(ldub, %o1, %o5)) ++1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) + subcc %g1, 1, %g1 +- EX_ST(STORE(stb, %o5, %o1 + %o3)) ++ EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +@@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 8: mov 64, %o3 + andn %o1, 0x7, %o1 +- EX_LD(LOAD(ldx, %o1, %g2)) ++ EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) + sub %o3, %g1, %o3 + andn %o2, 0x7, %GLOBAL_SPARE + sllx %g2, %g1, %g2 +-1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) ++1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) + subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE + add %o1, 0x8, %o1 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 +- EX_ST(STORE(stx, %o5, %o0)) ++ EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 +@@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +-1: EX_LD(LOAD(lduw, %o1, %g1)) ++1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) + subcc %o2, 4, %o2 +- EX_ST(STORE(stw, %g1, %o1 + %o3)) ++ EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +@@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + mov EX_RETVAL(%o4), %o0 + + .align 32 +-90: EX_LD(LOAD(ldub, %o1, %g1)) ++90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) + subcc %o2, 1, %o2 +- EX_ST(STORE(stb, %g1, %o1 + %o3)) ++ EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl diff --git a/queue-4.4/sparc64-convert-u3copy_-from-to-_user-to-accurate-exception-reporting.patch b/queue-4.4/sparc64-convert-u3copy_-from-to-_user-to-accurate-exception-reporting.patch new file mode 100644 index 00000000000..6aecf172c5e --- /dev/null +++ b/queue-4.4/sparc64-convert-u3copy_-from-to-_user-to-accurate-exception-reporting.patch @@ -0,0 +1,501 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 24 Oct 2016 21:20:35 -0700 +Subject: sparc64: Convert U3copy_{from,to}_user to accurate exception reporting. + +From: "David S. Miller" + + +[ Upstream commit ee841d0aff649164080e445e84885015958d8ff4 ] + +Report the exact number of bytes which have not been successfully +copied when an exception occurs, using the running remaining length. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/U3copy_from_user.S | 8 - + arch/sparc/lib/U3copy_to_user.S | 8 - + arch/sparc/lib/U3memcpy.S | 227 +++++++++++++++++++++++++------------- + 3 files changed, 162 insertions(+), 81 deletions(-) + +--- a/arch/sparc/lib/U3copy_from_user.S ++++ b/arch/sparc/lib/U3copy_from_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +-#define EX_LD(x) \ ++#define EX_LD(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_LD_FP(x) \ ++#define EX_LD_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_fp;\ ++ .word 98b, y##_fp; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U3copy_to_user.S ++++ b/arch/sparc/lib/U3copy_to_user.S +@@ -3,19 +3,19 @@ + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +-#define EX_ST(x) \ ++#define EX_ST(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone; \ ++ .word 98b, y; \ + .text; \ + .align 4; + +-#define EX_ST_FP(x) \ ++#define EX_ST_FP(x,y) \ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_mone_fp;\ ++ .word 98b, y##_fp; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U3memcpy.S ++++ b/arch/sparc/lib/U3memcpy.S +@@ -4,6 +4,7 @@ + */ + + #ifdef __KERNEL__ ++#include + #include + #include + #define GLOBAL_SPARE %g7 +@@ -22,21 +23,17 @@ + #endif + + #ifndef EX_LD +-#define EX_LD(x) x ++#define EX_LD(x,y) x + #endif + #ifndef EX_LD_FP +-#define EX_LD_FP(x) x ++#define EX_LD_FP(x,y) x + #endif + + #ifndef EX_ST +-#define EX_ST(x) x ++#define EX_ST(x,y) x + #endif + #ifndef EX_ST_FP +-#define EX_ST_FP(x) x +-#endif +- +-#ifndef EX_RETVAL +-#define EX_RETVAL(x) x ++#define EX_ST_FP(x,y) x + #endif + + #ifndef LOAD +@@ -77,6 +74,87 @@ + */ + + .text ++#ifndef EX_RETVAL ++#define EX_RETVAL(x) x ++__restore_fp: ++ VISExitHalf ++ retl ++ nop ++ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) ++ add %g1, 1, %g1 ++ add %g2, %g1, %g2 ++ ba,pt %xcc, __restore_fp ++ add %o2, %g2, %o0 ++ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) ++ENTRY(U3_retl_o2_plus_g2_fp) ++ ba,pt %xcc, __restore_fp ++ add %o2, %g2, %o0 ++ENDPROC(U3_retl_o2_plus_g2_fp) ++ENTRY(U3_retl_o2_plus_g2_plus_8_fp) ++ add %g2, 8, %g2 ++ ba,pt %xcc, __restore_fp ++ add %o2, %g2, %o0 ++ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) ++ENTRY(U3_retl_o2) ++ retl ++ mov %o2, %o0 ++ENDPROC(U3_retl_o2) ++ENTRY(U3_retl_o2_plus_1) ++ retl ++ add %o2, 1, %o0 ++ENDPROC(U3_retl_o2_plus_1) ++ENTRY(U3_retl_o2_plus_4) ++ retl ++ add %o2, 4, %o0 ++ENDPROC(U3_retl_o2_plus_4) ++ENTRY(U3_retl_o2_plus_8) ++ retl ++ add %o2, 8, %o0 ++ENDPROC(U3_retl_o2_plus_8) ++ENTRY(U3_retl_o2_plus_g1_plus_1) ++ add %g1, 1, %g1 ++ retl ++ add %o2, %g1, %o0 ++ENDPROC(U3_retl_o2_plus_g1_plus_1) ++ENTRY(U3_retl_o2_fp) ++ ba,pt %xcc, __restore_fp ++ mov %o2, %o0 ++ENDPROC(U3_retl_o2_fp) ++ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) ++ sll %o3, 6, %o3 ++ add %o3, 0x80, %o3 ++ ba,pt %xcc, __restore_fp ++ add %o2, %o3, %o0 ++ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) ++ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) ++ sll %o3, 6, %o3 ++ add %o3, 0x40, %o3 ++ ba,pt %xcc, __restore_fp ++ add %o2, %o3, %o0 ++ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) ++ENTRY(U3_retl_o2_plus_GS_plus_0x10) ++ add GLOBAL_SPARE, 0x10, GLOBAL_SPARE ++ retl ++ add %o2, GLOBAL_SPARE, %o0 ++ENDPROC(U3_retl_o2_plus_GS_plus_0x10) ++ENTRY(U3_retl_o2_plus_GS_plus_0x08) ++ add GLOBAL_SPARE, 0x08, GLOBAL_SPARE ++ retl ++ add %o2, GLOBAL_SPARE, %o0 ++ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ++ENTRY(U3_retl_o2_and_7_plus_GS) ++ and %o2, 7, %o2 ++ retl ++ add %o2, GLOBAL_SPARE, %o2 ++ENDPROC(U3_retl_o2_and_7_plus_GS) ++ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) ++ add GLOBAL_SPARE, 8, GLOBAL_SPARE ++ and %o2, 7, %o2 ++ retl ++ add %o2, GLOBAL_SPARE, %o2 ++ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) ++#endif ++ + .align 64 + + /* The cheetah's flexible spine, oversized liver, enlarged heart, +@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + and %g2, 0x38, %g2 + + 1: subcc %g1, 0x1, %g1 +- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) +- EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) ++ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) ++ EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) + bgu,pt %XCC, 1b + add %o1, 0x1, %o1 + +@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + be,pt %icc, 3f + alignaddr %o1, %g0, %o1 + +- EX_LD_FP(LOAD(ldd, %o1, %f4)) +-1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) ++ EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) ++1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f4, %f6, %f0 +- EX_ST_FP(STORE(std, %f0, %o0)) ++ EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) + be,pn %icc, 3f + add %o0, 0x8, %o0 + +- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f6, %f4, %f2 +- EX_ST_FP(STORE(std, %f2, %o0)) ++ EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) + bne,pt %icc, 1b + add %o0, 0x8, %o0 + +@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + LOAD(prefetch, %o1 + 0x080, #one_read) + LOAD(prefetch, %o1 + 0x0c0, #one_read) + LOAD(prefetch, %o1 + 0x100, #one_read) +- EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) + LOAD(prefetch, %o1 + 0x140, #one_read) +- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) + LOAD(prefetch, %o1 + 0x180, #one_read) +- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) + LOAD(prefetch, %o1 + 0x1c0, #one_read) + faligndata %f0, %f2, %f16 +- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) + faligndata %f2, %f4, %f18 +- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) + faligndata %f4, %f6, %f20 +- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) + faligndata %f6, %f8, %f22 + +- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) + faligndata %f8, %f10, %f24 +- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) + faligndata %f10, %f12, %f26 +- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) + + subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE + add %o1, 0x40, %o1 +@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + .align 64 + 1: +- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) + faligndata %f12, %f14, %f28 +- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) + faligndata %f14, %f0, %f30 +- EX_ST_FP(STORE_BLK(%f16, %o0)) +- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) ++ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f0, %f2, %f16 + add %o0, 0x40, %o0 + +- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f2, %f4, %f18 +- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f4, %f6, %f20 +- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) + subcc %o3, 0x01, %o3 + faligndata %f6, %f8, %f22 +- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) + + faligndata %f8, %f10, %f24 +- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + LOAD(prefetch, %o1 + 0x1c0, #one_read) + faligndata %f10, %f12, %f26 + bg,pt %XCC, 1b +@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + /* Finally we copy the last full 64-byte block. */ + 2: +- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) + faligndata %f12, %f14, %f28 +- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) + faligndata %f14, %f0, %f30 +- EX_ST_FP(STORE_BLK(%f16, %o0)) +- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) ++ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f0, %f2, %f16 +- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f2, %f4, %f18 +- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f4, %f6, %f20 +- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f6, %f8, %f22 +- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) + faligndata %f8, %f10, %f24 + cmp %g1, 0 + be,pt %XCC, 1f + add %o0, 0x40, %o0 +- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) + 1: faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 +- EX_ST_FP(STORE_BLK(%f16, %o0)) ++ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) + add %o0, 0x40, %o0 + add %o1, 0x40, %o1 + membar #Sync +@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + sub %o2, %g2, %o2 + be,a,pt %XCC, 1f +- EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) + +-1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) ++1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f0, %f2, %f8 +- EX_ST_FP(STORE(std, %f8, %o0)) ++ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) + be,pn %XCC, 2f + add %o0, 0x8, %o0 +- EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) ++ EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f2, %f0, %f8 +- EX_ST_FP(STORE(std, %f8, %o0)) ++ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) + bne,pn %XCC, 1b + add %o0, 0x8, %o0 + +@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + andcc %o2, 0x8, %g0 + be,pt %icc, 1f + nop +- EX_LD(LOAD(ldx, %o1, %o5)) +- EX_ST(STORE(stx, %o5, %o1 + %o3)) ++ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) ++ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) + add %o1, 0x8, %o1 ++ sub %o2, 8, %o2 + + 1: andcc %o2, 0x4, %g0 + be,pt %icc, 1f + nop +- EX_LD(LOAD(lduw, %o1, %o5)) +- EX_ST(STORE(stw, %o5, %o1 + %o3)) ++ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) ++ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) + add %o1, 0x4, %o1 ++ sub %o2, 4, %o2 + + 1: andcc %o2, 0x2, %g0 + be,pt %icc, 1f + nop +- EX_LD(LOAD(lduh, %o1, %o5)) +- EX_ST(STORE(sth, %o5, %o1 + %o3)) ++ EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) ++ EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) + add %o1, 0x2, %o1 ++ sub %o2, 2, %o2 + + 1: andcc %o2, 0x1, %g0 + be,pt %icc, 85f + nop +- EX_LD(LOAD(ldub, %o1, %o5)) ++ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) + ba,pt %xcc, 85f +- EX_ST(STORE(stb, %o5, %o1 + %o3)) ++ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) + + .align 64 + 70: /* 16 < len <= 64 */ +@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + andn %o2, 0xf, GLOBAL_SPARE + and %o2, 0xf, %o2 + 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE +- EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) +- EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +- EX_ST(STORE(stx, %o5, %o1 + %o3)) ++ EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) ++ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) ++ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) + add %o1, 0x8, %o1 +- EX_ST(STORE(stx, %g1, %o1 + %o3)) ++ EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + 73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 +- EX_LD(LOAD(ldx, %o1, %o5)) +- EX_ST(STORE(stx, %o5, %o1 + %o3)) ++ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) ++ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) + add %o1, 0x8, %o1 + 1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 +- EX_LD(LOAD(lduw, %o1, %o5)) +- EX_ST(STORE(stw, %o5, %o1 + %o3)) ++ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) ++ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) + add %o1, 0x4, %o1 + 1: cmp %o2, 0 + be,pt %XCC, 85f +@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + sub %o2, %g1, %o2 + + 1: subcc %g1, 1, %g1 +- EX_LD(LOAD(ldub, %o1, %o5)) +- EX_ST(STORE(stb, %o5, %o1 + %o3)) ++ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) ++ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 8: mov 64, %o3 + andn %o1, 0x7, %o1 +- EX_LD(LOAD(ldx, %o1, %g2)) ++ EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) + sub %o3, %g1, %o3 + andn %o2, 0x7, GLOBAL_SPARE + sllx %g2, %g1, %g2 +-1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) ++1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) + subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE + add %o1, 0x8, %o1 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 +- EX_ST(STORE(stx, %o5, %o0)) ++ EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 +@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + + 1: + subcc %o2, 4, %o2 +- EX_LD(LOAD(lduw, %o1, %g1)) +- EX_ST(STORE(stw, %g1, %o1 + %o3)) ++ EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) ++ EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + .align 32 + 90: + subcc %o2, 1, %o2 +- EX_LD(LOAD(ldub, %o1, %g1)) +- EX_ST(STORE(stb, %g1, %o1 + %o3)) ++ EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) ++ EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl diff --git a/queue-4.4/sparc64-delete-__ret_efault.patch b/queue-4.4/sparc64-delete-__ret_efault.patch new file mode 100644 index 00000000000..9ef88f94399 --- /dev/null +++ b/queue-4.4/sparc64-delete-__ret_efault.patch @@ -0,0 +1,110 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Wed, 10 Aug 2016 14:41:33 -0700 +Subject: sparc64: Delete __ret_efault. + +From: "David S. Miller" + + +[ Upstream commit aa95ce361ed95c72ac42dcb315166bce5cf1a014 ] + +It is completely unused. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/uaccess_64.h | 41 ------------------------------------ + arch/sparc/kernel/head_64.S | 7 ------ + arch/sparc/kernel/sparc_ksyms_64.c | 1 + 3 files changed, 1 insertion(+), 48 deletions(-) + +--- a/arch/sparc/include/asm/uaccess_64.h ++++ b/arch/sparc/include/asm/uaccess_64.h +@@ -98,7 +98,6 @@ struct exception_table_entry { + unsigned int insn, fixup; + }; + +-void __ret_efault(void); + void __retl_efault(void); + + /* Uh, these should become the main single-value transfer routines.. +@@ -179,20 +178,6 @@ int __put_user_bad(void); + __gu_ret; \ + }) + +-#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \ +- register unsigned long __gu_val __asm__ ("l1"); \ +- switch (size) { \ +- case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \ +- case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \ +- case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \ +- case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \ +- default: \ +- if (__get_user_bad()) \ +- return retval; \ +- } \ +- data = (__force type) __gu_val; \ +-}) +- + #define __get_user_asm(x, size, addr, ret) \ + __asm__ __volatile__( \ + "/* Get user asm, inline. */\n" \ +@@ -214,32 +199,6 @@ __asm__ __volatile__( \ + : "=r" (ret), "=r" (x) : "r" (__m(addr)), \ + "i" (-EFAULT)) + +-#define __get_user_asm_ret(x, size, addr, retval) \ +-if (__builtin_constant_p(retval) && retval == -EFAULT) \ +- __asm__ __volatile__( \ +- "/* Get user asm ret, inline. */\n" \ +- "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ +- ".section __ex_table,\"a\"\n\t" \ +- ".align 4\n\t" \ +- ".word 1b,__ret_efault\n\n\t" \ +- ".previous\n\t" \ +- : "=r" (x) : "r" (__m(addr))); \ +-else \ +- __asm__ __volatile__( \ +- "/* Get user asm ret, inline. */\n" \ +- "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ +- ".section .fixup,#alloc,#execinstr\n\t" \ +- ".align 4\n" \ +- "3:\n\t" \ +- "ret\n\t" \ +- " restore %%g0, %2, %%o0\n\n\t" \ +- ".previous\n\t" \ +- ".section __ex_table,\"a\"\n\t" \ +- ".align 4\n\t" \ +- ".word 1b, 3b\n\n\t" \ +- ".previous\n\t" \ +- : "=r" (x) : "r" (__m(addr)), "i" (retval)) +- + int __get_user_bad(void); + + unsigned long __must_check ___copy_from_user(void *to, +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -922,12 +922,7 @@ prom_tba: .xword 0 + tlb_type: .word 0 /* Must NOT end up in BSS */ + .section ".fixup",#alloc,#execinstr + +- .globl __ret_efault, __retl_efault, __ret_one, __retl_one +-ENTRY(__ret_efault) +- ret +- restore %g0, -EFAULT, %o0 +-ENDPROC(__ret_efault) +- ++ .globl __retl_efault, __ret_one, __retl_one + ENTRY(__retl_efault) + retl + mov -EFAULT, %o0 +--- a/arch/sparc/kernel/sparc_ksyms_64.c ++++ b/arch/sparc/kernel/sparc_ksyms_64.c +@@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user); + EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); + + /* from head_64.S */ +-EXPORT_SYMBOL(__ret_efault); + EXPORT_SYMBOL(tlb_type); + EXPORT_SYMBOL(sun4v_chip_type); + EXPORT_SYMBOL(prom_root_node); diff --git a/queue-4.4/sparc64-delete-now-unused-user-copy-assembler-helpers.patch b/queue-4.4/sparc64-delete-now-unused-user-copy-assembler-helpers.patch new file mode 100644 index 00000000000..f9ed763ebc0 --- /dev/null +++ b/queue-4.4/sparc64-delete-now-unused-user-copy-assembler-helpers.patch @@ -0,0 +1,57 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 24 Oct 2016 21:22:27 -0700 +Subject: sparc64: Delete now unused user copy assembler helpers. + +From: "David S. Miller" + + +[ Upstream commit 614da3d9685b67917cab48c8452fd8bf93de0867 ] + +All of __ret{,l}_mone{_asi,_fp,_asi_fpu} are now unused. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/head_64.S | 30 ------------------------------ + 1 file changed, 30 deletions(-) + +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -927,36 +927,6 @@ ENTRY(__retl_efault) + mov -EFAULT, %o0 + ENDPROC(__retl_efault) + +-ENTRY(__retl_mone) +- retl +- mov -1, %o0 +-ENDPROC(__retl_mone) +- +-ENTRY(__retl_mone_fp) +- VISExitHalf +- retl +- mov 1, %o0 +-ENDPROC(__retl_mone_fp) +- +-ENTRY(__ret_mone_asi) +- wr %g0, ASI_AIUS, %asi +- ret +- restore %g0, 1, %o0 +-ENDPROC(__ret_mone_asi) +- +-ENTRY(__retl_mone_asi) +- wr %g0, ASI_AIUS, %asi +- retl +- mov 1, %o0 +-ENDPROC(__retl_mone_asi) +- +-ENTRY(__retl_mone_asi_fp) +- wr %g0, ASI_AIUS, %asi +- VISExitHalf +- retl +- mov 1, %o0 +-ENDPROC(__retl_mone_asi_fp) +- + ENTRY(__retl_o1) + retl + mov %o1, %o0 diff --git a/queue-4.4/sparc64-delete-now-unused-user-copy-fixup-functions.patch b/queue-4.4/sparc64-delete-now-unused-user-copy-fixup-functions.patch new file mode 100644 index 00000000000..b6a674f0957 --- /dev/null +++ b/queue-4.4/sparc64-delete-now-unused-user-copy-fixup-functions.patch @@ -0,0 +1,172 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 24 Oct 2016 21:25:31 -0700 +Subject: sparc64: Delete now unused user copy fixup functions. + +From: "David S. Miller" + + +[ Upstream commit 0fd0ff01d4c3c01e7fe69b762ee1a13236639acc ] + +Now that all of the user copy routines are converted to return +accurate residual lengths when an exception occurs, we no longer need +the broken fixup routines. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/uaccess_64.h | 34 +---------------- + arch/sparc/lib/Makefile | 2 - + arch/sparc/lib/user_fixup.c | 71 ------------------------------------ + 3 files changed, 4 insertions(+), 103 deletions(-) + delete mode 100644 arch/sparc/lib/user_fixup.c + +--- a/arch/sparc/include/asm/uaccess_64.h ++++ b/arch/sparc/include/asm/uaccess_64.h +@@ -204,58 +204,30 @@ int __get_user_bad(void); + unsigned long __must_check ___copy_from_user(void *to, + const void __user *from, + unsigned long size); +-unsigned long copy_from_user_fixup(void *to, const void __user *from, +- unsigned long size); + static inline unsigned long __must_check + copy_from_user(void *to, const void __user *from, unsigned long size) + { +- unsigned long ret = ___copy_from_user(to, from, size); +- +- if (unlikely(ret)) { +- if ((long)ret < 0) +- ret = copy_from_user_fixup(to, from, size); +- return ret; +- } +- +- return ret; ++ return ___copy_from_user(to, from, size); + } + #define __copy_from_user copy_from_user + + unsigned long __must_check ___copy_to_user(void __user *to, + const void *from, + unsigned long size); +-unsigned long copy_to_user_fixup(void __user *to, const void *from, +- unsigned long size); + static inline unsigned long __must_check + copy_to_user(void __user *to, const void *from, unsigned long size) + { +- unsigned long ret = ___copy_to_user(to, from, size); +- +- if (unlikely(ret)) { +- if ((long)ret < 0) +- ret = copy_to_user_fixup(to, from, size); +- return ret; +- } +- return ret; ++ return ___copy_to_user(to, from, size); + } + #define __copy_to_user copy_to_user + + unsigned long __must_check ___copy_in_user(void __user *to, + const void __user *from, + unsigned long size); +-unsigned long copy_in_user_fixup(void __user *to, void __user *from, +- unsigned long size); + static inline unsigned long __must_check + copy_in_user(void __user *to, void __user *from, unsigned long size) + { +- unsigned long ret = ___copy_in_user(to, from, size); +- +- if (unlikely(ret)) { +- if ((long)ret < 0) +- ret = copy_in_user_fixup(to, from, size); +- return ret; +- } +- return ret; ++ return ___copy_in_user(to, from, size); + } + #define __copy_in_user copy_in_user + +--- a/arch/sparc/lib/Makefile ++++ b/arch/sparc/lib/Makefile +@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4 + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o + lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o + +-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o ++lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o + lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o + + obj-$(CONFIG_SPARC64) += iomap.o +--- a/arch/sparc/lib/user_fixup.c ++++ /dev/null +@@ -1,71 +0,0 @@ +-/* user_fixup.c: Fix up user copy faults. +- * +- * Copyright (C) 2004 David S. Miller +- */ +- +-#include +-#include +-#include +-#include +-#include +- +-#include +- +-/* Calculating the exact fault address when using +- * block loads and stores can be very complicated. +- * +- * Instead of trying to be clever and handling all +- * of the cases, just fix things up simply here. +- */ +- +-static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) +-{ +- unsigned long fault_addr = current_thread_info()->fault_address; +- unsigned long end = start + size; +- +- if (fault_addr < start || fault_addr >= end) { +- *offset = 0; +- } else { +- *offset = fault_addr - start; +- size = end - fault_addr; +- } +- return size; +-} +- +-unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) +-{ +- unsigned long offset; +- +- size = compute_size((unsigned long) from, size, &offset); +- if (likely(size)) +- memset(to + offset, 0, size); +- +- return size; +-} +-EXPORT_SYMBOL(copy_from_user_fixup); +- +-unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) +-{ +- unsigned long offset; +- +- return compute_size((unsigned long) to, size, &offset); +-} +-EXPORT_SYMBOL(copy_to_user_fixup); +- +-unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) +-{ +- unsigned long fault_addr = current_thread_info()->fault_address; +- unsigned long start = (unsigned long) to; +- unsigned long end = start + size; +- +- if (fault_addr >= start && fault_addr < end) +- return end - fault_addr; +- +- start = (unsigned long) from; +- end = start + size; +- if (fault_addr >= start && fault_addr < end) +- return end - fault_addr; +- +- return size; +-} +-EXPORT_SYMBOL(copy_in_user_fixup); diff --git a/queue-4.4/sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-code.patch b/queue-4.4/sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-code.patch new file mode 100644 index 00000000000..77ef4fe44ad --- /dev/null +++ b/queue-4.4/sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-code.patch @@ -0,0 +1,197 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Tue, 25 Oct 2016 16:23:26 -0700 +Subject: sparc64: Fix illegal relative branches in hypervisor patched TLB code. + +From: "David S. Miller" + + +[ Upstream commit b429ae4d5b565a71dfffd759dfcd4f6c093ced94 ] + +When we copy code over to patch another piece of code, we can only use +PC-relative branches that target code within that piece of code. + +Such PC-relative branches cannot be made to external symbols because +the patch moves the location of the code and thus modifies the +relative address of external symbols. + +Use an absolute jmpl to fix this problem. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/ultra.S | 65 +++++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 51 insertions(+), 14 deletions(-) + +--- a/arch/sparc/mm/ultra.S ++++ b/arch/sparc/mm/ultra.S +@@ -30,7 +30,7 @@ + .text + .align 32 + .globl __flush_tlb_mm +-__flush_tlb_mm: /* 18 insns */ ++__flush_tlb_mm: /* 19 insns */ + /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ + ldxa [%o1] ASI_DMMU, %g2 + cmp %g2, %o0 +@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ + + .align 32 + .globl __flush_tlb_pending +-__flush_tlb_pending: /* 26 insns */ ++__flush_tlb_pending: /* 27 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + rdpr %pstate, %g7 + sllx %o1, 3, %o1 +@@ -113,7 +113,7 @@ __flush_tlb_pending: /* 26 insns */ + + .align 32 + .globl __flush_tlb_kernel_range +-__flush_tlb_kernel_range: /* 16 insns */ ++__flush_tlb_kernel_range: /* 19 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f +@@ -131,6 +131,9 @@ __flush_tlb_kernel_range: /* 16 insns */ + retl + nop + nop ++ nop ++ nop ++ nop + + __spitfire_flush_tlb_mm_slow: + rdpr %pstate, %g1 +@@ -309,19 +312,28 @@ __hypervisor_tlb_tl0_error: + ret + restore + +-__hypervisor_flush_tlb_mm: /* 10 insns */ ++__hypervisor_flush_tlb_mm: /* 19 insns */ + mov %o0, %o2 /* ARG2: mmu context */ + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP +- brnz,pn %o0, __hypervisor_tlb_tl0_error ++ brnz,pn %o0, 1f + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop ++1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 ++ jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop + +-__hypervisor_flush_tlb_page: /* 11 insns */ ++__hypervisor_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ +@@ -330,10 +342,21 @@ __hypervisor_flush_tlb_page: /* 11 insns + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP +- brnz,pn %o0, __hypervisor_tlb_tl0_error ++ brnz,pn %o0, 1f + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop ++1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 ++ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop + + __hypervisor_flush_tlb_pending: /* 16 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ +@@ -347,14 +370,25 @@ __hypervisor_flush_tlb_pending: /* 16 in + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP +- brnz,pn %o0, __hypervisor_tlb_tl0_error ++ brnz,pn %o0, 1f + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g1, 1b + nop + retl + nop ++1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 ++ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop + +-__hypervisor_flush_tlb_kernel_range: /* 16 insns */ ++__hypervisor_flush_tlb_kernel_range: /* 19 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f +@@ -366,12 +400,15 @@ __hypervisor_flush_tlb_kernel_range: /* + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP +- brnz,pn %o0, __hypervisor_tlb_tl0_error ++ brnz,pn %o0, 3f + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g2, 1b + sub %g2, %g3, %g2 + 2: retl + nop ++3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 ++ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 ++ nop + + #ifdef DCACHE_ALIASING_POSSIBLE + /* XXX Niagara and friends have an 8K cache, so no aliasing is +@@ -819,28 +856,28 @@ hypervisor_patch_cachetlbops: + sethi %hi(__hypervisor_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 + call tlb_patch_one +- mov 10, %o2 ++ mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one +- mov 11, %o2 ++ mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__hypervisor_flush_tlb_pending), %o1 + or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 + call tlb_patch_one +- mov 16, %o2 ++ mov 27, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 + call tlb_patch_one +- mov 16, %o2 ++ mov 19, %o2 + + #ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 diff --git a/queue-4.4/sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-cross-call-code.patch b/queue-4.4/sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-cross-call-code.patch new file mode 100644 index 00000000000..79e4fc710f2 --- /dev/null +++ b/queue-4.4/sparc64-fix-illegal-relative-branches-in-hypervisor-patched-tlb-cross-call-code.patch @@ -0,0 +1,166 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Wed, 26 Oct 2016 10:20:14 -0700 +Subject: sparc64: Fix illegal relative branches in hypervisor patched TLB cross-call code. + +From: "David S. Miller" + + +[ Upstream commit a236441bb69723032db94128761a469030c3fe6d ] + +Just like the non-cross-call TLB flush handlers, the cross-call ones need +to avoid doing PC-relative branches outside of their code blocks. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/ultra.S | 42 ++++++++++++++++++++++++++++++------------ + 1 file changed, 30 insertions(+), 12 deletions(-) + +--- a/arch/sparc/mm/ultra.S ++++ b/arch/sparc/mm/ultra.S +@@ -484,7 +484,7 @@ cheetah_patch_cachetlbops: + */ + .align 32 + .globl xcall_flush_tlb_mm +-xcall_flush_tlb_mm: /* 21 insns */ ++xcall_flush_tlb_mm: /* 24 insns */ + mov PRIMARY_CONTEXT, %g2 + ldxa [%g2] ASI_DMMU, %g3 + srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 +@@ -506,9 +506,12 @@ xcall_flush_tlb_mm: /* 21 insns */ + nop + nop + nop ++ nop ++ nop ++ nop + + .globl xcall_flush_tlb_page +-xcall_flush_tlb_page: /* 17 insns */ ++xcall_flush_tlb_page: /* 20 insns */ + /* %g5=context, %g1=vaddr */ + mov PRIMARY_CONTEXT, %g4 + ldxa [%g4] ASI_DMMU, %g2 +@@ -527,9 +530,12 @@ xcall_flush_tlb_page: /* 17 insns */ + retry + nop + nop ++ nop ++ nop ++ nop + + .globl xcall_flush_tlb_kernel_range +-xcall_flush_tlb_kernel_range: /* 25 insns */ ++xcall_flush_tlb_kernel_range: /* 28 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 +@@ -555,6 +561,9 @@ xcall_flush_tlb_kernel_range: /* 25 insn + nop + nop + nop ++ nop ++ nop ++ nop + + /* This runs in a very controlled environment, so we do + * not need to worry about BH races etc. +@@ -737,7 +746,7 @@ __hypervisor_tlb_xcall_error: + ba,a,pt %xcc, rtrap + + .globl __hypervisor_xcall_flush_tlb_mm +-__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ ++__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ + /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ + mov %o0, %g2 + mov %o1, %g3 +@@ -751,7 +760,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 i + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov HV_FAST_MMU_DEMAP_CTX, %g6 +- brnz,pn %o0, __hypervisor_tlb_xcall_error ++ brnz,pn %o0, 1f + mov %o0, %g5 + mov %g2, %o0 + mov %g3, %o1 +@@ -760,9 +769,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 i + mov %g7, %o5 + membar #Sync + retry ++1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 ++ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 ++ nop + + .globl __hypervisor_xcall_flush_tlb_page +-__hypervisor_xcall_flush_tlb_page: /* 17 insns */ ++__hypervisor_xcall_flush_tlb_page: /* 20 insns */ + /* %g5=ctx, %g1=vaddr */ + mov %o0, %g2 + mov %o1, %g3 +@@ -774,16 +786,19 @@ __hypervisor_xcall_flush_tlb_page: /* 17 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 +- brnz,a,pn %o0, __hypervisor_tlb_xcall_error ++ brnz,a,pn %o0, 1f + mov %o0, %g5 + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 + membar #Sync + retry ++1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 ++ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 ++ nop + + .globl __hypervisor_xcall_flush_tlb_kernel_range +-__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ ++__hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ + /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 +@@ -800,7 +815,7 @@ __hypervisor_xcall_flush_tlb_kernel_rang + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 +- brnz,pn %o0, __hypervisor_tlb_xcall_error ++ brnz,pn %o0, 1f + mov %o0, %g5 + sethi %hi(PAGE_SIZE), %o2 + brnz,pt %g3, 1b +@@ -810,6 +825,9 @@ __hypervisor_xcall_flush_tlb_kernel_rang + mov %g7, %o2 + membar #Sync + retry ++1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 ++ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 ++ nop + + /* These just get rescheduled to PIL vectors. */ + .globl xcall_call_function +@@ -894,21 +912,21 @@ hypervisor_patch_cachetlbops: + sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 + call tlb_patch_one +- mov 21, %o2 ++ mov 24, %o2 + + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 + call tlb_patch_one +- mov 17, %o2 ++ mov 20, %o2 + + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one +- mov 25, %o2 ++ mov 28, %o2 + #endif /* CONFIG_SMP */ + + ret diff --git a/queue-4.4/sparc64-fix-instruction-count-in-comment-for-__hypervisor_flush_tlb_pending.patch b/queue-4.4/sparc64-fix-instruction-count-in-comment-for-__hypervisor_flush_tlb_pending.patch new file mode 100644 index 00000000000..6f3a594d4bb --- /dev/null +++ b/queue-4.4/sparc64-fix-instruction-count-in-comment-for-__hypervisor_flush_tlb_pending.patch @@ -0,0 +1,29 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Wed, 26 Oct 2016 10:08:22 -0700 +Subject: sparc64: Fix instruction count in comment for __hypervisor_flush_tlb_pending. + +From: "David S. Miller" + + +[ Upstream commit 830cda3f9855ff092b0e9610346d110846fc497c ] + +Noticed by James Clarke. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/ultra.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/sparc/mm/ultra.S ++++ b/arch/sparc/mm/ultra.S +@@ -358,7 +358,7 @@ __hypervisor_flush_tlb_page: /* 22 insns + nop + nop + +-__hypervisor_flush_tlb_pending: /* 16 insns */ ++__hypervisor_flush_tlb_pending: /* 27 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + sllx %o1, 3, %g1 + mov %o2, %g2 diff --git a/queue-4.4/sparc64-handle-extremely-large-kernel-tlb-range-flushes-more-gracefully.patch b/queue-4.4/sparc64-handle-extremely-large-kernel-tlb-range-flushes-more-gracefully.patch new file mode 100644 index 00000000000..07f60cf9163 --- /dev/null +++ b/queue-4.4/sparc64-handle-extremely-large-kernel-tlb-range-flushes-more-gracefully.patch @@ -0,0 +1,462 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Thu, 27 Oct 2016 09:04:54 -0700 +Subject: sparc64: Handle extremely large kernel TLB range flushes more gracefully. + +From: "David S. Miller" + + +[ Upstream commit a74ad5e660a9ee1d071665e7e8ad822784a2dc7f ] + +When the vmalloc area gets fragmented, and because the firmware +mapping area sits between where modules live and the vmalloc area, we +can sometimes receive requests for enormous kernel TLB range flushes. + +When this happens the cpu just spins flushing billions of pages and +this triggers the NMI watchdog and other problems. + +We took care of this on the TSB side by doing a linear scan of the +table once we pass a certain threshold. + +Do something similar for the TLB flush, however we are limited by +the TLB flush facilities provided by the different chip variants. + +First of all we use an (mostly arbitrary) cut-off of 256K which is +about 32 pages. This can be tuned in the future. + +The huge range code path for each chip works as follows: + +1) On spitfire we flush all non-locked TLB entries using diagnostic + acceses. + +2) On cheetah we use the "flush all" TLB flush. + +3) On sun4v/hypervisor we do a TLB context flush on context 0, which + unlike previous chips does not remove "permanent" or locked + entries. + +We could probably do something better on spitfire, such as limiting +the flush to kernel TLB entries or even doing range comparisons. +However that probably isn't worth it since those chips are old and +the TLB only had 64 entries. + +Reported-by: James Clarke +Tested-by: James Clarke +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/ultra.S | 283 ++++++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 228 insertions(+), 55 deletions(-) + +--- a/arch/sparc/mm/ultra.S ++++ b/arch/sparc/mm/ultra.S +@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 27 insns */ + + .align 32 + .globl __flush_tlb_kernel_range +-__flush_tlb_kernel_range: /* 19 insns */ ++__flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f ++ sub %o1, %o0, %o3 ++ srlx %o3, 18, %o4 ++ brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow + sethi %hi(PAGE_SIZE), %o4 +- sub %o1, %o0, %o3 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus + 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP +@@ -134,6 +136,38 @@ __flush_tlb_kernel_range: /* 19 insns */ + nop + nop + nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ ++__spitfire_flush_tlb_kernel_range_slow: ++ mov 63 * 8, %o4 ++1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 ++ andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ ++ bne,pn %xcc, 2f ++ mov TLB_TAG_ACCESS, %o3 ++ stxa %g0, [%o3] ASI_IMMU ++ stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS ++ membar #Sync ++2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 ++ andcc %o3, 0x40, %g0 ++ bne,pn %xcc, 2f ++ mov TLB_TAG_ACCESS, %o3 ++ stxa %g0, [%o3] ASI_DMMU ++ stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS ++ membar #Sync ++2: sub %o4, 8, %o4 ++ brgez,pt %o4, 1b ++ nop ++ retl ++ nop + + __spitfire_flush_tlb_mm_slow: + rdpr %pstate, %g1 +@@ -288,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns + retl + wrpr %g7, 0x0, %pstate + ++__cheetah_flush_tlb_kernel_range: /* 31 insns */ ++ /* %o0=start, %o1=end */ ++ cmp %o0, %o1 ++ be,pn %xcc, 2f ++ sub %o1, %o0, %o3 ++ srlx %o3, 18, %o4 ++ brnz,pn %o4, 3f ++ sethi %hi(PAGE_SIZE), %o4 ++ sub %o3, %o4, %o3 ++ or %o0, 0x20, %o0 ! Nucleus ++1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP ++ stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP ++ membar #Sync ++ brnz,pt %o3, 1b ++ sub %o3, %o4, %o3 ++2: sethi %hi(KERNBASE), %o3 ++ flush %o3 ++ retl ++ nop ++3: mov 0x80, %o4 ++ stxa %g0, [%o4] ASI_DMMU_DEMAP ++ membar #Sync ++ stxa %g0, [%o4] ASI_IMMU_DEMAP ++ membar #Sync ++ retl ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ + #ifdef DCACHE_ALIASING_POSSIBLE + __cheetah_flush_dcache_page: /* 11 insns */ + sethi %hi(PAGE_OFFSET), %g1 +@@ -388,13 +456,15 @@ __hypervisor_flush_tlb_pending: /* 27 in + nop + nop + +-__hypervisor_flush_tlb_kernel_range: /* 19 insns */ ++__hypervisor_flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f +- sethi %hi(PAGE_SIZE), %g3 +- mov %o0, %g1 +- sub %o1, %g1, %g2 ++ sub %o1, %o0, %g2 ++ srlx %g2, 18, %g3 ++ brnz,pn %g3, 4f ++ mov %o0, %g1 ++ sethi %hi(PAGE_SIZE), %g3 + sub %g2, %g3, %g2 + 1: add %g1, %g2, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ +@@ -409,6 +479,16 @@ __hypervisor_flush_tlb_kernel_range: /* + 3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop ++4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ ++ mov 0, %o1 /* ARG1: CPU lists unimplemented */ ++ mov 0, %o2 /* ARG2: mmu context == nucleus */ ++ mov HV_MMU_ALL, %o3 /* ARG3: flags */ ++ mov HV_FAST_MMU_DEMAP_CTX, %o5 ++ ta HV_FAST_TRAP ++ brnz,pn %o0, 3b ++ mov HV_FAST_MMU_DEMAP_CTX, %o1 ++ retl ++ nop + + #ifdef DCACHE_ALIASING_POSSIBLE + /* XXX Niagara and friends have an 8K cache, so no aliasing is +@@ -431,43 +511,6 @@ tlb_patch_one: + retl + nop + +- .globl cheetah_patch_cachetlbops +-cheetah_patch_cachetlbops: +- save %sp, -128, %sp +- +- sethi %hi(__flush_tlb_mm), %o0 +- or %o0, %lo(__flush_tlb_mm), %o0 +- sethi %hi(__cheetah_flush_tlb_mm), %o1 +- or %o1, %lo(__cheetah_flush_tlb_mm), %o1 +- call tlb_patch_one +- mov 19, %o2 +- +- sethi %hi(__flush_tlb_page), %o0 +- or %o0, %lo(__flush_tlb_page), %o0 +- sethi %hi(__cheetah_flush_tlb_page), %o1 +- or %o1, %lo(__cheetah_flush_tlb_page), %o1 +- call tlb_patch_one +- mov 22, %o2 +- +- sethi %hi(__flush_tlb_pending), %o0 +- or %o0, %lo(__flush_tlb_pending), %o0 +- sethi %hi(__cheetah_flush_tlb_pending), %o1 +- or %o1, %lo(__cheetah_flush_tlb_pending), %o1 +- call tlb_patch_one +- mov 27, %o2 +- +-#ifdef DCACHE_ALIASING_POSSIBLE +- sethi %hi(__flush_dcache_page), %o0 +- or %o0, %lo(__flush_dcache_page), %o0 +- sethi %hi(__cheetah_flush_dcache_page), %o1 +- or %o1, %lo(__cheetah_flush_dcache_page), %o1 +- call tlb_patch_one +- mov 11, %o2 +-#endif /* DCACHE_ALIASING_POSSIBLE */ +- +- ret +- restore +- + #ifdef CONFIG_SMP + /* These are all called by the slaves of a cross call, at + * trap level 1, with interrupts fully disabled. +@@ -535,13 +578,15 @@ xcall_flush_tlb_page: /* 20 insns */ + nop + + .globl xcall_flush_tlb_kernel_range +-xcall_flush_tlb_kernel_range: /* 28 insns */ ++xcall_flush_tlb_kernel_range: /* 44 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 +- add %g2, 1, %g2 ++ srlx %g3, 18, %g2 ++ brnz,pn %g2, 2f ++ add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus + 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP +@@ -550,11 +595,25 @@ xcall_flush_tlb_kernel_range: /* 28 insn + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry +- nop +- nop +- nop +- nop +- nop ++2: mov 63 * 8, %g1 ++1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 ++ andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ ++ bne,pn %xcc, 2f ++ mov TLB_TAG_ACCESS, %g2 ++ stxa %g0, [%g2] ASI_IMMU ++ stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS ++ membar #Sync ++2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 ++ andcc %g2, 0x40, %g0 ++ bne,pn %xcc, 2f ++ mov TLB_TAG_ACCESS, %g2 ++ stxa %g0, [%g2] ASI_DMMU ++ stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS ++ membar #Sync ++2: sub %g1, 8, %g1 ++ brgez,pt %g1, 1b ++ nop ++ retry + nop + nop + nop +@@ -683,6 +742,52 @@ xcall_fetch_glob_pmu_n4: + + retry + ++__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ ++ sethi %hi(PAGE_SIZE - 1), %g2 ++ or %g2, %lo(PAGE_SIZE - 1), %g2 ++ andn %g1, %g2, %g1 ++ andn %g7, %g2, %g7 ++ sub %g7, %g1, %g3 ++ srlx %g3, 18, %g2 ++ brnz,pn %g2, 2f ++ add %g2, 1, %g2 ++ sub %g3, %g2, %g3 ++ or %g1, 0x20, %g1 ! Nucleus ++1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP ++ stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP ++ membar #Sync ++ brnz,pt %g3, 1b ++ sub %g3, %g2, %g3 ++ retry ++2: mov 0x80, %g2 ++ stxa %g0, [%g2] ASI_DMMU_DEMAP ++ membar #Sync ++ stxa %g0, [%g2] ASI_IMMU_DEMAP ++ membar #Sync ++ retry ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ nop ++ + #ifdef DCACHE_ALIASING_POSSIBLE + .align 32 + .globl xcall_flush_dcache_page_cheetah +@@ -798,18 +903,20 @@ __hypervisor_xcall_flush_tlb_page: /* 20 + nop + + .globl __hypervisor_xcall_flush_tlb_kernel_range +-__hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ ++__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ + /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 ++ srlx %g3, 18, %g7 + add %g2, 1, %g2 + sub %g3, %g2, %g3 + mov %o0, %g2 + mov %o1, %g4 +- mov %o2, %g7 ++ brnz,pn %g7, 2f ++ mov %o2, %g7 + 1: add %g1, %g3, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ +@@ -820,7 +927,7 @@ __hypervisor_xcall_flush_tlb_kernel_rang + sethi %hi(PAGE_SIZE), %o2 + brnz,pt %g3, 1b + sub %g3, %o2, %g3 +- mov %g2, %o0 ++5: mov %g2, %o0 + mov %g4, %o1 + mov %g7, %o2 + membar #Sync +@@ -828,6 +935,20 @@ __hypervisor_xcall_flush_tlb_kernel_rang + 1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop ++2: mov %o3, %g1 ++ mov %o5, %g3 ++ mov 0, %o0 /* ARG0: CPU lists unimplemented */ ++ mov 0, %o1 /* ARG1: CPU lists unimplemented */ ++ mov 0, %o2 /* ARG2: mmu context == nucleus */ ++ mov HV_MMU_ALL, %o3 /* ARG3: flags */ ++ mov HV_FAST_MMU_DEMAP_CTX, %o5 ++ ta HV_FAST_TRAP ++ mov %g1, %o3 ++ brz,pt %o0, 5b ++ mov %g3, %o5 ++ mov HV_FAST_MMU_DEMAP_CTX, %g6 ++ ba,pt %xcc, 1b ++ clr %g5 + + /* These just get rescheduled to PIL vectors. */ + .globl xcall_call_function +@@ -864,6 +985,58 @@ xcall_kgdb_capture: + + #endif /* CONFIG_SMP */ + ++ .globl cheetah_patch_cachetlbops ++cheetah_patch_cachetlbops: ++ save %sp, -128, %sp ++ ++ sethi %hi(__flush_tlb_mm), %o0 ++ or %o0, %lo(__flush_tlb_mm), %o0 ++ sethi %hi(__cheetah_flush_tlb_mm), %o1 ++ or %o1, %lo(__cheetah_flush_tlb_mm), %o1 ++ call tlb_patch_one ++ mov 19, %o2 ++ ++ sethi %hi(__flush_tlb_page), %o0 ++ or %o0, %lo(__flush_tlb_page), %o0 ++ sethi %hi(__cheetah_flush_tlb_page), %o1 ++ or %o1, %lo(__cheetah_flush_tlb_page), %o1 ++ call tlb_patch_one ++ mov 22, %o2 ++ ++ sethi %hi(__flush_tlb_pending), %o0 ++ or %o0, %lo(__flush_tlb_pending), %o0 ++ sethi %hi(__cheetah_flush_tlb_pending), %o1 ++ or %o1, %lo(__cheetah_flush_tlb_pending), %o1 ++ call tlb_patch_one ++ mov 27, %o2 ++ ++ sethi %hi(__flush_tlb_kernel_range), %o0 ++ or %o0, %lo(__flush_tlb_kernel_range), %o0 ++ sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 ++ or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 ++ call tlb_patch_one ++ mov 31, %o2 ++ ++#ifdef DCACHE_ALIASING_POSSIBLE ++ sethi %hi(__flush_dcache_page), %o0 ++ or %o0, %lo(__flush_dcache_page), %o0 ++ sethi %hi(__cheetah_flush_dcache_page), %o1 ++ or %o1, %lo(__cheetah_flush_dcache_page), %o1 ++ call tlb_patch_one ++ mov 11, %o2 ++#endif /* DCACHE_ALIASING_POSSIBLE */ ++ ++#ifdef CONFIG_SMP ++ sethi %hi(xcall_flush_tlb_kernel_range), %o0 ++ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 ++ sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 ++ or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 ++ call tlb_patch_one ++ mov 44, %o2 ++#endif /* CONFIG_SMP */ ++ ++ ret ++ restore + + .globl hypervisor_patch_cachetlbops + hypervisor_patch_cachetlbops: +@@ -895,7 +1068,7 @@ hypervisor_patch_cachetlbops: + sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 + call tlb_patch_one +- mov 19, %o2 ++ mov 31, %o2 + + #ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 +@@ -926,7 +1099,7 @@ hypervisor_patch_cachetlbops: + sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one +- mov 28, %o2 ++ mov 44, %o2 + #endif /* CONFIG_SMP */ + + ret diff --git a/queue-4.4/sparc64-handle-extremely-large-kernel-tsb-range-flushes-sanely.patch b/queue-4.4/sparc64-handle-extremely-large-kernel-tsb-range-flushes-sanely.patch new file mode 100644 index 00000000000..2582e306ed5 --- /dev/null +++ b/queue-4.4/sparc64-handle-extremely-large-kernel-tsb-range-flushes-sanely.patch @@ -0,0 +1,55 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Tue, 25 Oct 2016 19:43:17 -0700 +Subject: sparc64: Handle extremely large kernel TSB range flushes sanely. + +From: "David S. Miller" + + +[ Upstream commit 849c498766060a16aad5b0e0d03206726e7d2fa4 ] + +If the number of pages we are flushing is more than twice the number +of entries in the TSB, just scan the TSB table for matches rather +than probing each and every page in the range. + +Based upon a patch and report by James Clarke. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/tsb.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +--- a/arch/sparc/mm/tsb.c ++++ b/arch/sparc/mm/tsb.c +@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned l + return (tag == (vaddr >> 22)); + } + ++static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) ++{ ++ unsigned long idx; ++ ++ for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { ++ struct tsb *ent = &swapper_tsb[idx]; ++ unsigned long match = idx << 13; ++ ++ match |= (ent->tag << 22); ++ if (match >= start && match < end) ++ ent->tag = (1UL << TSB_TAG_INVALID_BIT); ++ } ++} ++ + /* TSB flushes need only occur on the processor initiating the address + * space modification, not on each cpu the address space has run on. + * Only the TLB flush needs that treatment. +@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned lon + { + unsigned long v; + ++ if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) ++ return flush_tsb_kernel_range_scan(start, end); ++ + for (v = start; v < end; v += PAGE_SIZE) { + unsigned long hash = tsb_hash(v, PAGE_SHIFT, + KERNEL_TSB_NENTRIES); diff --git a/queue-4.4/sparc64-mm-fix-base-tsb-sizing-when-hugetlb-pages-are-used.patch b/queue-4.4/sparc64-mm-fix-base-tsb-sizing-when-hugetlb-pages-are-used.patch new file mode 100644 index 00000000000..143376cc2d7 --- /dev/null +++ b/queue-4.4/sparc64-mm-fix-base-tsb-sizing-when-hugetlb-pages-are-used.patch @@ -0,0 +1,158 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: Mike Kravetz +Date: Fri, 15 Jul 2016 13:08:42 -0700 +Subject: sparc64 mm: Fix base TSB sizing when hugetlb pages are used + +From: Mike Kravetz + + +[ Upstream commit af1b1a9b36b8f9d583d4b4f90dd8946ed0cd4bd0 ] + +do_sparc64_fault() calculates both the base and huge page RSS sizes and +uses this information in calls to tsb_grow(). The calculation for base +page TSB size is not correct if the task uses hugetlb pages. hugetlb +pages are not accounted for in RSS, therefore the call to get_mm_rss(mm) +does not include hugetlb pages. However, the number of pages based on +huge_pte_count (which does include hugetlb pages) is subtracted from +this value. This will result in an artificially small and often negative +RSS calculation. The base TSB size is then often set to max_tsb_size +as the passed RSS is unsigned, so a negative value looks really big. + +THP pages are also accounted for in huge_pte_count, and THP pages are +accounted for in RSS so the calculation in do_sparc64_fault() is correct +if a task only uses THP pages. + +A single huge_pte_count is not sufficient for TSB sizing if both hugetlb +and THP pages can be used. Instead of a single counter, use two: one +for hugetlb and one for THP. + +Signed-off-by: Mike Kravetz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_64.h | 3 ++- + arch/sparc/mm/fault_64.c | 6 +++--- + arch/sparc/mm/hugetlbpage.c | 4 ++-- + arch/sparc/mm/init_64.c | 3 ++- + arch/sparc/mm/tlb.c | 4 ++-- + arch/sparc/mm/tsb.c | 14 ++++++++------ + 6 files changed, 19 insertions(+), 15 deletions(-) + +--- a/arch/sparc/include/asm/mmu_64.h ++++ b/arch/sparc/include/asm/mmu_64.h +@@ -92,7 +92,8 @@ struct tsb_config { + typedef struct { + spinlock_t lock; + unsigned long sparc64_ctx_val; +- unsigned long huge_pte_count; ++ unsigned long hugetlb_pte_count; ++ unsigned long thp_pte_count; + struct tsb_config tsb_block[MM_NUM_TSBS]; + struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; + } mm_context_t; +--- a/arch/sparc/mm/fault_64.c ++++ b/arch/sparc/mm/fault_64.c +@@ -479,14 +479,14 @@ good_area: + up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); +-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) ++ mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE)); + #endif + if (unlikely(mm_rss > + mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) + tsb_grow(mm, MM_TSB_BASE, mm_rss); + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- mm_rss = mm->context.huge_pte_count; ++ mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + if (unlikely(mm_rss > + mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) +--- a/arch/sparc/mm/hugetlbpage.c ++++ b/arch/sparc/mm/hugetlbpage.c +@@ -180,7 +180,7 @@ void set_huge_pte_at(struct mm_struct *m + unsigned long nptes; + + if (!pte_present(*ptep) && pte_present(entry)) +- mm->context.huge_pte_count++; ++ mm->context.hugetlb_pte_count++; + + addr &= HPAGE_MASK; + +@@ -212,7 +212,7 @@ pte_t huge_ptep_get_and_clear(struct mm_ + + entry = *ptep; + if (pte_present(entry)) +- mm->context.huge_pte_count--; ++ mm->context.hugetlb_pte_count--; + + addr &= HPAGE_MASK; + nptes = 1 << HUGETLB_PAGE_ORDER; +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -346,7 +346,8 @@ void update_mmu_cache(struct vm_area_str + spin_lock_irqsave(&mm->context.lock, flags); + + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) ++ if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && ++ is_hugetlb_pte(pte)) + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, + address, pte_val(pte)); + else +--- a/arch/sparc/mm/tlb.c ++++ b/arch/sparc/mm/tlb.c +@@ -175,9 +175,9 @@ void set_pmd_at(struct mm_struct *mm, un + + if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { + if (pmd_val(pmd) & _PAGE_PMD_HUGE) +- mm->context.huge_pte_count++; ++ mm->context.thp_pte_count++; + else +- mm->context.huge_pte_count--; ++ mm->context.thp_pte_count--; + + /* Do not try to allocate the TSB hash table if we + * don't have one already. We have various locks held +--- a/arch/sparc/mm/tsb.c ++++ b/arch/sparc/mm/tsb.c +@@ -470,7 +470,7 @@ retry_tsb_alloc: + int init_new_context(struct task_struct *tsk, struct mm_struct *mm) + { + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- unsigned long huge_pte_count; ++ unsigned long total_huge_pte_count; + #endif + unsigned int i; + +@@ -479,12 +479,14 @@ int init_new_context(struct task_struct + mm->context.sparc64_ctx_val = 0UL; + + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- /* We reset it to zero because the fork() page copying ++ /* We reset them to zero because the fork() page copying + * will re-increment the counters as the parent PTEs are + * copied into the child address space. + */ +- huge_pte_count = mm->context.huge_pte_count; +- mm->context.huge_pte_count = 0; ++ total_huge_pte_count = mm->context.hugetlb_pte_count + ++ mm->context.thp_pte_count; ++ mm->context.hugetlb_pte_count = 0; ++ mm->context.thp_pte_count = 0; + #endif + + /* copy_mm() copies over the parent's mm_struct before calling +@@ -500,8 +502,8 @@ int init_new_context(struct task_struct + tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +- if (unlikely(huge_pte_count)) +- tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); ++ if (unlikely(total_huge_pte_count)) ++ tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + #endif + + if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) diff --git a/queue-4.4/sparc64-prepare-to-move-to-more-saner-user-copy-exception-handling.patch b/queue-4.4/sparc64-prepare-to-move-to-more-saner-user-copy-exception-handling.patch new file mode 100644 index 00000000000..62dad284256 --- /dev/null +++ b/queue-4.4/sparc64-prepare-to-move-to-more-saner-user-copy-exception-handling.patch @@ -0,0 +1,360 @@ +From foo@baz Sat Nov 19 09:52:37 CET 2016 +From: "David S. Miller" +Date: Mon, 15 Aug 2016 14:47:54 -0700 +Subject: sparc64: Prepare to move to more saner user copy exception handling. + +From: "David S. Miller" + + +[ Upstream commit 83a17d2661674d8c198adc0e183418f72aabab79 ] + +The fixup helper function mechanism for handling user copy fault +handling is not %100 accurrate, and can never be made so. + +We are going to transition the code to return the running return +return length, which is always kept track in one or more registers +of each of these routines. + +In order to convert them one by one, we have to allow the existing +behavior to continue functioning. + +Therefore make all the copy code that wants the fixup helper to be +used return negative one. + +After all of the user copy routines have been converted, this logic +and the fixup helpers themselves can be removed completely. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/uaccess_64.h | 21 +++++++++++++++------ + arch/sparc/kernel/head_64.S | 23 +++++++++++------------ + arch/sparc/lib/GENcopy_from_user.S | 2 +- + arch/sparc/lib/GENcopy_to_user.S | 2 +- + arch/sparc/lib/NG2copy_from_user.S | 4 ++-- + arch/sparc/lib/NG2copy_to_user.S | 4 ++-- + arch/sparc/lib/NG4copy_from_user.S | 4 ++-- + arch/sparc/lib/NG4copy_to_user.S | 4 ++-- + arch/sparc/lib/NGcopy_from_user.S | 2 +- + arch/sparc/lib/NGcopy_to_user.S | 2 +- + arch/sparc/lib/U1copy_from_user.S | 4 ++-- + arch/sparc/lib/U1copy_to_user.S | 4 ++-- + arch/sparc/lib/U3copy_from_user.S | 4 ++-- + arch/sparc/lib/U3copy_to_user.S | 4 ++-- + arch/sparc/lib/copy_in_user.S | 2 +- + 15 files changed, 47 insertions(+), 39 deletions(-) + +--- a/arch/sparc/include/asm/uaccess_64.h ++++ b/arch/sparc/include/asm/uaccess_64.h +@@ -211,8 +211,11 @@ copy_from_user(void *to, const void __us + { + unsigned long ret = ___copy_from_user(to, from, size); + +- if (unlikely(ret)) +- ret = copy_from_user_fixup(to, from, size); ++ if (unlikely(ret)) { ++ if ((long)ret < 0) ++ ret = copy_from_user_fixup(to, from, size); ++ return ret; ++ } + + return ret; + } +@@ -228,8 +231,11 @@ copy_to_user(void __user *to, const void + { + unsigned long ret = ___copy_to_user(to, from, size); + +- if (unlikely(ret)) +- ret = copy_to_user_fixup(to, from, size); ++ if (unlikely(ret)) { ++ if ((long)ret < 0) ++ ret = copy_to_user_fixup(to, from, size); ++ return ret; ++ } + return ret; + } + #define __copy_to_user copy_to_user +@@ -244,8 +250,11 @@ copy_in_user(void __user *to, void __use + { + unsigned long ret = ___copy_in_user(to, from, size); + +- if (unlikely(ret)) +- ret = copy_in_user_fixup(to, from, size); ++ if (unlikely(ret)) { ++ if ((long)ret < 0) ++ ret = copy_in_user_fixup(to, from, size); ++ return ret; ++ } + return ret; + } + #define __copy_in_user copy_in_user +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -922,41 +922,40 @@ prom_tba: .xword 0 + tlb_type: .word 0 /* Must NOT end up in BSS */ + .section ".fixup",#alloc,#execinstr + +- .globl __retl_efault, __ret_one, __retl_one + ENTRY(__retl_efault) + retl + mov -EFAULT, %o0 + ENDPROC(__retl_efault) + +-ENTRY(__retl_one) ++ENTRY(__retl_mone) + retl +- mov 1, %o0 +-ENDPROC(__retl_one) ++ mov -1, %o0 ++ENDPROC(__retl_mone) + +-ENTRY(__retl_one_fp) ++ENTRY(__retl_mone_fp) + VISExitHalf + retl + mov 1, %o0 +-ENDPROC(__retl_one_fp) ++ENDPROC(__retl_mone_fp) + +-ENTRY(__ret_one_asi) ++ENTRY(__ret_mone_asi) + wr %g0, ASI_AIUS, %asi + ret + restore %g0, 1, %o0 +-ENDPROC(__ret_one_asi) ++ENDPROC(__ret_mone_asi) + +-ENTRY(__retl_one_asi) ++ENTRY(__retl_mone_asi) + wr %g0, ASI_AIUS, %asi + retl + mov 1, %o0 +-ENDPROC(__retl_one_asi) ++ENDPROC(__retl_mone_asi) + +-ENTRY(__retl_one_asi_fp) ++ENTRY(__retl_mone_asi_fp) + wr %g0, ASI_AIUS, %asi + VISExitHalf + retl + mov 1, %o0 +-ENDPROC(__retl_one_asi_fp) ++ENDPROC(__retl_mone_asi_fp) + + ENTRY(__retl_o1) + retl +--- a/arch/sparc/lib/GENcopy_from_user.S ++++ b/arch/sparc/lib/GENcopy_from_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/GENcopy_to_user.S ++++ b/arch/sparc/lib/GENcopy_to_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG2copy_from_user.S ++++ b/arch/sparc/lib/NG2copy_from_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi;\ ++ .word 98b, __retl_mone_asi;\ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi_fp;\ ++ .word 98b, __retl_mone_asi_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG2copy_to_user.S ++++ b/arch/sparc/lib/NG2copy_to_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi;\ ++ .word 98b, __retl_mone_asi;\ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi_fp;\ ++ .word 98b, __retl_mone_asi_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG4copy_from_user.S ++++ b/arch/sparc/lib/NG4copy_from_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi;\ ++ .word 98b, __retl_mone_asi;\ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi_fp;\ ++ .word 98b, __retl_mone_asi_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NG4copy_to_user.S ++++ b/arch/sparc/lib/NG4copy_to_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi;\ ++ .word 98b, __retl_mone_asi;\ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_asi_fp;\ ++ .word 98b, __retl_mone_asi_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NGcopy_from_user.S ++++ b/arch/sparc/lib/NGcopy_from_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __ret_one_asi;\ ++ .word 98b, __ret_mone_asi;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/NGcopy_to_user.S ++++ b/arch/sparc/lib/NGcopy_to_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __ret_one_asi;\ ++ .word 98b, __ret_mone_asi;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U1copy_from_user.S ++++ b/arch/sparc/lib/U1copy_from_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_fp;\ ++ .word 98b, __retl_mone_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U1copy_to_user.S ++++ b/arch/sparc/lib/U1copy_to_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_fp;\ ++ .word 98b, __retl_mone_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U3copy_from_user.S ++++ b/arch/sparc/lib/U3copy_from_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_fp;\ ++ .word 98b, __retl_mone_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/U3copy_to_user.S ++++ b/arch/sparc/lib/U3copy_to_user.S +@@ -7,7 +7,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; + +@@ -15,7 +15,7 @@ + 98: x; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one_fp;\ ++ .word 98b, __retl_mone_fp;\ + .text; \ + .align 4; + +--- a/arch/sparc/lib/copy_in_user.S ++++ b/arch/sparc/lib/copy_in_user.S +@@ -12,7 +12,7 @@ + 98: x,y; \ + .section __ex_table,"a";\ + .align 4; \ +- .word 98b, __retl_one; \ ++ .word 98b, __retl_mone; \ + .text; \ + .align 4; +