From: Greg Kroah-Hartman Date: Tue, 28 Oct 2014 03:25:11 +0000 (+0800) Subject: 3.14-stable patches X-Git-Tag: v3.10.59~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a24680070280b25b7fd2ce9065049a426e9ca37b;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: sparc-let-memset-return-the-address-argument.patch sparc32-dma_alloc_coherent-must-honour-gfp-flags.patch sparc64-adjust-ktsb-assembler-to-support-larger-physical-addresses.patch sparc64-adjust-vmalloc-region-size-based-upon-available-virtual-address-bits.patch sparc64-correctly-recognise-m6-and-m7-cpu-type.patch sparc64-cpu-hardware-caps-support-for-sparc-m6-and-m7.patch sparc64-define-va-hole-at-run-time-rather-than-at-compile-time.patch sparc64-do-not-define-thread-fpregs-save-area-as-zero-length-array.patch sparc64-do-not-disable-interrupts-in-nmi_cpu_busy.patch sparc64-find_node-adjustment.patch sparc64-fix-corrupted-thread-fault-code.patch sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch sparc64-fix-hibernation-code-refrence-to-page_offset.patch sparc64-fix-lockdep-warnings-on-reboot-on-ultra-5.patch sparc64-fix-pcr_ops-initialization-and-usage-bugs.patch sparc64-fix-physical-memory-management-regressions-with-large-max_phys_bits.patch sparc64-fix-register-corruption-in-top-most-kernel-stack-frame-during-boot.patch sparc64-fix-reversed-start-end-in-flush_tlb_kernel_range.patch sparc64-implement-__get_user_pages_fast.patch sparc64-increase-max_phys_address_bits-to-53.patch sparc64-increase-size-of-boot-string-to-1024-bytes.patch sparc64-kill-unnecessary-tables-and-increase-max_banks.patch sparc64-move-request_irq-from-ldc_bind-to-ldc_alloc.patch sparc64-sparse-irq.patch sparc64-sun4v-tlb-error-power-off-events.patch sparc64-support-m6-and-m7-for-building-cpu-distribution-map.patch sparc64-switch-to-4-level-page-tables.patch sparc64-t5-pmu.patch sparc64-use-kernel-page-tables-for-vmemmap.patch --- diff --git a/queue-3.14/series b/queue-3.14/series index 2f7ff4e4ea4..d17fd3a5755 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -69,3 +69,32 @@ arm-mvebu-netgear-rn2120-use-hardware-bch-ecc.patch arm-mvebu-netgear-rn102-use-hardware-bch-ecc.patch ecryptfs-avoid-to-access-null-pointer-when-write-metadata-in-xattr.patch xfs-ensure-wb_sync_all-writeback-handles-partial-pages-correctly.patch +sparc64-do-not-disable-interrupts-in-nmi_cpu_busy.patch +sparc64-fix-pcr_ops-initialization-and-usage-bugs.patch +sparc32-dma_alloc_coherent-must-honour-gfp-flags.patch +sparc64-sun4v-tlb-error-power-off-events.patch +sparc64-fix-corrupted-thread-fault-code.patch +sparc64-find_node-adjustment.patch +sparc64-move-request_irq-from-ldc_bind-to-ldc_alloc.patch +sparc-let-memset-return-the-address-argument.patch +sparc64-fix-reversed-start-end-in-flush_tlb_kernel_range.patch +sparc64-fix-lockdep-warnings-on-reboot-on-ultra-5.patch +sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch +sparc64-do-not-define-thread-fpregs-save-area-as-zero-length-array.patch +sparc64-fix-hibernation-code-refrence-to-page_offset.patch +sparc64-correctly-recognise-m6-and-m7-cpu-type.patch +sparc64-support-m6-and-m7-for-building-cpu-distribution-map.patch +sparc64-cpu-hardware-caps-support-for-sparc-m6-and-m7.patch +sparc64-t5-pmu.patch +sparc64-switch-to-4-level-page-tables.patch +sparc64-define-va-hole-at-run-time-rather-than-at-compile-time.patch +sparc64-adjust-ktsb-assembler-to-support-larger-physical-addresses.patch +sparc64-fix-physical-memory-management-regressions-with-large-max_phys_bits.patch +sparc64-use-kernel-page-tables-for-vmemmap.patch +sparc64-increase-max_phys_address_bits-to-53.patch +sparc64-adjust-vmalloc-region-size-based-upon-available-virtual-address-bits.patch +sparc64-sparse-irq.patch +sparc64-kill-unnecessary-tables-and-increase-max_banks.patch +sparc64-increase-size-of-boot-string-to-1024-bytes.patch +sparc64-fix-register-corruption-in-top-most-kernel-stack-frame-during-boot.patch +sparc64-implement-__get_user_pages_fast.patch diff --git a/queue-3.14/sparc-let-memset-return-the-address-argument.patch b/queue-3.14/sparc-let-memset-return-the-address-argument.patch new file mode 100644 index 00000000000..b4c9c817864 --- /dev/null +++ b/queue-3.14/sparc-let-memset-return-the-address-argument.patch @@ -0,0 +1,75 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Andreas Larsson +Date: Fri, 29 Aug 2014 17:08:21 +0200 +Subject: sparc: Let memset return the address argument + +From: Andreas Larsson + +[ Upstream commit 74cad25c076a2f5253312c2fe82d1a4daecc1323 ] + +This makes memset follow the standard (instead of returning 0 on success). This +is needed when certain versions of gcc optimizes around memset calls and assume +that the address argument is preserved in %o0. + +Signed-off-by: Andreas Larsson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/memset.S | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/arch/sparc/lib/memset.S ++++ b/arch/sparc/lib/memset.S +@@ -3,8 +3,9 @@ + * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * +- * Returns 0, if ok, and number of bytes not yet set if exception +- * occurs and we were called as clear_user. ++ * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and ++ * number of bytes not yet set if exception occurs and we were called as ++ * clear_user. + */ + + #include +@@ -65,6 +66,8 @@ __bzero_begin: + .globl __memset_start, __memset_end + __memset_start: + memset: ++ mov %o0, %g1 ++ mov 1, %g4 + and %o1, 0xff, %g3 + sll %g3, 8, %g2 + or %g3, %g2, %g3 +@@ -89,6 +92,7 @@ memset: + sub %o0, %o2, %o0 + + __bzero: ++ clr %g4 + mov %g0, %g3 + 1: + cmp %o1, 7 +@@ -151,8 +155,8 @@ __bzero: + bne,a 8f + EX(stb %g3, [%o0], and %o1, 1) + 8: +- retl +- clr %o0 ++ b 0f ++ nop + 7: + be 13b + orcc %o1, 0, %g0 +@@ -164,6 +168,12 @@ __bzero: + bne 8b + EX(stb %g3, [%o0 - 1], add %o1, 1) + 0: ++ andcc %g4, 1, %g0 ++ be 5f ++ nop ++ retl ++ mov %g1, %o0 ++5: + retl + clr %o0 + __memset_end: diff --git a/queue-3.14/sparc32-dma_alloc_coherent-must-honour-gfp-flags.patch b/queue-3.14/sparc32-dma_alloc_coherent-must-honour-gfp-flags.patch new file mode 100644 index 00000000000..5f4a197ed90 --- /dev/null +++ b/queue-3.14/sparc32-dma_alloc_coherent-must-honour-gfp-flags.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Daniel Hellstrom +Date: Wed, 10 Sep 2014 14:17:52 +0200 +Subject: sparc32: dma_alloc_coherent must honour gfp flags + +From: Daniel Hellstrom + +[ Upstream commit d1105287aabe88dbb3af825140badaa05cf0442c ] + +dma_zalloc_coherent() calls dma_alloc_coherent(__GFP_ZERO) +but the sparc32 implementations sbus_alloc_coherent() and +pci32_alloc_coherent() doesn't take the gfp flags into +account. + +Tested on the SPARC32/LEON GRETH Ethernet driver which fails +due to dma_alloc_coherent(__GFP_ZERO) returns non zeroed +pages. + +Signed-off-by: Daniel Hellstrom +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/ioport.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/sparc/kernel/ioport.c ++++ b/arch/sparc/kernel/ioport.c +@@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct + } + + order = get_order(len_total); +- if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0) ++ va = __get_free_pages(gfp, order); ++ if (va == 0) + goto err_nopages; + + if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) +@@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct + } + + order = get_order(len_total); +- va = (void *) __get_free_pages(GFP_KERNEL, order); ++ va = (void *) __get_free_pages(gfp, order); + if (va == NULL) { + printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT); + goto err_nopages; diff --git a/queue-3.14/sparc64-adjust-ktsb-assembler-to-support-larger-physical-addresses.patch b/queue-3.14/sparc64-adjust-ktsb-assembler-to-support-larger-physical-addresses.patch new file mode 100644 index 00000000000..4e13e2f0cc3 --- /dev/null +++ b/queue-3.14/sparc64-adjust-ktsb-assembler-to-support-larger-physical-addresses.patch @@ -0,0 +1,129 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Wed, 17 Sep 2014 10:14:56 -0700 +Subject: sparc64: Adjust KTSB assembler to support larger physical addresses. + +From: "David S. Miller" + +[ Upstream commit 8c82dc0e883821c098c8b0b130ffebabf9aab5df ] + +As currently coded the KTSB accesses in the kernel only support up to +47 bits of physical addressing. + +Adjust the instruction and patching sequence in order to support +arbitrary 64 bits addresses. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/tsb.h | 30 ++++++++++++------------------ + arch/sparc/mm/init_64.c | 28 +++++++++++++++++++++++++--- + 2 files changed, 37 insertions(+), 21 deletions(-) + +--- a/arch/sparc/include/asm/tsb.h ++++ b/arch/sparc/include/asm/tsb.h +@@ -256,8 +256,6 @@ extern struct tsb_phys_patch_entry __tsb + (KERNEL_TSB_SIZE_BYTES / 16) + #define KERNEL_TSB4M_NENTRIES 4096 + +-#define KTSB_PHYS_SHIFT 15 +- + /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL + * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries + * and the found TTE will be left in REG1. REG3 and REG4 must +@@ -266,17 +264,15 @@ extern struct tsb_phys_patch_entry __tsb + * VADDR and TAG will be preserved and not clobbered by this macro. + */ + #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ +-661: sethi %hi(swapper_tsb), REG1; \ +- or REG1, %lo(swapper_tsb), REG1; \ ++661: sethi %uhi(swapper_tsb), REG1; \ ++ sethi %hi(swapper_tsb), REG2; \ ++ or REG1, %ulo(swapper_tsb), REG1; \ ++ or REG2, %lo(swapper_tsb), REG2; \ + .section .swapper_tsb_phys_patch, "ax"; \ + .word 661b; \ + .previous; \ +-661: nop; \ +- .section .tsb_ldquad_phys_patch, "ax"; \ +- .word 661b; \ +- sllx REG1, KTSB_PHYS_SHIFT, REG1; \ +- sllx REG1, KTSB_PHYS_SHIFT, REG1; \ +- .previous; \ ++ sllx REG1, 32, REG1; \ ++ or REG1, REG2, REG1; \ + srlx VADDR, PAGE_SHIFT, REG2; \ + and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ +@@ -291,17 +287,15 @@ extern struct tsb_phys_patch_entry __tsb + * we can make use of that for the index computation. + */ + #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ +-661: sethi %hi(swapper_4m_tsb), REG1; \ +- or REG1, %lo(swapper_4m_tsb), REG1; \ ++661: sethi %uhi(swapper_4m_tsb), REG1; \ ++ sethi %hi(swapper_4m_tsb), REG2; \ ++ or REG1, %ulo(swapper_4m_tsb), REG1; \ ++ or REG2, %lo(swapper_4m_tsb), REG2; \ + .section .swapper_4m_tsb_phys_patch, "ax"; \ + .word 661b; \ + .previous; \ +-661: nop; \ +- .section .tsb_ldquad_phys_patch, "ax"; \ +- .word 661b; \ +- sllx REG1, KTSB_PHYS_SHIFT, REG1; \ +- sllx REG1, KTSB_PHYS_SHIFT, REG1; \ +- .previous; \ ++ sllx REG1, 32, REG1; \ ++ or REG1, REG2, REG1; \ + and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1727,19 +1727,41 @@ static void __init tsb_phys_patch(void) + static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; + extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + ++/* The swapper TSBs are loaded with a base sequence of: ++ * ++ * sethi %uhi(SYMBOL), REG1 ++ * sethi %hi(SYMBOL), REG2 ++ * or REG1, %ulo(SYMBOL), REG1 ++ * or REG2, %lo(SYMBOL), REG2 ++ * sllx REG1, 32, REG1 ++ * or REG1, REG2, REG1 ++ * ++ * When we use physical addressing for the TSB accesses, we patch the ++ * first four instructions in the above sequence. ++ */ ++ + static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) + { +- pa >>= KTSB_PHYS_SHIFT; ++ unsigned long high_bits, low_bits; ++ ++ high_bits = (pa >> 32) & 0xffffffff; ++ low_bits = (pa >> 0) & 0xffffffff; + + while (start < end) { + unsigned int *ia = (unsigned int *)(unsigned long)*start; + +- ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); ++ ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10); + __asm__ __volatile__("flush %0" : : "r" (ia)); + +- ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); ++ ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10); + __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + ++ ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff); ++ __asm__ __volatile__("flush %0" : : "r" (ia + 2)); ++ ++ ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff); ++ __asm__ __volatile__("flush %0" : : "r" (ia + 3)); ++ + start++; + } + } diff --git a/queue-3.14/sparc64-adjust-vmalloc-region-size-based-upon-available-virtual-address-bits.patch b/queue-3.14/sparc64-adjust-vmalloc-region-size-based-upon-available-virtual-address-bits.patch new file mode 100644 index 00000000000..56eb57a3cb1 --- /dev/null +++ b/queue-3.14/sparc64-adjust-vmalloc-region-size-based-upon-available-virtual-address-bits.patch @@ -0,0 +1,152 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Sat, 27 Sep 2014 11:05:21 -0700 +Subject: sparc64: Adjust vmalloc region size based upon available virtual address bits. + +From: "David S. Miller" + +[ Upstream commit bb4e6e85daa52a9f6210fa06a5ec6269598a202b ] + +In order to accomodate embedded per-cpu allocation with large numbers +of cpus and numa nodes, we have to use as much virtual address space +as possible for the vmalloc region. Otherwise we can get things like: + +PERCPU: max_distance=0x380001c10000 too large for vmalloc space 0xff00000000 + +So, once we select a value for PAGE_OFFSET, derive the size of the +vmalloc region based upon that. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/page_64.h | 1 - + arch/sparc/include/asm/pgtable_64.h | 9 +++++---- + arch/sparc/kernel/ktlb.S | 8 ++++---- + arch/sparc/mm/init_64.c | 30 +++++++++++++++++++----------- + 4 files changed, 28 insertions(+), 20 deletions(-) + +--- a/arch/sparc/include/asm/page_64.h ++++ b/arch/sparc/include/asm/page_64.h +@@ -117,7 +117,6 @@ extern unsigned long sparc64_va_hole_bot + + #include + +-#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X))) + extern unsigned long PAGE_OFFSET; + + #endif /* !(__ASSEMBLY__) */ +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -40,10 +40,7 @@ + #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) + #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) + #define VMALLOC_START _AC(0x0000000100000000,UL) +-#define VMALLOC_END _AC(0x0000010000000000,UL) +-#define VMEMMAP_BASE _AC(0x0000010000000000,UL) +- +-#define vmemmap ((struct page *)VMEMMAP_BASE) ++#define VMEMMAP_BASE VMALLOC_END + + /* PMD_SHIFT determines the size of the area a second-level page + * table can map +@@ -81,6 +78,10 @@ + + #ifndef __ASSEMBLY__ + ++extern unsigned long VMALLOC_END; ++ ++#define vmemmap ((struct page *)VMEMMAP_BASE) ++ + #include + + bool kern_addr_valid(unsigned long addr); +--- a/arch/sparc/kernel/ktlb.S ++++ b/arch/sparc/kernel/ktlb.S +@@ -199,8 +199,8 @@ kvmap_dtlb_nonlinear: + + #ifdef CONFIG_SPARSEMEM_VMEMMAP + /* Do not use the TSB for vmemmap. */ +- mov (VMEMMAP_BASE >> 40), %g5 +- sllx %g5, 40, %g5 ++ sethi %hi(VMEMMAP_BASE), %g5 ++ ldx [%g5 + %lo(VMEMMAP_BASE)], %g5 + cmp %g4,%g5 + bgeu,pn %xcc, kvmap_vmemmap + nop +@@ -212,8 +212,8 @@ kvmap_dtlb_tsbmiss: + sethi %hi(MODULES_VADDR), %g5 + cmp %g4, %g5 + blu,pn %xcc, kvmap_dtlb_longpath +- mov (VMALLOC_END >> 40), %g5 +- sllx %g5, 40, %g5 ++ sethi %hi(VMALLOC_END), %g5 ++ ldx [%g5 + %lo(VMALLOC_END)], %g5 + cmp %g4, %g5 + bgeu,pn %xcc, kvmap_dtlb_longpath + nop +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1363,25 +1363,24 @@ static unsigned long max_phys_bits = 40; + + bool kern_addr_valid(unsigned long addr) + { +- unsigned long above = ((long)addr) >> max_phys_bits; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +- if (above != 0 && above != -1UL) +- return false; +- +- if (addr >= (unsigned long) KERNBASE && +- addr < (unsigned long)&_end) +- return true; +- +- if (addr >= PAGE_OFFSET) { ++ if ((long)addr < 0L) { + unsigned long pa = __pa(addr); + ++ if ((addr >> max_phys_bits) != 0UL) ++ return false; ++ + return pfn_valid(pa >> PAGE_SHIFT); + } + ++ if (addr >= (unsigned long) KERNBASE && ++ addr < (unsigned long)&_end) ++ return true; ++ + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; +@@ -1650,6 +1649,9 @@ unsigned long __init find_ecache_flush_s + unsigned long PAGE_OFFSET; + EXPORT_SYMBOL(PAGE_OFFSET); + ++unsigned long VMALLOC_END = 0x0000010000000000UL; ++EXPORT_SYMBOL(VMALLOC_END); ++ + unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; + unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; + +@@ -1706,10 +1708,16 @@ static void __init setup_page_offset(voi + prom_halt(); + } + +- PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); ++ PAGE_OFFSET = sparc64_va_hole_top; ++ VMALLOC_END = ((sparc64_va_hole_bottom >> 1) + ++ (sparc64_va_hole_bottom >> 2)); + +- pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", ++ pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); ++ pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n", ++ VMALLOC_START, VMALLOC_END); ++ pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n", ++ VMEMMAP_BASE, VMEMMAP_BASE << 1); + } + + static void __init tsb_phys_patch(void) diff --git a/queue-3.14/sparc64-correctly-recognise-m6-and-m7-cpu-type.patch b/queue-3.14/sparc64-correctly-recognise-m6-and-m7-cpu-type.patch new file mode 100644 index 00000000000..734fd2fdeb5 --- /dev/null +++ b/queue-3.14/sparc64-correctly-recognise-m6-and-m7-cpu-type.patch @@ -0,0 +1,79 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Allen Pais +Date: Mon, 8 Sep 2014 11:48:53 +0530 +Subject: sparc64: correctly recognise M6 and M7 cpu type + +From: Allen Pais + +The following patch adds support for correctly +recognising M6 and M7 cpu type. + +Signed-off-by: Allen Pais +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/spitfire.h | 2 ++ + arch/sparc/kernel/cpu.c | 12 ++++++++++++ + arch/sparc/kernel/head_64.S | 12 ++++++++++++ + 3 files changed, 26 insertions(+) + +--- a/arch/sparc/include/asm/spitfire.h ++++ b/arch/sparc/include/asm/spitfire.h +@@ -45,6 +45,8 @@ + #define SUN4V_CHIP_NIAGARA3 0x03 + #define SUN4V_CHIP_NIAGARA4 0x04 + #define SUN4V_CHIP_NIAGARA5 0x05 ++#define SUN4V_CHIP_SPARC_M6 0x06 ++#define SUN4V_CHIP_SPARC_M7 0x07 + #define SUN4V_CHIP_SPARC64X 0x8a + #define SUN4V_CHIP_UNKNOWN 0xff + +--- a/arch/sparc/kernel/cpu.c ++++ b/arch/sparc/kernel/cpu.c +@@ -493,6 +493,18 @@ static void __init sun4v_cpu_probe(void) + sparc_pmu_type = "niagara5"; + break; + ++ case SUN4V_CHIP_SPARC_M6: ++ sparc_cpu_type = "SPARC-M6"; ++ sparc_fpu_type = "SPARC-M6 integrated FPU"; ++ sparc_pmu_type = "sparc-m6"; ++ break; ++ ++ case SUN4V_CHIP_SPARC_M7: ++ sparc_cpu_type = "SPARC-M7"; ++ sparc_fpu_type = "SPARC-M7 integrated FPU"; ++ sparc_pmu_type = "sparc-m7"; ++ break; ++ + case SUN4V_CHIP_SPARC64X: + sparc_cpu_type = "SPARC64-X"; + sparc_fpu_type = "SPARC64-X integrated FPU"; +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -427,6 +427,12 @@ sun4v_chip_type: + cmp %g2, '5' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA5, %g4 ++ cmp %g2, '6' ++ be,pt %xcc, 5f ++ mov SUN4V_CHIP_SPARC_M6, %g4 ++ cmp %g2, '7' ++ be,pt %xcc, 5f ++ mov SUN4V_CHIP_SPARC_M7, %g4 + ba,pt %xcc, 49f + nop + +@@ -585,6 +591,12 @@ niagara_tlb_fixup: + cmp %g1, SUN4V_CHIP_NIAGARA5 + be,pt %xcc, niagara4_patch + nop ++ cmp %g1, SUN4V_CHIP_SPARC_M6 ++ be,pt %xcc, niagara4_patch ++ nop ++ cmp %g1, SUN4V_CHIP_SPARC_M7 ++ be,pt %xcc, niagara4_patch ++ nop + + call generic_patch_copyops + nop diff --git a/queue-3.14/sparc64-cpu-hardware-caps-support-for-sparc-m6-and-m7.patch b/queue-3.14/sparc64-cpu-hardware-caps-support-for-sparc-m6-and-m7.patch new file mode 100644 index 00000000000..7a976b74901 --- /dev/null +++ b/queue-3.14/sparc64-cpu-hardware-caps-support-for-sparc-m6-and-m7.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Allen Pais +Date: Mon, 8 Sep 2014 11:48:55 +0530 +Subject: sparc64: cpu hardware caps support for sparc M6 and M7 + +From: Allen Pais + +Signed-off-by: Allen Pais +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/setup_64.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/sparc/kernel/setup_64.c ++++ b/arch/sparc/kernel/setup_64.c +@@ -500,12 +500,16 @@ static void __init init_sparc64_elf_hwca + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC64X) + cap |= HWCAP_SPARC_BLKINIT; + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC64X) + cap |= HWCAP_SPARC_N2; + } +@@ -533,6 +537,8 @@ static void __init init_sparc64_elf_hwca + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC64X) + cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | + AV_SPARC_ASI_BLK_INIT | +@@ -540,6 +546,8 @@ static void __init init_sparc64_elf_hwca + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || ++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC64X) + cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | + AV_SPARC_FMAF); diff --git a/queue-3.14/sparc64-define-va-hole-at-run-time-rather-than-at-compile-time.patch b/queue-3.14/sparc64-define-va-hole-at-run-time-rather-than-at-compile-time.patch new file mode 100644 index 00000000000..eeed90b7a0a --- /dev/null +++ b/queue-3.14/sparc64-define-va-hole-at-run-time-rather-than-at-compile-time.patch @@ -0,0 +1,99 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Fri, 26 Sep 2014 21:58:33 -0700 +Subject: sparc64: Define VA hole at run time, rather than at compile time. + +From: "David S. Miller" + +[ Upstream commit 4397bed080598001e88f612deb8b080bb1cc2322 ] + +Now that we use 4-level page tables, we can provide up to 53-bits of +virtual address space to the user. + +Adjust the VA hole based upon the capabilities of the cpu type probed. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/page_64.h | 15 ++++----------- + arch/sparc/mm/init_64.c | 21 +++++++++++++++++++++ + 2 files changed, 25 insertions(+), 11 deletions(-) + +--- a/arch/sparc/include/asm/page_64.h ++++ b/arch/sparc/include/asm/page_64.h +@@ -102,21 +102,14 @@ typedef unsigned long pgprot_t; + + typedef pte_t *pgtable_t; + +-/* These two values define the virtual address space range in which we +- * must forbid 64-bit user processes from making mappings. It used to +- * represent precisely the virtual address space hole present in most +- * early sparc64 chips including UltraSPARC-I. But now it also is +- * further constrained by the limits of our page tables, which is +- * 43-bits of virtual address. +- */ +-#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL) +-#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL) ++extern unsigned long sparc64_va_hole_top; ++extern unsigned long sparc64_va_hole_bottom; + + /* The next two defines specify the actual exclusion region we + * enforce, wherein we use a 4GB red zone on each side of the VA hole. + */ +-#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL)) +-#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL)) ++#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL)) ++#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL)) + + #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ + _AC(0x0000000070000000,UL) : \ +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1624,25 +1624,46 @@ static void __init page_offset_shift_pat + } + } + ++unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; ++unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; ++ + static void __init setup_page_offset(void) + { + unsigned long max_phys_bits = 40; + + if (tlb_type == cheetah || tlb_type == cheetah_plus) { ++ /* Cheetah/Panther support a full 64-bit virtual ++ * address, so we can use all that our page tables ++ * support. ++ */ ++ sparc64_va_hole_top = 0xfff0000000000000UL; ++ sparc64_va_hole_bottom = 0x0010000000000000UL; ++ + max_phys_bits = 42; + } else if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: ++ /* T1 and T2 support 48-bit virtual addresses. */ ++ sparc64_va_hole_top = 0xffff800000000000UL; ++ sparc64_va_hole_bottom = 0x0000800000000000UL; ++ + max_phys_bits = 39; + break; + case SUN4V_CHIP_NIAGARA3: ++ /* T3 supports 48-bit virtual addresses. */ ++ sparc64_va_hole_top = 0xffff800000000000UL; ++ sparc64_va_hole_bottom = 0x0000800000000000UL; ++ + max_phys_bits = 43; + break; + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: + default: ++ /* T4 and later support 52-bit virtual addresses. */ ++ sparc64_va_hole_top = 0xfff8000000000000UL; ++ sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 47; + break; + } diff --git a/queue-3.14/sparc64-do-not-define-thread-fpregs-save-area-as-zero-length-array.patch b/queue-3.14/sparc64-do-not-define-thread-fpregs-save-area-as-zero-length-array.patch new file mode 100644 index 00000000000..7cf9bb1cc88 --- /dev/null +++ b/queue-3.14/sparc64-do-not-define-thread-fpregs-save-area-as-zero-length-array.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Sat, 18 Oct 2014 23:12:33 -0400 +Subject: sparc64: Do not define thread fpregs save area as zero-length array. + +From: "David S. Miller" + +[ Upstream commit e2653143d7d79a49f1a961aeae1d82612838b12c ] + +This breaks the stack end corruption detection facility. + +What that facility does it write a magic value to "end_of_stack()" +and checking to see if it gets overwritten. + +"end_of_stack()" is "task_thread_info(p) + 1", which for sparc64 is +the beginning of the FPU register save area. + +So once the user uses the FPU, the magic value is overwritten and the +debug checks trigger. + +Fix this by making the size explicit. + +Due to the size we use for the fpsaved[], gsr[], and xfsr[] arrays we +are limited to 7 levels of FPU state saves. So each FPU register set +is 256 bytes, allocate 256 * 7 for the fpregs area. + +Reported-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/thread_info_64.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/sparc/include/asm/thread_info_64.h ++++ b/arch/sparc/include/asm/thread_info_64.h +@@ -63,7 +63,8 @@ struct thread_info { + struct pt_regs *kern_una_regs; + unsigned int kern_una_insn; + +- unsigned long fpregs[0] __attribute__ ((aligned(64))); ++ unsigned long fpregs[(7 * 256) / sizeof(unsigned long)] ++ __attribute__ ((aligned(64))); + }; + + #endif /* !(__ASSEMBLY__) */ diff --git a/queue-3.14/sparc64-do-not-disable-interrupts-in-nmi_cpu_busy.patch b/queue-3.14/sparc64-do-not-disable-interrupts-in-nmi_cpu_busy.patch new file mode 100644 index 00000000000..908641e24f7 --- /dev/null +++ b/queue-3.14/sparc64-do-not-disable-interrupts-in-nmi_cpu_busy.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Mon, 11 Aug 2014 20:45:01 -0700 +Subject: sparc64: Do not disable interrupts in nmi_cpu_busy() + +From: "David S. Miller" + +[ Upstream commit 58556104e9cd0107a7a8d2692cf04ef31669f6e4 ] + +nmi_cpu_busy() is a SMP function call that just makes sure that all of the +cpus are spinning using cpu cycles while the NMI test runs. + +It does not need to disable IRQs because we just care about NMIs executing +which will even with 'normal' IRQs disabled. + +It is not legal to enable hard IRQs in a SMP cross call, in fact this bug +triggers the BUG check in irq_work_run_list(): + + BUG_ON(!irqs_disabled()); + +Because now irq_work_run() is invoked from the tail of +generic_smp_call_function_single_interrupt(). + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/nmi.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/sparc/kernel/nmi.c ++++ b/arch/sparc/kernel/nmi.c +@@ -141,7 +141,6 @@ static inline unsigned int get_nmi_count + + static __init void nmi_cpu_busy(void *data) + { +- local_irq_enable_in_hardirq(); + while (endflag == 0) + mb(); + } diff --git a/queue-3.14/sparc64-find_node-adjustment.patch b/queue-3.14/sparc64-find_node-adjustment.patch new file mode 100644 index 00000000000..096a9ae0c17 --- /dev/null +++ b/queue-3.14/sparc64-find_node-adjustment.patch @@ -0,0 +1,102 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: bob picco +Date: Tue, 16 Sep 2014 09:28:15 -0400 +Subject: sparc64: find_node adjustment + +From: bob picco + +[ Upstream commit 3dee9df54836d5f844f3d58281d3f3e6331b467f ] + +We have seen an issue with guest boot into LDOM that causes early boot failures +because of no matching rules for node identitity of the memory. I analyzed this +on my T4 and concluded there might not be a solution. I saw the issue in +mainline too when booting into the control/primary domain - with guests +configured. Note, this could be a firmware bug on some older machines. + +I'll provide a full explanation of the issues below. Should we not find a +matching BEST latency group for a real address (RA) then we will assume node 0. +On the T4-2 here with the information provided I can't see an alternative. + +Technically the LDOM shown below should match the MBLOCK to the +favorable latency group. However other factors must be considered too. Were +the memory controllers configured "fine" grained interleave or "coarse" +grain interleaved - T4. Also should a "group" MD node be considered a NUMA +node? + +There has to be at least one Machine Description (MD) "group" and hence one +NUMA node. The group can have one or more latency groups (lg) - more than one +memory controller. The current code chooses the smallest latency as the most +favorable per group. The latency and lg information is in MLGROUP below. +MBLOCK is the base and size of the RAs for the machine as fetched from OBP +/memory "available" property. My machine has one MBLOCK but more would be +possible - with holes? + +For a T4-2 the following information has been gathered: +with LDOM guest +MEMBLOCK configuration: + memory size = 0x27f870000 + memory.cnt = 0x3 + memory[0x0] [0x00000020400000-0x0000029fc67fff], 0x27f868000 bytes + memory[0x1] [0x0000029fd8a000-0x0000029fd8bfff], 0x2000 bytes + memory[0x2] [0x0000029fd92000-0x0000029fd97fff], 0x6000 bytes + reserved.cnt = 0x2 + reserved[0x0] [0x00000020800000-0x000000216c15c0], 0xec15c1 bytes + reserved[0x1] [0x00000024800000-0x0000002c180c1e], 0x7980c1f bytes +MBLOCK[0]: base[20000000] size[280000000] offset[0] +(note: "base" and "size" reported in "MBLOCK" encompass the "memory[X]" values) +(note: (RA + offset) & mask = val is the formula to detect a match for the +memory controller. should there be no match for find_node node, a return +value of -1 resulted for the node - BAD) + +There is one group. It has these forward links +MLGROUP[1]: node[545] latency[1f7e8] match[200000000] mask[200000000] +MLGROUP[2]: node[54d] latency[2de60] match[0] mask[200000000] +NUMA NODE[0]: node[545] mask[200000000] val[200000000] (latency[1f7e8]) +(note: "val" is the best lg's (smallest latency) "match") + +no LDOM guest - bare metal +MEMBLOCK configuration: + memory size = 0xfdf2d0000 + memory.cnt = 0x3 + memory[0x0] [0x00000020400000-0x00000fff6adfff], 0xfdf2ae000 bytes + memory[0x1] [0x00000fff6d2000-0x00000fff6e7fff], 0x16000 bytes + memory[0x2] [0x00000fff766000-0x00000fff771fff], 0xc000 bytes + reserved.cnt = 0x2 + reserved[0x0] [0x00000020800000-0x00000021a04580], 0x1204581 bytes + reserved[0x1] [0x00000024800000-0x0000002c7d29fc], 0x7fd29fd bytes +MBLOCK[0]: base[20000000] size[fe0000000] offset[0] + +there are two groups +group node[16d5] +MLGROUP[0]: node[1765] latency[1f7e8] match[0] mask[200000000] +MLGROUP[3]: node[177d] latency[2de60] match[200000000] mask[200000000] +NUMA NODE[0]: node[1765] mask[200000000] val[0] (latency[1f7e8]) +group node[171d] +MLGROUP[2]: node[1775] latency[2de60] match[0] mask[200000000] +MLGROUP[1]: node[176d] latency[1f7e8] match[200000000] mask[200000000] +NUMA NODE[1]: node[176d] mask[200000000] val[200000000] (latency[1f7e8]) +(note: for this two "group" bare metal machine, 1/2 memory is in group one's +lg and 1/2 memory is in group two's lg). + +Cc: sparclinux@vger.kernel.org +Signed-off-by: Bob Picco +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -838,7 +838,10 @@ static int find_node(unsigned long addr) + if ((addr & p->mask) == p->val) + return i; + } +- return -1; ++ /* The following condition has been observed on LDOM guests.*/ ++ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" ++ " rule. Some physical memory will be owned by node 0."); ++ return 0; + } + + static u64 memblock_nid_range(u64 start, u64 end, int *nid) diff --git a/queue-3.14/sparc64-fix-corrupted-thread-fault-code.patch b/queue-3.14/sparc64-fix-corrupted-thread-fault-code.patch new file mode 100644 index 00000000000..31bd1d22009 --- /dev/null +++ b/queue-3.14/sparc64-fix-corrupted-thread-fault-code.patch @@ -0,0 +1,76 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Sat, 18 Oct 2014 23:03:09 -0400 +Subject: sparc64: Fix corrupted thread fault code. + +From: "David S. Miller" + +[ Upstream commit 84bd6d8b9c0f06b3f188efb479c77e20f05e9a8a ] + +Every path that ends up at do_sparc64_fault() must install a valid +FAULT_CODE_* bitmask in the per-thread fault code byte. + +Two paths leading to the label winfix_trampoline (which expects the +FAULT_CODE_* mask in register %g4) were not doing so: + +1) For pre-hypervisor TLB protection violation traps, if we took + the 'winfix_trampoline' path we wouldn't have %g4 initialized + with the FAULT_CODE_* value yet. Resulting in using the + TLB_TAG_ACCESS register address value instead. + +2) In the TSB miss path, when we notice that we are going to use a + hugepage mapping, but we haven't allocated the hugepage TSB yet, we + still have to take the window fixup case into consideration and + in that particular path we leave %g4 not setup properly. + +Errors on this sort were largely invisible previously, but after +commit 4ccb9272892c33ef1c19a783cfa87103b30c2784 ("sparc64: sun4v TLB +error power off events") we now have a fault_code mask bit +(FAULT_CODE_BAD_RA) that triggers due to this bug. + +FAULT_CODE_BAD_RA triggers because this bit is set in TLB_TAG_ACCESS +(see #1 above) and thus we get seemingly random bus errors triggered +for user processes. + +Fixes: 4ccb9272892c ("sparc64: sun4v TLB error power off events") +Reported-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/dtlb_prot.S | 6 +++--- + arch/sparc/kernel/tsb.S | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/sparc/kernel/dtlb_prot.S ++++ b/arch/sparc/kernel/dtlb_prot.S +@@ -24,11 +24,11 @@ + mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr + + /* PROT ** ICACHE line 2: More real fault processing */ ++ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 + bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup +- ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 +- ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 +- nop ++ ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault ++ nop + nop + nop + nop +--- a/arch/sparc/kernel/tsb.S ++++ b/arch/sparc/kernel/tsb.S +@@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath: + nop + .previous + +- rdpr %tl, %g3 +- cmp %g3, 1 ++ rdpr %tl, %g7 ++ cmp %g7, 1 + bne,pn %xcc, winfix_trampoline +- nop ++ mov %g3, %g4 + ba,pt %xcc, etrap + rd %pc, %g7 + call hugetlb_setup diff --git a/queue-3.14/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch b/queue-3.14/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch new file mode 100644 index 00000000000..40f4105e7f5 --- /dev/null +++ b/queue-3.14/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch @@ -0,0 +1,107 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Tue, 14 Oct 2014 19:37:58 -0700 +Subject: sparc64: Fix FPU register corruption with AES crypto offload. + +From: "David S. Miller" + +[ Upstream commit f4da3628dc7c32a59d1fb7116bb042e6f436d611 ] + +The AES loops in arch/sparc/crypto/aes_glue.c use a scheme where the +key material is preloaded into the FPU registers, and then we loop +over and over doing the crypt operation, reusing those pre-cooked key +registers. + +There are intervening blkcipher*() calls between the crypt operation +calls. And those might perform memcpy() and thus also try to use the +FPU. + +The sparc64 kernel FPU usage mechanism is designed to allow such +recursive uses, but with a catch. + +There has to be a trap between the two FPU using threads of control. + +The mechanism works by, when the FPU is already in use by the kernel, +allocating a slot for FPU saving at trap time. Then if, within the +trap handler, we try to use the FPU registers, the pre-trap FPU +register state is saved into the slot. Then at trap return time we +notice this and restore the pre-trap FPU state. + +Over the long term there are various more involved ways we can make +this work, but for a quick fix let's take advantage of the fact that +the situation where this happens is very limited. + +All sparc64 chips that support the crypto instructiosn also are using +the Niagara4 memcpy routine, and that routine only uses the FPU for +large copies where we can't get the source aligned properly to a +multiple of 8 bytes. + +We look to see if the FPU is already in use in this context, and if so +we use the non-large copy path which only uses integer registers. + +Furthermore, we also limit this special logic to when we are doing +kernel copy, rather than a user copy. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/visasm.h | 8 ++++++++ + arch/sparc/lib/NG4memcpy.S | 14 +++++++++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +--- a/arch/sparc/include/asm/visasm.h ++++ b/arch/sparc/include/asm/visasm.h +@@ -39,6 +39,14 @@ + 297: wr %o5, FPRS_FEF, %fprs; \ + 298: + ++#define VISEntryHalfFast(fail_label) \ ++ rd %fprs, %o5; \ ++ andcc %o5, FPRS_FEF, %g0; \ ++ be,pt %icc, 297f; \ ++ nop; \ ++ ba,a,pt %xcc, fail_label; \ ++297: wr %o5, FPRS_FEF, %fprs; ++ + #define VISExitHalf \ + wr %o5, 0, %fprs; + +--- a/arch/sparc/lib/NG4memcpy.S ++++ b/arch/sparc/lib/NG4memcpy.S +@@ -41,6 +41,10 @@ + #endif + #endif + ++#if !defined(EX_LD) && !defined(EX_ST) ++#define NON_USER_COPY ++#endif ++ + #ifndef EX_LD + #define EX_LD(x) x + #endif +@@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + mov EX_RETVAL(%o3), %o0 + + .Llarge_src_unaligned: ++#ifdef NON_USER_COPY ++ VISEntryHalfFast(.Lmedium_vis_entry_fail) ++#else ++ VISEntryHalf ++#endif + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 +- VISEntryHalf + alignaddr %o1, %g0, %g1 + add %o1, %o4, %o1 + EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) +@@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + nop + ba,a,pt %icc, .Lmedium_unaligned + ++#ifdef NON_USER_COPY ++.Lmedium_vis_entry_fail: ++ or %o0, %o1, %g2 ++#endif + .Lmedium: + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) + andcc %g2, 0x7, %g0 diff --git a/queue-3.14/sparc64-fix-hibernation-code-refrence-to-page_offset.patch b/queue-3.14/sparc64-fix-hibernation-code-refrence-to-page_offset.patch new file mode 100644 index 00000000000..1c6c6ac2cd4 --- /dev/null +++ b/queue-3.14/sparc64-fix-hibernation-code-refrence-to-page_offset.patch @@ -0,0 +1,29 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Wed, 24 Sep 2014 21:05:30 -0700 +Subject: sparc64: Fix hibernation code refrence to PAGE_OFFSET. + +From: "David S. Miller" + +We changed PAGE_OFFSET to be a variable rather than a constant, +but this reference here in the hibernate assembler got missed. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/power/hibernate_asm.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/sparc/power/hibernate_asm.S ++++ b/arch/sparc/power/hibernate_asm.S +@@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume) + nop + + /* Write PAGE_OFFSET to %g7 */ +- sethi %uhi(PAGE_OFFSET), %g7 +- sllx %g7, 32, %g7 ++ sethi %hi(PAGE_OFFSET), %g7 ++ ldx [%g7 + %lo(PAGE_OFFSET)], %g7 + + setuw (PAGE_SIZE-8), %g3 + diff --git a/queue-3.14/sparc64-fix-lockdep-warnings-on-reboot-on-ultra-5.patch b/queue-3.14/sparc64-fix-lockdep-warnings-on-reboot-on-ultra-5.patch new file mode 100644 index 00000000000..98dd5562574 --- /dev/null +++ b/queue-3.14/sparc64-fix-lockdep-warnings-on-reboot-on-ultra-5.patch @@ -0,0 +1,90 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Fri, 10 Oct 2014 15:49:16 -0400 +Subject: sparc64: Fix lockdep warnings on reboot on Ultra-5 + +From: "David S. Miller" + +[ Upstream commit bdcf81b658ebc4c2640c3c2c55c8b31c601b6996 ] + +Inconsistently, the raw_* IRQ routines do not interact with and update +the irqflags tracing and lockdep state, whereas the raw_* spinlock +interfaces do. + +This causes problems in p1275_cmd_direct() because we disable hardirqs +by hand using raw_local_irq_restore() and then do a raw_spin_lock() +which triggers a lockdep trace because the CPU's hw IRQ state doesn't +match IRQ tracing's internal software copy of that state. + +The CPU's irqs are disabled, yet current->hardirqs_enabled is true. + +==================== +reboot: Restarting system +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3536 check_flags+0x7c/0x240() +DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) +Modules linked in: openpromfs +CPU: 0 PID: 1 Comm: systemd-shutdow Tainted: G W 3.17.0-dirty #145 +Call Trace: + [000000000045919c] warn_slowpath_common+0x5c/0xa0 + [0000000000459210] warn_slowpath_fmt+0x30/0x40 + [000000000048f41c] check_flags+0x7c/0x240 + [0000000000493280] lock_acquire+0x20/0x1c0 + [0000000000832b70] _raw_spin_lock+0x30/0x60 + [000000000068f2fc] p1275_cmd_direct+0x1c/0x60 + [000000000068ed28] prom_reboot+0x28/0x40 + [000000000043610c] machine_restart+0x4c/0x80 + [000000000047d2d4] kernel_restart+0x54/0x80 + [000000000047d618] SyS_reboot+0x138/0x200 + [00000000004060b4] linux_sparc_syscall32+0x34/0x60 +---[ end trace 5c439fe81c05a100 ]--- +possible reason: unannotated irqs-off. +irq event stamp: 2010267 +hardirqs last enabled at (2010267): [<000000000049a358>] vprintk_emit+0x4b8/0x580 +hardirqs last disabled at (2010266): [<0000000000499f08>] vprintk_emit+0x68/0x580 +softirqs last enabled at (2010046): [<000000000045d278>] __do_softirq+0x378/0x4a0 +softirqs last disabled at (2010039): [<000000000042bf08>] do_softirq_own_stack+0x28/0x40 +Resetting ... +==================== + +Use local_* variables of the hw IRQ interfaces so that IRQ tracing sees +all of our changes. + +Reported-by: Meelis Roos +Tested-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/prom/p1275.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/arch/sparc/prom/p1275.c ++++ b/arch/sparc/prom/p1275.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -36,8 +37,8 @@ void p1275_cmd_direct(unsigned long *arg + { + unsigned long flags; + +- raw_local_save_flags(flags); +- raw_local_irq_restore((unsigned long)PIL_NMI); ++ local_save_flags(flags); ++ local_irq_restore((unsigned long)PIL_NMI); + raw_spin_lock(&prom_entry_lock); + + prom_world(1); +@@ -45,7 +46,7 @@ void p1275_cmd_direct(unsigned long *arg + prom_world(0); + + raw_spin_unlock(&prom_entry_lock); +- raw_local_irq_restore(flags); ++ local_irq_restore(flags); + } + + void prom_cif_init(void *cif_handler, void *cif_stack) diff --git a/queue-3.14/sparc64-fix-pcr_ops-initialization-and-usage-bugs.patch b/queue-3.14/sparc64-fix-pcr_ops-initialization-and-usage-bugs.patch new file mode 100644 index 00000000000..7640f487bb6 --- /dev/null +++ b/queue-3.14/sparc64-fix-pcr_ops-initialization-and-usage-bugs.patch @@ -0,0 +1,85 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Mon, 11 Aug 2014 15:38:46 -0700 +Subject: sparc64: Fix pcr_ops initialization and usage bugs. + +From: "David S. Miller" + +[ Upstream commit 8bccf5b313180faefce38e0d1140f76e0f327d28 ] + +Christopher reports that perf_event_print_debug() can crash in uniprocessor +builds. The crash is due to pcr_ops being NULL. + +This happens because pcr_arch_init() is only invoked by smp_cpus_done() which +only executes in SMP builds. + +init_hw_perf_events() is closely intertwined with pcr_ops being setup properly, +therefore: + +1) Call pcr_arch_init() early on from init_hw_perf_events(), instead of + from smp_cpus_done(). + +2) Do not hook up a PMU type if pcr_ops is NULL after pcr_arch_init(). + +3) Move init_hw_perf_events to a later initcall so that it we will be + sure to invoke pcr_arch_init() after all cpus are brought up. + +Finally, guard the one naked sequence of pcr_ops dereferences in +__global_pmu_self() with an appropriate NULL check. + +Reported-by: Christopher Alexander Tobias Schulze +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/perf_event.c | 7 +++++-- + arch/sparc/kernel/process_64.c | 3 +++ + arch/sparc/kernel/smp_64.c | 1 - + 3 files changed, 8 insertions(+), 3 deletions(-) + +--- a/arch/sparc/kernel/perf_event.c ++++ b/arch/sparc/kernel/perf_event.c +@@ -1671,9 +1671,12 @@ static bool __init supported_pmu(void) + + int __init init_hw_perf_events(void) + { ++ int err; ++ + pr_info("Performance events: "); + +- if (!supported_pmu()) { ++ err = pcr_arch_init(); ++ if (err || !supported_pmu()) { + pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); + return 0; + } +@@ -1685,7 +1688,7 @@ int __init init_hw_perf_events(void) + + return 0; + } +-early_initcall(init_hw_perf_events); ++pure_initcall(init_hw_perf_events); + + void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +--- a/arch/sparc/kernel/process_64.c ++++ b/arch/sparc/kernel/process_64.c +@@ -306,6 +306,9 @@ static void __global_pmu_self(int this_c + struct global_pmu_snapshot *pp; + int i, num; + ++ if (!pcr_ops) ++ return; ++ + pp = &global_cpu_snapshot[this_cpu].pmu; + + num = 1; +--- a/arch/sparc/kernel/smp_64.c ++++ b/arch/sparc/kernel/smp_64.c +@@ -1395,7 +1395,6 @@ void __cpu_die(unsigned int cpu) + + void __init smp_cpus_done(unsigned int max_cpus) + { +- pcr_arch_init(); + } + + void smp_send_reschedule(int cpu) diff --git a/queue-3.14/sparc64-fix-physical-memory-management-regressions-with-large-max_phys_bits.patch b/queue-3.14/sparc64-fix-physical-memory-management-regressions-with-large-max_phys_bits.patch new file mode 100644 index 00000000000..54773ed4236 --- /dev/null +++ b/queue-3.14/sparc64-fix-physical-memory-management-regressions-with-large-max_phys_bits.patch @@ -0,0 +1,952 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Wed, 24 Sep 2014 20:56:11 -0700 +Subject: sparc64: Fix physical memory management regressions with large max_phys_bits. + +From: "David S. Miller" + +[ Upstream commit 0dd5b7b09e13dae32869371e08e1048349fd040c ] + +If max_phys_bits needs to be > 43 (f.e. for T4 chips), things like +DEBUG_PAGEALLOC stop working because the 3-level page tables only +can cover up to 43 bits. + +Another problem is that when we increased MAX_PHYS_ADDRESS_BITS up to +47, several statically allocated tables became enormous. + +Compounding this is that we will need to support up to 49 bits of +physical addressing for M7 chips. + +The two tables in question are sparc64_valid_addr_bitmap and +kpte_linear_bitmap. + +The first holds a bitmap, with 1 bit for each 4MB chunk of physical +memory, indicating whether that chunk actually exists in the machine +and is valid. + +The second table is a set of 2-bit values which tell how large of a +mapping (4MB, 256MB, 2GB, 16GB, respectively) we can use at each 256MB +chunk of ram in the system. + +These tables are huge and take up an enormous amount of the BSS +section of the sparc64 kernel image. Specifically, the +sparc64_valid_addr_bitmap is 4MB, and the kpte_linear_bitmap is 128K. + +So let's solve the space wastage and the DEBUG_PAGEALLOC problem +at the same time, by using the kernel page tables (as designed) to +manage this information. + +We have to keep using large mappings when DEBUG_PAGEALLOC is disabled, +and we do this by encoding huge PMDs and PUDs. + +On a T4-2 with 256GB of ram the kernel page table takes up 16K with +DEBUG_PAGEALLOC disabled and 256MB with it enabled. Furthermore, this +memory is dynamically allocated at run time rather than coded +statically into the kernel image. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/page_64.h | 3 + arch/sparc/include/asm/pgtable_64.h | 55 ++--- + arch/sparc/include/asm/tsb.h | 47 +++- + arch/sparc/kernel/ktlb.S | 108 --------- + arch/sparc/kernel/vmlinux.lds.S | 5 + arch/sparc/mm/init_64.c | 393 +++++++++++++++--------------------- + arch/sparc/mm/init_64.h | 7 + 7 files changed, 244 insertions(+), 374 deletions(-) + +--- a/arch/sparc/include/asm/page_64.h ++++ b/arch/sparc/include/asm/page_64.h +@@ -128,9 +128,6 @@ extern unsigned long PAGE_OFFSET; + */ + #define MAX_PHYS_ADDRESS_BITS 47 + +-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap +- * and kpte_linear_bitmap. +- */ + #define ILOG2_4MB 22 + #define ILOG2_256MB 28 + +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -79,22 +79,7 @@ + + #include + +-extern unsigned long sparc64_valid_addr_bitmap[]; +- +-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +-static inline bool __kern_addr_valid(unsigned long paddr) +-{ +- if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL) +- return false; +- return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap); +-} +- +-static inline bool kern_addr_valid(unsigned long addr) +-{ +- unsigned long paddr = __pa(addr); +- +- return __kern_addr_valid(paddr); +-} ++bool kern_addr_valid(unsigned long addr); + + /* Entries per page directory level. */ + #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) +@@ -122,6 +107,7 @@ static inline bool kern_addr_valid(unsig + #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ + #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ + #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ ++#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE + + /* Advertise support for _PAGE_SPECIAL */ + #define __HAVE_ARCH_PTE_SPECIAL +@@ -668,26 +654,26 @@ static inline unsigned long pmd_large(pm + return pte_val(pte) & _PAGE_PMD_HUGE; + } + +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE +-static inline unsigned long pmd_young(pmd_t pmd) ++static inline unsigned long pmd_pfn(pmd_t pmd) + { + pte_t pte = __pte(pmd_val(pmd)); + +- return pte_young(pte); ++ return pte_pfn(pte); + } + +-static inline unsigned long pmd_write(pmd_t pmd) ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++static inline unsigned long pmd_young(pmd_t pmd) + { + pte_t pte = __pte(pmd_val(pmd)); + +- return pte_write(pte); ++ return pte_young(pte); + } + +-static inline unsigned long pmd_pfn(pmd_t pmd) ++static inline unsigned long pmd_write(pmd_t pmd) + { + pte_t pte = __pte(pmd_val(pmd)); + +- return pte_pfn(pte); ++ return pte_write(pte); + } + + static inline unsigned long pmd_trans_huge(pmd_t pmd) +@@ -781,18 +767,15 @@ static inline int pmd_present(pmd_t pmd) + * the top bits outside of the range of any physical address size we + * support are clear as well. We also validate the physical itself. + */ +-#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \ +- !__kern_addr_valid(pmd_val(pmd))) ++#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK) + + #define pud_none(pud) (!pud_val(pud)) + +-#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \ +- !__kern_addr_valid(pud_val(pud))) ++#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) + + #define pgd_none(pgd) (!pgd_val(pgd)) + +-#define pgd_bad(pgd) ((pgd_val(pgd) & ~PAGE_MASK) || \ +- !__kern_addr_valid(pgd_val(pgd))) ++#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, +@@ -835,6 +818,20 @@ static inline unsigned long __pmd_page(p + #define pgd_present(pgd) (pgd_val(pgd) != 0U) + #define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) + ++static inline unsigned long pud_large(pud_t pud) ++{ ++ pte_t pte = __pte(pud_val(pud)); ++ ++ return pte_val(pte) & _PAGE_PMD_HUGE; ++} ++ ++static inline unsigned long pud_pfn(pud_t pud) ++{ ++ pte_t pte = __pte(pud_val(pud)); ++ ++ return pte_pfn(pte); ++} ++ + /* Same in both SUN4V and SUN4U. */ + #define pte_none(pte) (!pte_val(pte)) + +--- a/arch/sparc/include/asm/tsb.h ++++ b/arch/sparc/include/asm/tsb.h +@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb + sub TSB, 0x8, TSB; \ + TSB_STORE(TSB, TAG); + +- /* Do a kernel page table walk. Leaves physical PTE pointer in +- * REG1. Jumps to FAIL_LABEL on early page table walk termination. +- * VADDR will not be clobbered, but REG2 will. ++ /* Do a kernel page table walk. Leaves valid PTE value in ++ * REG1. Jumps to FAIL_LABEL on early page table walk ++ * termination. VADDR will not be clobbered, but REG2 will. ++ * ++ * There are two masks we must apply to propagate bits from ++ * the virtual address into the PTE physical address field ++ * when dealing with huge pages. This is because the page ++ * table boundaries do not match the huge page size(s) the ++ * hardware supports. ++ * ++ * In these cases we propagate the bits that are below the ++ * page table level where we saw the huge page mapping, but ++ * are still within the relevant physical bits for the huge ++ * page size in question. So for PMD mappings (which fall on ++ * bit 23, for 8MB per PMD) we must propagate bit 22 for a ++ * 4MB huge page. For huge PUDs (which fall on bit 33, for ++ * 8GB per PUD), we have to accomodate 256MB and 2GB huge ++ * pages. So for those we propagate bits 32 to 28. + */ + #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ + sethi %hi(swapper_pg_dir), REG1; \ +@@ -150,15 +165,35 @@ extern struct tsb_phys_patch_entry __tsb + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ +- sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ ++ sethi %uhi(_PAGE_PUD_HUGE), REG2; \ ++ brz,pn REG1, FAIL_LABEL; \ ++ sllx REG2, 32, REG2; \ ++ andcc REG1, REG2, %g0; \ ++ sethi %hi(0xf8000000), REG2; \ ++ bne,pt %xcc, 697f; \ ++ sllx REG2, 1, REG2; \ ++ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ ++ sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ +- sllx VADDR, 64 - PMD_SHIFT, REG2; \ ++ sllx REG2, 32, REG2; \ ++ andcc REG1, REG2, %g0; \ ++ be,pn %xcc, 698f; \ ++ sethi %hi(0x400000), REG2; \ ++697: brgez,pn REG1, FAIL_LABEL; \ ++ andn REG1, REG2, REG1; \ ++ and VADDR, REG2, REG2; \ ++ ba,pt %xcc, 699f; \ ++ or REG1, REG2, REG1; \ ++698: sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ +- add REG1, REG2, REG1; ++ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ ++ brgez,pn REG1, FAIL_LABEL; \ ++ nop; \ ++699: + + /* PMD has been loaded into REG1, interpret the value, seeing + * if it is a HUGE PMD or a normal one. If it is not valid +--- a/arch/sparc/kernel/ktlb.S ++++ b/arch/sparc/kernel/ktlb.S +@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) + + TSB_LOCK_TAG(%g1, %g2, %g7) +- +- /* Load and check PTE. */ +- ldxa [%g5] ASI_PHYS_USE_EC, %g5 +- mov 1, %g7 +- sllx %g7, TSB_TAG_INVALID_BIT, %g7 +- brgez,a,pn %g5, kvmap_itlb_longpath +- TSB_STORE(%g1, %g7) +- + TSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ +@@ -118,6 +110,12 @@ kvmap_dtlb_obp: + ba,pt %xcc, kvmap_dtlb_load + nop + ++kvmap_linear_early: ++ sethi %hi(kern_linear_pte_xor), %g7 ++ ldx [%g7 + %lo(kern_linear_pte_xor)], %g2 ++ ba,pt %xcc, kvmap_dtlb_tsb4m_load ++ xor %g2, %g4, %g5 ++ + .align 32 + kvmap_dtlb_tsb4m_load: + TSB_LOCK_TAG(%g1, %g2, %g7) +@@ -146,105 +144,17 @@ kvmap_dtlb_4v: + /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ + KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) + #endif +- /* TSB entry address left in %g1, lookup linear PTE. +- * Must preserve %g1 and %g6 (TAG). +- */ +-kvmap_dtlb_tsb4m_miss: +- /* Clear the PAGE_OFFSET top virtual bits, shift +- * down to get PFN, and make sure PFN is in range. +- */ +-661: sllx %g4, 0, %g5 +- .section .page_offset_shift_patch, "ax" +- .word 661b +- .previous +- +- /* Check to see if we know about valid memory at the 4MB +- * chunk this physical address will reside within. ++ /* Linear mapping TSB lookup failed. Fallthrough to kernel ++ * page table based lookup. + */ +-661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2 +- .section .page_offset_shift_patch, "ax" +- .word 661b +- .previous +- +- brnz,pn %g2, kvmap_dtlb_longpath +- nop +- +- /* This unconditional branch and delay-slot nop gets patched +- * by the sethi sequence once the bitmap is properly setup. +- */ +- .globl valid_addr_bitmap_insn +-valid_addr_bitmap_insn: +- ba,pt %xcc, 2f +- nop +- .subsection 2 +- .globl valid_addr_bitmap_patch +-valid_addr_bitmap_patch: +- sethi %hi(sparc64_valid_addr_bitmap), %g7 +- or %g7, %lo(sparc64_valid_addr_bitmap), %g7 +- .previous +- +-661: srlx %g5, ILOG2_4MB, %g2 +- .section .page_offset_shift_patch, "ax" +- .word 661b +- .previous +- +- srlx %g2, 6, %g5 +- and %g2, 63, %g2 +- sllx %g5, 3, %g5 +- ldx [%g7 + %g5], %g5 +- mov 1, %g7 +- sllx %g7, %g2, %g7 +- andcc %g5, %g7, %g0 +- be,pn %xcc, kvmap_dtlb_longpath +- +-2: sethi %hi(kpte_linear_bitmap), %g2 +- +- /* Get the 256MB physical address index. */ +-661: sllx %g4, 0, %g5 +- .section .page_offset_shift_patch, "ax" +- .word 661b +- .previous +- +- or %g2, %lo(kpte_linear_bitmap), %g2 +- +-661: srlx %g5, ILOG2_256MB, %g5 +- .section .page_offset_shift_patch, "ax" +- .word 661b +- .previous +- +- and %g5, (32 - 1), %g7 +- +- /* Divide by 32 to get the offset into the bitmask. */ +- srlx %g5, 5, %g5 +- add %g7, %g7, %g7 +- sllx %g5, 3, %g5 +- +- /* kern_linear_pte_xor[(mask >> shift) & 3)] */ +- ldx [%g2 + %g5], %g2 +- srlx %g2, %g7, %g7 +- sethi %hi(kern_linear_pte_xor), %g5 +- and %g7, 3, %g7 +- or %g5, %lo(kern_linear_pte_xor), %g5 +- sllx %g7, 3, %g7 +- ldx [%g5 + %g7], %g2 +- + .globl kvmap_linear_patch + kvmap_linear_patch: +- ba,pt %xcc, kvmap_dtlb_tsb4m_load +- xor %g2, %g4, %g5 ++ ba,a,pt %xcc, kvmap_linear_early + + kvmap_dtlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + + TSB_LOCK_TAG(%g1, %g2, %g7) +- +- /* Load and check PTE. */ +- ldxa [%g5] ASI_PHYS_USE_EC, %g5 +- mov 1, %g7 +- sllx %g7, TSB_TAG_INVALID_BIT, %g7 +- brgez,a,pn %g5, kvmap_dtlb_longpath +- TSB_STORE(%g1, %g7) +- + TSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ +--- a/arch/sparc/kernel/vmlinux.lds.S ++++ b/arch/sparc/kernel/vmlinux.lds.S +@@ -122,11 +122,6 @@ SECTIONS + *(.swapper_4m_tsb_phys_patch) + __swapper_4m_tsb_phys_patch_end = .; + } +- .page_offset_shift_patch : { +- __page_offset_shift_patch = .; +- *(.page_offset_shift_patch) +- __page_offset_shift_patch_end = .; +- } + .popc_3insn_patch : { + __popc_3insn_patch = .; + *(.popc_3insn_patch) +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -73,7 +73,6 @@ unsigned long kern_linear_pte_xor[4] __r + * 'cpu' properties, but we need to have this table setup before the + * MDESC is initialized. + */ +-unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; + + #ifndef CONFIG_DEBUG_PAGEALLOC + /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. +@@ -82,6 +81,7 @@ unsigned long kpte_linear_bitmap[KPTE_BI + */ + extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; + #endif ++extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + + static unsigned long cpu_pgsz_mask; + +@@ -163,10 +163,6 @@ static void __init read_obp_memory(const + cmp_p64, NULL); + } + +-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / +- sizeof(unsigned long)]; +-EXPORT_SYMBOL(sparc64_valid_addr_bitmap); +- + /* Kernel physical address base and size in bytes. */ + unsigned long kern_base __read_mostly; + unsigned long kern_size __read_mostly; +@@ -1363,9 +1359,145 @@ static unsigned long __init bootmem_init + static struct linux_prom64_registers pall[MAX_BANKS] __initdata; + static int pall_ents __initdata; + +-#ifdef CONFIG_DEBUG_PAGEALLOC ++static unsigned long max_phys_bits = 40; ++ ++bool kern_addr_valid(unsigned long addr) ++{ ++ unsigned long above = ((long)addr) >> max_phys_bits; ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ ++ if (above != 0 && above != -1UL) ++ return false; ++ ++ if (addr >= (unsigned long) KERNBASE && ++ addr < (unsigned long)&_end) ++ return true; ++ ++ if (addr >= PAGE_OFFSET) { ++ unsigned long pa = __pa(addr); ++ ++ return pfn_valid(pa >> PAGE_SHIFT); ++ } ++ ++ pgd = pgd_offset_k(addr); ++ if (pgd_none(*pgd)) ++ return 0; ++ ++ pud = pud_offset(pgd, addr); ++ if (pud_none(*pud)) ++ return 0; ++ ++ if (pud_large(*pud)) ++ return pfn_valid(pud_pfn(*pud)); ++ ++ pmd = pmd_offset(pud, addr); ++ if (pmd_none(*pmd)) ++ return 0; ++ ++ if (pmd_large(*pmd)) ++ return pfn_valid(pmd_pfn(*pmd)); ++ ++ pte = pte_offset_kernel(pmd, addr); ++ if (pte_none(*pte)) ++ return 0; ++ ++ return pfn_valid(pte_pfn(*pte)); ++} ++EXPORT_SYMBOL(kern_addr_valid); ++ ++static unsigned long __ref kernel_map_hugepud(unsigned long vstart, ++ unsigned long vend, ++ pud_t *pud) ++{ ++ const unsigned long mask16gb = (1UL << 34) - 1UL; ++ u64 pte_val = vstart; ++ ++ /* Each PUD is 8GB */ ++ if ((vstart & mask16gb) || ++ (vend - vstart <= mask16gb)) { ++ pte_val ^= kern_linear_pte_xor[2]; ++ pud_val(*pud) = pte_val | _PAGE_PUD_HUGE; ++ ++ return vstart + PUD_SIZE; ++ } ++ ++ pte_val ^= kern_linear_pte_xor[3]; ++ pte_val |= _PAGE_PUD_HUGE; ++ ++ vend = vstart + mask16gb + 1UL; ++ while (vstart < vend) { ++ pud_val(*pud) = pte_val; ++ ++ pte_val += PUD_SIZE; ++ vstart += PUD_SIZE; ++ pud++; ++ } ++ return vstart; ++} ++ ++static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend, ++ bool guard) ++{ ++ if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE) ++ return true; ++ ++ return false; ++} ++ ++static unsigned long __ref kernel_map_hugepmd(unsigned long vstart, ++ unsigned long vend, ++ pmd_t *pmd) ++{ ++ const unsigned long mask256mb = (1UL << 28) - 1UL; ++ const unsigned long mask2gb = (1UL << 31) - 1UL; ++ u64 pte_val = vstart; ++ ++ /* Each PMD is 8MB */ ++ if ((vstart & mask256mb) || ++ (vend - vstart <= mask256mb)) { ++ pte_val ^= kern_linear_pte_xor[0]; ++ pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE; ++ ++ return vstart + PMD_SIZE; ++ } ++ ++ if ((vstart & mask2gb) || ++ (vend - vstart <= mask2gb)) { ++ pte_val ^= kern_linear_pte_xor[1]; ++ pte_val |= _PAGE_PMD_HUGE; ++ vend = vstart + mask256mb + 1UL; ++ } else { ++ pte_val ^= kern_linear_pte_xor[2]; ++ pte_val |= _PAGE_PMD_HUGE; ++ vend = vstart + mask2gb + 1UL; ++ } ++ ++ while (vstart < vend) { ++ pmd_val(*pmd) = pte_val; ++ ++ pte_val += PMD_SIZE; ++ vstart += PMD_SIZE; ++ pmd++; ++ } ++ ++ return vstart; ++} ++ ++static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend, ++ bool guard) ++{ ++ if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE) ++ return true; ++ ++ return false; ++} ++ + static unsigned long __ref kernel_map_range(unsigned long pstart, +- unsigned long pend, pgprot_t prot) ++ unsigned long pend, pgprot_t prot, ++ bool use_huge) + { + unsigned long vstart = PAGE_OFFSET + pstart; + unsigned long vend = PAGE_OFFSET + pend; +@@ -1395,15 +1527,23 @@ static unsigned long __ref kernel_map_ra + if (pud_none(*pud)) { + pmd_t *new; + ++ if (kernel_can_map_hugepud(vstart, vend, use_huge)) { ++ vstart = kernel_map_hugepud(vstart, vend, pud); ++ continue; ++ } + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, vstart); +- if (!pmd_present(*pmd)) { ++ if (pmd_none(*pmd)) { + pte_t *new; + ++ if (kernel_can_map_hugepmd(vstart, vend, use_huge)) { ++ vstart = kernel_map_hugepmd(vstart, vend, pmd); ++ continue; ++ } + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pmd_populate_kernel(&init_mm, pmd, new); +@@ -1426,100 +1566,34 @@ static unsigned long __ref kernel_map_ra + return alloc_bytes; + } + +-extern unsigned int kvmap_linear_patch[1]; +-#endif /* CONFIG_DEBUG_PAGEALLOC */ +- +-static void __init kpte_set_val(unsigned long index, unsigned long val) +-{ +- unsigned long *ptr = kpte_linear_bitmap; +- +- val <<= ((index % (BITS_PER_LONG / 2)) * 2); +- ptr += (index / (BITS_PER_LONG / 2)); +- +- *ptr |= val; +-} +- +-static const unsigned long kpte_shift_min = 28; /* 256MB */ +-static const unsigned long kpte_shift_max = 34; /* 16GB */ +-static const unsigned long kpte_shift_incr = 3; +- +-static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, +- unsigned long shift) ++static void __init flush_all_kernel_tsbs(void) + { +- unsigned long size = (1UL << shift); +- unsigned long mask = (size - 1UL); +- unsigned long remains = end - start; +- unsigned long val; +- +- if (remains < size || (start & mask)) +- return start; +- +- /* VAL maps: +- * +- * shift 28 --> kern_linear_pte_xor index 1 +- * shift 31 --> kern_linear_pte_xor index 2 +- * shift 34 --> kern_linear_pte_xor index 3 +- */ +- val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; +- +- remains &= ~mask; +- if (shift != kpte_shift_max) +- remains = size; +- +- while (remains) { +- unsigned long index = start >> kpte_shift_min; ++ int i; + +- kpte_set_val(index, val); ++ for (i = 0; i < KERNEL_TSB_NENTRIES; i++) { ++ struct tsb *ent = &swapper_tsb[i]; + +- start += 1UL << kpte_shift_min; +- remains -= 1UL << kpte_shift_min; ++ ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } ++#ifndef CONFIG_DEBUG_PAGEALLOC ++ for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) { ++ struct tsb *ent = &swapper_4m_tsb[i]; + +- return start; +-} +- +-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +-{ +- unsigned long smallest_size, smallest_mask; +- unsigned long s; +- +- smallest_size = (1UL << kpte_shift_min); +- smallest_mask = (smallest_size - 1UL); +- +- while (start < end) { +- unsigned long orig_start = start; +- +- for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { +- start = kpte_mark_using_shift(start, end, s); +- +- if (start != orig_start) +- break; +- } +- +- if (start == orig_start) +- start = (start + smallest_size) & ~smallest_mask; ++ ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } ++#endif + } + +-static void __init init_kpte_bitmap(void) +-{ +- unsigned long i; +- +- for (i = 0; i < pall_ents; i++) { +- unsigned long phys_start, phys_end; +- +- phys_start = pall[i].phys_addr; +- phys_end = phys_start + pall[i].reg_size; +- +- mark_kpte_bitmap(phys_start, phys_end); +- } +-} ++extern unsigned int kvmap_linear_patch[1]; + + static void __init kernel_physical_mapping_init(void) + { +-#ifdef CONFIG_DEBUG_PAGEALLOC + unsigned long i, mem_alloced = 0UL; ++ bool use_huge = true; + ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ use_huge = false; ++#endif + for (i = 0; i < pall_ents; i++) { + unsigned long phys_start, phys_end; + +@@ -1527,7 +1601,7 @@ static void __init kernel_physical_mappi + phys_end = phys_start + pall[i].reg_size; + + mem_alloced += kernel_map_range(phys_start, phys_end, +- PAGE_KERNEL); ++ PAGE_KERNEL, use_huge); + } + + printk("Allocated %ld bytes for kernel page tables.\n", +@@ -1536,8 +1610,9 @@ static void __init kernel_physical_mappi + kvmap_linear_patch[0] = 0x01000000; /* nop */ + flushi(&kvmap_linear_patch[0]); + ++ flush_all_kernel_tsbs(); ++ + __flush_tlb_all(); +-#endif + } + + #ifdef CONFIG_DEBUG_PAGEALLOC +@@ -1547,7 +1622,7 @@ void kernel_map_pages(struct page *page, + unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); + + kernel_map_range(phys_start, phys_end, +- (enable ? PAGE_KERNEL : __pgprot(0))); ++ (enable ? PAGE_KERNEL : __pgprot(0)), false); + + flush_tsb_kernel_range(PAGE_OFFSET + phys_start, + PAGE_OFFSET + phys_end); +@@ -1575,62 +1650,11 @@ unsigned long __init find_ecache_flush_s + unsigned long PAGE_OFFSET; + EXPORT_SYMBOL(PAGE_OFFSET); + +-static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) +-{ +- unsigned long final_shift; +- unsigned int val = *insn; +- unsigned int cnt; +- +- /* We are patching in ilog2(max_supported_phys_address), and +- * we are doing so in a manner similar to a relocation addend. +- * That is, we are adding the shift value to whatever value +- * is in the shift instruction count field already. +- */ +- cnt = (val & 0x3f); +- val &= ~0x3f; +- +- /* If we are trying to shift >= 64 bits, clear the destination +- * register. This can happen when phys_bits ends up being equal +- * to MAX_PHYS_ADDRESS_BITS. +- */ +- final_shift = (cnt + (64 - phys_bits)); +- if (final_shift >= 64) { +- unsigned int rd = (val >> 25) & 0x1f; +- +- val = 0x80100000 | (rd << 25); +- } else { +- val |= final_shift; +- } +- *insn = val; +- +- __asm__ __volatile__("flush %0" +- : /* no outputs */ +- : "r" (insn)); +-} +- +-static void __init page_offset_shift_patch(unsigned long phys_bits) +-{ +- extern unsigned int __page_offset_shift_patch; +- extern unsigned int __page_offset_shift_patch_end; +- unsigned int *p; +- +- p = &__page_offset_shift_patch; +- while (p < &__page_offset_shift_patch_end) { +- unsigned int *insn = (unsigned int *)(unsigned long)*p; +- +- page_offset_shift_patch_one(insn, phys_bits); +- +- p++; +- } +-} +- + unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; + unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; + + static void __init setup_page_offset(void) + { +- unsigned long max_phys_bits = 40; +- + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + /* Cheetah/Panther support a full 64-bit virtual + * address, so we can use all that our page tables +@@ -1679,8 +1703,6 @@ static void __init setup_page_offset(voi + + pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); +- +- page_offset_shift_patch(max_phys_bits); + } + + static void __init tsb_phys_patch(void) +@@ -1725,7 +1747,6 @@ static void __init tsb_phys_patch(void) + #define NUM_KTSB_DESCR 1 + #endif + static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; +-extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + + /* The swapper TSBs are loaded with a base sequence of: + * +@@ -2024,11 +2045,9 @@ void __init paging_init(void) + + pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t)); + pud_set(&swapper_pud_dir[0], pmd); +- ++ + inherit_prom_mappings(); + +- init_kpte_bitmap(); +- + /* Ok, we can use our TLB miss and window trap handlers safely. */ + setup_tba(); + +@@ -2135,70 +2154,6 @@ int page_in_phys_avail(unsigned long pad + return 0; + } + +-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; +-static int pavail_rescan_ents __initdata; +- +-/* Certain OBP calls, such as fetching "available" properties, can +- * claim physical memory. So, along with initializing the valid +- * address bitmap, what we do here is refetch the physical available +- * memory list again, and make sure it provides at least as much +- * memory as 'pavail' does. +- */ +-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) +-{ +- int i; +- +- read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); +- +- for (i = 0; i < pavail_ents; i++) { +- unsigned long old_start, old_end; +- +- old_start = pavail[i].phys_addr; +- old_end = old_start + pavail[i].reg_size; +- while (old_start < old_end) { +- int n; +- +- for (n = 0; n < pavail_rescan_ents; n++) { +- unsigned long new_start, new_end; +- +- new_start = pavail_rescan[n].phys_addr; +- new_end = new_start + +- pavail_rescan[n].reg_size; +- +- if (new_start <= old_start && +- new_end >= (old_start + PAGE_SIZE)) { +- set_bit(old_start >> ILOG2_4MB, bitmap); +- goto do_next_page; +- } +- } +- +- prom_printf("mem_init: Lost memory in pavail\n"); +- prom_printf("mem_init: OLD start[%lx] size[%lx]\n", +- pavail[i].phys_addr, +- pavail[i].reg_size); +- prom_printf("mem_init: NEW start[%lx] size[%lx]\n", +- pavail_rescan[i].phys_addr, +- pavail_rescan[i].reg_size); +- prom_printf("mem_init: Cannot continue, aborting.\n"); +- prom_halt(); +- +- do_next_page: +- old_start += PAGE_SIZE; +- } +- } +-} +- +-static void __init patch_tlb_miss_handler_bitmap(void) +-{ +- extern unsigned int valid_addr_bitmap_insn[]; +- extern unsigned int valid_addr_bitmap_patch[]; +- +- valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; +- mb(); +- valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; +- flushi(&valid_addr_bitmap_insn[0]); +-} +- + static void __init register_page_bootmem_info(void) + { + #ifdef CONFIG_NEED_MULTIPLE_NODES +@@ -2211,18 +2166,6 @@ static void __init register_page_bootmem + } + void __init mem_init(void) + { +- unsigned long addr, last; +- +- addr = PAGE_OFFSET + kern_base; +- last = PAGE_ALIGN(kern_size) + addr; +- while (addr < last) { +- set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap); +- addr += PAGE_SIZE; +- } +- +- setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); +- patch_tlb_miss_handler_bitmap(); +- + high_memory = __va(last_valid_pfn << PAGE_SHIFT); + + register_page_bootmem_info(); +--- a/arch/sparc/mm/init_64.h ++++ b/arch/sparc/mm/init_64.h +@@ -8,15 +8,8 @@ + */ + + #define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) +-#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) +-#define KPTE_BITMAP_BYTES \ +- ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) +-#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) +-#define VALID_ADDR_BITMAP_BYTES \ +- ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) + + extern unsigned long kern_linear_pte_xor[4]; +-extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; + extern unsigned int sparc64_highest_unlocked_tlb_ent; + extern unsigned long sparc64_kern_pri_context; + extern unsigned long sparc64_kern_pri_nuc_bits; diff --git a/queue-3.14/sparc64-fix-register-corruption-in-top-most-kernel-stack-frame-during-boot.patch b/queue-3.14/sparc64-fix-register-corruption-in-top-most-kernel-stack-frame-during-boot.patch new file mode 100644 index 00000000000..df79296925b --- /dev/null +++ b/queue-3.14/sparc64-fix-register-corruption-in-top-most-kernel-stack-frame-during-boot.patch @@ -0,0 +1,320 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Thu, 23 Oct 2014 12:58:13 -0700 +Subject: sparc64: Fix register corruption in top-most kernel stack frame during boot. + +From: "David S. Miller" + +[ Upstream commit ef3e035c3a9b81da8a778bc333d10637acf6c199 ] + +Meelis Roos reported that kernels built with gcc-4.9 do not boot, we +eventually narrowed this down to only impacting machines using +UltraSPARC-III and derivitive cpus. + +The crash happens right when the first user process is spawned: + +[ 54.451346] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004 +[ 54.451346] +[ 54.571516] CPU: 1 PID: 1 Comm: init Not tainted 3.16.0-rc2-00211-gd7933ab #96 +[ 54.666431] Call Trace: +[ 54.698453] [0000000000762f8c] panic+0xb0/0x224 +[ 54.759071] [000000000045cf68] do_exit+0x948/0x960 +[ 54.823123] [000000000042cbc0] fault_in_user_windows+0xe0/0x100 +[ 54.902036] [0000000000404ad0] __handle_user_windows+0x0/0x10 +[ 54.978662] Press Stop-A (L1-A) to return to the boot prom +[ 55.050713] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004 + +Further investigation showed that compiling only per_cpu_patch() with +an older compiler fixes the boot. + +Detailed analysis showed that the function is not being miscompiled by +gcc-4.9, but it is using a different register allocation ordering. + +With the gcc-4.9 compiled function, something during the code patching +causes some of the %i* input registers to get corrupted. Perhaps +we have a TLB miss path into the firmware that is deep enough to +cause a register window spill and subsequent restore when we get +back from the TLB miss trap. + +Let's plug this up by doing two things: + +1) Stop using the firmware stack for client interface calls into + the firmware. Just use the kernel's stack. + +2) As soon as we can, call into a new function "start_early_boot()" + to put a one-register-window buffer between the firmware's + deepest stack frame and the top-most initial kernel one. + +Reported-by: Meelis Roos +Tested-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/oplib_64.h | 3 +- + arch/sparc/include/asm/setup.h | 4 +++ + arch/sparc/kernel/entry.h | 3 -- + arch/sparc/kernel/head_64.S | 40 +++----------------------------------- + arch/sparc/kernel/hvtramp.S | 1 + arch/sparc/kernel/setup_64.c | 28 +++++++++++++++++++------- + arch/sparc/kernel/trampoline_64.S | 12 ++++++----- + arch/sparc/prom/cif.S | 5 +--- + arch/sparc/prom/init_64.c | 6 ++--- + arch/sparc/prom/p1275.c | 2 - + 10 files changed, 42 insertions(+), 62 deletions(-) + +--- a/arch/sparc/include/asm/oplib_64.h ++++ b/arch/sparc/include/asm/oplib_64.h +@@ -62,7 +62,8 @@ struct linux_mem_p1275 { + /* You must call prom_init() before using any of the library services, + * preferably as early as possible. Pass it the romvec pointer. + */ +-extern void prom_init(void *cif_handler, void *cif_stack); ++extern void prom_init(void *cif_handler); ++extern void prom_init_report(void); + + /* Boot argument acquisition, returns the boot command line string. */ + extern char *prom_getbootargs(void); +--- a/arch/sparc/include/asm/setup.h ++++ b/arch/sparc/include/asm/setup.h +@@ -24,6 +24,10 @@ static inline int con_is_present(void) + } + #endif + ++#ifdef CONFIG_SPARC64 ++extern void __init start_early_boot(void); ++#endif ++ + extern void sun_do_break(void); + extern int stop_a_enabled; + extern int scons_pwroff; +--- a/arch/sparc/kernel/entry.h ++++ b/arch/sparc/kernel/entry.h +@@ -66,13 +66,10 @@ struct pause_patch_entry { + extern struct pause_patch_entry __pause_3insn_patch, + __pause_3insn_patch_end; + +-extern void __init per_cpu_patch(void); + extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, + struct sun4v_1insn_patch_entry *); + extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, + struct sun4v_2insn_patch_entry *); +-extern void __init sun4v_patch(void); +-extern void __init boot_cpu_id_too_large(int cpu); + extern unsigned int dcache_parity_tl1_occurred; + extern unsigned int icache_parity_tl1_occurred; + +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -672,14 +672,12 @@ tlb_fixup_done: + sethi %hi(init_thread_union), %g6 + or %g6, %lo(init_thread_union), %g6 + ldx [%g6 + TI_TASK], %g4 +- mov %sp, %l6 + + wr %g0, ASI_P, %asi + mov 1, %g1 + sllx %g1, THREAD_SHIFT, %g1 + sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 + add %g6, %g1, %sp +- mov 0, %fp + + /* Set per-cpu pointer initially to zero, this makes + * the boot-cpu use the in-kernel-image per-cpu areas +@@ -706,44 +704,14 @@ tlb_fixup_done: + nop + #endif + +- mov %l6, %o1 ! OpenPROM stack + call prom_init + mov %l7, %o0 ! OpenPROM cif handler + +- /* Initialize current_thread_info()->cpu as early as possible. +- * In order to do that accurately we have to patch up the get_cpuid() +- * assembler sequences. And that, in turn, requires that we know +- * if we are on a Starfire box or not. While we're here, patch up +- * the sun4v sequences as well. ++ /* To create a one-register-window buffer between the kernel's ++ * initial stack and the last stack frame we use from the firmware, ++ * do the rest of the boot from a C helper function. + */ +- call check_if_starfire +- nop +- call per_cpu_patch +- nop +- call sun4v_patch +- nop +- +-#ifdef CONFIG_SMP +- call hard_smp_processor_id +- nop +- cmp %o0, NR_CPUS +- blu,pt %xcc, 1f +- nop +- call boot_cpu_id_too_large +- nop +- /* Not reached... */ +- +-1: +-#else +- mov 0, %o0 +-#endif +- sth %o0, [%g6 + TI_CPU] +- +- call prom_init_report +- nop +- +- /* Off we go.... */ +- call start_kernel ++ call start_early_boot + nop + /* Not reached... */ + +--- a/arch/sparc/kernel/hvtramp.S ++++ b/arch/sparc/kernel/hvtramp.S +@@ -109,7 +109,6 @@ hv_cpu_startup: + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp +- mov 0, %fp + + call init_irqwork_curcpu + nop +--- a/arch/sparc/kernel/setup_64.c ++++ b/arch/sparc/kernel/setup_64.c +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE]; + + static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; + +-void __init per_cpu_patch(void) ++static void __init per_cpu_patch(void) + { + struct cpuid_patch_entry *p; + unsigned long ver; +@@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4 + } + } + +-void __init sun4v_patch(void) ++static void __init sun4v_patch(void) + { + extern void sun4v_hvapi_init(void); + +@@ -335,14 +336,25 @@ static void __init pause_patch(void) + } + } + +-#ifdef CONFIG_SMP +-void __init boot_cpu_id_too_large(int cpu) ++void __init start_early_boot(void) + { +- prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", +- cpu, NR_CPUS); +- prom_halt(); ++ int cpu; ++ ++ check_if_starfire(); ++ per_cpu_patch(); ++ sun4v_patch(); ++ ++ cpu = hard_smp_processor_id(); ++ if (cpu >= NR_CPUS) { ++ prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", ++ cpu, NR_CPUS); ++ prom_halt(); ++ } ++ current_thread_info()->cpu = cpu; ++ ++ prom_init_report(); ++ start_kernel(); + } +-#endif + + /* On Ultra, we support all of the v8 capabilities. */ + unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | +--- a/arch/sparc/kernel/trampoline_64.S ++++ b/arch/sparc/kernel/trampoline_64.S +@@ -109,10 +109,13 @@ startup_continue: + brnz,pn %g1, 1b + nop + +- sethi %hi(p1275buf), %g2 +- or %g2, %lo(p1275buf), %g2 +- ldx [%g2 + 0x10], %l2 +- add %l2, -(192 + 128), %sp ++ /* Get onto temporary stack which will be in the locked ++ * kernel image. ++ */ ++ sethi %hi(tramp_stack), %g1 ++ or %g1, %lo(tramp_stack), %g1 ++ add %g1, TRAMP_STACK_SIZE, %g1 ++ sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp + flushw + + /* Setup the loop variables: +@@ -394,7 +397,6 @@ after_lock_tlb: + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp +- mov 0, %fp + + rdpr %pstate, %o1 + or %o1, PSTATE_IE, %o1 +--- a/arch/sparc/prom/cif.S ++++ b/arch/sparc/prom/cif.S +@@ -11,11 +11,10 @@ + .text + .globl prom_cif_direct + prom_cif_direct: ++ save %sp, -192, %sp + sethi %hi(p1275buf), %o1 + or %o1, %lo(p1275buf), %o1 +- ldx [%o1 + 0x0010], %o2 ! prom_cif_stack +- save %o2, -192, %sp +- ldx [%i1 + 0x0008], %l2 ! prom_cif_handler ++ ldx [%o1 + 0x0008], %l2 ! prom_cif_handler + mov %g4, %l0 + mov %g5, %l1 + mov %g6, %l3 +--- a/arch/sparc/prom/init_64.c ++++ b/arch/sparc/prom/init_64.c +@@ -26,13 +26,13 @@ phandle prom_chosen_node; + * It gets passed the pointer to the PROM vector. + */ + +-extern void prom_cif_init(void *, void *); ++extern void prom_cif_init(void *); + +-void __init prom_init(void *cif_handler, void *cif_stack) ++void __init prom_init(void *cif_handler) + { + phandle node; + +- prom_cif_init(cif_handler, cif_stack); ++ prom_cif_init(cif_handler); + + prom_chosen_node = prom_finddevice(prom_chosen_path); + if (!prom_chosen_node || (s32)prom_chosen_node == -1) +--- a/arch/sparc/prom/p1275.c ++++ b/arch/sparc/prom/p1275.c +@@ -20,7 +20,6 @@ + struct { + long prom_callback; /* 0x00 */ + void (*prom_cif_handler)(long *); /* 0x08 */ +- unsigned long prom_cif_stack; /* 0x10 */ + } p1275buf; + + extern void prom_world(int); +@@ -52,5 +51,4 @@ void p1275_cmd_direct(unsigned long *arg + void prom_cif_init(void *cif_handler, void *cif_stack) + { + p1275buf.prom_cif_handler = (void (*)(long *))cif_handler; +- p1275buf.prom_cif_stack = (unsigned long)cif_stack; + } diff --git a/queue-3.14/sparc64-fix-reversed-start-end-in-flush_tlb_kernel_range.patch b/queue-3.14/sparc64-fix-reversed-start-end-in-flush_tlb_kernel_range.patch new file mode 100644 index 00000000000..fadc7812126 --- /dev/null +++ b/queue-3.14/sparc64-fix-reversed-start-end-in-flush_tlb_kernel_range.patch @@ -0,0 +1,67 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Sat, 4 Oct 2014 21:05:14 -0700 +Subject: sparc64: Fix reversed start/end in flush_tlb_kernel_range() + +From: "David S. Miller" + +[ Upstream commit 473ad7f4fb005d1bb727e4ef27d370d28703a062 ] + +When we have to split up a flush request into multiple pieces +(in order to avoid the firmware range) we don't specify the +arguments in the right order for the second piece. + +Fix the order, or else we get hangs as the code tries to +flush "a lot" of entries and we get lockups like this: + +[ 4422.981276] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [expect:117032] +[ 4422.996130] Modules linked in: ipv6 loop usb_storage igb ptp sg sr_mod ehci_pci ehci_hcd pps_core n2_rng rng_core +[ 4423.016617] CPU: 12 PID: 117032 Comm: expect Not tainted 3.17.0-rc4+ #1608 +[ 4423.030331] task: fff8003cc730e220 ti: fff8003d99d54000 task.ti: fff8003d99d54000 +[ 4423.045282] TSTATE: 0000000011001602 TPC: 00000000004521e8 TNPC: 00000000004521ec Y: 00000000 Not tainted +[ 4423.064905] TPC: <__flush_tlb_kernel_range+0x28/0x40> +[ 4423.074964] g0: 000000000052fd10 g1: 00000001295a8000 g2: ffffff7176ffc000 g3: 0000000000002000 +[ 4423.092324] g4: fff8003cc730e220 g5: fff8003dfedcc000 g6: fff8003d99d54000 g7: 0000000000000006 +[ 4423.109687] o0: 0000000000000000 o1: 0000000000000000 o2: 0000000000000003 o3: 00000000f0000000 +[ 4423.127058] o4: 0000000000000080 o5: 00000001295a8000 sp: fff8003d99d56d01 ret_pc: 000000000052ff54 +[ 4423.145121] RPC: <__purge_vmap_area_lazy+0x314/0x3a0> +[ 4423.155185] l0: 0000000000000000 l1: 0000000000000000 l2: 0000000000a38040 l3: 0000000000000000 +[ 4423.172559] l4: fff8003dae8965e0 l5: ffffffffffffffff l6: 0000000000000000 l7: 00000000f7e2b138 +[ 4423.189913] i0: fff8003d99d576a0 i1: fff8003d99d576a8 i2: fff8003d99d575e8 i3: 0000000000000000 +[ 4423.207284] i4: 0000000000008008 i5: fff8003d99d575c8 i6: fff8003d99d56df1 i7: 0000000000530c24 +[ 4423.224640] I7: +[ 4423.234193] Call Trace: +[ 4423.239051] [0000000000530c24] free_vmap_area_noflush+0x64/0x80 +[ 4423.251029] [0000000000531a7c] remove_vm_area+0x5c/0x80 +[ 4423.261628] [0000000000531b80] __vunmap+0x20/0x120 +[ 4423.271352] [000000000071cf18] n_tty_close+0x18/0x40 +[ 4423.281423] [00000000007222b0] tty_ldisc_close+0x30/0x60 +[ 4423.292183] [00000000007225a4] tty_ldisc_reinit+0x24/0xa0 +[ 4423.303120] [0000000000722ab4] tty_ldisc_hangup+0xd4/0x1e0 +[ 4423.314232] [0000000000719aa0] __tty_hangup+0x280/0x3c0 +[ 4423.324835] [0000000000724cb4] pty_close+0x134/0x1a0 +[ 4423.334905] [000000000071aa24] tty_release+0x104/0x500 +[ 4423.345316] [00000000005511d0] __fput+0x90/0x1e0 +[ 4423.354701] [000000000047fa54] task_work_run+0x94/0xe0 +[ 4423.365126] [0000000000404b44] __handle_signal+0xc/0x2c + +Fixes: 4ca9a23765da ("sparc64: Guard against flushing openfirmware mappings.") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -2720,8 +2720,8 @@ void flush_tlb_kernel_range(unsigned lon + do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS); + } + if (end > HI_OBP_ADDRESS) { +- flush_tsb_kernel_range(end, HI_OBP_ADDRESS); +- do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS); ++ flush_tsb_kernel_range(HI_OBP_ADDRESS, end); ++ do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end); + } + } else { + flush_tsb_kernel_range(start, end); diff --git a/queue-3.14/sparc64-implement-__get_user_pages_fast.patch b/queue-3.14/sparc64-implement-__get_user_pages_fast.patch new file mode 100644 index 00000000000..d36c2bd3d40 --- /dev/null +++ b/queue-3.14/sparc64-implement-__get_user_pages_fast.patch @@ -0,0 +1,62 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Fri, 24 Oct 2014 09:59:02 -0700 +Subject: sparc64: Implement __get_user_pages_fast(). + +From: "David S. Miller" + +[ Upstream commit 06090e8ed89ea2113a236befb41f71d51f100e60 ] + +It is not sufficient to only implement get_user_pages_fast(), you +must also implement the atomic version __get_user_pages_fast() +otherwise you end up using the weak symbol fallback implementation +which simply returns zero. + +This is dangerous, because it causes the futex code to loop forever +if transparent hugepages are supported (see get_futex_key()). + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/gup.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +--- a/arch/sparc/mm/gup.c ++++ b/arch/sparc/mm/gup.c +@@ -160,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsi + return 1; + } + ++int __get_user_pages_fast(unsigned long start, int nr_pages, int write, ++ struct page **pages) ++{ ++ struct mm_struct *mm = current->mm; ++ unsigned long addr, len, end; ++ unsigned long next, flags; ++ pgd_t *pgdp; ++ int nr = 0; ++ ++ start &= PAGE_MASK; ++ addr = start; ++ len = (unsigned long) nr_pages << PAGE_SHIFT; ++ end = start + len; ++ ++ local_irq_save(flags); ++ pgdp = pgd_offset(mm, addr); ++ do { ++ pgd_t pgd = *pgdp; ++ ++ next = pgd_addr_end(addr, end); ++ if (pgd_none(pgd)) ++ break; ++ if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) ++ break; ++ } while (pgdp++, addr = next, addr != end); ++ local_irq_restore(flags); ++ ++ return nr; ++} ++ + int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) + { diff --git a/queue-3.14/sparc64-increase-max_phys_address_bits-to-53.patch b/queue-3.14/sparc64-increase-max_phys_address_bits-to-53.patch new file mode 100644 index 00000000000..6478578dd7f --- /dev/null +++ b/queue-3.14/sparc64-increase-max_phys_address_bits-to-53.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Wed, 24 Sep 2014 21:49:29 -0700 +Subject: sparc64: Increase MAX_PHYS_ADDRESS_BITS to 53. + +From: "David S. Miller" + +Make sure, at compile time, that the kernel can properly support +whatever MAX_PHYS_ADDRESS_BITS is defined to. + +On M7 chips, use a max_phys_bits value of 49. + +Based upon a patch by Bob Picco. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/page_64.h | 8 ++++---- + arch/sparc/include/asm/pgtable_64.h | 4 ++++ + arch/sparc/mm/init_64.c | 9 ++++++++- + 3 files changed, 16 insertions(+), 5 deletions(-) + +--- a/arch/sparc/include/asm/page_64.h ++++ b/arch/sparc/include/asm/page_64.h +@@ -122,11 +122,11 @@ extern unsigned long PAGE_OFFSET; + + #endif /* !(__ASSEMBLY__) */ + +-/* The maximum number of physical memory address bits we support, this +- * is used to size various tables used to manage kernel TLB misses and +- * also the sparsemem code. ++/* The maximum number of physical memory address bits we support. The ++ * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" ++ * evaluates to. + */ +-#define MAX_PHYS_ADDRESS_BITS 47 ++#define MAX_PHYS_ADDRESS_BITS 53 + + #define ILOG2_4MB 22 + #define ILOG2_256MB 28 +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -67,6 +67,10 @@ + #define PGDIR_MASK (~(PGDIR_SIZE-1)) + #define PGDIR_BITS (PAGE_SHIFT - 3) + ++#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS) ++#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support ++#endif ++ + #if (PGDIR_SHIFT + PGDIR_BITS) != 53 + #error Page table parameters do not cover virtual address space properly. + #endif +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1684,12 +1684,19 @@ static void __init setup_page_offset(voi + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: +- default: ++ case SUN4V_CHIP_SPARC_M6: + /* T4 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 47; + break; ++ case SUN4V_CHIP_SPARC_M7: ++ default: ++ /* M7 and later support 52-bit virtual addresses. */ ++ sparc64_va_hole_top = 0xfff8000000000000UL; ++ sparc64_va_hole_bottom = 0x0008000000000000UL; ++ max_phys_bits = 49; ++ break; + } + } + diff --git a/queue-3.14/sparc64-increase-size-of-boot-string-to-1024-bytes.patch b/queue-3.14/sparc64-increase-size-of-boot-string-to-1024-bytes.patch new file mode 100644 index 00000000000..ea64618a94e --- /dev/null +++ b/queue-3.14/sparc64-increase-size-of-boot-string-to-1024-bytes.patch @@ -0,0 +1,35 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Dave Kleikamp +Date: Tue, 7 Oct 2014 08:12:37 -0500 +Subject: sparc64: Increase size of boot string to 1024 bytes + +From: Dave Kleikamp + +[ Upstream commit 1cef94c36bd4d79b5ae3a3df99ee0d76d6a4a6dc ] + +This is the longest boot string that silo supports. + +Signed-off-by: Dave Kleikamp +Cc: Bob Picco +Cc: David S. Miller +Cc: sparclinux@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/prom/bootstr_64.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/sparc/prom/bootstr_64.c ++++ b/arch/sparc/prom/bootstr_64.c +@@ -14,7 +14,10 @@ + * the .bss section or it will break things. + */ + +-#define BARG_LEN 256 ++/* We limit BARG_LEN to 1024 because this is the size of the ++ * 'barg_out' command line buffer in the SILO bootloader. ++ */ ++#define BARG_LEN 1024 + struct { + int bootstr_len; + int bootstr_valid; diff --git a/queue-3.14/sparc64-kill-unnecessary-tables-and-increase-max_banks.patch b/queue-3.14/sparc64-kill-unnecessary-tables-and-increase-max_banks.patch new file mode 100644 index 00000000000..0fc82eb7936 --- /dev/null +++ b/queue-3.14/sparc64-kill-unnecessary-tables-and-increase-max_banks.patch @@ -0,0 +1,113 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Sat, 27 Sep 2014 21:30:57 -0700 +Subject: sparc64: Kill unnecessary tables and increase MAX_BANKS. + +From: "David S. Miller" + +[ Upstream commit d195b71bad4347d2df51072a537f922546a904f1 ] + +swapper_low_pmd_dir and swapper_pud_dir are actually completely +useless and unnecessary. + +We just need swapper_pg_dir[]. Naturally the other page table chunks +will be allocated on an as-needed basis. Since the kernel actually +accesses these tables in the PAGE_OFFSET view, there is not even a TLB +locality advantage of placing them in the kernel image. + +Use the hard coded vmlinux.ld.S slot for swapper_pg_dir which is +naturally page aligned. + +Increase MAX_BANKS to 1024 in order to handle heavily fragmented +virtual guests. + +Even with this MAX_BANKS increase, the kernel is 20K+ smaller. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/pgtable_64.h | 1 - + arch/sparc/kernel/vmlinux.lds.S | 5 +++-- + arch/sparc/mm/init_64.c | 25 ++----------------------- + 3 files changed, 5 insertions(+), 26 deletions(-) + +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -927,7 +927,6 @@ static inline void __set_pte_at(struct m + #endif + + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +-extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD]; + + extern void paging_init(void); + extern unsigned long find_ecache_flush_span(unsigned long size); +--- a/arch/sparc/kernel/vmlinux.lds.S ++++ b/arch/sparc/kernel/vmlinux.lds.S +@@ -35,8 +35,9 @@ jiffies = jiffies_64; + + SECTIONS + { +- /* swapper_low_pmd_dir is sparc64 only */ +- swapper_low_pmd_dir = 0x0000000000402000; ++#ifdef CONFIG_SPARC64 ++ swapper_pg_dir = 0x0000000000402000; ++#endif + . = INITIAL_ADDRESS; + .text TEXTSTART : + { +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -85,7 +85,7 @@ extern struct tsb swapper_tsb[KERNEL_TSB + + static unsigned long cpu_pgsz_mask; + +-#define MAX_BANKS 32 ++#define MAX_BANKS 1024 + + static struct linux_prom64_registers pavail[MAX_BANKS]; + static int pavail_ents; +@@ -1937,12 +1937,6 @@ static void __init sun4v_linear_pte_xor_ + + static unsigned long last_valid_pfn; + +-/* These must be page aligned in order to not trigger the +- * alignment tests of pgd_bad() and pud_bad(). +- */ +-pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((aligned (PAGE_SIZE))); +-static pud_t swapper_pud_dir[PTRS_PER_PUD] __attribute__ ((aligned (PAGE_SIZE))); +- + static void sun4u_pgprot_init(void); + static void sun4v_pgprot_init(void); + +@@ -1950,8 +1944,6 @@ void __init paging_init(void) + { + unsigned long end_pfn, shift, phys_base; + unsigned long real_end, i; +- pud_t *pud; +- pmd_t *pmd; + int node; + + setup_page_offset(); +@@ -2046,20 +2038,7 @@ void __init paging_init(void) + */ + init_mm.pgd += ((shift) / (sizeof(pgd_t))); + +- memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); +- +- /* The kernel page tables we publish into what the rest of the +- * world sees must be adjusted so that they see the PAGE_OFFSET +- * address of these in-kerenel data structures. However right +- * here we must access them from the kernel image side, because +- * the trap tables haven't been taken over and therefore we cannot +- * take TLB misses in the PAGE_OFFSET linear mappings yet. +- */ +- pud = swapper_pud_dir + (shift / sizeof(pud_t)); +- pgd_set(&swapper_pg_dir[0], pud); +- +- pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t)); +- pud_set(&swapper_pud_dir[0], pmd); ++ memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir)); + + inherit_prom_mappings(); + diff --git a/queue-3.14/sparc64-move-request_irq-from-ldc_bind-to-ldc_alloc.patch b/queue-3.14/sparc64-move-request_irq-from-ldc_bind-to-ldc_alloc.patch new file mode 100644 index 00000000000..f583a432479 --- /dev/null +++ b/queue-3.14/sparc64-move-request_irq-from-ldc_bind-to-ldc_alloc.patch @@ -0,0 +1,173 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Sowmini Varadhan +Date: Tue, 16 Sep 2014 11:37:08 -0400 +Subject: sparc64: Move request_irq() from ldc_bind() to ldc_alloc() + +From: Sowmini Varadhan + +[ Upstream commit c21c4ab0d6921f7160a43216fa6973b5924de561 ] + +The request_irq() needs to be done from ldc_alloc() +to avoid the following (caught by lockdep) + + [00000000004a0738] __might_sleep+0xf8/0x120 + [000000000058bea4] kmem_cache_alloc_trace+0x184/0x2c0 + [00000000004faf80] request_threaded_irq+0x80/0x160 + [000000000044f71c] ldc_bind+0x7c/0x220 + [0000000000452454] vio_port_up+0x54/0xe0 + [00000000101f6778] probe_disk+0x38/0x220 [sunvdc] + [00000000101f6b8c] vdc_port_probe+0x22c/0x300 [sunvdc] + [0000000000451a88] vio_device_probe+0x48/0x60 + [000000000074c56c] really_probe+0x6c/0x300 + [000000000074c83c] driver_probe_device+0x3c/0xa0 + [000000000074c92c] __driver_attach+0x8c/0xa0 + [000000000074a6ec] bus_for_each_dev+0x6c/0xa0 + [000000000074c1dc] driver_attach+0x1c/0x40 + [000000000074b0fc] bus_add_driver+0xbc/0x280 + +Signed-off-by: Sowmini Varadhan +Acked-by: Dwight Engen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/ldc.h | 5 +++-- + arch/sparc/kernel/ds.c | 4 ++-- + arch/sparc/kernel/ldc.c | 41 +++++++++++++++++++++-------------------- + arch/sparc/kernel/viohs.c | 4 ++-- + 4 files changed, 28 insertions(+), 26 deletions(-) + +--- a/arch/sparc/include/asm/ldc.h ++++ b/arch/sparc/include/asm/ldc.h +@@ -53,13 +53,14 @@ struct ldc_channel; + /* Allocate state for a channel. */ + extern struct ldc_channel *ldc_alloc(unsigned long id, + const struct ldc_channel_config *cfgp, +- void *event_arg); ++ void *event_arg, ++ const char *name); + + /* Shut down and free state for a channel. */ + extern void ldc_free(struct ldc_channel *lp); + + /* Register TX and RX queues of the link with the hypervisor. */ +-extern int ldc_bind(struct ldc_channel *lp, const char *name); ++extern int ldc_bind(struct ldc_channel *lp); + + /* For non-RAW protocols we need to complete a handshake before + * communication can proceed. ldc_connect() does that, if the +--- a/arch/sparc/kernel/ds.c ++++ b/arch/sparc/kernel/ds.c +@@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev + ds_cfg.tx_irq = vdev->tx_irq; + ds_cfg.rx_irq = vdev->rx_irq; + +- lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); ++ lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS"); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out_free_ds_states; + } + dp->lp = lp; + +- err = ldc_bind(lp, "DS"); ++ err = ldc_bind(lp); + if (err) + goto out_free_ldc; + +--- a/arch/sparc/kernel/ldc.c ++++ b/arch/sparc/kernel/ldc.c +@@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc + + struct ldc_channel *ldc_alloc(unsigned long id, + const struct ldc_channel_config *cfgp, +- void *event_arg) ++ void *event_arg, ++ const char *name) + { + struct ldc_channel *lp; + const struct ldc_mode_ops *mops; +@@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned l + err = -EINVAL; + if (!cfgp) + goto out_err; ++ if (!name) ++ goto out_err; + + switch (cfgp->mode) { + case LDC_MODE_RAW: +@@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned l + + INIT_HLIST_HEAD(&lp->mh_list); + ++ snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); ++ snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); ++ ++ err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, ++ lp->rx_irq_name, lp); ++ if (err) ++ goto out_free_txq; ++ ++ err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, ++ lp->tx_irq_name, lp); ++ if (err) { ++ free_irq(lp->cfg.rx_irq, lp); ++ goto out_free_txq; ++ } ++ + return lp; + + out_free_txq: +@@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free); + * state. This does not initiate a handshake, ldc_connect() does + * that. + */ +-int ldc_bind(struct ldc_channel *lp, const char *name) ++int ldc_bind(struct ldc_channel *lp) + { + unsigned long hv_err, flags; + int err = -EINVAL; + +- if (!name || +- (lp->state != LDC_STATE_INIT)) ++ if (lp->state != LDC_STATE_INIT) + return -EINVAL; + +- snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); +- snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); +- +- err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, +- lp->rx_irq_name, lp); +- if (err) +- return err; +- +- err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, +- lp->tx_irq_name, lp); +- if (err) { +- free_irq(lp->cfg.rx_irq, lp); +- return err; +- } +- +- + spin_lock_irqsave(&lp->lock, flags); + + enable_irq(lp->cfg.rx_irq); +--- a/arch/sparc/kernel/viohs.c ++++ b/arch/sparc/kernel/viohs.c +@@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_stat + cfg.tx_irq = vio->vdev->tx_irq; + cfg.rx_irq = vio->vdev->rx_irq; + +- lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); ++ lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name); + if (IS_ERR(lp)) + return PTR_ERR(lp); + +@@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state + + err = 0; + if (state == LDC_STATE_INIT) { +- err = ldc_bind(vio->lp, vio->name); ++ err = ldc_bind(vio->lp); + if (err) + printk(KERN_WARNING "%s: Port %lu bind failed, " + "err=%d\n", diff --git a/queue-3.14/sparc64-sparse-irq.patch b/queue-3.14/sparc64-sparse-irq.patch new file mode 100644 index 00000000000..266844e2314 --- /dev/null +++ b/queue-3.14/sparc64-sparse-irq.patch @@ -0,0 +1,787 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: bob picco +Date: Thu, 25 Sep 2014 12:25:03 -0700 +Subject: sparc64: sparse irq + +From: bob picco + +[ Upstream commit ee6a9333fa58e11577c1b531b8e0f5ffc0fd6f50 ] + +This patch attempts to do a few things. The highlights are: 1) enable +SPARSE_IRQ unconditionally, 2) kills off !SPARSE_IRQ code 3) allocates +ivector_table at boot time and 4) default to cookie only VIRQ mechanism +for supported firmware. The first firmware with cookie only support for +me appears on T5. You can optionally force the HV firmware to not cookie +only mode which is the sysino support. + +The sysino is a deprecated HV mechanism according to the most recent +SPARC Virtual Machine Specification. HV_GRP_INTR is what controls the +cookie/sysino firmware versioning. + +The history of this interface is: + +1) Major version 1.0 only supported sysino based interrupt interfaces. + +2) Major version 2.0 added cookie based VIRQs, however due to the fact + that OSs were using the VIRQs without negoatiating major version + 2.0 (Linux and Solaris are both guilty), the VIRQs calls were + allowed even with major version 1.0 + + To complicate things even further, the VIRQ interfaces were only + actually hooked up in the hypervisor for LDC interrupt sources. + VIRQ calls on other device types would result in HV_EINVAL errors. + + So effectively, major version 2.0 is unusable. + +3) Major version 3.0 was created to signal use of VIRQs and the fact + that the hypervisor has these calls hooked up for all interrupt + sources, not just those for LDC devices. + +A new boot option is provided should cookie only HV support have issues. +hvirq - this is the version for HV_GRP_INTR. This is related to HV API +versioning. The code attempts major=3 first by default. The option can +be used to override this default. + +I've tested with SPARSE_IRQ on T5-8, M7-4 and T4-X and Jalap?no. + +Signed-off-by: Bob Picco +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/Kconfig | 1 + arch/sparc/include/asm/irq_64.h | 9 + arch/sparc/kernel/irq_64.c | 509 ++++++++++++++++++++++++++-------------- + 3 files changed, 343 insertions(+), 176 deletions(-) + +--- a/arch/sparc/Kconfig ++++ b/arch/sparc/Kconfig +@@ -67,6 +67,7 @@ config SPARC64 + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_CONTEXT_TRACKING + select HAVE_DEBUG_KMEMLEAK ++ select SPARSE_IRQ + select RTC_DRV_CMOS + select RTC_DRV_BQ4802 + select RTC_DRV_SUN4V +--- a/arch/sparc/include/asm/irq_64.h ++++ b/arch/sparc/include/asm/irq_64.h +@@ -37,7 +37,7 @@ + * + * ino_bucket->irq allocation is made during {sun4v_,}build_irq(). + */ +-#define NR_IRQS 255 ++#define NR_IRQS (2048) + + extern void irq_install_pre_handler(int irq, + void (*func)(unsigned int, void *, void *), +@@ -57,11 +57,8 @@ extern unsigned int sun4u_build_msi(u32 + unsigned long iclr_base); + extern void sun4u_destroy_msi(unsigned int irq); + +-extern unsigned char irq_alloc(unsigned int dev_handle, +- unsigned int dev_ino); +-#ifdef CONFIG_PCI_MSI +-extern void irq_free(unsigned int irq); +-#endif ++unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); ++void irq_free(unsigned int irq); + + extern void __init init_IRQ(void); + extern void fixup_irqs(void); +--- a/arch/sparc/kernel/irq_64.c ++++ b/arch/sparc/kernel/irq_64.c +@@ -47,8 +47,6 @@ + #include "cpumap.h" + #include "kstack.h" + +-#define NUM_IVECS (IMAP_INR + 1) +- + struct ino_bucket *ivector_table; + unsigned long ivector_table_pa; + +@@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long + + #define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) + +-static struct { +- unsigned int dev_handle; +- unsigned int dev_ino; +- unsigned int in_use; +-} irq_table[NR_IRQS]; +-static DEFINE_SPINLOCK(irq_alloc_lock); ++static unsigned long hvirq_major __initdata; ++static int __init early_hvirq_major(char *p) ++{ ++ int rc = kstrtoul(p, 10, &hvirq_major); ++ ++ return rc; ++} ++early_param("hvirq", early_hvirq_major); ++ ++static int hv_irq_version; ++ ++/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie ++ * based interfaces, but: ++ * ++ * 1) Several OSs, Solaris and Linux included, use them even when only ++ * negotiating version 1.0 (or failing to negotiate at all). So the ++ * hypervisor has a workaround that provides the VIRQ interfaces even ++ * when only verion 1.0 of the API is in use. ++ * ++ * 2) Second, and more importantly, with major version 2.0 these VIRQ ++ * interfaces only were actually hooked up for LDC interrupts, even ++ * though the Hypervisor specification clearly stated: ++ * ++ * The new interrupt API functions will be available to a guest ++ * when it negotiates version 2.0 in the interrupt API group 0x2. When ++ * a guest negotiates version 2.0, all interrupt sources will only ++ * support using the cookie interface, and any attempt to use the ++ * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the ++ * ENOTSUPPORTED error being returned. ++ * ++ * with an emphasis on "all interrupt sources". ++ * ++ * To correct this, major version 3.0 was created which does actually ++ * support VIRQs for all interrupt sources (not just LDC devices). So ++ * if we want to move completely over the cookie based VIRQs we must ++ * negotiate major version 3.0 or later of HV_GRP_INTR. ++ */ ++static bool sun4v_cookie_only_virqs(void) ++{ ++ if (hv_irq_version >= 3) ++ return true; ++ return false; ++} + +-unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino) ++static void __init irq_init_hv(void) + { +- unsigned long flags; +- unsigned char ent; ++ unsigned long hv_error, major, minor = 0; ++ ++ if (tlb_type != hypervisor) ++ return; + +- BUILD_BUG_ON(NR_IRQS >= 256); ++ if (hvirq_major) ++ major = hvirq_major; ++ else ++ major = 3; + +- spin_lock_irqsave(&irq_alloc_lock, flags); ++ hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor); ++ if (!hv_error) ++ hv_irq_version = major; ++ else ++ hv_irq_version = 1; + +- for (ent = 1; ent < NR_IRQS; ent++) { +- if (!irq_table[ent].in_use) ++ pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n", ++ hv_irq_version, ++ sun4v_cookie_only_virqs() ? "enabled" : "disabled"); ++} ++ ++/* This function is for the timer interrupt.*/ ++int __init arch_probe_nr_irqs(void) ++{ ++ return 1; ++} ++ ++#define DEFAULT_NUM_IVECS (0xfffU) ++static unsigned int nr_ivec = DEFAULT_NUM_IVECS; ++#define NUM_IVECS (nr_ivec) ++ ++static unsigned int __init size_nr_ivec(void) ++{ ++ if (tlb_type == hypervisor) { ++ switch (sun4v_chip_type) { ++ /* Athena's devhandle|devino is large.*/ ++ case SUN4V_CHIP_SPARC64X: ++ nr_ivec = 0xffff; + break; ++ } + } +- if (ent >= NR_IRQS) { +- printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); +- ent = 0; +- } else { +- irq_table[ent].dev_handle = dev_handle; +- irq_table[ent].dev_ino = dev_ino; +- irq_table[ent].in_use = 1; +- } ++ return nr_ivec; ++} ++ ++struct irq_handler_data { ++ union { ++ struct { ++ unsigned int dev_handle; ++ unsigned int dev_ino; ++ }; ++ unsigned long sysino; ++ }; ++ struct ino_bucket bucket; ++ unsigned long iclr; ++ unsigned long imap; ++}; ++ ++static inline unsigned int irq_data_to_handle(struct irq_data *data) ++{ ++ struct irq_handler_data *ihd = data->handler_data; ++ ++ return ihd->dev_handle; ++} ++ ++static inline unsigned int irq_data_to_ino(struct irq_data *data) ++{ ++ struct irq_handler_data *ihd = data->handler_data; ++ ++ return ihd->dev_ino; ++} + +- spin_unlock_irqrestore(&irq_alloc_lock, flags); ++static inline unsigned long irq_data_to_sysino(struct irq_data *data) ++{ ++ struct irq_handler_data *ihd = data->handler_data; + +- return ent; ++ return ihd->sysino; + } + +-#ifdef CONFIG_PCI_MSI + void irq_free(unsigned int irq) + { +- unsigned long flags; ++ void *data = irq_get_handler_data(irq); + +- if (irq >= NR_IRQS) +- return; ++ kfree(data); ++ irq_set_handler_data(irq, NULL); ++ irq_free_descs(irq, 1); ++} + +- spin_lock_irqsave(&irq_alloc_lock, flags); ++unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) ++{ ++ int irq; + +- irq_table[irq].in_use = 0; ++ irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); ++ if (irq <= 0) ++ goto out; + +- spin_unlock_irqrestore(&irq_alloc_lock, flags); ++ return irq; ++out: ++ return 0; ++} ++ ++static unsigned int cookie_exists(u32 devhandle, unsigned int devino) ++{ ++ unsigned long hv_err, cookie; ++ struct ino_bucket *bucket; ++ unsigned int irq = 0U; ++ ++ hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie); ++ if (hv_err) { ++ pr_err("HV get cookie failed hv_err = %ld\n", hv_err); ++ goto out; ++ } ++ ++ if (cookie & ((1UL << 63UL))) { ++ cookie = ~cookie; ++ bucket = (struct ino_bucket *) __va(cookie); ++ irq = bucket->__irq; ++ } ++out: ++ return irq; ++} ++ ++static unsigned int sysino_exists(u32 devhandle, unsigned int devino) ++{ ++ unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); ++ struct ino_bucket *bucket; ++ unsigned int irq; ++ ++ bucket = &ivector_table[sysino]; ++ irq = bucket_get_irq(__pa(bucket)); ++ ++ return irq; ++} ++ ++void ack_bad_irq(unsigned int irq) ++{ ++ pr_crit("BAD IRQ ack %d\n", irq); ++} ++ ++void irq_install_pre_handler(int irq, ++ void (*func)(unsigned int, void *, void *), ++ void *arg1, void *arg2) ++{ ++ pr_warn("IRQ pre handler NOT supported.\n"); + } +-#endif + + /* + * /proc/interrupts printing: +@@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(un + return tid; + } + +-struct irq_handler_data { +- unsigned long iclr; +- unsigned long imap; +- +- void (*pre_handler)(unsigned int, void *, void *); +- void *arg1; +- void *arg2; +-}; +- + #ifdef CONFIG_SMP + static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) + { +@@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_dat + + static void sun4v_irq_enable(struct irq_data *data) + { +- unsigned int ino = irq_table[data->irq].dev_ino; + unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity); ++ unsigned int ino = irq_data_to_sysino(data); + int err; + + err = sun4v_intr_settarget(ino, cpuid); +@@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_ + static int sun4v_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) + { +- unsigned int ino = irq_table[data->irq].dev_ino; + unsigned long cpuid = irq_choose_cpu(data->irq, mask); ++ unsigned int ino = irq_data_to_sysino(data); + int err; + + err = sun4v_intr_settarget(ino, cpuid); +@@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq + + static void sun4v_irq_disable(struct irq_data *data) + { +- unsigned int ino = irq_table[data->irq].dev_ino; ++ unsigned int ino = irq_data_to_sysino(data); + int err; + + err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); +@@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq + + static void sun4v_irq_eoi(struct irq_data *data) + { +- unsigned int ino = irq_table[data->irq].dev_ino; ++ unsigned int ino = irq_data_to_sysino(data); + int err; + + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); +@@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_dat + + static void sun4v_virq_enable(struct irq_data *data) + { +- unsigned long cpuid, dev_handle, dev_ino; ++ unsigned long dev_handle = irq_data_to_handle(data); ++ unsigned long dev_ino = irq_data_to_ino(data); ++ unsigned long cpuid; + int err; + + cpuid = irq_choose_cpu(data->irq, data->affinity); + +- dev_handle = irq_table[data->irq].dev_handle; +- dev_ino = irq_table[data->irq].dev_ino; +- + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " +@@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq + static int sun4v_virt_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) + { +- unsigned long cpuid, dev_handle, dev_ino; ++ unsigned long dev_handle = irq_data_to_handle(data); ++ unsigned long dev_ino = irq_data_to_ino(data); ++ unsigned long cpuid; + int err; + + cpuid = irq_choose_cpu(data->irq, mask); + +- dev_handle = irq_table[data->irq].dev_handle; +- dev_ino = irq_table[data->irq].dev_ino; +- + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " +@@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struc + + static void sun4v_virq_disable(struct irq_data *data) + { +- unsigned long dev_handle, dev_ino; ++ unsigned long dev_handle = irq_data_to_handle(data); ++ unsigned long dev_ino = irq_data_to_ino(data); + int err; + +- dev_handle = irq_table[data->irq].dev_handle; +- dev_ino = irq_table[data->irq].dev_ino; + + err = sun4v_vintr_set_valid(dev_handle, dev_ino, + HV_INTR_DISABLED); +@@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct ir + + static void sun4v_virq_eoi(struct irq_data *data) + { +- unsigned long dev_handle, dev_ino; ++ unsigned long dev_handle = irq_data_to_handle(data); ++ unsigned long dev_ino = irq_data_to_ino(data); + int err; + +- dev_handle = irq_table[data->irq].dev_handle; +- dev_ino = irq_table[data->irq].dev_ino; +- + err = sun4v_vintr_set_state(dev_handle, dev_ino, + HV_INTR_STATE_IDLE); + if (err != HV_EOK) +@@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = { + .flags = IRQCHIP_EOI_IF_HANDLED, + }; + +-static void pre_flow_handler(struct irq_data *d) +-{ +- struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d); +- unsigned int ino = irq_table[d->irq].dev_ino; +- +- handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2); +-} +- +-void irq_install_pre_handler(int irq, +- void (*func)(unsigned int, void *, void *), +- void *arg1, void *arg2) +-{ +- struct irq_handler_data *handler_data = irq_get_handler_data(irq); +- +- handler_data->pre_handler = func; +- handler_data->arg1 = arg1; +- handler_data->arg2 = arg2; +- +- __irq_set_preflow_handler(irq, pre_flow_handler); +-} +- + unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) + { +- struct ino_bucket *bucket; + struct irq_handler_data *handler_data; ++ struct ino_bucket *bucket; + unsigned int irq; + int ino; + +@@ -537,119 +641,166 @@ out: + return irq; + } + +-static unsigned int sun4v_build_common(unsigned long sysino, +- struct irq_chip *chip) ++static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino, ++ void (*handler_data_init)(struct irq_handler_data *data, ++ u32 devhandle, unsigned int devino), ++ struct irq_chip *chip) + { +- struct ino_bucket *bucket; +- struct irq_handler_data *handler_data; ++ struct irq_handler_data *data; + unsigned int irq; + +- BUG_ON(tlb_type != hypervisor); ++ irq = irq_alloc(devhandle, devino); ++ if (!irq) ++ goto out; + +- bucket = &ivector_table[sysino]; +- irq = bucket_get_irq(__pa(bucket)); +- if (!irq) { +- irq = irq_alloc(0, sysino); +- bucket_set_irq(__pa(bucket), irq); +- irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, +- "IVEC"); ++ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); ++ if (unlikely(!data)) { ++ pr_err("IRQ handler data allocation failed.\n"); ++ irq_free(irq); ++ irq = 0; ++ goto out; + } + +- handler_data = irq_get_handler_data(irq); +- if (unlikely(handler_data)) +- goto out; ++ irq_set_handler_data(irq, data); ++ handler_data_init(data, devhandle, devino); ++ irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC"); ++ data->imap = ~0UL; ++ data->iclr = ~0UL; ++out: ++ return irq; ++} + +- handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); +- if (unlikely(!handler_data)) { +- prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); +- prom_halt(); +- } +- irq_set_handler_data(irq, handler_data); ++static unsigned long cookie_assign(unsigned int irq, u32 devhandle, ++ unsigned int devino) ++{ ++ struct irq_handler_data *ihd = irq_get_handler_data(irq); ++ unsigned long hv_error, cookie; + +- /* Catch accidental accesses to these things. IMAP/ICLR handling +- * is done by hypervisor calls on sun4v platforms, not by direct +- * register accesses. ++ /* handler_irq needs to find the irq. cookie is seen signed in ++ * sun4v_dev_mondo and treated as a non ivector_table delivery. + */ +- handler_data->imap = ~0UL; +- handler_data->iclr = ~0UL; ++ ihd->bucket.__irq = irq; ++ cookie = ~__pa(&ihd->bucket); + +-out: +- return irq; ++ hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie); ++ if (hv_error) ++ pr_err("HV vintr set cookie failed = %ld\n", hv_error); ++ ++ return hv_error; + } + +-unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) ++static void cookie_handler_data(struct irq_handler_data *data, ++ u32 devhandle, unsigned int devino) + { +- unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); ++ data->dev_handle = devhandle; ++ data->dev_ino = devino; ++} + +- return sun4v_build_common(sysino, &sun4v_irq); ++static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino, ++ struct irq_chip *chip) ++{ ++ unsigned long hv_error; ++ unsigned int irq; ++ ++ irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip); ++ ++ hv_error = cookie_assign(irq, devhandle, devino); ++ if (hv_error) { ++ irq_free(irq); ++ irq = 0; ++ } ++ ++ return irq; + } + +-unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) ++static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino) + { +- struct irq_handler_data *handler_data; +- unsigned long hv_err, cookie; +- struct ino_bucket *bucket; + unsigned int irq; + +- bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); +- if (unlikely(!bucket)) +- return 0; +- +- /* The only reference we store to the IRQ bucket is +- * by physical address which kmemleak can't see, tell +- * it that this object explicitly is not a leak and +- * should be scanned. +- */ +- kmemleak_not_leak(bucket); ++ irq = cookie_exists(devhandle, devino); ++ if (irq) ++ goto out; + +- __flush_dcache_range((unsigned long) bucket, +- ((unsigned long) bucket + +- sizeof(struct ino_bucket))); ++ irq = cookie_build_irq(devhandle, devino, &sun4v_virq); + +- irq = irq_alloc(devhandle, devino); ++out: ++ return irq; ++} ++ ++static void sysino_set_bucket(unsigned int irq) ++{ ++ struct irq_handler_data *ihd = irq_get_handler_data(irq); ++ struct ino_bucket *bucket; ++ unsigned long sysino; ++ ++ sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino); ++ BUG_ON(sysino >= nr_ivec); ++ bucket = &ivector_table[sysino]; + bucket_set_irq(__pa(bucket), irq); ++} + +- irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq, +- "IVEC"); ++static void sysino_handler_data(struct irq_handler_data *data, ++ u32 devhandle, unsigned int devino) ++{ ++ unsigned long sysino; + +- handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); +- if (unlikely(!handler_data)) +- return 0; ++ sysino = sun4v_devino_to_sysino(devhandle, devino); ++ data->sysino = sysino; ++} + +- /* In order to make the LDC channel startup sequence easier, +- * especially wrt. locking, we do not let request_irq() enable +- * the interrupt. +- */ +- irq_set_status_flags(irq, IRQ_NOAUTOEN); +- irq_set_handler_data(irq, handler_data); ++static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino, ++ struct irq_chip *chip) ++{ ++ unsigned int irq; + +- /* Catch accidental accesses to these things. IMAP/ICLR handling +- * is done by hypervisor calls on sun4v platforms, not by direct +- * register accesses. +- */ +- handler_data->imap = ~0UL; +- handler_data->iclr = ~0UL; ++ irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip); ++ if (!irq) ++ goto out; + +- cookie = ~__pa(bucket); +- hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); +- if (hv_err) { +- prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " +- "err=%lu\n", devhandle, devino, hv_err); +- prom_halt(); +- } ++ sysino_set_bucket(irq); ++out: ++ return irq; ++} + ++static int sun4v_build_sysino(u32 devhandle, unsigned int devino) ++{ ++ int irq; ++ ++ irq = sysino_exists(devhandle, devino); ++ if (irq) ++ goto out; ++ ++ irq = sysino_build_irq(devhandle, devino, &sun4v_irq); ++out: + return irq; + } + +-void ack_bad_irq(unsigned int irq) ++unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) + { +- unsigned int ino = irq_table[irq].dev_ino; ++ unsigned int irq; + +- if (!ino) +- ino = 0xdeadbeef; ++ if (sun4v_cookie_only_virqs()) ++ irq = sun4v_build_cookie(devhandle, devino); ++ else ++ irq = sun4v_build_sysino(devhandle, devino); + +- printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n", +- ino, irq); ++ return irq; ++} ++ ++unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) ++{ ++ int irq; ++ ++ irq = cookie_build_irq(devhandle, devino, &sun4v_virq); ++ if (!irq) ++ goto out; ++ ++ /* This is borrowed from the original function. ++ */ ++ irq_set_status_flags(irq, IRQ_NOAUTOEN); ++ ++out: ++ return irq; + } + + void *hardirq_stack[NR_CPUS]; +@@ -720,9 +871,12 @@ void fixup_irqs(void) + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_to_desc(irq); +- struct irq_data *data = irq_desc_get_irq_data(desc); ++ struct irq_data *data; + unsigned long flags; + ++ if (!desc) ++ continue; ++ data = irq_desc_get_irq_data(desc); + raw_spin_lock_irqsave(&desc->lock, flags); + if (desc->action && !irqd_is_per_cpu(data)) { + if (data->chip->irq_set_affinity) +@@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action + .name = "timer", + }; + +-/* Only invoked on boot processor. */ +-void __init init_IRQ(void) ++static void __init irq_ivector_init(void) + { +- unsigned long size; ++ unsigned long size, order; ++ unsigned int ivecs; + +- map_prom_timers(); +- kill_prom_timer(); ++ /* If we are doing cookie only VIRQs then we do not need the ivector ++ * table to process interrupts. ++ */ ++ if (sun4v_cookie_only_virqs()) ++ return; + +- size = sizeof(struct ino_bucket) * NUM_IVECS; +- ivector_table = kzalloc(size, GFP_KERNEL); ++ ivecs = size_nr_ivec(); ++ size = sizeof(struct ino_bucket) * ivecs; ++ order = get_order(size); ++ ivector_table = (struct ino_bucket *) ++ __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!ivector_table) { + prom_printf("Fatal error, cannot allocate ivector_table\n"); + prom_halt(); +@@ -940,6 +1100,15 @@ void __init init_IRQ(void) + ((unsigned long) ivector_table) + size); + + ivector_table_pa = __pa(ivector_table); ++} ++ ++/* Only invoked on boot processor.*/ ++void __init init_IRQ(void) ++{ ++ irq_init_hv(); ++ irq_ivector_init(); ++ map_prom_timers(); ++ kill_prom_timer(); + + if (tlb_type == hypervisor) + sun4v_init_mondo_queues(); diff --git a/queue-3.14/sparc64-sun4v-tlb-error-power-off-events.patch b/queue-3.14/sparc64-sun4v-tlb-error-power-off-events.patch new file mode 100644 index 00000000000..fd916dcf32b --- /dev/null +++ b/queue-3.14/sparc64-sun4v-tlb-error-power-off-events.patch @@ -0,0 +1,205 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: bob picco +Date: Tue, 16 Sep 2014 09:26:47 -0400 +Subject: sparc64: sun4v TLB error power off events + +From: bob picco + +[ Upstream commit 4ccb9272892c33ef1c19a783cfa87103b30c2784 ] + +We've witnessed a few TLB events causing the machine to power off because +of prom_halt. In one case it was some nfs related area during rmmod. Another +was an mmapper of /dev/mem. A more recent one is an ITLB issue with +a bad pagesize which could be a hardware bug. Bugs happen but we should +attempt to not power off the machine and/or hang it when possible. + +This is a DTLB error from an mmapper of /dev/mem: +[root@sparcie ~]# SUN4V-DTLB: Error at TPC[fffff80100903e6c], tl 1 +SUN4V-DTLB: TPC<0xfffff80100903e6c> +SUN4V-DTLB: O7[fffff801081979d0] +SUN4V-DTLB: O7<0xfffff801081979d0> +SUN4V-DTLB: vaddr[fffff80100000000] ctx[1250] pte[98000000000f0610] error[2] +. + +This is recent mainline for ITLB: +[ 3708.179864] SUN4V-ITLB: TPC<0xfffffc010071cefc> +[ 3708.188866] SUN4V-ITLB: O7[fffffc010071cee8] +[ 3708.197377] SUN4V-ITLB: O7<0xfffffc010071cee8> +[ 3708.206539] SUN4V-ITLB: vaddr[e0003] ctx[1a3c] pte[2900000dcc800eeb] error[4] +. + +Normally sun4v_itlb_error_report() and sun4v_dtlb_error_report() would call +prom_halt() and drop us to OF command prompt "ok". This isn't the case for +LDOMs and the machine powers off. + +For the HV reported error of HV_ENORADDR for HV HV_MMU_MAP_ADDR_TRAP we cause +a SIGBUS error by qualifying it within do_sparc64_fault() for fault code mask +of FAULT_CODE_BAD_RA. This is done when trap level (%tl) is less or equal +one("1"). Otherwise, for %tl > 1, we proceed eventually to die_if_kernel(). + +The logic of this patch was partially inspired by David Miller's feedback. + +Power off of large sparc64 machines is painful. Plus die_if_kernel provides +more context. A reset sequence isn't a brief period on large sparc64 but +better than power-off/power-on sequence. + +Cc: sparclinux@vger.kernel.org +Signed-off-by: Bob Picco +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/thread_info_64.h | 1 + arch/sparc/kernel/sun4v_tlb_miss.S | 35 +++++++++++++++++++------------- + arch/sparc/kernel/traps_64.c | 15 ++++++++----- + arch/sparc/mm/fault_64.c | 3 ++ + 4 files changed, 34 insertions(+), 20 deletions(-) + +--- a/arch/sparc/include/asm/thread_info_64.h ++++ b/arch/sparc/include/asm/thread_info_64.h +@@ -102,6 +102,7 @@ struct thread_info { + #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ + #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ + #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ ++#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */ + + #if PAGE_SHIFT == 13 + #define THREAD_SIZE (2*PAGE_SIZE) +--- a/arch/sparc/kernel/sun4v_tlb_miss.S ++++ b/arch/sparc/kernel/sun4v_tlb_miss.S +@@ -195,6 +195,11 @@ sun4v_tsb_miss_common: + ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 + + sun4v_itlb_error: ++ rdpr %tl, %g1 ++ cmp %g1, 1 ++ ble,pt %icc, sun4v_bad_ra ++ or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 ++ + sethi %hi(sun4v_err_itlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] + sethi %hi(sun4v_err_itlb_ctx), %g1 +@@ -206,15 +211,10 @@ sun4v_itlb_error: + sethi %hi(sun4v_err_itlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + ++ sethi %hi(1f), %g7 + rdpr %tl, %g4 +- cmp %g4, 1 +- ble,pt %icc, 1f +- sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 +- or %g7, %lo(2f), %g7 +- +-1: ba,pt %xcc, etrap +-2: or %g7, %lo(2b), %g7 ++1: or %g7, %lo(1f), %g7 + mov %l4, %o1 + call sun4v_itlb_error_report + add %sp, PTREGS_OFF, %o0 +@@ -222,6 +222,11 @@ sun4v_itlb_error: + /* NOTREACHED */ + + sun4v_dtlb_error: ++ rdpr %tl, %g1 ++ cmp %g1, 1 ++ ble,pt %icc, sun4v_bad_ra ++ or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 ++ + sethi %hi(sun4v_err_dtlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] + sethi %hi(sun4v_err_dtlb_ctx), %g1 +@@ -233,21 +238,23 @@ sun4v_dtlb_error: + sethi %hi(sun4v_err_dtlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + ++ sethi %hi(1f), %g7 + rdpr %tl, %g4 +- cmp %g4, 1 +- ble,pt %icc, 1f +- sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 +- or %g7, %lo(2f), %g7 +- +-1: ba,pt %xcc, etrap +-2: or %g7, %lo(2b), %g7 ++1: or %g7, %lo(1f), %g7 + mov %l4, %o1 + call sun4v_dtlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + ++sun4v_bad_ra: ++ or %g0, %g4, %g5 ++ ba,pt %xcc, sparc64_realfault_common ++ or %g1, %g0, %g4 ++ ++ /* NOTREACHED */ ++ + /* Instruction Access Exception, tl0. */ + sun4v_iacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 +--- a/arch/sparc/kernel/traps_64.c ++++ b/arch/sparc/kernel/traps_64.c +@@ -2102,6 +2102,11 @@ void sun4v_nonresum_overflow(struct pt_r + atomic_inc(&sun4v_nonresum_oflow_cnt); + } + ++static void sun4v_tlb_error(struct pt_regs *regs) ++{ ++ die_if_kernel("TLB/TSB error", regs); ++} ++ + unsigned long sun4v_err_itlb_vaddr; + unsigned long sun4v_err_itlb_ctx; + unsigned long sun4v_err_itlb_pte; +@@ -2109,8 +2114,7 @@ unsigned long sun4v_err_itlb_error; + + void sun4v_itlb_error_report(struct pt_regs *regs, int tl) + { +- if (tl > 1) +- dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); ++ dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); +@@ -2123,7 +2127,7 @@ void sun4v_itlb_error_report(struct pt_r + sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, + sun4v_err_itlb_pte, sun4v_err_itlb_error); + +- prom_halt(); ++ sun4v_tlb_error(regs); + } + + unsigned long sun4v_err_dtlb_vaddr; +@@ -2133,8 +2137,7 @@ unsigned long sun4v_err_dtlb_error; + + void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) + { +- if (tl > 1) +- dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); ++ dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); +@@ -2147,7 +2150,7 @@ void sun4v_dtlb_error_report(struct pt_r + sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, + sun4v_err_dtlb_pte, sun4v_err_dtlb_error); + +- prom_halt(); ++ sun4v_tlb_error(regs); + } + + void hypervisor_tlbop_error(unsigned long err, unsigned long op) +--- a/arch/sparc/mm/fault_64.c ++++ b/arch/sparc/mm/fault_64.c +@@ -348,6 +348,9 @@ retry: + down_read(&mm->mmap_sem); + } + ++ if (fault_code & FAULT_CODE_BAD_RA) ++ goto do_sigbus; ++ + vma = find_vma(mm, address); + if (!vma) + goto bad_area; diff --git a/queue-3.14/sparc64-support-m6-and-m7-for-building-cpu-distribution-map.patch b/queue-3.14/sparc64-support-m6-and-m7-for-building-cpu-distribution-map.patch new file mode 100644 index 00000000000..9280cc5d829 --- /dev/null +++ b/queue-3.14/sparc64-support-m6-and-m7-for-building-cpu-distribution-map.patch @@ -0,0 +1,27 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: Allen Pais +Date: Mon, 8 Sep 2014 11:48:54 +0530 +Subject: sparc64: support M6 and M7 for building CPU distribution map + +From: Allen Pais + +Add M6 and M7 chip type in cpumap.c to correctly build CPU distribution map that spans all online CPUs. + +Signed-off-by: Allen Pais +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/cpumap.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/sparc/kernel/cpumap.c ++++ b/arch/sparc/kernel/cpumap.c +@@ -326,6 +326,8 @@ static int iterate_cpu(struct cpuinfo_tr + case SUN4V_CHIP_NIAGARA3: + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: ++ case SUN4V_CHIP_SPARC_M6: ++ case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC64X: + rover_inc_table = niagara_iterate_method; + break; diff --git a/queue-3.14/sparc64-switch-to-4-level-page-tables.patch b/queue-3.14/sparc64-switch-to-4-level-page-tables.patch new file mode 100644 index 00000000000..c26c2c1036f --- /dev/null +++ b/queue-3.14/sparc64-switch-to-4-level-page-tables.patch @@ -0,0 +1,320 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Fri, 26 Sep 2014 21:19:46 -0700 +Subject: sparc64: Switch to 4-level page tables. + +From: "David S. Miller" + +[ Upstream commit ac55c768143aa34cc3789c4820cbb0809a76fd9c ] + +This has become necessary with chips that support more than 43-bits +of physical addressing. + +Based almost entirely upon a patch by Bob Picco. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/page_64.h | 6 +++++ + arch/sparc/include/asm/pgalloc_64.h | 28 ++++++++++++++++++++++++++- + arch/sparc/include/asm/pgtable_64.h | 37 +++++++++++++++++++++++++++++++----- + arch/sparc/include/asm/tsb.h | 10 +++++++++ + arch/sparc/kernel/smp_64.c | 7 ++++++ + arch/sparc/mm/init_64.c | 31 ++++++++++++++++++++++++++---- + 6 files changed, 109 insertions(+), 10 deletions(-) + +--- a/arch/sparc/include/asm/page_64.h ++++ b/arch/sparc/include/asm/page_64.h +@@ -57,18 +57,21 @@ extern void copy_user_page(void *to, voi + typedef struct { unsigned long pte; } pte_t; + typedef struct { unsigned long iopte; } iopte_t; + typedef struct { unsigned long pmd; } pmd_t; ++typedef struct { unsigned long pud; } pud_t; + typedef struct { unsigned long pgd; } pgd_t; + typedef struct { unsigned long pgprot; } pgprot_t; + + #define pte_val(x) ((x).pte) + #define iopte_val(x) ((x).iopte) + #define pmd_val(x) ((x).pmd) ++#define pud_val(x) ((x).pud) + #define pgd_val(x) ((x).pgd) + #define pgprot_val(x) ((x).pgprot) + + #define __pte(x) ((pte_t) { (x) } ) + #define __iopte(x) ((iopte_t) { (x) } ) + #define __pmd(x) ((pmd_t) { (x) } ) ++#define __pud(x) ((pud_t) { (x) } ) + #define __pgd(x) ((pgd_t) { (x) } ) + #define __pgprot(x) ((pgprot_t) { (x) } ) + +@@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; } + typedef unsigned long pte_t; + typedef unsigned long iopte_t; + typedef unsigned long pmd_t; ++typedef unsigned long pud_t; + typedef unsigned long pgd_t; + typedef unsigned long pgprot_t; + + #define pte_val(x) (x) + #define iopte_val(x) (x) + #define pmd_val(x) (x) ++#define pud_val(x) (x) + #define pgd_val(x) (x) + #define pgprot_val(x) (x) + + #define __pte(x) (x) + #define __iopte(x) (x) + #define __pmd(x) (x) ++#define __pud(x) (x) + #define __pgd(x) (x) + #define __pgprot(x) (x) + +--- a/arch/sparc/include/asm/pgalloc_64.h ++++ b/arch/sparc/include/asm/pgalloc_64.h +@@ -15,6 +15,13 @@ + + extern struct kmem_cache *pgtable_cache; + ++static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) ++{ ++ pgd_set(pgd, pud); ++} ++ ++#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) ++ + static inline pgd_t *pgd_alloc(struct mm_struct *mm) + { + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); +@@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_st + kmem_cache_free(pgtable_cache, pgd); + } + +-#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) ++static inline void __pud_populate(pud_t *pud, pmd_t *pmd) ++{ ++ pud_set(pud, pmd); ++} ++ ++#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD) ++ ++static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) ++{ ++ return kmem_cache_alloc(pgtable_cache, ++ GFP_KERNEL|__GFP_REPEAT); ++} ++ ++static inline void pud_free(struct mm_struct *mm, pud_t *pud) ++{ ++ kmem_cache_free(pgtable_cache, pud); ++} + + static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) + { +@@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct + #define __pmd_free_tlb(tlb, pmd, addr) \ + pgtable_free_tlb(tlb, pmd, false) + ++#define __pud_free_tlb(tlb, pud, addr) \ ++ pgtable_free_tlb(tlb, pud, false) ++ + #endif /* _SPARC64_PGALLOC_H */ +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -20,8 +20,6 @@ + #include + #include + +-#include +- + /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB). + * The page copy blockops can use 0x6000000 to 0x8000000. + * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range. +@@ -55,13 +53,21 @@ + #define PMD_MASK (~(PMD_SIZE-1)) + #define PMD_BITS (PAGE_SHIFT - 3) + +-/* PGDIR_SHIFT determines what a third-level page table entry can map */ +-#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) ++/* PUD_SHIFT determines the size of the area a third-level page ++ * table can map ++ */ ++#define PUD_SHIFT (PMD_SHIFT + PMD_BITS) ++#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) ++#define PUD_MASK (~(PUD_SIZE-1)) ++#define PUD_BITS (PAGE_SHIFT - 3) ++ ++/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ ++#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS) + #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) + #define PGDIR_MASK (~(PGDIR_SIZE-1)) + #define PGDIR_BITS (PAGE_SHIFT - 3) + +-#if (PGDIR_SHIFT + PGDIR_BITS) != 43 ++#if (PGDIR_SHIFT + PGDIR_BITS) != 53 + #error Page table parameters do not cover virtual address space properly. + #endif + +@@ -93,6 +99,7 @@ static inline bool kern_addr_valid(unsig + /* Entries per page directory level. */ + #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) + #define PTRS_PER_PMD (1UL << PMD_BITS) ++#define PTRS_PER_PUD (1UL << PUD_BITS) + #define PTRS_PER_PGD (1UL << PGDIR_BITS) + + /* Kernel has a separate 44bit address space. */ +@@ -101,6 +108,9 @@ static inline bool kern_addr_valid(unsig + #define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0)) ++#define pud_ERROR(e) \ ++ pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \ ++ __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0)) + #define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) +@@ -779,6 +789,11 @@ static inline int pmd_present(pmd_t pmd) + #define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \ + !__kern_addr_valid(pud_val(pud))) + ++#define pgd_none(pgd) (!pgd_val(pgd)) ++ ++#define pgd_bad(pgd) ((pgd_val(pgd) & ~PAGE_MASK) || \ ++ !__kern_addr_valid(pgd_val(pgd))) ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +@@ -815,10 +830,17 @@ static inline unsigned long __pmd_page(p + #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) + #define pud_present(pud) (pud_val(pud) != 0U) + #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) ++#define pgd_page_vaddr(pgd) \ ++ ((unsigned long) __va(pgd_val(pgd))) ++#define pgd_present(pgd) (pgd_val(pgd) != 0U) ++#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) + + /* Same in both SUN4V and SUN4U. */ + #define pte_none(pte) (!pte_val(pte)) + ++#define pgd_set(pgdp, pudp) \ ++ (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) ++ + /* to find an entry in a page-table-directory. */ + #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) + #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +@@ -826,6 +848,11 @@ static inline unsigned long __pmd_page(p + /* to find an entry in a kernel page-table-directory */ + #define pgd_offset_k(address) pgd_offset(&init_mm, address) + ++/* Find an entry in the third-level page table.. */ ++#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) ++#define pud_offset(pgdp, address) \ ++ ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) ++ + /* Find an entry in the second-level page table.. */ + #define pmd_offset(pudp, address) \ + ((pmd_t *) pud_page_vaddr(*(pudp)) + \ +--- a/arch/sparc/include/asm/tsb.h ++++ b/arch/sparc/include/asm/tsb.h +@@ -145,6 +145,11 @@ extern struct tsb_phys_patch_entry __tsb + andn REG2, 0x7, REG2; \ + ldx [REG1 + REG2], REG1; \ + brz,pn REG1, FAIL_LABEL; \ ++ sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ ++ srlx REG2, 64 - PAGE_SHIFT, REG2; \ ++ andn REG2, 0x7, REG2; \ ++ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ ++ brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ +@@ -198,6 +203,11 @@ extern struct tsb_phys_patch_entry __tsb + andn REG2, 0x7, REG2; \ + ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ ++ sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ ++ srlx REG2, 64 - PAGE_SHIFT, REG2; \ ++ andn REG2, 0x7, REG2; \ ++ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ ++ brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ +--- a/arch/sparc/kernel/smp_64.c ++++ b/arch/sparc/kernel/smp_64.c +@@ -1479,6 +1479,13 @@ static void __init pcpu_populate_pte(uns + pud_t *pud; + pmd_t *pmd; + ++ if (pgd_none(*pgd)) { ++ pud_t *new; ++ ++ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); ++ pgd_populate(&init_mm, pgd, new); ++ } ++ + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + pmd_t *new; +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1384,6 +1384,13 @@ static unsigned long __ref kernel_map_ra + pmd_t *pmd; + pte_t *pte; + ++ if (pgd_none(*pgd)) { ++ pud_t *new; ++ ++ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); ++ alloc_bytes += PAGE_SIZE; ++ pgd_populate(&init_mm, pgd, new); ++ } + pud = pud_offset(pgd, vstart); + if (pud_none(*pud)) { + pmd_t *new; +@@ -1850,7 +1857,12 @@ static void __init sun4v_linear_pte_xor_ + /* paging_init() sets up the page tables */ + + static unsigned long last_valid_pfn; +-pgd_t swapper_pg_dir[PTRS_PER_PGD]; ++ ++/* These must be page aligned in order to not trigger the ++ * alignment tests of pgd_bad() and pud_bad(). ++ */ ++pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((aligned (PAGE_SIZE))); ++static pud_t swapper_pud_dir[PTRS_PER_PUD] __attribute__ ((aligned (PAGE_SIZE))); + + static void sun4u_pgprot_init(void); + static void sun4v_pgprot_init(void); +@@ -1859,6 +1871,8 @@ void __init paging_init(void) + { + unsigned long end_pfn, shift, phys_base; + unsigned long real_end, i; ++ pud_t *pud; ++ pmd_t *pmd; + int node; + + setup_page_offset(); +@@ -1955,9 +1969,18 @@ void __init paging_init(void) + + memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); + +- /* Now can init the kernel/bad page tables. */ +- pud_set(pud_offset(&swapper_pg_dir[0], 0), +- swapper_low_pmd_dir + (shift / sizeof(pgd_t))); ++ /* The kernel page tables we publish into what the rest of the ++ * world sees must be adjusted so that they see the PAGE_OFFSET ++ * address of these in-kerenel data structures. However right ++ * here we must access them from the kernel image side, because ++ * the trap tables haven't been taken over and therefore we cannot ++ * take TLB misses in the PAGE_OFFSET linear mappings yet. ++ */ ++ pud = swapper_pud_dir + (shift / sizeof(pud_t)); ++ pgd_set(&swapper_pg_dir[0], pud); ++ ++ pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t)); ++ pud_set(&swapper_pud_dir[0], pmd); + + inherit_prom_mappings(); + diff --git a/queue-3.14/sparc64-t5-pmu.patch b/queue-3.14/sparc64-t5-pmu.patch new file mode 100644 index 00000000000..8e98a3c62ff --- /dev/null +++ b/queue-3.14/sparc64-t5-pmu.patch @@ -0,0 +1,179 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: bob picco +Date: Tue, 16 Sep 2014 10:09:06 -0400 +Subject: sparc64: T5 PMU + +From: bob picco + +The T5 (niagara5) has different PCR related HV fast trap values and a new +HV API Group. This patch utilizes these and shares when possible with niagara4. + +We use the same sparc_pmu niagara4_pmu. Should there be new effort to +obtain the MCU perf statistics then this would have to be changed. + +Cc: sparclinux@vger.kernel.org +Signed-off-by: Bob Picco +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/hypervisor.h | 11 ++++++++ + arch/sparc/kernel/hvapi.c | 1 + arch/sparc/kernel/hvcalls.S | 16 ++++++++++++ + arch/sparc/kernel/pcr.c | 47 ++++++++++++++++++++++++++++++++---- + arch/sparc/kernel/perf_event.c | 3 +- + 5 files changed, 73 insertions(+), 5 deletions(-) + +--- a/arch/sparc/include/asm/hypervisor.h ++++ b/arch/sparc/include/asm/hypervisor.h +@@ -2944,6 +2944,16 @@ extern unsigned long sun4v_vt_set_perfre + unsigned long reg_val); + #endif + ++#define HV_FAST_T5_GET_PERFREG 0x1a8 ++#define HV_FAST_T5_SET_PERFREG 0x1a9 ++ ++#ifndef __ASSEMBLY__ ++unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, ++ unsigned long *reg_val); ++unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, ++ unsigned long reg_val); ++#endif ++ + /* Function numbers for HV_CORE_TRAP. */ + #define HV_CORE_SET_VER 0x00 + #define HV_CORE_PUTCHAR 0x01 +@@ -2975,6 +2985,7 @@ extern unsigned long sun4v_vt_set_perfre + #define HV_GRP_VF_CPU 0x0205 + #define HV_GRP_KT_CPU 0x0209 + #define HV_GRP_VT_CPU 0x020c ++#define HV_GRP_T5_CPU 0x0211 + #define HV_GRP_DIAG 0x0300 + + #ifndef __ASSEMBLY__ +--- a/arch/sparc/kernel/hvapi.c ++++ b/arch/sparc/kernel/hvapi.c +@@ -46,6 +46,7 @@ static struct api_info api_table[] = { + { .group = HV_GRP_VF_CPU, }, + { .group = HV_GRP_KT_CPU, }, + { .group = HV_GRP_VT_CPU, }, ++ { .group = HV_GRP_T5_CPU, }, + { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, + }; + +--- a/arch/sparc/kernel/hvcalls.S ++++ b/arch/sparc/kernel/hvcalls.S +@@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg) + retl + nop + ENDPROC(sun4v_vt_set_perfreg) ++ ++ENTRY(sun4v_t5_get_perfreg) ++ mov %o1, %o4 ++ mov HV_FAST_T5_GET_PERFREG, %o5 ++ ta HV_FAST_TRAP ++ stx %o1, [%o4] ++ retl ++ nop ++ENDPROC(sun4v_t5_get_perfreg) ++ ++ENTRY(sun4v_t5_set_perfreg) ++ mov HV_FAST_T5_SET_PERFREG, %o5 ++ ta HV_FAST_TRAP ++ retl ++ nop ++ENDPROC(sun4v_t5_set_perfreg) +--- a/arch/sparc/kernel/pcr.c ++++ b/arch/sparc/kernel/pcr.c +@@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = + .pcr_nmi_disable = PCR_N4_PICNPT, + }; + ++static u64 n5_pcr_read(unsigned long reg_num) ++{ ++ unsigned long val; ++ ++ (void) sun4v_t5_get_perfreg(reg_num, &val); ++ ++ return val; ++} ++ ++static void n5_pcr_write(unsigned long reg_num, u64 val) ++{ ++ (void) sun4v_t5_set_perfreg(reg_num, val); ++} ++ ++static const struct pcr_ops n5_pcr_ops = { ++ .read_pcr = n5_pcr_read, ++ .write_pcr = n5_pcr_write, ++ .read_pic = n4_pic_read, ++ .write_pic = n4_pic_write, ++ .nmi_picl_value = n4_picl_value, ++ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | ++ PCR_N4_UTRACE | PCR_N4_TOE | ++ (26 << PCR_N4_SL_SHIFT)), ++ .pcr_nmi_disable = PCR_N4_PICNPT, ++}; ++ ++ + static unsigned long perf_hsvc_group; + static unsigned long perf_hsvc_major; + static unsigned long perf_hsvc_minor; + + static int __init register_perf_hsvc(void) + { ++ unsigned long hverror; ++ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: +@@ -215,6 +244,10 @@ static int __init register_perf_hsvc(voi + perf_hsvc_group = HV_GRP_VT_CPU; + break; + ++ case SUN4V_CHIP_NIAGARA5: ++ perf_hsvc_group = HV_GRP_T5_CPU; ++ break; ++ + default: + return -ENODEV; + } +@@ -222,10 +255,12 @@ static int __init register_perf_hsvc(voi + + perf_hsvc_major = 1; + perf_hsvc_minor = 0; +- if (sun4v_hvapi_register(perf_hsvc_group, +- perf_hsvc_major, +- &perf_hsvc_minor)) { +- printk("perfmon: Could not register hvapi.\n"); ++ hverror = sun4v_hvapi_register(perf_hsvc_group, ++ perf_hsvc_major, ++ &perf_hsvc_minor); ++ if (hverror) { ++ pr_err("perfmon: Could not register hvapi(0x%lx).\n", ++ hverror); + return -ENODEV; + } + } +@@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(vo + pcr_ops = &n4_pcr_ops; + break; + ++ case SUN4V_CHIP_NIAGARA5: ++ pcr_ops = &n5_pcr_ops; ++ break; ++ + default: + ret = -ENODEV; + break; +--- a/arch/sparc/kernel/perf_event.c ++++ b/arch/sparc/kernel/perf_event.c +@@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void) + sparc_pmu = &niagara2_pmu; + return true; + } +- if (!strcmp(sparc_pmu_type, "niagara4")) { ++ if (!strcmp(sparc_pmu_type, "niagara4") || ++ !strcmp(sparc_pmu_type, "niagara5")) { + sparc_pmu = &niagara4_pmu; + return true; + } diff --git a/queue-3.14/sparc64-use-kernel-page-tables-for-vmemmap.patch b/queue-3.14/sparc64-use-kernel-page-tables-for-vmemmap.patch new file mode 100644 index 00000000000..9256412b062 --- /dev/null +++ b/queue-3.14/sparc64-use-kernel-page-tables-for-vmemmap.patch @@ -0,0 +1,166 @@ +From foo@baz Tue Oct 28 11:13:19 CST 2014 +From: "David S. Miller" +Date: Wed, 24 Sep 2014 21:20:14 -0700 +Subject: sparc64: Use kernel page tables for vmemmap. + +From: "David S. Miller" + +[ Upstream commit c06240c7f5c39c83dfd7849c0770775562441b96 ] + +For sparse memory configurations, the vmemmap array behaves terribly +and it takes up an inordinate amount of space in the BSS section of +the kernel image unconditionally. + +Just build huge PMDs and look them up just like we do for TLB misses +in the vmalloc area. + +Kernel BSS shrinks by about 2MB. + +Signed-off-by: David S. Miller +Acked-by: Bob Picco +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/ktlb.S | 9 +---- + arch/sparc/mm/init_64.c | 72 ++++++++++++++++++++++------------------------- + arch/sparc/mm/init_64.h | 11 ------- + 3 files changed, 36 insertions(+), 56 deletions(-) + +--- a/arch/sparc/kernel/ktlb.S ++++ b/arch/sparc/kernel/ktlb.S +@@ -186,13 +186,8 @@ kvmap_dtlb_load: + + #ifdef CONFIG_SPARSEMEM_VMEMMAP + kvmap_vmemmap: +- sub %g4, %g5, %g5 +- srlx %g5, ILOG2_4MB, %g5 +- sethi %hi(vmemmap_table), %g1 +- sllx %g5, 3, %g5 +- or %g1, %lo(vmemmap_table), %g1 +- ba,pt %xcc, kvmap_dtlb_load +- ldx [%g1 + %g5], %g5 ++ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) ++ ba,a,pt %xcc, kvmap_dtlb_load + #endif + + kvmap_dtlb_nonlinear: +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -2255,18 +2255,9 @@ unsigned long _PAGE_CACHE __read_mostly; + EXPORT_SYMBOL(_PAGE_CACHE); + + #ifdef CONFIG_SPARSEMEM_VMEMMAP +-unsigned long vmemmap_table[VMEMMAP_SIZE]; +- +-static long __meminitdata addr_start, addr_end; +-static int __meminitdata node_start; +- + int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, + int node) + { +- unsigned long phys_start = (vstart - VMEMMAP_BASE); +- unsigned long phys_end = (vend - VMEMMAP_BASE); +- unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; +- unsigned long end = VMEMMAP_ALIGN(phys_end); + unsigned long pte_base; + + pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | +@@ -2277,47 +2268,52 @@ int __meminit vmemmap_populate(unsigned + _PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + +- for (; addr < end; addr += VMEMMAP_CHUNK) { +- unsigned long *vmem_pp = +- vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); +- void *block; ++ pte_base |= _PAGE_PMD_HUGE; + +- if (!(*vmem_pp & _PAGE_VALID)) { +- block = vmemmap_alloc_block(1UL << ILOG2_4MB, node); +- if (!block) ++ vstart = vstart & PMD_MASK; ++ vend = ALIGN(vend, PMD_SIZE); ++ for (; vstart < vend; vstart += PMD_SIZE) { ++ pgd_t *pgd = pgd_offset_k(vstart); ++ unsigned long pte; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++ if (pgd_none(*pgd)) { ++ pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node); ++ ++ if (!new) + return -ENOMEM; ++ pgd_populate(&init_mm, pgd, new); ++ } + +- *vmem_pp = pte_base | __pa(block); ++ pud = pud_offset(pgd, vstart); ++ if (pud_none(*pud)) { ++ pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node); + +- /* check to see if we have contiguous blocks */ +- if (addr_end != addr || node_start != node) { +- if (addr_start) +- printk(KERN_DEBUG " [%lx-%lx] on node %d\n", +- addr_start, addr_end-1, node_start); +- addr_start = addr; +- node_start = node; +- } +- addr_end = addr + VMEMMAP_CHUNK; ++ if (!new) ++ return -ENOMEM; ++ pud_populate(&init_mm, pud, new); + } +- } +- return 0; +-} + +-void __meminit vmemmap_populate_print_last(void) +-{ +- if (addr_start) { +- printk(KERN_DEBUG " [%lx-%lx] on node %d\n", +- addr_start, addr_end-1, node_start); +- addr_start = 0; +- addr_end = 0; +- node_start = 0; ++ pmd = pmd_offset(pud, vstart); ++ ++ pte = pmd_val(*pmd); ++ if (!(pte & _PAGE_VALID)) { ++ void *block = vmemmap_alloc_block(PMD_SIZE, node); ++ ++ if (!block) ++ return -ENOMEM; ++ ++ pmd_val(*pmd) = pte_base | __pa(block); ++ } + } ++ ++ return 0; + } + + void vmemmap_free(unsigned long start, unsigned long end) + { + } +- + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + + static void prot_init_common(unsigned long page_none, +--- a/arch/sparc/mm/init_64.h ++++ b/arch/sparc/mm/init_64.h +@@ -31,15 +31,4 @@ extern unsigned long kern_locked_tte_dat + + extern void prom_world(int enter); + +-#ifdef CONFIG_SPARSEMEM_VMEMMAP +-#define VMEMMAP_CHUNK_SHIFT 22 +-#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) +-#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL) +-#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) +- +-#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ +- sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) +-extern unsigned long vmemmap_table[VMEMMAP_SIZE]; +-#endif +- + #endif /* _SPARC64_MM_INIT_H */