arm-mvebu-netgear-rn102-use-hardware-bch-ecc.patch
ecryptfs-avoid-to-access-null-pointer-when-write-metadata-in-xattr.patch
xfs-ensure-wb_sync_all-writeback-handles-partial-pages-correctly.patch
+sparc64-do-not-disable-interrupts-in-nmi_cpu_busy.patch
+sparc64-fix-pcr_ops-initialization-and-usage-bugs.patch
+sparc32-dma_alloc_coherent-must-honour-gfp-flags.patch
+sparc64-sun4v-tlb-error-power-off-events.patch
+sparc64-fix-corrupted-thread-fault-code.patch
+sparc64-find_node-adjustment.patch
+sparc64-move-request_irq-from-ldc_bind-to-ldc_alloc.patch
+sparc-let-memset-return-the-address-argument.patch
+sparc64-fix-reversed-start-end-in-flush_tlb_kernel_range.patch
+sparc64-fix-lockdep-warnings-on-reboot-on-ultra-5.patch
+sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch
+sparc64-do-not-define-thread-fpregs-save-area-as-zero-length-array.patch
+sparc64-fix-hibernation-code-refrence-to-page_offset.patch
+sparc64-correctly-recognise-m6-and-m7-cpu-type.patch
+sparc64-support-m6-and-m7-for-building-cpu-distribution-map.patch
+sparc64-cpu-hardware-caps-support-for-sparc-m6-and-m7.patch
+sparc64-t5-pmu.patch
+sparc64-switch-to-4-level-page-tables.patch
+sparc64-define-va-hole-at-run-time-rather-than-at-compile-time.patch
+sparc64-adjust-ktsb-assembler-to-support-larger-physical-addresses.patch
+sparc64-fix-physical-memory-management-regressions-with-large-max_phys_bits.patch
+sparc64-use-kernel-page-tables-for-vmemmap.patch
+sparc64-increase-max_phys_address_bits-to-53.patch
+sparc64-adjust-vmalloc-region-size-based-upon-available-virtual-address-bits.patch
+sparc64-sparse-irq.patch
+sparc64-kill-unnecessary-tables-and-increase-max_banks.patch
+sparc64-increase-size-of-boot-string-to-1024-bytes.patch
+sparc64-fix-register-corruption-in-top-most-kernel-stack-frame-during-boot.patch
+sparc64-implement-__get_user_pages_fast.patch
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Fri, 29 Aug 2014 17:08:21 +0200
+Subject: sparc: Let memset return the address argument
+
+From: Andreas Larsson <andreas@gaisler.com>
+
+[ Upstream commit 74cad25c076a2f5253312c2fe82d1a4daecc1323 ]
+
+This makes memset follow the standard (instead of returning 0 on success). This
+is needed when certain versions of gcc optimizes around memset calls and assume
+that the address argument is preserved in %o0.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/lib/memset.S | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/arch/sparc/lib/memset.S
++++ b/arch/sparc/lib/memset.S
+@@ -3,8 +3,9 @@
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ *
+- * Returns 0, if ok, and number of bytes not yet set if exception
+- * occurs and we were called as clear_user.
++ * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
++ * number of bytes not yet set if exception occurs and we were called as
++ * clear_user.
+ */
+
+ #include <asm/ptrace.h>
+@@ -65,6 +66,8 @@ __bzero_begin:
+ .globl __memset_start, __memset_end
+ __memset_start:
+ memset:
++ mov %o0, %g1
++ mov 1, %g4
+ and %o1, 0xff, %g3
+ sll %g3, 8, %g2
+ or %g3, %g2, %g3
+@@ -89,6 +92,7 @@ memset:
+ sub %o0, %o2, %o0
+
+ __bzero:
++ clr %g4
+ mov %g0, %g3
+ 1:
+ cmp %o1, 7
+@@ -151,8 +155,8 @@ __bzero:
+ bne,a 8f
+ EX(stb %g3, [%o0], and %o1, 1)
+ 8:
+- retl
+- clr %o0
++ b 0f
++ nop
+ 7:
+ be 13b
+ orcc %o1, 0, %g0
+@@ -164,6 +168,12 @@ __bzero:
+ bne 8b
+ EX(stb %g3, [%o0 - 1], add %o1, 1)
+ 0:
++ andcc %g4, 1, %g0
++ be 5f
++ nop
++ retl
++ mov %g1, %o0
++5:
+ retl
+ clr %o0
+ __memset_end:
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Daniel Hellstrom <daniel@gaisler.com>
+Date: Wed, 10 Sep 2014 14:17:52 +0200
+Subject: sparc32: dma_alloc_coherent must honour gfp flags
+
+From: Daniel Hellstrom <daniel@gaisler.com>
+
+[ Upstream commit d1105287aabe88dbb3af825140badaa05cf0442c ]
+
+dma_zalloc_coherent() calls dma_alloc_coherent(__GFP_ZERO)
+but the sparc32 implementations sbus_alloc_coherent() and
+pci32_alloc_coherent() doesn't take the gfp flags into
+account.
+
+Tested on the SPARC32/LEON GRETH Ethernet driver which fails
+due to dma_alloc_coherent(__GFP_ZERO) returns non zeroed
+pages.
+
+Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/ioport.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/sparc/kernel/ioport.c
++++ b/arch/sparc/kernel/ioport.c
+@@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct
+ }
+
+ order = get_order(len_total);
+- if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0)
++ va = __get_free_pages(gfp, order);
++ if (va == 0)
+ goto err_nopages;
+
+ if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
+@@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct
+ }
+
+ order = get_order(len_total);
+- va = (void *) __get_free_pages(GFP_KERNEL, order);
++ va = (void *) __get_free_pages(gfp, order);
+ if (va == NULL) {
+ printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
+ goto err_nopages;
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 17 Sep 2014 10:14:56 -0700
+Subject: sparc64: Adjust KTSB assembler to support larger physical addresses.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 8c82dc0e883821c098c8b0b130ffebabf9aab5df ]
+
+As currently coded the KTSB accesses in the kernel only support up to
+47 bits of physical addressing.
+
+Adjust the instruction and patching sequence in order to support
+arbitrary 64 bits addresses.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/tsb.h | 30 ++++++++++++------------------
+ arch/sparc/mm/init_64.c | 28 +++++++++++++++++++++++++---
+ 2 files changed, 37 insertions(+), 21 deletions(-)
+
+--- a/arch/sparc/include/asm/tsb.h
++++ b/arch/sparc/include/asm/tsb.h
+@@ -256,8 +256,6 @@ extern struct tsb_phys_patch_entry __tsb
+ (KERNEL_TSB_SIZE_BYTES / 16)
+ #define KERNEL_TSB4M_NENTRIES 4096
+
+-#define KTSB_PHYS_SHIFT 15
+-
+ /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
+ * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
+ * and the found TTE will be left in REG1. REG3 and REG4 must
+@@ -266,17 +264,15 @@ extern struct tsb_phys_patch_entry __tsb
+ * VADDR and TAG will be preserved and not clobbered by this macro.
+ */
+ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+-661: sethi %hi(swapper_tsb), REG1; \
+- or REG1, %lo(swapper_tsb), REG1; \
++661: sethi %uhi(swapper_tsb), REG1; \
++ sethi %hi(swapper_tsb), REG2; \
++ or REG1, %ulo(swapper_tsb), REG1; \
++ or REG2, %lo(swapper_tsb), REG2; \
+ .section .swapper_tsb_phys_patch, "ax"; \
+ .word 661b; \
+ .previous; \
+-661: nop; \
+- .section .tsb_ldquad_phys_patch, "ax"; \
+- .word 661b; \
+- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
+- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
+- .previous; \
++ sllx REG1, 32, REG1; \
++ or REG1, REG2, REG1; \
+ srlx VADDR, PAGE_SHIFT, REG2; \
+ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
+ sllx REG2, 4, REG2; \
+@@ -291,17 +287,15 @@ extern struct tsb_phys_patch_entry __tsb
+ * we can make use of that for the index computation.
+ */
+ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+-661: sethi %hi(swapper_4m_tsb), REG1; \
+- or REG1, %lo(swapper_4m_tsb), REG1; \
++661: sethi %uhi(swapper_4m_tsb), REG1; \
++ sethi %hi(swapper_4m_tsb), REG2; \
++ or REG1, %ulo(swapper_4m_tsb), REG1; \
++ or REG2, %lo(swapper_4m_tsb), REG2; \
+ .section .swapper_4m_tsb_phys_patch, "ax"; \
+ .word 661b; \
+ .previous; \
+-661: nop; \
+- .section .tsb_ldquad_phys_patch, "ax"; \
+- .word 661b; \
+- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
+- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
+- .previous; \
++ sllx REG1, 32, REG1; \
++ or REG1, REG2, REG1; \
+ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1727,19 +1727,41 @@ static void __init tsb_phys_patch(void)
+ static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
+ extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
++/* The swapper TSBs are loaded with a base sequence of:
++ *
++ * sethi %uhi(SYMBOL), REG1
++ * sethi %hi(SYMBOL), REG2
++ * or REG1, %ulo(SYMBOL), REG1
++ * or REG2, %lo(SYMBOL), REG2
++ * sllx REG1, 32, REG1
++ * or REG1, REG2, REG1
++ *
++ * When we use physical addressing for the TSB accesses, we patch the
++ * first four instructions in the above sequence.
++ */
++
+ static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
+ {
+- pa >>= KTSB_PHYS_SHIFT;
++ unsigned long high_bits, low_bits;
++
++ high_bits = (pa >> 32) & 0xffffffff;
++ low_bits = (pa >> 0) & 0xffffffff;
+
+ while (start < end) {
+ unsigned int *ia = (unsigned int *)(unsigned long)*start;
+
+- ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
++ ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10);
+ __asm__ __volatile__("flush %0" : : "r" (ia));
+
+- ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
++ ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10);
+ __asm__ __volatile__("flush %0" : : "r" (ia + 1));
+
++ ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff);
++ __asm__ __volatile__("flush %0" : : "r" (ia + 2));
++
++ ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff);
++ __asm__ __volatile__("flush %0" : : "r" (ia + 3));
++
+ start++;
+ }
+ }
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 27 Sep 2014 11:05:21 -0700
+Subject: sparc64: Adjust vmalloc region size based upon available virtual address bits.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit bb4e6e85daa52a9f6210fa06a5ec6269598a202b ]
+
+In order to accomodate embedded per-cpu allocation with large numbers
+of cpus and numa nodes, we have to use as much virtual address space
+as possible for the vmalloc region. Otherwise we can get things like:
+
+PERCPU: max_distance=0x380001c10000 too large for vmalloc space 0xff00000000
+
+So, once we select a value for PAGE_OFFSET, derive the size of the
+vmalloc region based upon that.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/page_64.h | 1 -
+ arch/sparc/include/asm/pgtable_64.h | 9 +++++----
+ arch/sparc/kernel/ktlb.S | 8 ++++----
+ arch/sparc/mm/init_64.c | 30 +++++++++++++++++++-----------
+ 4 files changed, 28 insertions(+), 20 deletions(-)
+
+--- a/arch/sparc/include/asm/page_64.h
++++ b/arch/sparc/include/asm/page_64.h
+@@ -117,7 +117,6 @@ extern unsigned long sparc64_va_hole_bot
+
+ #include <asm-generic/memory_model.h>
+
+-#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X)))
+ extern unsigned long PAGE_OFFSET;
+
+ #endif /* !(__ASSEMBLY__) */
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -40,10 +40,7 @@
+ #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL)
+ #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL)
+ #define VMALLOC_START _AC(0x0000000100000000,UL)
+-#define VMALLOC_END _AC(0x0000010000000000,UL)
+-#define VMEMMAP_BASE _AC(0x0000010000000000,UL)
+-
+-#define vmemmap ((struct page *)VMEMMAP_BASE)
++#define VMEMMAP_BASE VMALLOC_END
+
+ /* PMD_SHIFT determines the size of the area a second-level page
+ * table can map
+@@ -81,6 +78,10 @@
+
+ #ifndef __ASSEMBLY__
+
++extern unsigned long VMALLOC_END;
++
++#define vmemmap ((struct page *)VMEMMAP_BASE)
++
+ #include <linux/sched.h>
+
+ bool kern_addr_valid(unsigned long addr);
+--- a/arch/sparc/kernel/ktlb.S
++++ b/arch/sparc/kernel/ktlb.S
+@@ -199,8 +199,8 @@ kvmap_dtlb_nonlinear:
+
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /* Do not use the TSB for vmemmap. */
+- mov (VMEMMAP_BASE >> 40), %g5
+- sllx %g5, 40, %g5
++ sethi %hi(VMEMMAP_BASE), %g5
++ ldx [%g5 + %lo(VMEMMAP_BASE)], %g5
+ cmp %g4,%g5
+ bgeu,pn %xcc, kvmap_vmemmap
+ nop
+@@ -212,8 +212,8 @@ kvmap_dtlb_tsbmiss:
+ sethi %hi(MODULES_VADDR), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_longpath
+- mov (VMALLOC_END >> 40), %g5
+- sllx %g5, 40, %g5
++ sethi %hi(VMALLOC_END), %g5
++ ldx [%g5 + %lo(VMALLOC_END)], %g5
+ cmp %g4, %g5
+ bgeu,pn %xcc, kvmap_dtlb_longpath
+ nop
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1363,25 +1363,24 @@ static unsigned long max_phys_bits = 40;
+
+ bool kern_addr_valid(unsigned long addr)
+ {
+- unsigned long above = ((long)addr) >> max_phys_bits;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+- if (above != 0 && above != -1UL)
+- return false;
+-
+- if (addr >= (unsigned long) KERNBASE &&
+- addr < (unsigned long)&_end)
+- return true;
+-
+- if (addr >= PAGE_OFFSET) {
++ if ((long)addr < 0L) {
+ unsigned long pa = __pa(addr);
+
++ if ((addr >> max_phys_bits) != 0UL)
++ return false;
++
+ return pfn_valid(pa >> PAGE_SHIFT);
+ }
+
++ if (addr >= (unsigned long) KERNBASE &&
++ addr < (unsigned long)&_end)
++ return true;
++
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return 0;
+@@ -1650,6 +1649,9 @@ unsigned long __init find_ecache_flush_s
+ unsigned long PAGE_OFFSET;
+ EXPORT_SYMBOL(PAGE_OFFSET);
+
++unsigned long VMALLOC_END = 0x0000010000000000UL;
++EXPORT_SYMBOL(VMALLOC_END);
++
+ unsigned long sparc64_va_hole_top = 0xfffff80000000000UL;
+ unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
+
+@@ -1706,10 +1708,16 @@ static void __init setup_page_offset(voi
+ prom_halt();
+ }
+
+- PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
++ PAGE_OFFSET = sparc64_va_hole_top;
++ VMALLOC_END = ((sparc64_va_hole_bottom >> 1) +
++ (sparc64_va_hole_bottom >> 2));
+
+- pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
++ pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+ PAGE_OFFSET, max_phys_bits);
++ pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n",
++ VMALLOC_START, VMALLOC_END);
++ pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n",
++ VMEMMAP_BASE, VMEMMAP_BASE << 1);
+ }
+
+ static void __init tsb_phys_patch(void)
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Allen Pais <allen.pais@oracle.com>
+Date: Mon, 8 Sep 2014 11:48:53 +0530
+Subject: sparc64: correctly recognise M6 and M7 cpu type
+
+From: Allen Pais <allen.pais@oracle.com>
+
+The following patch adds support for correctly
+recognising M6 and M7 cpu type.
+
+Signed-off-by: Allen Pais <allen.pais@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/spitfire.h | 2 ++
+ arch/sparc/kernel/cpu.c | 12 ++++++++++++
+ arch/sparc/kernel/head_64.S | 12 ++++++++++++
+ 3 files changed, 26 insertions(+)
+
+--- a/arch/sparc/include/asm/spitfire.h
++++ b/arch/sparc/include/asm/spitfire.h
+@@ -45,6 +45,8 @@
+ #define SUN4V_CHIP_NIAGARA3 0x03
+ #define SUN4V_CHIP_NIAGARA4 0x04
+ #define SUN4V_CHIP_NIAGARA5 0x05
++#define SUN4V_CHIP_SPARC_M6 0x06
++#define SUN4V_CHIP_SPARC_M7 0x07
+ #define SUN4V_CHIP_SPARC64X 0x8a
+ #define SUN4V_CHIP_UNKNOWN 0xff
+
+--- a/arch/sparc/kernel/cpu.c
++++ b/arch/sparc/kernel/cpu.c
+@@ -493,6 +493,18 @@ static void __init sun4v_cpu_probe(void)
+ sparc_pmu_type = "niagara5";
+ break;
+
++ case SUN4V_CHIP_SPARC_M6:
++ sparc_cpu_type = "SPARC-M6";
++ sparc_fpu_type = "SPARC-M6 integrated FPU";
++ sparc_pmu_type = "sparc-m6";
++ break;
++
++ case SUN4V_CHIP_SPARC_M7:
++ sparc_cpu_type = "SPARC-M7";
++ sparc_fpu_type = "SPARC-M7 integrated FPU";
++ sparc_pmu_type = "sparc-m7";
++ break;
++
+ case SUN4V_CHIP_SPARC64X:
+ sparc_cpu_type = "SPARC64-X";
+ sparc_fpu_type = "SPARC64-X integrated FPU";
+--- a/arch/sparc/kernel/head_64.S
++++ b/arch/sparc/kernel/head_64.S
+@@ -427,6 +427,12 @@ sun4v_chip_type:
+ cmp %g2, '5'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA5, %g4
++ cmp %g2, '6'
++ be,pt %xcc, 5f
++ mov SUN4V_CHIP_SPARC_M6, %g4
++ cmp %g2, '7'
++ be,pt %xcc, 5f
++ mov SUN4V_CHIP_SPARC_M7, %g4
+ ba,pt %xcc, 49f
+ nop
+
+@@ -585,6 +591,12 @@ niagara_tlb_fixup:
+ cmp %g1, SUN4V_CHIP_NIAGARA5
+ be,pt %xcc, niagara4_patch
+ nop
++ cmp %g1, SUN4V_CHIP_SPARC_M6
++ be,pt %xcc, niagara4_patch
++ nop
++ cmp %g1, SUN4V_CHIP_SPARC_M7
++ be,pt %xcc, niagara4_patch
++ nop
+
+ call generic_patch_copyops
+ nop
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Allen Pais <allen.pais@oracle.com>
+Date: Mon, 8 Sep 2014 11:48:55 +0530
+Subject: sparc64: cpu hardware caps support for sparc M6 and M7
+
+From: Allen Pais <allen.pais@oracle.com>
+
+Signed-off-by: Allen Pais <allen.pais@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/setup_64.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/sparc/kernel/setup_64.c
++++ b/arch/sparc/kernel/setup_64.c
+@@ -500,12 +500,16 @@ static void __init init_sparc64_elf_hwca
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+ cap |= HWCAP_SPARC_BLKINIT;
+ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+ cap |= HWCAP_SPARC_N2;
+ }
+@@ -533,6 +537,8 @@ static void __init init_sparc64_elf_hwca
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+ cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
+ AV_SPARC_ASI_BLK_INIT |
+@@ -540,6 +546,8 @@ static void __init init_sparc64_elf_hwca
+ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
++ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+ cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
+ AV_SPARC_FMAF);
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 26 Sep 2014 21:58:33 -0700
+Subject: sparc64: Define VA hole at run time, rather than at compile time.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 4397bed080598001e88f612deb8b080bb1cc2322 ]
+
+Now that we use 4-level page tables, we can provide up to 53-bits of
+virtual address space to the user.
+
+Adjust the VA hole based upon the capabilities of the cpu type probed.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/page_64.h | 15 ++++-----------
+ arch/sparc/mm/init_64.c | 21 +++++++++++++++++++++
+ 2 files changed, 25 insertions(+), 11 deletions(-)
+
+--- a/arch/sparc/include/asm/page_64.h
++++ b/arch/sparc/include/asm/page_64.h
+@@ -102,21 +102,14 @@ typedef unsigned long pgprot_t;
+
+ typedef pte_t *pgtable_t;
+
+-/* These two values define the virtual address space range in which we
+- * must forbid 64-bit user processes from making mappings. It used to
+- * represent precisely the virtual address space hole present in most
+- * early sparc64 chips including UltraSPARC-I. But now it also is
+- * further constrained by the limits of our page tables, which is
+- * 43-bits of virtual address.
+- */
+-#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL)
+-#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL)
++extern unsigned long sparc64_va_hole_top;
++extern unsigned long sparc64_va_hole_bottom;
+
+ /* The next two defines specify the actual exclusion region we
+ * enforce, wherein we use a 4GB red zone on each side of the VA hole.
+ */
+-#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL))
+-#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL))
++#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL))
++#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL))
+
+ #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
+ _AC(0x0000000070000000,UL) : \
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1624,25 +1624,46 @@ static void __init page_offset_shift_pat
+ }
+ }
+
++unsigned long sparc64_va_hole_top = 0xfffff80000000000UL;
++unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
++
+ static void __init setup_page_offset(void)
+ {
+ unsigned long max_phys_bits = 40;
+
+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
++ /* Cheetah/Panther support a full 64-bit virtual
++ * address, so we can use all that our page tables
++ * support.
++ */
++ sparc64_va_hole_top = 0xfff0000000000000UL;
++ sparc64_va_hole_bottom = 0x0010000000000000UL;
++
+ max_phys_bits = 42;
+ } else if (tlb_type == hypervisor) {
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+ case SUN4V_CHIP_NIAGARA2:
++ /* T1 and T2 support 48-bit virtual addresses. */
++ sparc64_va_hole_top = 0xffff800000000000UL;
++ sparc64_va_hole_bottom = 0x0000800000000000UL;
++
+ max_phys_bits = 39;
+ break;
+ case SUN4V_CHIP_NIAGARA3:
++ /* T3 supports 48-bit virtual addresses. */
++ sparc64_va_hole_top = 0xffff800000000000UL;
++ sparc64_va_hole_bottom = 0x0000800000000000UL;
++
+ max_phys_bits = 43;
+ break;
+ case SUN4V_CHIP_NIAGARA4:
+ case SUN4V_CHIP_NIAGARA5:
+ case SUN4V_CHIP_SPARC64X:
+ default:
++ /* T4 and later support 52-bit virtual addresses. */
++ sparc64_va_hole_top = 0xfff8000000000000UL;
++ sparc64_va_hole_bottom = 0x0008000000000000UL;
+ max_phys_bits = 47;
+ break;
+ }
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 18 Oct 2014 23:12:33 -0400
+Subject: sparc64: Do not define thread fpregs save area as zero-length array.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit e2653143d7d79a49f1a961aeae1d82612838b12c ]
+
+This breaks the stack end corruption detection facility.
+
+What that facility does it write a magic value to "end_of_stack()"
+and checking to see if it gets overwritten.
+
+"end_of_stack()" is "task_thread_info(p) + 1", which for sparc64 is
+the beginning of the FPU register save area.
+
+So once the user uses the FPU, the magic value is overwritten and the
+debug checks trigger.
+
+Fix this by making the size explicit.
+
+Due to the size we use for the fpsaved[], gsr[], and xfsr[] arrays we
+are limited to 7 levels of FPU state saves. So each FPU register set
+is 256 bytes, allocate 256 * 7 for the fpregs area.
+
+Reported-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/thread_info_64.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/include/asm/thread_info_64.h
++++ b/arch/sparc/include/asm/thread_info_64.h
+@@ -63,7 +63,8 @@ struct thread_info {
+ struct pt_regs *kern_una_regs;
+ unsigned int kern_una_insn;
+
+- unsigned long fpregs[0] __attribute__ ((aligned(64)));
++ unsigned long fpregs[(7 * 256) / sizeof(unsigned long)]
++ __attribute__ ((aligned(64)));
+ };
+
+ #endif /* !(__ASSEMBLY__) */
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Mon, 11 Aug 2014 20:45:01 -0700
+Subject: sparc64: Do not disable interrupts in nmi_cpu_busy()
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 58556104e9cd0107a7a8d2692cf04ef31669f6e4 ]
+
+nmi_cpu_busy() is a SMP function call that just makes sure that all of the
+cpus are spinning using cpu cycles while the NMI test runs.
+
+It does not need to disable IRQs because we just care about NMIs executing
+which will even with 'normal' IRQs disabled.
+
+It is not legal to enable hard IRQs in a SMP cross call, in fact this bug
+triggers the BUG check in irq_work_run_list():
+
+ BUG_ON(!irqs_disabled());
+
+Because now irq_work_run() is invoked from the tail of
+generic_smp_call_function_single_interrupt().
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/nmi.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/sparc/kernel/nmi.c
++++ b/arch/sparc/kernel/nmi.c
+@@ -141,7 +141,6 @@ static inline unsigned int get_nmi_count
+
+ static __init void nmi_cpu_busy(void *data)
+ {
+- local_irq_enable_in_hardirq();
+ while (endflag == 0)
+ mb();
+ }
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: bob picco <bpicco@meloft.net>
+Date: Tue, 16 Sep 2014 09:28:15 -0400
+Subject: sparc64: find_node adjustment
+
+From: bob picco <bpicco@meloft.net>
+
+[ Upstream commit 3dee9df54836d5f844f3d58281d3f3e6331b467f ]
+
+We have seen an issue with guest boot into LDOM that causes early boot failures
+because of no matching rules for node identitity of the memory. I analyzed this
+on my T4 and concluded there might not be a solution. I saw the issue in
+mainline too when booting into the control/primary domain - with guests
+configured. Note, this could be a firmware bug on some older machines.
+
+I'll provide a full explanation of the issues below. Should we not find a
+matching BEST latency group for a real address (RA) then we will assume node 0.
+On the T4-2 here with the information provided I can't see an alternative.
+
+Technically the LDOM shown below should match the MBLOCK to the
+favorable latency group. However other factors must be considered too. Were
+the memory controllers configured "fine" grained interleave or "coarse"
+grain interleaved - T4. Also should a "group" MD node be considered a NUMA
+node?
+
+There has to be at least one Machine Description (MD) "group" and hence one
+NUMA node. The group can have one or more latency groups (lg) - more than one
+memory controller. The current code chooses the smallest latency as the most
+favorable per group. The latency and lg information is in MLGROUP below.
+MBLOCK is the base and size of the RAs for the machine as fetched from OBP
+/memory "available" property. My machine has one MBLOCK but more would be
+possible - with holes?
+
+For a T4-2 the following information has been gathered:
+with LDOM guest
+MEMBLOCK configuration:
+ memory size = 0x27f870000
+ memory.cnt = 0x3
+ memory[0x0] [0x00000020400000-0x0000029fc67fff], 0x27f868000 bytes
+ memory[0x1] [0x0000029fd8a000-0x0000029fd8bfff], 0x2000 bytes
+ memory[0x2] [0x0000029fd92000-0x0000029fd97fff], 0x6000 bytes
+ reserved.cnt = 0x2
+ reserved[0x0] [0x00000020800000-0x000000216c15c0], 0xec15c1 bytes
+ reserved[0x1] [0x00000024800000-0x0000002c180c1e], 0x7980c1f bytes
+MBLOCK[0]: base[20000000] size[280000000] offset[0]
+(note: "base" and "size" reported in "MBLOCK" encompass the "memory[X]" values)
+(note: (RA + offset) & mask = val is the formula to detect a match for the
+memory controller. should there be no match for find_node node, a return
+value of -1 resulted for the node - BAD)
+
+There is one group. It has these forward links
+MLGROUP[1]: node[545] latency[1f7e8] match[200000000] mask[200000000]
+MLGROUP[2]: node[54d] latency[2de60] match[0] mask[200000000]
+NUMA NODE[0]: node[545] mask[200000000] val[200000000] (latency[1f7e8])
+(note: "val" is the best lg's (smallest latency) "match")
+
+no LDOM guest - bare metal
+MEMBLOCK configuration:
+ memory size = 0xfdf2d0000
+ memory.cnt = 0x3
+ memory[0x0] [0x00000020400000-0x00000fff6adfff], 0xfdf2ae000 bytes
+ memory[0x1] [0x00000fff6d2000-0x00000fff6e7fff], 0x16000 bytes
+ memory[0x2] [0x00000fff766000-0x00000fff771fff], 0xc000 bytes
+ reserved.cnt = 0x2
+ reserved[0x0] [0x00000020800000-0x00000021a04580], 0x1204581 bytes
+ reserved[0x1] [0x00000024800000-0x0000002c7d29fc], 0x7fd29fd bytes
+MBLOCK[0]: base[20000000] size[fe0000000] offset[0]
+
+there are two groups
+group node[16d5]
+MLGROUP[0]: node[1765] latency[1f7e8] match[0] mask[200000000]
+MLGROUP[3]: node[177d] latency[2de60] match[200000000] mask[200000000]
+NUMA NODE[0]: node[1765] mask[200000000] val[0] (latency[1f7e8])
+group node[171d]
+MLGROUP[2]: node[1775] latency[2de60] match[0] mask[200000000]
+MLGROUP[1]: node[176d] latency[1f7e8] match[200000000] mask[200000000]
+NUMA NODE[1]: node[176d] mask[200000000] val[200000000] (latency[1f7e8])
+(note: for this two "group" bare metal machine, 1/2 memory is in group one's
+lg and 1/2 memory is in group two's lg).
+
+Cc: sparclinux@vger.kernel.org
+Signed-off-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -838,7 +838,10 @@ static int find_node(unsigned long addr)
+ if ((addr & p->mask) == p->val)
+ return i;
+ }
+- return -1;
++ /* The following condition has been observed on LDOM guests.*/
++ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
++ " rule. Some physical memory will be owned by node 0.");
++ return 0;
+ }
+
+ static u64 memblock_nid_range(u64 start, u64 end, int *nid)
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 18 Oct 2014 23:03:09 -0400
+Subject: sparc64: Fix corrupted thread fault code.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 84bd6d8b9c0f06b3f188efb479c77e20f05e9a8a ]
+
+Every path that ends up at do_sparc64_fault() must install a valid
+FAULT_CODE_* bitmask in the per-thread fault code byte.
+
+Two paths leading to the label winfix_trampoline (which expects the
+FAULT_CODE_* mask in register %g4) were not doing so:
+
+1) For pre-hypervisor TLB protection violation traps, if we took
+ the 'winfix_trampoline' path we wouldn't have %g4 initialized
+ with the FAULT_CODE_* value yet. Resulting in using the
+ TLB_TAG_ACCESS register address value instead.
+
+2) In the TSB miss path, when we notice that we are going to use a
+ hugepage mapping, but we haven't allocated the hugepage TSB yet, we
+ still have to take the window fixup case into consideration and
+ in that particular path we leave %g4 not setup properly.
+
+Errors on this sort were largely invisible previously, but after
+commit 4ccb9272892c33ef1c19a783cfa87103b30c2784 ("sparc64: sun4v TLB
+error power off events") we now have a fault_code mask bit
+(FAULT_CODE_BAD_RA) that triggers due to this bug.
+
+FAULT_CODE_BAD_RA triggers because this bit is set in TLB_TAG_ACCESS
+(see #1 above) and thus we get seemingly random bus errors triggered
+for user processes.
+
+Fixes: 4ccb9272892c ("sparc64: sun4v TLB error power off events")
+Reported-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/dtlb_prot.S | 6 +++---
+ arch/sparc/kernel/tsb.S | 6 +++---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/sparc/kernel/dtlb_prot.S
++++ b/arch/sparc/kernel/dtlb_prot.S
+@@ -24,11 +24,11 @@
+ mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr
+
+ /* PROT ** ICACHE line 2: More real fault processing */
++ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
+ bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
+- ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
+- ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
+ mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
+- nop
++ ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
++ nop
+ nop
+ nop
+ nop
+--- a/arch/sparc/kernel/tsb.S
++++ b/arch/sparc/kernel/tsb.S
+@@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath:
+ nop
+ .previous
+
+- rdpr %tl, %g3
+- cmp %g3, 1
++ rdpr %tl, %g7
++ cmp %g7, 1
+ bne,pn %xcc, winfix_trampoline
+- nop
++ mov %g3, %g4
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ call hugetlb_setup
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Tue, 14 Oct 2014 19:37:58 -0700
+Subject: sparc64: Fix FPU register corruption with AES crypto offload.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit f4da3628dc7c32a59d1fb7116bb042e6f436d611 ]
+
+The AES loops in arch/sparc/crypto/aes_glue.c use a scheme where the
+key material is preloaded into the FPU registers, and then we loop
+over and over doing the crypt operation, reusing those pre-cooked key
+registers.
+
+There are intervening blkcipher*() calls between the crypt operation
+calls. And those might perform memcpy() and thus also try to use the
+FPU.
+
+The sparc64 kernel FPU usage mechanism is designed to allow such
+recursive uses, but with a catch.
+
+There has to be a trap between the two FPU using threads of control.
+
+The mechanism works by, when the FPU is already in use by the kernel,
+allocating a slot for FPU saving at trap time. Then if, within the
+trap handler, we try to use the FPU registers, the pre-trap FPU
+register state is saved into the slot. Then at trap return time we
+notice this and restore the pre-trap FPU state.
+
+Over the long term there are various more involved ways we can make
+this work, but for a quick fix let's take advantage of the fact that
+the situation where this happens is very limited.
+
+All sparc64 chips that support the crypto instructiosn also are using
+the Niagara4 memcpy routine, and that routine only uses the FPU for
+large copies where we can't get the source aligned properly to a
+multiple of 8 bytes.
+
+We look to see if the FPU is already in use in this context, and if so
+we use the non-large copy path which only uses integer registers.
+
+Furthermore, we also limit this special logic to when we are doing
+kernel copy, rather than a user copy.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/visasm.h | 8 ++++++++
+ arch/sparc/lib/NG4memcpy.S | 14 +++++++++++++-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/include/asm/visasm.h
++++ b/arch/sparc/include/asm/visasm.h
+@@ -39,6 +39,14 @@
+ 297: wr %o5, FPRS_FEF, %fprs; \
+ 298:
+
++#define VISEntryHalfFast(fail_label) \
++ rd %fprs, %o5; \
++ andcc %o5, FPRS_FEF, %g0; \
++ be,pt %icc, 297f; \
++ nop; \
++ ba,a,pt %xcc, fail_label; \
++297: wr %o5, FPRS_FEF, %fprs;
++
+ #define VISExitHalf \
+ wr %o5, 0, %fprs;
+
+--- a/arch/sparc/lib/NG4memcpy.S
++++ b/arch/sparc/lib/NG4memcpy.S
+@@ -41,6 +41,10 @@
+ #endif
+ #endif
+
++#if !defined(EX_LD) && !defined(EX_ST)
++#define NON_USER_COPY
++#endif
++
+ #ifndef EX_LD
+ #define EX_LD(x) x
+ #endif
+@@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len
+ mov EX_RETVAL(%o3), %o0
+
+ .Llarge_src_unaligned:
++#ifdef NON_USER_COPY
++ VISEntryHalfFast(.Lmedium_vis_entry_fail)
++#else
++ VISEntryHalf
++#endif
+ andn %o2, 0x3f, %o4
+ sub %o2, %o4, %o2
+- VISEntryHalf
+ alignaddr %o1, %g0, %g1
+ add %o1, %o4, %o1
+ EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
+@@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len
+ nop
+ ba,a,pt %icc, .Lmedium_unaligned
+
++#ifdef NON_USER_COPY
++.Lmedium_vis_entry_fail:
++ or %o0, %o1, %g2
++#endif
+ .Lmedium:
+ LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
+ andcc %g2, 0x7, %g0
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 24 Sep 2014 21:05:30 -0700
+Subject: sparc64: Fix hibernation code refrence to PAGE_OFFSET.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+We changed PAGE_OFFSET to be a variable rather than a constant,
+but this reference here in the hibernate assembler got missed.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/power/hibernate_asm.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/sparc/power/hibernate_asm.S
++++ b/arch/sparc/power/hibernate_asm.S
+@@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume)
+ nop
+
+ /* Write PAGE_OFFSET to %g7 */
+- sethi %uhi(PAGE_OFFSET), %g7
+- sllx %g7, 32, %g7
++ sethi %hi(PAGE_OFFSET), %g7
++ ldx [%g7 + %lo(PAGE_OFFSET)], %g7
+
+ setuw (PAGE_SIZE-8), %g3
+
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 10 Oct 2014 15:49:16 -0400
+Subject: sparc64: Fix lockdep warnings on reboot on Ultra-5
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit bdcf81b658ebc4c2640c3c2c55c8b31c601b6996 ]
+
+Inconsistently, the raw_* IRQ routines do not interact with and update
+the irqflags tracing and lockdep state, whereas the raw_* spinlock
+interfaces do.
+
+This causes problems in p1275_cmd_direct() because we disable hardirqs
+by hand using raw_local_irq_restore() and then do a raw_spin_lock()
+which triggers a lockdep trace because the CPU's hw IRQ state doesn't
+match IRQ tracing's internal software copy of that state.
+
+The CPU's irqs are disabled, yet current->hardirqs_enabled is true.
+
+====================
+reboot: Restarting system
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3536 check_flags+0x7c/0x240()
+DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)
+Modules linked in: openpromfs
+CPU: 0 PID: 1 Comm: systemd-shutdow Tainted: G W 3.17.0-dirty #145
+Call Trace:
+ [000000000045919c] warn_slowpath_common+0x5c/0xa0
+ [0000000000459210] warn_slowpath_fmt+0x30/0x40
+ [000000000048f41c] check_flags+0x7c/0x240
+ [0000000000493280] lock_acquire+0x20/0x1c0
+ [0000000000832b70] _raw_spin_lock+0x30/0x60
+ [000000000068f2fc] p1275_cmd_direct+0x1c/0x60
+ [000000000068ed28] prom_reboot+0x28/0x40
+ [000000000043610c] machine_restart+0x4c/0x80
+ [000000000047d2d4] kernel_restart+0x54/0x80
+ [000000000047d618] SyS_reboot+0x138/0x200
+ [00000000004060b4] linux_sparc_syscall32+0x34/0x60
+---[ end trace 5c439fe81c05a100 ]---
+possible reason: unannotated irqs-off.
+irq event stamp: 2010267
+hardirqs last enabled at (2010267): [<000000000049a358>] vprintk_emit+0x4b8/0x580
+hardirqs last disabled at (2010266): [<0000000000499f08>] vprintk_emit+0x68/0x580
+softirqs last enabled at (2010046): [<000000000045d278>] __do_softirq+0x378/0x4a0
+softirqs last disabled at (2010039): [<000000000042bf08>] do_softirq_own_stack+0x28/0x40
+Resetting ...
+====================
+
+Use local_* variables of the hw IRQ interfaces so that IRQ tracing sees
+all of our changes.
+
+Reported-by: Meelis Roos <mroos@linux.ee>
+Tested-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/prom/p1275.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/sparc/prom/p1275.c
++++ b/arch/sparc/prom/p1275.c
+@@ -9,6 +9,7 @@
+ #include <linux/smp.h>
+ #include <linux/string.h>
+ #include <linux/spinlock.h>
++#include <linux/irqflags.h>
+
+ #include <asm/openprom.h>
+ #include <asm/oplib.h>
+@@ -36,8 +37,8 @@ void p1275_cmd_direct(unsigned long *arg
+ {
+ unsigned long flags;
+
+- raw_local_save_flags(flags);
+- raw_local_irq_restore((unsigned long)PIL_NMI);
++ local_save_flags(flags);
++ local_irq_restore((unsigned long)PIL_NMI);
+ raw_spin_lock(&prom_entry_lock);
+
+ prom_world(1);
+@@ -45,7 +46,7 @@ void p1275_cmd_direct(unsigned long *arg
+ prom_world(0);
+
+ raw_spin_unlock(&prom_entry_lock);
+- raw_local_irq_restore(flags);
++ local_irq_restore(flags);
+ }
+
+ void prom_cif_init(void *cif_handler, void *cif_stack)
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Mon, 11 Aug 2014 15:38:46 -0700
+Subject: sparc64: Fix pcr_ops initialization and usage bugs.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 8bccf5b313180faefce38e0d1140f76e0f327d28 ]
+
+Christopher reports that perf_event_print_debug() can crash in uniprocessor
+builds. The crash is due to pcr_ops being NULL.
+
+This happens because pcr_arch_init() is only invoked by smp_cpus_done() which
+only executes in SMP builds.
+
+init_hw_perf_events() is closely intertwined with pcr_ops being setup properly,
+therefore:
+
+1) Call pcr_arch_init() early on from init_hw_perf_events(), instead of
+ from smp_cpus_done().
+
+2) Do not hook up a PMU type if pcr_ops is NULL after pcr_arch_init().
+
+3) Move init_hw_perf_events to a later initcall so that it we will be
+ sure to invoke pcr_arch_init() after all cpus are brought up.
+
+Finally, guard the one naked sequence of pcr_ops dereferences in
+__global_pmu_self() with an appropriate NULL check.
+
+Reported-by: Christopher Alexander Tobias Schulze <cat.schulze@alice-dsl.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/perf_event.c | 7 +++++--
+ arch/sparc/kernel/process_64.c | 3 +++
+ arch/sparc/kernel/smp_64.c | 1 -
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/sparc/kernel/perf_event.c
++++ b/arch/sparc/kernel/perf_event.c
+@@ -1671,9 +1671,12 @@ static bool __init supported_pmu(void)
+
+ int __init init_hw_perf_events(void)
+ {
++ int err;
++
+ pr_info("Performance events: ");
+
+- if (!supported_pmu()) {
++ err = pcr_arch_init();
++ if (err || !supported_pmu()) {
+ pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
+ return 0;
+ }
+@@ -1685,7 +1688,7 @@ int __init init_hw_perf_events(void)
+
+ return 0;
+ }
+-early_initcall(init_hw_perf_events);
++pure_initcall(init_hw_perf_events);
+
+ void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+--- a/arch/sparc/kernel/process_64.c
++++ b/arch/sparc/kernel/process_64.c
+@@ -306,6 +306,9 @@ static void __global_pmu_self(int this_c
+ struct global_pmu_snapshot *pp;
+ int i, num;
+
++ if (!pcr_ops)
++ return;
++
+ pp = &global_cpu_snapshot[this_cpu].pmu;
+
+ num = 1;
+--- a/arch/sparc/kernel/smp_64.c
++++ b/arch/sparc/kernel/smp_64.c
+@@ -1395,7 +1395,6 @@ void __cpu_die(unsigned int cpu)
+
+ void __init smp_cpus_done(unsigned int max_cpus)
+ {
+- pcr_arch_init();
+ }
+
+ void smp_send_reschedule(int cpu)
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 24 Sep 2014 20:56:11 -0700
+Subject: sparc64: Fix physical memory management regressions with large max_phys_bits.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 0dd5b7b09e13dae32869371e08e1048349fd040c ]
+
+If max_phys_bits needs to be > 43 (f.e. for T4 chips), things like
+DEBUG_PAGEALLOC stop working because the 3-level page tables only
+can cover up to 43 bits.
+
+Another problem is that when we increased MAX_PHYS_ADDRESS_BITS up to
+47, several statically allocated tables became enormous.
+
+Compounding this is that we will need to support up to 49 bits of
+physical addressing for M7 chips.
+
+The two tables in question are sparc64_valid_addr_bitmap and
+kpte_linear_bitmap.
+
+The first holds a bitmap, with 1 bit for each 4MB chunk of physical
+memory, indicating whether that chunk actually exists in the machine
+and is valid.
+
+The second table is a set of 2-bit values which tell how large of a
+mapping (4MB, 256MB, 2GB, 16GB, respectively) we can use at each 256MB
+chunk of ram in the system.
+
+These tables are huge and take up an enormous amount of the BSS
+section of the sparc64 kernel image. Specifically, the
+sparc64_valid_addr_bitmap is 4MB, and the kpte_linear_bitmap is 128K.
+
+So let's solve the space wastage and the DEBUG_PAGEALLOC problem
+at the same time, by using the kernel page tables (as designed) to
+manage this information.
+
+We have to keep using large mappings when DEBUG_PAGEALLOC is disabled,
+and we do this by encoding huge PMDs and PUDs.
+
+On a T4-2 with 256GB of ram the kernel page table takes up 16K with
+DEBUG_PAGEALLOC disabled and 256MB with it enabled. Furthermore, this
+memory is dynamically allocated at run time rather than coded
+statically into the kernel image.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/page_64.h | 3
+ arch/sparc/include/asm/pgtable_64.h | 55 ++---
+ arch/sparc/include/asm/tsb.h | 47 +++-
+ arch/sparc/kernel/ktlb.S | 108 ---------
+ arch/sparc/kernel/vmlinux.lds.S | 5
+ arch/sparc/mm/init_64.c | 393 +++++++++++++++---------------------
+ arch/sparc/mm/init_64.h | 7
+ 7 files changed, 244 insertions(+), 374 deletions(-)
+
+--- a/arch/sparc/include/asm/page_64.h
++++ b/arch/sparc/include/asm/page_64.h
+@@ -128,9 +128,6 @@ extern unsigned long PAGE_OFFSET;
+ */
+ #define MAX_PHYS_ADDRESS_BITS 47
+
+-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
+- * and kpte_linear_bitmap.
+- */
+ #define ILOG2_4MB 22
+ #define ILOG2_256MB 28
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -79,22 +79,7 @@
+
+ #include <linux/sched.h>
+
+-extern unsigned long sparc64_valid_addr_bitmap[];
+-
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-static inline bool __kern_addr_valid(unsigned long paddr)
+-{
+- if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
+- return false;
+- return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
+-}
+-
+-static inline bool kern_addr_valid(unsigned long addr)
+-{
+- unsigned long paddr = __pa(addr);
+-
+- return __kern_addr_valid(paddr);
+-}
++bool kern_addr_valid(unsigned long addr);
+
+ /* Entries per page directory level. */
+ #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
+@@ -122,6 +107,7 @@ static inline bool kern_addr_valid(unsig
+ #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
+ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */
+ #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */
++#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE
+
+ /* Advertise support for _PAGE_SPECIAL */
+ #define __HAVE_ARCH_PTE_SPECIAL
+@@ -668,26 +654,26 @@ static inline unsigned long pmd_large(pm
+ return pte_val(pte) & _PAGE_PMD_HUGE;
+ }
+
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-static inline unsigned long pmd_young(pmd_t pmd)
++static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+ pte_t pte = __pte(pmd_val(pmd));
+
+- return pte_young(pte);
++ return pte_pfn(pte);
+ }
+
+-static inline unsigned long pmd_write(pmd_t pmd)
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++static inline unsigned long pmd_young(pmd_t pmd)
+ {
+ pte_t pte = __pte(pmd_val(pmd));
+
+- return pte_write(pte);
++ return pte_young(pte);
+ }
+
+-static inline unsigned long pmd_pfn(pmd_t pmd)
++static inline unsigned long pmd_write(pmd_t pmd)
+ {
+ pte_t pte = __pte(pmd_val(pmd));
+
+- return pte_pfn(pte);
++ return pte_write(pte);
+ }
+
+ static inline unsigned long pmd_trans_huge(pmd_t pmd)
+@@ -781,18 +767,15 @@ static inline int pmd_present(pmd_t pmd)
+ * the top bits outside of the range of any physical address size we
+ * support are clear as well. We also validate the physical itself.
+ */
+-#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \
+- !__kern_addr_valid(pmd_val(pmd)))
++#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK)
+
+ #define pud_none(pud) (!pud_val(pud))
+
+-#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \
+- !__kern_addr_valid(pud_val(pud)))
++#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK)
+
+ #define pgd_none(pgd) (!pgd_val(pgd))
+
+-#define pgd_bad(pgd) ((pgd_val(pgd) & ~PAGE_MASK) || \
+- !__kern_addr_valid(pgd_val(pgd)))
++#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK)
+
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+@@ -835,6 +818,20 @@ static inline unsigned long __pmd_page(p
+ #define pgd_present(pgd) (pgd_val(pgd) != 0U)
+ #define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL)
+
++static inline unsigned long pud_large(pud_t pud)
++{
++ pte_t pte = __pte(pud_val(pud));
++
++ return pte_val(pte) & _PAGE_PMD_HUGE;
++}
++
++static inline unsigned long pud_pfn(pud_t pud)
++{
++ pte_t pte = __pte(pud_val(pud));
++
++ return pte_pfn(pte);
++}
++
+ /* Same in both SUN4V and SUN4U. */
+ #define pte_none(pte) (!pte_val(pte))
+
+--- a/arch/sparc/include/asm/tsb.h
++++ b/arch/sparc/include/asm/tsb.h
+@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb
+ sub TSB, 0x8, TSB; \
+ TSB_STORE(TSB, TAG);
+
+- /* Do a kernel page table walk. Leaves physical PTE pointer in
+- * REG1. Jumps to FAIL_LABEL on early page table walk termination.
+- * VADDR will not be clobbered, but REG2 will.
++ /* Do a kernel page table walk. Leaves valid PTE value in
++ * REG1. Jumps to FAIL_LABEL on early page table walk
++ * termination. VADDR will not be clobbered, but REG2 will.
++ *
++ * There are two masks we must apply to propagate bits from
++ * the virtual address into the PTE physical address field
++ * when dealing with huge pages. This is because the page
++ * table boundaries do not match the huge page size(s) the
++ * hardware supports.
++ *
++ * In these cases we propagate the bits that are below the
++ * page table level where we saw the huge page mapping, but
++ * are still within the relevant physical bits for the huge
++ * page size in question. So for PMD mappings (which fall on
++ * bit 23, for 8MB per PMD) we must propagate bit 22 for a
++ * 4MB huge page. For huge PUDs (which fall on bit 33, for
++ * 8GB per PUD), we have to accomodate 256MB and 2GB huge
++ * pages. So for those we propagate bits 32 to 28.
+ */
+ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
+ sethi %hi(swapper_pg_dir), REG1; \
+@@ -150,15 +165,35 @@ extern struct tsb_phys_patch_entry __tsb
+ andn REG2, 0x7, REG2; \
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+- sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
++ sethi %uhi(_PAGE_PUD_HUGE), REG2; \
++ brz,pn REG1, FAIL_LABEL; \
++ sllx REG2, 32, REG2; \
++ andcc REG1, REG2, %g0; \
++ sethi %hi(0xf8000000), REG2; \
++ bne,pt %xcc, 697f; \
++ sllx REG2, 1, REG2; \
++ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x7, REG2; \
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
++ sethi %uhi(_PAGE_PMD_HUGE), REG2; \
+ brz,pn REG1, FAIL_LABEL; \
+- sllx VADDR, 64 - PMD_SHIFT, REG2; \
++ sllx REG2, 32, REG2; \
++ andcc REG1, REG2, %g0; \
++ be,pn %xcc, 698f; \
++ sethi %hi(0x400000), REG2; \
++697: brgez,pn REG1, FAIL_LABEL; \
++ andn REG1, REG2, REG1; \
++ and VADDR, REG2, REG2; \
++ ba,pt %xcc, 699f; \
++ or REG1, REG2, REG1; \
++698: sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x7, REG2; \
+- add REG1, REG2, REG1;
++ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
++ brgez,pn REG1, FAIL_LABEL; \
++ nop; \
++699:
+
+ /* PMD has been loaded into REG1, interpret the value, seeing
+ * if it is a HUGE PMD or a normal one. If it is not valid
+--- a/arch/sparc/kernel/ktlb.S
++++ b/arch/sparc/kernel/ktlb.S
+@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
+
+ TSB_LOCK_TAG(%g1, %g2, %g7)
+-
+- /* Load and check PTE. */
+- ldxa [%g5] ASI_PHYS_USE_EC, %g5
+- mov 1, %g7
+- sllx %g7, TSB_TAG_INVALID_BIT, %g7
+- brgez,a,pn %g5, kvmap_itlb_longpath
+- TSB_STORE(%g1, %g7)
+-
+ TSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+@@ -118,6 +110,12 @@ kvmap_dtlb_obp:
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
+
++kvmap_linear_early:
++ sethi %hi(kern_linear_pte_xor), %g7
++ ldx [%g7 + %lo(kern_linear_pte_xor)], %g2
++ ba,pt %xcc, kvmap_dtlb_tsb4m_load
++ xor %g2, %g4, %g5
++
+ .align 32
+ kvmap_dtlb_tsb4m_load:
+ TSB_LOCK_TAG(%g1, %g2, %g7)
+@@ -146,105 +144,17 @@ kvmap_dtlb_4v:
+ /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */
+ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+ #endif
+- /* TSB entry address left in %g1, lookup linear PTE.
+- * Must preserve %g1 and %g6 (TAG).
+- */
+-kvmap_dtlb_tsb4m_miss:
+- /* Clear the PAGE_OFFSET top virtual bits, shift
+- * down to get PFN, and make sure PFN is in range.
+- */
+-661: sllx %g4, 0, %g5
+- .section .page_offset_shift_patch, "ax"
+- .word 661b
+- .previous
+-
+- /* Check to see if we know about valid memory at the 4MB
+- * chunk this physical address will reside within.
++ /* Linear mapping TSB lookup failed. Fallthrough to kernel
++ * page table based lookup.
+ */
+-661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2
+- .section .page_offset_shift_patch, "ax"
+- .word 661b
+- .previous
+-
+- brnz,pn %g2, kvmap_dtlb_longpath
+- nop
+-
+- /* This unconditional branch and delay-slot nop gets patched
+- * by the sethi sequence once the bitmap is properly setup.
+- */
+- .globl valid_addr_bitmap_insn
+-valid_addr_bitmap_insn:
+- ba,pt %xcc, 2f
+- nop
+- .subsection 2
+- .globl valid_addr_bitmap_patch
+-valid_addr_bitmap_patch:
+- sethi %hi(sparc64_valid_addr_bitmap), %g7
+- or %g7, %lo(sparc64_valid_addr_bitmap), %g7
+- .previous
+-
+-661: srlx %g5, ILOG2_4MB, %g2
+- .section .page_offset_shift_patch, "ax"
+- .word 661b
+- .previous
+-
+- srlx %g2, 6, %g5
+- and %g2, 63, %g2
+- sllx %g5, 3, %g5
+- ldx [%g7 + %g5], %g5
+- mov 1, %g7
+- sllx %g7, %g2, %g7
+- andcc %g5, %g7, %g0
+- be,pn %xcc, kvmap_dtlb_longpath
+-
+-2: sethi %hi(kpte_linear_bitmap), %g2
+-
+- /* Get the 256MB physical address index. */
+-661: sllx %g4, 0, %g5
+- .section .page_offset_shift_patch, "ax"
+- .word 661b
+- .previous
+-
+- or %g2, %lo(kpte_linear_bitmap), %g2
+-
+-661: srlx %g5, ILOG2_256MB, %g5
+- .section .page_offset_shift_patch, "ax"
+- .word 661b
+- .previous
+-
+- and %g5, (32 - 1), %g7
+-
+- /* Divide by 32 to get the offset into the bitmask. */
+- srlx %g5, 5, %g5
+- add %g7, %g7, %g7
+- sllx %g5, 3, %g5
+-
+- /* kern_linear_pte_xor[(mask >> shift) & 3)] */
+- ldx [%g2 + %g5], %g2
+- srlx %g2, %g7, %g7
+- sethi %hi(kern_linear_pte_xor), %g5
+- and %g7, 3, %g7
+- or %g5, %lo(kern_linear_pte_xor), %g5
+- sllx %g7, 3, %g7
+- ldx [%g5 + %g7], %g2
+-
+ .globl kvmap_linear_patch
+ kvmap_linear_patch:
+- ba,pt %xcc, kvmap_dtlb_tsb4m_load
+- xor %g2, %g4, %g5
++ ba,a,pt %xcc, kvmap_linear_early
+
+ kvmap_dtlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+
+ TSB_LOCK_TAG(%g1, %g2, %g7)
+-
+- /* Load and check PTE. */
+- ldxa [%g5] ASI_PHYS_USE_EC, %g5
+- mov 1, %g7
+- sllx %g7, TSB_TAG_INVALID_BIT, %g7
+- brgez,a,pn %g5, kvmap_dtlb_longpath
+- TSB_STORE(%g1, %g7)
+-
+ TSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+--- a/arch/sparc/kernel/vmlinux.lds.S
++++ b/arch/sparc/kernel/vmlinux.lds.S
+@@ -122,11 +122,6 @@ SECTIONS
+ *(.swapper_4m_tsb_phys_patch)
+ __swapper_4m_tsb_phys_patch_end = .;
+ }
+- .page_offset_shift_patch : {
+- __page_offset_shift_patch = .;
+- *(.page_offset_shift_patch)
+- __page_offset_shift_patch_end = .;
+- }
+ .popc_3insn_patch : {
+ __popc_3insn_patch = .;
+ *(.popc_3insn_patch)
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -73,7 +73,6 @@ unsigned long kern_linear_pte_xor[4] __r
+ * 'cpu' properties, but we need to have this table setup before the
+ * MDESC is initialized.
+ */
+-unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
+
+ #ifndef CONFIG_DEBUG_PAGEALLOC
+ /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
+@@ -82,6 +81,7 @@ unsigned long kpte_linear_bitmap[KPTE_BI
+ */
+ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
+ #endif
++extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+ static unsigned long cpu_pgsz_mask;
+
+@@ -163,10 +163,6 @@ static void __init read_obp_memory(const
+ cmp_p64, NULL);
+ }
+
+-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
+- sizeof(unsigned long)];
+-EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
+-
+ /* Kernel physical address base and size in bytes. */
+ unsigned long kern_base __read_mostly;
+ unsigned long kern_size __read_mostly;
+@@ -1363,9 +1359,145 @@ static unsigned long __init bootmem_init
+ static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
+ static int pall_ents __initdata;
+
+-#ifdef CONFIG_DEBUG_PAGEALLOC
++static unsigned long max_phys_bits = 40;
++
++bool kern_addr_valid(unsigned long addr)
++{
++ unsigned long above = ((long)addr) >> max_phys_bits;
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *pte;
++
++ if (above != 0 && above != -1UL)
++ return false;
++
++ if (addr >= (unsigned long) KERNBASE &&
++ addr < (unsigned long)&_end)
++ return true;
++
++ if (addr >= PAGE_OFFSET) {
++ unsigned long pa = __pa(addr);
++
++ return pfn_valid(pa >> PAGE_SHIFT);
++ }
++
++ pgd = pgd_offset_k(addr);
++ if (pgd_none(*pgd))
++ return 0;
++
++ pud = pud_offset(pgd, addr);
++ if (pud_none(*pud))
++ return 0;
++
++ if (pud_large(*pud))
++ return pfn_valid(pud_pfn(*pud));
++
++ pmd = pmd_offset(pud, addr);
++ if (pmd_none(*pmd))
++ return 0;
++
++ if (pmd_large(*pmd))
++ return pfn_valid(pmd_pfn(*pmd));
++
++ pte = pte_offset_kernel(pmd, addr);
++ if (pte_none(*pte))
++ return 0;
++
++ return pfn_valid(pte_pfn(*pte));
++}
++EXPORT_SYMBOL(kern_addr_valid);
++
++static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
++ unsigned long vend,
++ pud_t *pud)
++{
++ const unsigned long mask16gb = (1UL << 34) - 1UL;
++ u64 pte_val = vstart;
++
++ /* Each PUD is 8GB */
++ if ((vstart & mask16gb) ||
++ (vend - vstart <= mask16gb)) {
++ pte_val ^= kern_linear_pte_xor[2];
++ pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
++
++ return vstart + PUD_SIZE;
++ }
++
++ pte_val ^= kern_linear_pte_xor[3];
++ pte_val |= _PAGE_PUD_HUGE;
++
++ vend = vstart + mask16gb + 1UL;
++ while (vstart < vend) {
++ pud_val(*pud) = pte_val;
++
++ pte_val += PUD_SIZE;
++ vstart += PUD_SIZE;
++ pud++;
++ }
++ return vstart;
++}
++
++static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
++ bool guard)
++{
++ if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
++ return true;
++
++ return false;
++}
++
++static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
++ unsigned long vend,
++ pmd_t *pmd)
++{
++ const unsigned long mask256mb = (1UL << 28) - 1UL;
++ const unsigned long mask2gb = (1UL << 31) - 1UL;
++ u64 pte_val = vstart;
++
++ /* Each PMD is 8MB */
++ if ((vstart & mask256mb) ||
++ (vend - vstart <= mask256mb)) {
++ pte_val ^= kern_linear_pte_xor[0];
++ pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
++
++ return vstart + PMD_SIZE;
++ }
++
++ if ((vstart & mask2gb) ||
++ (vend - vstart <= mask2gb)) {
++ pte_val ^= kern_linear_pte_xor[1];
++ pte_val |= _PAGE_PMD_HUGE;
++ vend = vstart + mask256mb + 1UL;
++ } else {
++ pte_val ^= kern_linear_pte_xor[2];
++ pte_val |= _PAGE_PMD_HUGE;
++ vend = vstart + mask2gb + 1UL;
++ }
++
++ while (vstart < vend) {
++ pmd_val(*pmd) = pte_val;
++
++ pte_val += PMD_SIZE;
++ vstart += PMD_SIZE;
++ pmd++;
++ }
++
++ return vstart;
++}
++
++static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
++ bool guard)
++{
++ if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
++ return true;
++
++ return false;
++}
++
+ static unsigned long __ref kernel_map_range(unsigned long pstart,
+- unsigned long pend, pgprot_t prot)
++ unsigned long pend, pgprot_t prot,
++ bool use_huge)
+ {
+ unsigned long vstart = PAGE_OFFSET + pstart;
+ unsigned long vend = PAGE_OFFSET + pend;
+@@ -1395,15 +1527,23 @@ static unsigned long __ref kernel_map_ra
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
++ if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
++ vstart = kernel_map_hugepud(vstart, vend, pud);
++ continue;
++ }
+ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ alloc_bytes += PAGE_SIZE;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, vstart);
+- if (!pmd_present(*pmd)) {
++ if (pmd_none(*pmd)) {
+ pte_t *new;
+
++ if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
++ vstart = kernel_map_hugepmd(vstart, vend, pmd);
++ continue;
++ }
+ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ alloc_bytes += PAGE_SIZE;
+ pmd_populate_kernel(&init_mm, pmd, new);
+@@ -1426,100 +1566,34 @@ static unsigned long __ref kernel_map_ra
+ return alloc_bytes;
+ }
+
+-extern unsigned int kvmap_linear_patch[1];
+-#endif /* CONFIG_DEBUG_PAGEALLOC */
+-
+-static void __init kpte_set_val(unsigned long index, unsigned long val)
+-{
+- unsigned long *ptr = kpte_linear_bitmap;
+-
+- val <<= ((index % (BITS_PER_LONG / 2)) * 2);
+- ptr += (index / (BITS_PER_LONG / 2));
+-
+- *ptr |= val;
+-}
+-
+-static const unsigned long kpte_shift_min = 28; /* 256MB */
+-static const unsigned long kpte_shift_max = 34; /* 16GB */
+-static const unsigned long kpte_shift_incr = 3;
+-
+-static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
+- unsigned long shift)
++static void __init flush_all_kernel_tsbs(void)
+ {
+- unsigned long size = (1UL << shift);
+- unsigned long mask = (size - 1UL);
+- unsigned long remains = end - start;
+- unsigned long val;
+-
+- if (remains < size || (start & mask))
+- return start;
+-
+- /* VAL maps:
+- *
+- * shift 28 --> kern_linear_pte_xor index 1
+- * shift 31 --> kern_linear_pte_xor index 2
+- * shift 34 --> kern_linear_pte_xor index 3
+- */
+- val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
+-
+- remains &= ~mask;
+- if (shift != kpte_shift_max)
+- remains = size;
+-
+- while (remains) {
+- unsigned long index = start >> kpte_shift_min;
++ int i;
+
+- kpte_set_val(index, val);
++ for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
++ struct tsb *ent = &swapper_tsb[i];
+
+- start += 1UL << kpte_shift_min;
+- remains -= 1UL << kpte_shift_min;
++ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+ }
++#ifndef CONFIG_DEBUG_PAGEALLOC
++ for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
++ struct tsb *ent = &swapper_4m_tsb[i];
+
+- return start;
+-}
+-
+-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
+-{
+- unsigned long smallest_size, smallest_mask;
+- unsigned long s;
+-
+- smallest_size = (1UL << kpte_shift_min);
+- smallest_mask = (smallest_size - 1UL);
+-
+- while (start < end) {
+- unsigned long orig_start = start;
+-
+- for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
+- start = kpte_mark_using_shift(start, end, s);
+-
+- if (start != orig_start)
+- break;
+- }
+-
+- if (start == orig_start)
+- start = (start + smallest_size) & ~smallest_mask;
++ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+ }
++#endif
+ }
+
+-static void __init init_kpte_bitmap(void)
+-{
+- unsigned long i;
+-
+- for (i = 0; i < pall_ents; i++) {
+- unsigned long phys_start, phys_end;
+-
+- phys_start = pall[i].phys_addr;
+- phys_end = phys_start + pall[i].reg_size;
+-
+- mark_kpte_bitmap(phys_start, phys_end);
+- }
+-}
++extern unsigned int kvmap_linear_patch[1];
+
+ static void __init kernel_physical_mapping_init(void)
+ {
+-#ifdef CONFIG_DEBUG_PAGEALLOC
+ unsigned long i, mem_alloced = 0UL;
++ bool use_huge = true;
+
++#ifdef CONFIG_DEBUG_PAGEALLOC
++ use_huge = false;
++#endif
+ for (i = 0; i < pall_ents; i++) {
+ unsigned long phys_start, phys_end;
+
+@@ -1527,7 +1601,7 @@ static void __init kernel_physical_mappi
+ phys_end = phys_start + pall[i].reg_size;
+
+ mem_alloced += kernel_map_range(phys_start, phys_end,
+- PAGE_KERNEL);
++ PAGE_KERNEL, use_huge);
+ }
+
+ printk("Allocated %ld bytes for kernel page tables.\n",
+@@ -1536,8 +1610,9 @@ static void __init kernel_physical_mappi
+ kvmap_linear_patch[0] = 0x01000000; /* nop */
+ flushi(&kvmap_linear_patch[0]);
+
++ flush_all_kernel_tsbs();
++
+ __flush_tlb_all();
+-#endif
+ }
+
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+@@ -1547,7 +1622,7 @@ void kernel_map_pages(struct page *page,
+ unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
+
+ kernel_map_range(phys_start, phys_end,
+- (enable ? PAGE_KERNEL : __pgprot(0)));
++ (enable ? PAGE_KERNEL : __pgprot(0)), false);
+
+ flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
+ PAGE_OFFSET + phys_end);
+@@ -1575,62 +1650,11 @@ unsigned long __init find_ecache_flush_s
+ unsigned long PAGE_OFFSET;
+ EXPORT_SYMBOL(PAGE_OFFSET);
+
+-static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
+-{
+- unsigned long final_shift;
+- unsigned int val = *insn;
+- unsigned int cnt;
+-
+- /* We are patching in ilog2(max_supported_phys_address), and
+- * we are doing so in a manner similar to a relocation addend.
+- * That is, we are adding the shift value to whatever value
+- * is in the shift instruction count field already.
+- */
+- cnt = (val & 0x3f);
+- val &= ~0x3f;
+-
+- /* If we are trying to shift >= 64 bits, clear the destination
+- * register. This can happen when phys_bits ends up being equal
+- * to MAX_PHYS_ADDRESS_BITS.
+- */
+- final_shift = (cnt + (64 - phys_bits));
+- if (final_shift >= 64) {
+- unsigned int rd = (val >> 25) & 0x1f;
+-
+- val = 0x80100000 | (rd << 25);
+- } else {
+- val |= final_shift;
+- }
+- *insn = val;
+-
+- __asm__ __volatile__("flush %0"
+- : /* no outputs */
+- : "r" (insn));
+-}
+-
+-static void __init page_offset_shift_patch(unsigned long phys_bits)
+-{
+- extern unsigned int __page_offset_shift_patch;
+- extern unsigned int __page_offset_shift_patch_end;
+- unsigned int *p;
+-
+- p = &__page_offset_shift_patch;
+- while (p < &__page_offset_shift_patch_end) {
+- unsigned int *insn = (unsigned int *)(unsigned long)*p;
+-
+- page_offset_shift_patch_one(insn, phys_bits);
+-
+- p++;
+- }
+-}
+-
+ unsigned long sparc64_va_hole_top = 0xfffff80000000000UL;
+ unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
+
+ static void __init setup_page_offset(void)
+ {
+- unsigned long max_phys_bits = 40;
+-
+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ /* Cheetah/Panther support a full 64-bit virtual
+ * address, so we can use all that our page tables
+@@ -1679,8 +1703,6 @@ static void __init setup_page_offset(voi
+
+ pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+ PAGE_OFFSET, max_phys_bits);
+-
+- page_offset_shift_patch(max_phys_bits);
+ }
+
+ static void __init tsb_phys_patch(void)
+@@ -1725,7 +1747,6 @@ static void __init tsb_phys_patch(void)
+ #define NUM_KTSB_DESCR 1
+ #endif
+ static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
+-extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+ /* The swapper TSBs are loaded with a base sequence of:
+ *
+@@ -2024,11 +2045,9 @@ void __init paging_init(void)
+
+ pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t));
+ pud_set(&swapper_pud_dir[0], pmd);
+-
++
+ inherit_prom_mappings();
+
+- init_kpte_bitmap();
+-
+ /* Ok, we can use our TLB miss and window trap handlers safely. */
+ setup_tba();
+
+@@ -2135,70 +2154,6 @@ int page_in_phys_avail(unsigned long pad
+ return 0;
+ }
+
+-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
+-static int pavail_rescan_ents __initdata;
+-
+-/* Certain OBP calls, such as fetching "available" properties, can
+- * claim physical memory. So, along with initializing the valid
+- * address bitmap, what we do here is refetch the physical available
+- * memory list again, and make sure it provides at least as much
+- * memory as 'pavail' does.
+- */
+-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
+-{
+- int i;
+-
+- read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
+-
+- for (i = 0; i < pavail_ents; i++) {
+- unsigned long old_start, old_end;
+-
+- old_start = pavail[i].phys_addr;
+- old_end = old_start + pavail[i].reg_size;
+- while (old_start < old_end) {
+- int n;
+-
+- for (n = 0; n < pavail_rescan_ents; n++) {
+- unsigned long new_start, new_end;
+-
+- new_start = pavail_rescan[n].phys_addr;
+- new_end = new_start +
+- pavail_rescan[n].reg_size;
+-
+- if (new_start <= old_start &&
+- new_end >= (old_start + PAGE_SIZE)) {
+- set_bit(old_start >> ILOG2_4MB, bitmap);
+- goto do_next_page;
+- }
+- }
+-
+- prom_printf("mem_init: Lost memory in pavail\n");
+- prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
+- pavail[i].phys_addr,
+- pavail[i].reg_size);
+- prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
+- pavail_rescan[i].phys_addr,
+- pavail_rescan[i].reg_size);
+- prom_printf("mem_init: Cannot continue, aborting.\n");
+- prom_halt();
+-
+- do_next_page:
+- old_start += PAGE_SIZE;
+- }
+- }
+-}
+-
+-static void __init patch_tlb_miss_handler_bitmap(void)
+-{
+- extern unsigned int valid_addr_bitmap_insn[];
+- extern unsigned int valid_addr_bitmap_patch[];
+-
+- valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
+- mb();
+- valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
+- flushi(&valid_addr_bitmap_insn[0]);
+-}
+-
+ static void __init register_page_bootmem_info(void)
+ {
+ #ifdef CONFIG_NEED_MULTIPLE_NODES
+@@ -2211,18 +2166,6 @@ static void __init register_page_bootmem
+ }
+ void __init mem_init(void)
+ {
+- unsigned long addr, last;
+-
+- addr = PAGE_OFFSET + kern_base;
+- last = PAGE_ALIGN(kern_size) + addr;
+- while (addr < last) {
+- set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
+- addr += PAGE_SIZE;
+- }
+-
+- setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
+- patch_tlb_miss_handler_bitmap();
+-
+ high_memory = __va(last_valid_pfn << PAGE_SHIFT);
+
+ register_page_bootmem_info();
+--- a/arch/sparc/mm/init_64.h
++++ b/arch/sparc/mm/init_64.h
+@@ -8,15 +8,8 @@
+ */
+
+ #define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS)
+-#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
+-#define KPTE_BITMAP_BYTES \
+- ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
+-#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
+-#define VALID_ADDR_BITMAP_BYTES \
+- ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
+
+ extern unsigned long kern_linear_pte_xor[4];
+-extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
+ extern unsigned int sparc64_highest_unlocked_tlb_ent;
+ extern unsigned long sparc64_kern_pri_context;
+ extern unsigned long sparc64_kern_pri_nuc_bits;
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Thu, 23 Oct 2014 12:58:13 -0700
+Subject: sparc64: Fix register corruption in top-most kernel stack frame during boot.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit ef3e035c3a9b81da8a778bc333d10637acf6c199 ]
+
+Meelis Roos reported that kernels built with gcc-4.9 do not boot, we
+eventually narrowed this down to only impacting machines using
+UltraSPARC-III and derivitive cpus.
+
+The crash happens right when the first user process is spawned:
+
+[ 54.451346] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004
+[ 54.451346]
+[ 54.571516] CPU: 1 PID: 1 Comm: init Not tainted 3.16.0-rc2-00211-gd7933ab #96
+[ 54.666431] Call Trace:
+[ 54.698453] [0000000000762f8c] panic+0xb0/0x224
+[ 54.759071] [000000000045cf68] do_exit+0x948/0x960
+[ 54.823123] [000000000042cbc0] fault_in_user_windows+0xe0/0x100
+[ 54.902036] [0000000000404ad0] __handle_user_windows+0x0/0x10
+[ 54.978662] Press Stop-A (L1-A) to return to the boot prom
+[ 55.050713] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004
+
+Further investigation showed that compiling only per_cpu_patch() with
+an older compiler fixes the boot.
+
+Detailed analysis showed that the function is not being miscompiled by
+gcc-4.9, but it is using a different register allocation ordering.
+
+With the gcc-4.9 compiled function, something during the code patching
+causes some of the %i* input registers to get corrupted. Perhaps
+we have a TLB miss path into the firmware that is deep enough to
+cause a register window spill and subsequent restore when we get
+back from the TLB miss trap.
+
+Let's plug this up by doing two things:
+
+1) Stop using the firmware stack for client interface calls into
+ the firmware. Just use the kernel's stack.
+
+2) As soon as we can, call into a new function "start_early_boot()"
+ to put a one-register-window buffer between the firmware's
+ deepest stack frame and the top-most initial kernel one.
+
+Reported-by: Meelis Roos <mroos@linux.ee>
+Tested-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/oplib_64.h | 3 +-
+ arch/sparc/include/asm/setup.h | 4 +++
+ arch/sparc/kernel/entry.h | 3 --
+ arch/sparc/kernel/head_64.S | 40 +++-----------------------------------
+ arch/sparc/kernel/hvtramp.S | 1
+ arch/sparc/kernel/setup_64.c | 28 +++++++++++++++++++-------
+ arch/sparc/kernel/trampoline_64.S | 12 ++++++-----
+ arch/sparc/prom/cif.S | 5 +---
+ arch/sparc/prom/init_64.c | 6 ++---
+ arch/sparc/prom/p1275.c | 2 -
+ 10 files changed, 42 insertions(+), 62 deletions(-)
+
+--- a/arch/sparc/include/asm/oplib_64.h
++++ b/arch/sparc/include/asm/oplib_64.h
+@@ -62,7 +62,8 @@ struct linux_mem_p1275 {
+ /* You must call prom_init() before using any of the library services,
+ * preferably as early as possible. Pass it the romvec pointer.
+ */
+-extern void prom_init(void *cif_handler, void *cif_stack);
++extern void prom_init(void *cif_handler);
++extern void prom_init_report(void);
+
+ /* Boot argument acquisition, returns the boot command line string. */
+ extern char *prom_getbootargs(void);
+--- a/arch/sparc/include/asm/setup.h
++++ b/arch/sparc/include/asm/setup.h
+@@ -24,6 +24,10 @@ static inline int con_is_present(void)
+ }
+ #endif
+
++#ifdef CONFIG_SPARC64
++extern void __init start_early_boot(void);
++#endif
++
+ extern void sun_do_break(void);
+ extern int stop_a_enabled;
+ extern int scons_pwroff;
+--- a/arch/sparc/kernel/entry.h
++++ b/arch/sparc/kernel/entry.h
+@@ -66,13 +66,10 @@ struct pause_patch_entry {
+ extern struct pause_patch_entry __pause_3insn_patch,
+ __pause_3insn_patch_end;
+
+-extern void __init per_cpu_patch(void);
+ extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
+ struct sun4v_1insn_patch_entry *);
+ extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+ struct sun4v_2insn_patch_entry *);
+-extern void __init sun4v_patch(void);
+-extern void __init boot_cpu_id_too_large(int cpu);
+ extern unsigned int dcache_parity_tl1_occurred;
+ extern unsigned int icache_parity_tl1_occurred;
+
+--- a/arch/sparc/kernel/head_64.S
++++ b/arch/sparc/kernel/head_64.S
+@@ -672,14 +672,12 @@ tlb_fixup_done:
+ sethi %hi(init_thread_union), %g6
+ or %g6, %lo(init_thread_union), %g6
+ ldx [%g6 + TI_TASK], %g4
+- mov %sp, %l6
+
+ wr %g0, ASI_P, %asi
+ mov 1, %g1
+ sllx %g1, THREAD_SHIFT, %g1
+ sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+ add %g6, %g1, %sp
+- mov 0, %fp
+
+ /* Set per-cpu pointer initially to zero, this makes
+ * the boot-cpu use the in-kernel-image per-cpu areas
+@@ -706,44 +704,14 @@ tlb_fixup_done:
+ nop
+ #endif
+
+- mov %l6, %o1 ! OpenPROM stack
+ call prom_init
+ mov %l7, %o0 ! OpenPROM cif handler
+
+- /* Initialize current_thread_info()->cpu as early as possible.
+- * In order to do that accurately we have to patch up the get_cpuid()
+- * assembler sequences. And that, in turn, requires that we know
+- * if we are on a Starfire box or not. While we're here, patch up
+- * the sun4v sequences as well.
++ /* To create a one-register-window buffer between the kernel's
++ * initial stack and the last stack frame we use from the firmware,
++ * do the rest of the boot from a C helper function.
+ */
+- call check_if_starfire
+- nop
+- call per_cpu_patch
+- nop
+- call sun4v_patch
+- nop
+-
+-#ifdef CONFIG_SMP
+- call hard_smp_processor_id
+- nop
+- cmp %o0, NR_CPUS
+- blu,pt %xcc, 1f
+- nop
+- call boot_cpu_id_too_large
+- nop
+- /* Not reached... */
+-
+-1:
+-#else
+- mov 0, %o0
+-#endif
+- sth %o0, [%g6 + TI_CPU]
+-
+- call prom_init_report
+- nop
+-
+- /* Off we go.... */
+- call start_kernel
++ call start_early_boot
+ nop
+ /* Not reached... */
+
+--- a/arch/sparc/kernel/hvtramp.S
++++ b/arch/sparc/kernel/hvtramp.S
+@@ -109,7 +109,6 @@ hv_cpu_startup:
+ sllx %g5, THREAD_SHIFT, %g5
+ sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+ add %g6, %g5, %sp
+- mov 0, %fp
+
+ call init_irqwork_curcpu
+ nop
+--- a/arch/sparc/kernel/setup_64.c
++++ b/arch/sparc/kernel/setup_64.c
+@@ -30,6 +30,7 @@
+ #include <linux/cpu.h>
+ #include <linux/initrd.h>
+ #include <linux/module.h>
++#include <linux/start_kernel.h>
+
+ #include <asm/io.h>
+ #include <asm/processor.h>
+@@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE];
+
+ static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
+
+-void __init per_cpu_patch(void)
++static void __init per_cpu_patch(void)
+ {
+ struct cpuid_patch_entry *p;
+ unsigned long ver;
+@@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4
+ }
+ }
+
+-void __init sun4v_patch(void)
++static void __init sun4v_patch(void)
+ {
+ extern void sun4v_hvapi_init(void);
+
+@@ -335,14 +336,25 @@ static void __init pause_patch(void)
+ }
+ }
+
+-#ifdef CONFIG_SMP
+-void __init boot_cpu_id_too_large(int cpu)
++void __init start_early_boot(void)
+ {
+- prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+- cpu, NR_CPUS);
+- prom_halt();
++ int cpu;
++
++ check_if_starfire();
++ per_cpu_patch();
++ sun4v_patch();
++
++ cpu = hard_smp_processor_id();
++ if (cpu >= NR_CPUS) {
++ prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
++ cpu, NR_CPUS);
++ prom_halt();
++ }
++ current_thread_info()->cpu = cpu;
++
++ prom_init_report();
++ start_kernel();
+ }
+-#endif
+
+ /* On Ultra, we support all of the v8 capabilities. */
+ unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
+--- a/arch/sparc/kernel/trampoline_64.S
++++ b/arch/sparc/kernel/trampoline_64.S
+@@ -109,10 +109,13 @@ startup_continue:
+ brnz,pn %g1, 1b
+ nop
+
+- sethi %hi(p1275buf), %g2
+- or %g2, %lo(p1275buf), %g2
+- ldx [%g2 + 0x10], %l2
+- add %l2, -(192 + 128), %sp
++ /* Get onto temporary stack which will be in the locked
++ * kernel image.
++ */
++ sethi %hi(tramp_stack), %g1
++ or %g1, %lo(tramp_stack), %g1
++ add %g1, TRAMP_STACK_SIZE, %g1
++ sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
+ flushw
+
+ /* Setup the loop variables:
+@@ -394,7 +397,6 @@ after_lock_tlb:
+ sllx %g5, THREAD_SHIFT, %g5
+ sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+ add %g6, %g5, %sp
+- mov 0, %fp
+
+ rdpr %pstate, %o1
+ or %o1, PSTATE_IE, %o1
+--- a/arch/sparc/prom/cif.S
++++ b/arch/sparc/prom/cif.S
+@@ -11,11 +11,10 @@
+ .text
+ .globl prom_cif_direct
+ prom_cif_direct:
++ save %sp, -192, %sp
+ sethi %hi(p1275buf), %o1
+ or %o1, %lo(p1275buf), %o1
+- ldx [%o1 + 0x0010], %o2 ! prom_cif_stack
+- save %o2, -192, %sp
+- ldx [%i1 + 0x0008], %l2 ! prom_cif_handler
++ ldx [%o1 + 0x0008], %l2 ! prom_cif_handler
+ mov %g4, %l0
+ mov %g5, %l1
+ mov %g6, %l3
+--- a/arch/sparc/prom/init_64.c
++++ b/arch/sparc/prom/init_64.c
+@@ -26,13 +26,13 @@ phandle prom_chosen_node;
+ * It gets passed the pointer to the PROM vector.
+ */
+
+-extern void prom_cif_init(void *, void *);
++extern void prom_cif_init(void *);
+
+-void __init prom_init(void *cif_handler, void *cif_stack)
++void __init prom_init(void *cif_handler)
+ {
+ phandle node;
+
+- prom_cif_init(cif_handler, cif_stack);
++ prom_cif_init(cif_handler);
+
+ prom_chosen_node = prom_finddevice(prom_chosen_path);
+ if (!prom_chosen_node || (s32)prom_chosen_node == -1)
+--- a/arch/sparc/prom/p1275.c
++++ b/arch/sparc/prom/p1275.c
+@@ -20,7 +20,6 @@
+ struct {
+ long prom_callback; /* 0x00 */
+ void (*prom_cif_handler)(long *); /* 0x08 */
+- unsigned long prom_cif_stack; /* 0x10 */
+ } p1275buf;
+
+ extern void prom_world(int);
+@@ -52,5 +51,4 @@ void p1275_cmd_direct(unsigned long *arg
+ void prom_cif_init(void *cif_handler, void *cif_stack)
+ {
+ p1275buf.prom_cif_handler = (void (*)(long *))cif_handler;
+- p1275buf.prom_cif_stack = (unsigned long)cif_stack;
+ }
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 4 Oct 2014 21:05:14 -0700
+Subject: sparc64: Fix reversed start/end in flush_tlb_kernel_range()
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 473ad7f4fb005d1bb727e4ef27d370d28703a062 ]
+
+When we have to split up a flush request into multiple pieces
+(in order to avoid the firmware range) we don't specify the
+arguments in the right order for the second piece.
+
+Fix the order, or else we get hangs as the code tries to
+flush "a lot" of entries and we get lockups like this:
+
+[ 4422.981276] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [expect:117032]
+[ 4422.996130] Modules linked in: ipv6 loop usb_storage igb ptp sg sr_mod ehci_pci ehci_hcd pps_core n2_rng rng_core
+[ 4423.016617] CPU: 12 PID: 117032 Comm: expect Not tainted 3.17.0-rc4+ #1608
+[ 4423.030331] task: fff8003cc730e220 ti: fff8003d99d54000 task.ti: fff8003d99d54000
+[ 4423.045282] TSTATE: 0000000011001602 TPC: 00000000004521e8 TNPC: 00000000004521ec Y: 00000000 Not tainted
+[ 4423.064905] TPC: <__flush_tlb_kernel_range+0x28/0x40>
+[ 4423.074964] g0: 000000000052fd10 g1: 00000001295a8000 g2: ffffff7176ffc000 g3: 0000000000002000
+[ 4423.092324] g4: fff8003cc730e220 g5: fff8003dfedcc000 g6: fff8003d99d54000 g7: 0000000000000006
+[ 4423.109687] o0: 0000000000000000 o1: 0000000000000000 o2: 0000000000000003 o3: 00000000f0000000
+[ 4423.127058] o4: 0000000000000080 o5: 00000001295a8000 sp: fff8003d99d56d01 ret_pc: 000000000052ff54
+[ 4423.145121] RPC: <__purge_vmap_area_lazy+0x314/0x3a0>
+[ 4423.155185] l0: 0000000000000000 l1: 0000000000000000 l2: 0000000000a38040 l3: 0000000000000000
+[ 4423.172559] l4: fff8003dae8965e0 l5: ffffffffffffffff l6: 0000000000000000 l7: 00000000f7e2b138
+[ 4423.189913] i0: fff8003d99d576a0 i1: fff8003d99d576a8 i2: fff8003d99d575e8 i3: 0000000000000000
+[ 4423.207284] i4: 0000000000008008 i5: fff8003d99d575c8 i6: fff8003d99d56df1 i7: 0000000000530c24
+[ 4423.224640] I7: <free_vmap_area_noflush+0x64/0x80>
+[ 4423.234193] Call Trace:
+[ 4423.239051] [0000000000530c24] free_vmap_area_noflush+0x64/0x80
+[ 4423.251029] [0000000000531a7c] remove_vm_area+0x5c/0x80
+[ 4423.261628] [0000000000531b80] __vunmap+0x20/0x120
+[ 4423.271352] [000000000071cf18] n_tty_close+0x18/0x40
+[ 4423.281423] [00000000007222b0] tty_ldisc_close+0x30/0x60
+[ 4423.292183] [00000000007225a4] tty_ldisc_reinit+0x24/0xa0
+[ 4423.303120] [0000000000722ab4] tty_ldisc_hangup+0xd4/0x1e0
+[ 4423.314232] [0000000000719aa0] __tty_hangup+0x280/0x3c0
+[ 4423.324835] [0000000000724cb4] pty_close+0x134/0x1a0
+[ 4423.334905] [000000000071aa24] tty_release+0x104/0x500
+[ 4423.345316] [00000000005511d0] __fput+0x90/0x1e0
+[ 4423.354701] [000000000047fa54] task_work_run+0x94/0xe0
+[ 4423.365126] [0000000000404b44] __handle_signal+0xc/0x2c
+
+Fixes: 4ca9a23765da ("sparc64: Guard against flushing openfirmware mappings.")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -2720,8 +2720,8 @@ void flush_tlb_kernel_range(unsigned lon
+ do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
+ }
+ if (end > HI_OBP_ADDRESS) {
+- flush_tsb_kernel_range(end, HI_OBP_ADDRESS);
+- do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS);
++ flush_tsb_kernel_range(HI_OBP_ADDRESS, end);
++ do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end);
+ }
+ } else {
+ flush_tsb_kernel_range(start, end);
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 24 Oct 2014 09:59:02 -0700
+Subject: sparc64: Implement __get_user_pages_fast().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 06090e8ed89ea2113a236befb41f71d51f100e60 ]
+
+It is not sufficient to only implement get_user_pages_fast(), you
+must also implement the atomic version __get_user_pages_fast()
+otherwise you end up using the weak symbol fallback implementation
+which simply returns zero.
+
+This is dangerous, because it causes the futex code to loop forever
+if transparent hugepages are supported (see get_futex_key()).
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/gup.c | 30 ++++++++++++++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+--- a/arch/sparc/mm/gup.c
++++ b/arch/sparc/mm/gup.c
+@@ -160,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsi
+ return 1;
+ }
+
++int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
++ struct page **pages)
++{
++ struct mm_struct *mm = current->mm;
++ unsigned long addr, len, end;
++ unsigned long next, flags;
++ pgd_t *pgdp;
++ int nr = 0;
++
++ start &= PAGE_MASK;
++ addr = start;
++ len = (unsigned long) nr_pages << PAGE_SHIFT;
++ end = start + len;
++
++ local_irq_save(flags);
++ pgdp = pgd_offset(mm, addr);
++ do {
++ pgd_t pgd = *pgdp;
++
++ next = pgd_addr_end(addr, end);
++ if (pgd_none(pgd))
++ break;
++ if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
++ break;
++ } while (pgdp++, addr = next, addr != end);
++ local_irq_restore(flags);
++
++ return nr;
++}
++
+ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+ {
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 24 Sep 2014 21:49:29 -0700
+Subject: sparc64: Increase MAX_PHYS_ADDRESS_BITS to 53.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+Make sure, at compile time, that the kernel can properly support
+whatever MAX_PHYS_ADDRESS_BITS is defined to.
+
+On M7 chips, use a max_phys_bits value of 49.
+
+Based upon a patch by Bob Picco.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/page_64.h | 8 ++++----
+ arch/sparc/include/asm/pgtable_64.h | 4 ++++
+ arch/sparc/mm/init_64.c | 9 ++++++++-
+ 3 files changed, 16 insertions(+), 5 deletions(-)
+
+--- a/arch/sparc/include/asm/page_64.h
++++ b/arch/sparc/include/asm/page_64.h
+@@ -122,11 +122,11 @@ extern unsigned long PAGE_OFFSET;
+
+ #endif /* !(__ASSEMBLY__) */
+
+-/* The maximum number of physical memory address bits we support, this
+- * is used to size various tables used to manage kernel TLB misses and
+- * also the sparsemem code.
++/* The maximum number of physical memory address bits we support. The
++ * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS"
++ * evaluates to.
+ */
+-#define MAX_PHYS_ADDRESS_BITS 47
++#define MAX_PHYS_ADDRESS_BITS 53
+
+ #define ILOG2_4MB 22
+ #define ILOG2_256MB 28
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -67,6 +67,10 @@
+ #define PGDIR_MASK (~(PGDIR_SIZE-1))
+ #define PGDIR_BITS (PAGE_SHIFT - 3)
+
++#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS)
++#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support
++#endif
++
+ #if (PGDIR_SHIFT + PGDIR_BITS) != 53
+ #error Page table parameters do not cover virtual address space properly.
+ #endif
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1684,12 +1684,19 @@ static void __init setup_page_offset(voi
+ case SUN4V_CHIP_NIAGARA4:
+ case SUN4V_CHIP_NIAGARA5:
+ case SUN4V_CHIP_SPARC64X:
+- default:
++ case SUN4V_CHIP_SPARC_M6:
+ /* T4 and later support 52-bit virtual addresses. */
+ sparc64_va_hole_top = 0xfff8000000000000UL;
+ sparc64_va_hole_bottom = 0x0008000000000000UL;
+ max_phys_bits = 47;
+ break;
++ case SUN4V_CHIP_SPARC_M7:
++ default:
++ /* M7 and later support 52-bit virtual addresses. */
++ sparc64_va_hole_top = 0xfff8000000000000UL;
++ sparc64_va_hole_bottom = 0x0008000000000000UL;
++ max_phys_bits = 49;
++ break;
+ }
+ }
+
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Dave Kleikamp <dave.kleikamp@oracle.com>
+Date: Tue, 7 Oct 2014 08:12:37 -0500
+Subject: sparc64: Increase size of boot string to 1024 bytes
+
+From: Dave Kleikamp <dave.kleikamp@oracle.com>
+
+[ Upstream commit 1cef94c36bd4d79b5ae3a3df99ee0d76d6a4a6dc ]
+
+This is the longest boot string that silo supports.
+
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Cc: Bob Picco <bob.picco@oracle.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: sparclinux@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/prom/bootstr_64.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/prom/bootstr_64.c
++++ b/arch/sparc/prom/bootstr_64.c
+@@ -14,7 +14,10 @@
+ * the .bss section or it will break things.
+ */
+
+-#define BARG_LEN 256
++/* We limit BARG_LEN to 1024 because this is the size of the
++ * 'barg_out' command line buffer in the SILO bootloader.
++ */
++#define BARG_LEN 1024
+ struct {
+ int bootstr_len;
+ int bootstr_valid;
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sat, 27 Sep 2014 21:30:57 -0700
+Subject: sparc64: Kill unnecessary tables and increase MAX_BANKS.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit d195b71bad4347d2df51072a537f922546a904f1 ]
+
+swapper_low_pmd_dir and swapper_pud_dir are actually completely
+useless and unnecessary.
+
+We just need swapper_pg_dir[]. Naturally the other page table chunks
+will be allocated on an as-needed basis. Since the kernel actually
+accesses these tables in the PAGE_OFFSET view, there is not even a TLB
+locality advantage of placing them in the kernel image.
+
+Use the hard coded vmlinux.ld.S slot for swapper_pg_dir which is
+naturally page aligned.
+
+Increase MAX_BANKS to 1024 in order to handle heavily fragmented
+virtual guests.
+
+Even with this MAX_BANKS increase, the kernel is 20K+ smaller.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/pgtable_64.h | 1 -
+ arch/sparc/kernel/vmlinux.lds.S | 5 +++--
+ arch/sparc/mm/init_64.c | 25 ++-----------------------
+ 3 files changed, 5 insertions(+), 26 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -927,7 +927,6 @@ static inline void __set_pte_at(struct m
+ #endif
+
+ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+-extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD];
+
+ extern void paging_init(void);
+ extern unsigned long find_ecache_flush_span(unsigned long size);
+--- a/arch/sparc/kernel/vmlinux.lds.S
++++ b/arch/sparc/kernel/vmlinux.lds.S
+@@ -35,8 +35,9 @@ jiffies = jiffies_64;
+
+ SECTIONS
+ {
+- /* swapper_low_pmd_dir is sparc64 only */
+- swapper_low_pmd_dir = 0x0000000000402000;
++#ifdef CONFIG_SPARC64
++ swapper_pg_dir = 0x0000000000402000;
++#endif
+ . = INITIAL_ADDRESS;
+ .text TEXTSTART :
+ {
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -85,7 +85,7 @@ extern struct tsb swapper_tsb[KERNEL_TSB
+
+ static unsigned long cpu_pgsz_mask;
+
+-#define MAX_BANKS 32
++#define MAX_BANKS 1024
+
+ static struct linux_prom64_registers pavail[MAX_BANKS];
+ static int pavail_ents;
+@@ -1937,12 +1937,6 @@ static void __init sun4v_linear_pte_xor_
+
+ static unsigned long last_valid_pfn;
+
+-/* These must be page aligned in order to not trigger the
+- * alignment tests of pgd_bad() and pud_bad().
+- */
+-pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((aligned (PAGE_SIZE)));
+-static pud_t swapper_pud_dir[PTRS_PER_PUD] __attribute__ ((aligned (PAGE_SIZE)));
+-
+ static void sun4u_pgprot_init(void);
+ static void sun4v_pgprot_init(void);
+
+@@ -1950,8 +1944,6 @@ void __init paging_init(void)
+ {
+ unsigned long end_pfn, shift, phys_base;
+ unsigned long real_end, i;
+- pud_t *pud;
+- pmd_t *pmd;
+ int node;
+
+ setup_page_offset();
+@@ -2046,20 +2038,7 @@ void __init paging_init(void)
+ */
+ init_mm.pgd += ((shift) / (sizeof(pgd_t)));
+
+- memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
+-
+- /* The kernel page tables we publish into what the rest of the
+- * world sees must be adjusted so that they see the PAGE_OFFSET
+- * address of these in-kerenel data structures. However right
+- * here we must access them from the kernel image side, because
+- * the trap tables haven't been taken over and therefore we cannot
+- * take TLB misses in the PAGE_OFFSET linear mappings yet.
+- */
+- pud = swapper_pud_dir + (shift / sizeof(pud_t));
+- pgd_set(&swapper_pg_dir[0], pud);
+-
+- pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t));
+- pud_set(&swapper_pud_dir[0], pmd);
++ memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+
+ inherit_prom_mappings();
+
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
+Date: Tue, 16 Sep 2014 11:37:08 -0400
+Subject: sparc64: Move request_irq() from ldc_bind() to ldc_alloc()
+
+From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
+
+[ Upstream commit c21c4ab0d6921f7160a43216fa6973b5924de561 ]
+
+The request_irq() needs to be done from ldc_alloc()
+to avoid the following (caught by lockdep)
+
+ [00000000004a0738] __might_sleep+0xf8/0x120
+ [000000000058bea4] kmem_cache_alloc_trace+0x184/0x2c0
+ [00000000004faf80] request_threaded_irq+0x80/0x160
+ [000000000044f71c] ldc_bind+0x7c/0x220
+ [0000000000452454] vio_port_up+0x54/0xe0
+ [00000000101f6778] probe_disk+0x38/0x220 [sunvdc]
+ [00000000101f6b8c] vdc_port_probe+0x22c/0x300 [sunvdc]
+ [0000000000451a88] vio_device_probe+0x48/0x60
+ [000000000074c56c] really_probe+0x6c/0x300
+ [000000000074c83c] driver_probe_device+0x3c/0xa0
+ [000000000074c92c] __driver_attach+0x8c/0xa0
+ [000000000074a6ec] bus_for_each_dev+0x6c/0xa0
+ [000000000074c1dc] driver_attach+0x1c/0x40
+ [000000000074b0fc] bus_add_driver+0xbc/0x280
+
+Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
+Acked-by: Dwight Engen <dwight.engen@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/ldc.h | 5 +++--
+ arch/sparc/kernel/ds.c | 4 ++--
+ arch/sparc/kernel/ldc.c | 41 +++++++++++++++++++++--------------------
+ arch/sparc/kernel/viohs.c | 4 ++--
+ 4 files changed, 28 insertions(+), 26 deletions(-)
+
+--- a/arch/sparc/include/asm/ldc.h
++++ b/arch/sparc/include/asm/ldc.h
+@@ -53,13 +53,14 @@ struct ldc_channel;
+ /* Allocate state for a channel. */
+ extern struct ldc_channel *ldc_alloc(unsigned long id,
+ const struct ldc_channel_config *cfgp,
+- void *event_arg);
++ void *event_arg,
++ const char *name);
+
+ /* Shut down and free state for a channel. */
+ extern void ldc_free(struct ldc_channel *lp);
+
+ /* Register TX and RX queues of the link with the hypervisor. */
+-extern int ldc_bind(struct ldc_channel *lp, const char *name);
++extern int ldc_bind(struct ldc_channel *lp);
+
+ /* For non-RAW protocols we need to complete a handshake before
+ * communication can proceed. ldc_connect() does that, if the
+--- a/arch/sparc/kernel/ds.c
++++ b/arch/sparc/kernel/ds.c
+@@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev
+ ds_cfg.tx_irq = vdev->tx_irq;
+ ds_cfg.rx_irq = vdev->rx_irq;
+
+- lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
++ lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS");
+ if (IS_ERR(lp)) {
+ err = PTR_ERR(lp);
+ goto out_free_ds_states;
+ }
+ dp->lp = lp;
+
+- err = ldc_bind(lp, "DS");
++ err = ldc_bind(lp);
+ if (err)
+ goto out_free_ldc;
+
+--- a/arch/sparc/kernel/ldc.c
++++ b/arch/sparc/kernel/ldc.c
+@@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc
+
+ struct ldc_channel *ldc_alloc(unsigned long id,
+ const struct ldc_channel_config *cfgp,
+- void *event_arg)
++ void *event_arg,
++ const char *name)
+ {
+ struct ldc_channel *lp;
+ const struct ldc_mode_ops *mops;
+@@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned l
+ err = -EINVAL;
+ if (!cfgp)
+ goto out_err;
++ if (!name)
++ goto out_err;
+
+ switch (cfgp->mode) {
+ case LDC_MODE_RAW:
+@@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned l
+
+ INIT_HLIST_HEAD(&lp->mh_list);
+
++ snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
++ snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
++
++ err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
++ lp->rx_irq_name, lp);
++ if (err)
++ goto out_free_txq;
++
++ err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
++ lp->tx_irq_name, lp);
++ if (err) {
++ free_irq(lp->cfg.rx_irq, lp);
++ goto out_free_txq;
++ }
++
+ return lp;
+
+ out_free_txq:
+@@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free);
+ * state. This does not initiate a handshake, ldc_connect() does
+ * that.
+ */
+-int ldc_bind(struct ldc_channel *lp, const char *name)
++int ldc_bind(struct ldc_channel *lp)
+ {
+ unsigned long hv_err, flags;
+ int err = -EINVAL;
+
+- if (!name ||
+- (lp->state != LDC_STATE_INIT))
++ if (lp->state != LDC_STATE_INIT)
+ return -EINVAL;
+
+- snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+- snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+-
+- err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
+- lp->rx_irq_name, lp);
+- if (err)
+- return err;
+-
+- err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
+- lp->tx_irq_name, lp);
+- if (err) {
+- free_irq(lp->cfg.rx_irq, lp);
+- return err;
+- }
+-
+-
+ spin_lock_irqsave(&lp->lock, flags);
+
+ enable_irq(lp->cfg.rx_irq);
+--- a/arch/sparc/kernel/viohs.c
++++ b/arch/sparc/kernel/viohs.c
+@@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_stat
+ cfg.tx_irq = vio->vdev->tx_irq;
+ cfg.rx_irq = vio->vdev->rx_irq;
+
+- lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
++ lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name);
+ if (IS_ERR(lp))
+ return PTR_ERR(lp);
+
+@@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state
+
+ err = 0;
+ if (state == LDC_STATE_INIT) {
+- err = ldc_bind(vio->lp, vio->name);
++ err = ldc_bind(vio->lp);
+ if (err)
+ printk(KERN_WARNING "%s: Port %lu bind failed, "
+ "err=%d\n",
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: bob picco <bpicco@meloft.net>
+Date: Thu, 25 Sep 2014 12:25:03 -0700
+Subject: sparc64: sparse irq
+
+From: bob picco <bpicco@meloft.net>
+
+[ Upstream commit ee6a9333fa58e11577c1b531b8e0f5ffc0fd6f50 ]
+
+This patch attempts to do a few things. The highlights are: 1) enable
+SPARSE_IRQ unconditionally, 2) kills off !SPARSE_IRQ code 3) allocates
+ivector_table at boot time and 4) default to cookie only VIRQ mechanism
+for supported firmware. The first firmware with cookie only support for
+me appears on T5. You can optionally force the HV firmware to not cookie
+only mode which is the sysino support.
+
+The sysino is a deprecated HV mechanism according to the most recent
+SPARC Virtual Machine Specification. HV_GRP_INTR is what controls the
+cookie/sysino firmware versioning.
+
+The history of this interface is:
+
+1) Major version 1.0 only supported sysino based interrupt interfaces.
+
+2) Major version 2.0 added cookie based VIRQs, however due to the fact
+ that OSs were using the VIRQs without negoatiating major version
+ 2.0 (Linux and Solaris are both guilty), the VIRQs calls were
+ allowed even with major version 1.0
+
+ To complicate things even further, the VIRQ interfaces were only
+ actually hooked up in the hypervisor for LDC interrupt sources.
+ VIRQ calls on other device types would result in HV_EINVAL errors.
+
+ So effectively, major version 2.0 is unusable.
+
+3) Major version 3.0 was created to signal use of VIRQs and the fact
+ that the hypervisor has these calls hooked up for all interrupt
+ sources, not just those for LDC devices.
+
+A new boot option is provided should cookie only HV support have issues.
+hvirq - this is the version for HV_GRP_INTR. This is related to HV API
+versioning. The code attempts major=3 first by default. The option can
+be used to override this default.
+
+I've tested with SPARSE_IRQ on T5-8, M7-4 and T4-X and Jalap?no.
+
+Signed-off-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/Kconfig | 1
+ arch/sparc/include/asm/irq_64.h | 9
+ arch/sparc/kernel/irq_64.c | 509 ++++++++++++++++++++++++++--------------
+ 3 files changed, 343 insertions(+), 176 deletions(-)
+
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -67,6 +67,7 @@ config SPARC64
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_CONTEXT_TRACKING
+ select HAVE_DEBUG_KMEMLEAK
++ select SPARSE_IRQ
+ select RTC_DRV_CMOS
+ select RTC_DRV_BQ4802
+ select RTC_DRV_SUN4V
+--- a/arch/sparc/include/asm/irq_64.h
++++ b/arch/sparc/include/asm/irq_64.h
+@@ -37,7 +37,7 @@
+ *
+ * ino_bucket->irq allocation is made during {sun4v_,}build_irq().
+ */
+-#define NR_IRQS 255
++#define NR_IRQS (2048)
+
+ extern void irq_install_pre_handler(int irq,
+ void (*func)(unsigned int, void *, void *),
+@@ -57,11 +57,8 @@ extern unsigned int sun4u_build_msi(u32
+ unsigned long iclr_base);
+ extern void sun4u_destroy_msi(unsigned int irq);
+
+-extern unsigned char irq_alloc(unsigned int dev_handle,
+- unsigned int dev_ino);
+-#ifdef CONFIG_PCI_MSI
+-extern void irq_free(unsigned int irq);
+-#endif
++unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
++void irq_free(unsigned int irq);
+
+ extern void __init init_IRQ(void);
+ extern void fixup_irqs(void);
+--- a/arch/sparc/kernel/irq_64.c
++++ b/arch/sparc/kernel/irq_64.c
+@@ -47,8 +47,6 @@
+ #include "cpumap.h"
+ #include "kstack.h"
+
+-#define NUM_IVECS (IMAP_INR + 1)
+-
+ struct ino_bucket *ivector_table;
+ unsigned long ivector_table_pa;
+
+@@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long
+
+ #define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa)
+
+-static struct {
+- unsigned int dev_handle;
+- unsigned int dev_ino;
+- unsigned int in_use;
+-} irq_table[NR_IRQS];
+-static DEFINE_SPINLOCK(irq_alloc_lock);
++static unsigned long hvirq_major __initdata;
++static int __init early_hvirq_major(char *p)
++{
++ int rc = kstrtoul(p, 10, &hvirq_major);
++
++ return rc;
++}
++early_param("hvirq", early_hvirq_major);
++
++static int hv_irq_version;
++
++/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie
++ * based interfaces, but:
++ *
++ * 1) Several OSs, Solaris and Linux included, use them even when only
++ * negotiating version 1.0 (or failing to negotiate at all). So the
++ * hypervisor has a workaround that provides the VIRQ interfaces even
++ * when only verion 1.0 of the API is in use.
++ *
++ * 2) Second, and more importantly, with major version 2.0 these VIRQ
++ * interfaces only were actually hooked up for LDC interrupts, even
++ * though the Hypervisor specification clearly stated:
++ *
++ * The new interrupt API functions will be available to a guest
++ * when it negotiates version 2.0 in the interrupt API group 0x2. When
++ * a guest negotiates version 2.0, all interrupt sources will only
++ * support using the cookie interface, and any attempt to use the
++ * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the
++ * ENOTSUPPORTED error being returned.
++ *
++ * with an emphasis on "all interrupt sources".
++ *
++ * To correct this, major version 3.0 was created which does actually
++ * support VIRQs for all interrupt sources (not just LDC devices). So
++ * if we want to move completely over the cookie based VIRQs we must
++ * negotiate major version 3.0 or later of HV_GRP_INTR.
++ */
++static bool sun4v_cookie_only_virqs(void)
++{
++ if (hv_irq_version >= 3)
++ return true;
++ return false;
++}
+
+-unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
++static void __init irq_init_hv(void)
+ {
+- unsigned long flags;
+- unsigned char ent;
++ unsigned long hv_error, major, minor = 0;
++
++ if (tlb_type != hypervisor)
++ return;
+
+- BUILD_BUG_ON(NR_IRQS >= 256);
++ if (hvirq_major)
++ major = hvirq_major;
++ else
++ major = 3;
+
+- spin_lock_irqsave(&irq_alloc_lock, flags);
++ hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor);
++ if (!hv_error)
++ hv_irq_version = major;
++ else
++ hv_irq_version = 1;
+
+- for (ent = 1; ent < NR_IRQS; ent++) {
+- if (!irq_table[ent].in_use)
++ pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n",
++ hv_irq_version,
++ sun4v_cookie_only_virqs() ? "enabled" : "disabled");
++}
++
++/* This function is for the timer interrupt.*/
++int __init arch_probe_nr_irqs(void)
++{
++ return 1;
++}
++
++#define DEFAULT_NUM_IVECS (0xfffU)
++static unsigned int nr_ivec = DEFAULT_NUM_IVECS;
++#define NUM_IVECS (nr_ivec)
++
++static unsigned int __init size_nr_ivec(void)
++{
++ if (tlb_type == hypervisor) {
++ switch (sun4v_chip_type) {
++ /* Athena's devhandle|devino is large.*/
++ case SUN4V_CHIP_SPARC64X:
++ nr_ivec = 0xffff;
+ break;
++ }
+ }
+- if (ent >= NR_IRQS) {
+- printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+- ent = 0;
+- } else {
+- irq_table[ent].dev_handle = dev_handle;
+- irq_table[ent].dev_ino = dev_ino;
+- irq_table[ent].in_use = 1;
+- }
++ return nr_ivec;
++}
++
++struct irq_handler_data {
++ union {
++ struct {
++ unsigned int dev_handle;
++ unsigned int dev_ino;
++ };
++ unsigned long sysino;
++ };
++ struct ino_bucket bucket;
++ unsigned long iclr;
++ unsigned long imap;
++};
++
++static inline unsigned int irq_data_to_handle(struct irq_data *data)
++{
++ struct irq_handler_data *ihd = data->handler_data;
++
++ return ihd->dev_handle;
++}
++
++static inline unsigned int irq_data_to_ino(struct irq_data *data)
++{
++ struct irq_handler_data *ihd = data->handler_data;
++
++ return ihd->dev_ino;
++}
+
+- spin_unlock_irqrestore(&irq_alloc_lock, flags);
++static inline unsigned long irq_data_to_sysino(struct irq_data *data)
++{
++ struct irq_handler_data *ihd = data->handler_data;
+
+- return ent;
++ return ihd->sysino;
+ }
+
+-#ifdef CONFIG_PCI_MSI
+ void irq_free(unsigned int irq)
+ {
+- unsigned long flags;
++ void *data = irq_get_handler_data(irq);
+
+- if (irq >= NR_IRQS)
+- return;
++ kfree(data);
++ irq_set_handler_data(irq, NULL);
++ irq_free_descs(irq, 1);
++}
+
+- spin_lock_irqsave(&irq_alloc_lock, flags);
++unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
++{
++ int irq;
+
+- irq_table[irq].in_use = 0;
++ irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL);
++ if (irq <= 0)
++ goto out;
+
+- spin_unlock_irqrestore(&irq_alloc_lock, flags);
++ return irq;
++out:
++ return 0;
++}
++
++static unsigned int cookie_exists(u32 devhandle, unsigned int devino)
++{
++ unsigned long hv_err, cookie;
++ struct ino_bucket *bucket;
++ unsigned int irq = 0U;
++
++ hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie);
++ if (hv_err) {
++ pr_err("HV get cookie failed hv_err = %ld\n", hv_err);
++ goto out;
++ }
++
++ if (cookie & ((1UL << 63UL))) {
++ cookie = ~cookie;
++ bucket = (struct ino_bucket *) __va(cookie);
++ irq = bucket->__irq;
++ }
++out:
++ return irq;
++}
++
++static unsigned int sysino_exists(u32 devhandle, unsigned int devino)
++{
++ unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
++ struct ino_bucket *bucket;
++ unsigned int irq;
++
++ bucket = &ivector_table[sysino];
++ irq = bucket_get_irq(__pa(bucket));
++
++ return irq;
++}
++
++void ack_bad_irq(unsigned int irq)
++{
++ pr_crit("BAD IRQ ack %d\n", irq);
++}
++
++void irq_install_pre_handler(int irq,
++ void (*func)(unsigned int, void *, void *),
++ void *arg1, void *arg2)
++{
++ pr_warn("IRQ pre handler NOT supported.\n");
+ }
+-#endif
+
+ /*
+ * /proc/interrupts printing:
+@@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(un
+ return tid;
+ }
+
+-struct irq_handler_data {
+- unsigned long iclr;
+- unsigned long imap;
+-
+- void (*pre_handler)(unsigned int, void *, void *);
+- void *arg1;
+- void *arg2;
+-};
+-
+ #ifdef CONFIG_SMP
+ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
+ {
+@@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_dat
+
+ static void sun4v_irq_enable(struct irq_data *data)
+ {
+- unsigned int ino = irq_table[data->irq].dev_ino;
+ unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity);
++ unsigned int ino = irq_data_to_sysino(data);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+@@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_
+ static int sun4v_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+ {
+- unsigned int ino = irq_table[data->irq].dev_ino;
+ unsigned long cpuid = irq_choose_cpu(data->irq, mask);
++ unsigned int ino = irq_data_to_sysino(data);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+@@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq
+
+ static void sun4v_irq_disable(struct irq_data *data)
+ {
+- unsigned int ino = irq_table[data->irq].dev_ino;
++ unsigned int ino = irq_data_to_sysino(data);
+ int err;
+
+ err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+@@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq
+
+ static void sun4v_irq_eoi(struct irq_data *data)
+ {
+- unsigned int ino = irq_table[data->irq].dev_ino;
++ unsigned int ino = irq_data_to_sysino(data);
+ int err;
+
+ err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+@@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_dat
+
+ static void sun4v_virq_enable(struct irq_data *data)
+ {
+- unsigned long cpuid, dev_handle, dev_ino;
++ unsigned long dev_handle = irq_data_to_handle(data);
++ unsigned long dev_ino = irq_data_to_ino(data);
++ unsigned long cpuid;
+ int err;
+
+ cpuid = irq_choose_cpu(data->irq, data->affinity);
+
+- dev_handle = irq_table[data->irq].dev_handle;
+- dev_ino = irq_table[data->irq].dev_ino;
+-
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+@@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq
+ static int sun4v_virt_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+ {
+- unsigned long cpuid, dev_handle, dev_ino;
++ unsigned long dev_handle = irq_data_to_handle(data);
++ unsigned long dev_ino = irq_data_to_ino(data);
++ unsigned long cpuid;
+ int err;
+
+ cpuid = irq_choose_cpu(data->irq, mask);
+
+- dev_handle = irq_table[data->irq].dev_handle;
+- dev_ino = irq_table[data->irq].dev_ino;
+-
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+@@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struc
+
+ static void sun4v_virq_disable(struct irq_data *data)
+ {
+- unsigned long dev_handle, dev_ino;
++ unsigned long dev_handle = irq_data_to_handle(data);
++ unsigned long dev_ino = irq_data_to_ino(data);
+ int err;
+
+- dev_handle = irq_table[data->irq].dev_handle;
+- dev_ino = irq_table[data->irq].dev_ino;
+
+ err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+ HV_INTR_DISABLED);
+@@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct ir
+
+ static void sun4v_virq_eoi(struct irq_data *data)
+ {
+- unsigned long dev_handle, dev_ino;
++ unsigned long dev_handle = irq_data_to_handle(data);
++ unsigned long dev_ino = irq_data_to_ino(data);
+ int err;
+
+- dev_handle = irq_table[data->irq].dev_handle;
+- dev_ino = irq_table[data->irq].dev_ino;
+-
+ err = sun4v_vintr_set_state(dev_handle, dev_ino,
+ HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+@@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = {
+ .flags = IRQCHIP_EOI_IF_HANDLED,
+ };
+
+-static void pre_flow_handler(struct irq_data *d)
+-{
+- struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d);
+- unsigned int ino = irq_table[d->irq].dev_ino;
+-
+- handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2);
+-}
+-
+-void irq_install_pre_handler(int irq,
+- void (*func)(unsigned int, void *, void *),
+- void *arg1, void *arg2)
+-{
+- struct irq_handler_data *handler_data = irq_get_handler_data(irq);
+-
+- handler_data->pre_handler = func;
+- handler_data->arg1 = arg1;
+- handler_data->arg2 = arg2;
+-
+- __irq_set_preflow_handler(irq, pre_flow_handler);
+-}
+-
+ unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
+ {
+- struct ino_bucket *bucket;
+ struct irq_handler_data *handler_data;
++ struct ino_bucket *bucket;
+ unsigned int irq;
+ int ino;
+
+@@ -537,119 +641,166 @@ out:
+ return irq;
+ }
+
+-static unsigned int sun4v_build_common(unsigned long sysino,
+- struct irq_chip *chip)
++static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino,
++ void (*handler_data_init)(struct irq_handler_data *data,
++ u32 devhandle, unsigned int devino),
++ struct irq_chip *chip)
+ {
+- struct ino_bucket *bucket;
+- struct irq_handler_data *handler_data;
++ struct irq_handler_data *data;
+ unsigned int irq;
+
+- BUG_ON(tlb_type != hypervisor);
++ irq = irq_alloc(devhandle, devino);
++ if (!irq)
++ goto out;
+
+- bucket = &ivector_table[sysino];
+- irq = bucket_get_irq(__pa(bucket));
+- if (!irq) {
+- irq = irq_alloc(0, sysino);
+- bucket_set_irq(__pa(bucket), irq);
+- irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq,
+- "IVEC");
++ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
++ if (unlikely(!data)) {
++ pr_err("IRQ handler data allocation failed.\n");
++ irq_free(irq);
++ irq = 0;
++ goto out;
+ }
+
+- handler_data = irq_get_handler_data(irq);
+- if (unlikely(handler_data))
+- goto out;
++ irq_set_handler_data(irq, data);
++ handler_data_init(data, devhandle, devino);
++ irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC");
++ data->imap = ~0UL;
++ data->iclr = ~0UL;
++out:
++ return irq;
++}
+
+- handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+- if (unlikely(!handler_data)) {
+- prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+- prom_halt();
+- }
+- irq_set_handler_data(irq, handler_data);
++static unsigned long cookie_assign(unsigned int irq, u32 devhandle,
++ unsigned int devino)
++{
++ struct irq_handler_data *ihd = irq_get_handler_data(irq);
++ unsigned long hv_error, cookie;
+
+- /* Catch accidental accesses to these things. IMAP/ICLR handling
+- * is done by hypervisor calls on sun4v platforms, not by direct
+- * register accesses.
++ /* handler_irq needs to find the irq. cookie is seen signed in
++ * sun4v_dev_mondo and treated as a non ivector_table delivery.
+ */
+- handler_data->imap = ~0UL;
+- handler_data->iclr = ~0UL;
++ ihd->bucket.__irq = irq;
++ cookie = ~__pa(&ihd->bucket);
+
+-out:
+- return irq;
++ hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie);
++ if (hv_error)
++ pr_err("HV vintr set cookie failed = %ld\n", hv_error);
++
++ return hv_error;
+ }
+
+-unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
++static void cookie_handler_data(struct irq_handler_data *data,
++ u32 devhandle, unsigned int devino)
+ {
+- unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
++ data->dev_handle = devhandle;
++ data->dev_ino = devino;
++}
+
+- return sun4v_build_common(sysino, &sun4v_irq);
++static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino,
++ struct irq_chip *chip)
++{
++ unsigned long hv_error;
++ unsigned int irq;
++
++ irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip);
++
++ hv_error = cookie_assign(irq, devhandle, devino);
++ if (hv_error) {
++ irq_free(irq);
++ irq = 0;
++ }
++
++ return irq;
+ }
+
+-unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
++static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino)
+ {
+- struct irq_handler_data *handler_data;
+- unsigned long hv_err, cookie;
+- struct ino_bucket *bucket;
+ unsigned int irq;
+
+- bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
+- if (unlikely(!bucket))
+- return 0;
+-
+- /* The only reference we store to the IRQ bucket is
+- * by physical address which kmemleak can't see, tell
+- * it that this object explicitly is not a leak and
+- * should be scanned.
+- */
+- kmemleak_not_leak(bucket);
++ irq = cookie_exists(devhandle, devino);
++ if (irq)
++ goto out;
+
+- __flush_dcache_range((unsigned long) bucket,
+- ((unsigned long) bucket +
+- sizeof(struct ino_bucket)));
++ irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+
+- irq = irq_alloc(devhandle, devino);
++out:
++ return irq;
++}
++
++static void sysino_set_bucket(unsigned int irq)
++{
++ struct irq_handler_data *ihd = irq_get_handler_data(irq);
++ struct ino_bucket *bucket;
++ unsigned long sysino;
++
++ sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino);
++ BUG_ON(sysino >= nr_ivec);
++ bucket = &ivector_table[sysino];
+ bucket_set_irq(__pa(bucket), irq);
++}
+
+- irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq,
+- "IVEC");
++static void sysino_handler_data(struct irq_handler_data *data,
++ u32 devhandle, unsigned int devino)
++{
++ unsigned long sysino;
+
+- handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+- if (unlikely(!handler_data))
+- return 0;
++ sysino = sun4v_devino_to_sysino(devhandle, devino);
++ data->sysino = sysino;
++}
+
+- /* In order to make the LDC channel startup sequence easier,
+- * especially wrt. locking, we do not let request_irq() enable
+- * the interrupt.
+- */
+- irq_set_status_flags(irq, IRQ_NOAUTOEN);
+- irq_set_handler_data(irq, handler_data);
++static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino,
++ struct irq_chip *chip)
++{
++ unsigned int irq;
+
+- /* Catch accidental accesses to these things. IMAP/ICLR handling
+- * is done by hypervisor calls on sun4v platforms, not by direct
+- * register accesses.
+- */
+- handler_data->imap = ~0UL;
+- handler_data->iclr = ~0UL;
++ irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip);
++ if (!irq)
++ goto out;
+
+- cookie = ~__pa(bucket);
+- hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+- if (hv_err) {
+- prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
+- "err=%lu\n", devhandle, devino, hv_err);
+- prom_halt();
+- }
++ sysino_set_bucket(irq);
++out:
++ return irq;
++}
+
++static int sun4v_build_sysino(u32 devhandle, unsigned int devino)
++{
++ int irq;
++
++ irq = sysino_exists(devhandle, devino);
++ if (irq)
++ goto out;
++
++ irq = sysino_build_irq(devhandle, devino, &sun4v_irq);
++out:
+ return irq;
+ }
+
+-void ack_bad_irq(unsigned int irq)
++unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+ {
+- unsigned int ino = irq_table[irq].dev_ino;
++ unsigned int irq;
+
+- if (!ino)
+- ino = 0xdeadbeef;
++ if (sun4v_cookie_only_virqs())
++ irq = sun4v_build_cookie(devhandle, devino);
++ else
++ irq = sun4v_build_sysino(devhandle, devino);
+
+- printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n",
+- ino, irq);
++ return irq;
++}
++
++unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
++{
++ int irq;
++
++ irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
++ if (!irq)
++ goto out;
++
++ /* This is borrowed from the original function.
++ */
++ irq_set_status_flags(irq, IRQ_NOAUTOEN);
++
++out:
++ return irq;
+ }
+
+ void *hardirq_stack[NR_CPUS];
+@@ -720,9 +871,12 @@ void fixup_irqs(void)
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ struct irq_desc *desc = irq_to_desc(irq);
+- struct irq_data *data = irq_desc_get_irq_data(desc);
++ struct irq_data *data;
+ unsigned long flags;
+
++ if (!desc)
++ continue;
++ data = irq_desc_get_irq_data(desc);
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ if (desc->action && !irqd_is_per_cpu(data)) {
+ if (data->chip->irq_set_affinity)
+@@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action
+ .name = "timer",
+ };
+
+-/* Only invoked on boot processor. */
+-void __init init_IRQ(void)
++static void __init irq_ivector_init(void)
+ {
+- unsigned long size;
++ unsigned long size, order;
++ unsigned int ivecs;
+
+- map_prom_timers();
+- kill_prom_timer();
++ /* If we are doing cookie only VIRQs then we do not need the ivector
++ * table to process interrupts.
++ */
++ if (sun4v_cookie_only_virqs())
++ return;
+
+- size = sizeof(struct ino_bucket) * NUM_IVECS;
+- ivector_table = kzalloc(size, GFP_KERNEL);
++ ivecs = size_nr_ivec();
++ size = sizeof(struct ino_bucket) * ivecs;
++ order = get_order(size);
++ ivector_table = (struct ino_bucket *)
++ __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!ivector_table) {
+ prom_printf("Fatal error, cannot allocate ivector_table\n");
+ prom_halt();
+@@ -940,6 +1100,15 @@ void __init init_IRQ(void)
+ ((unsigned long) ivector_table) + size);
+
+ ivector_table_pa = __pa(ivector_table);
++}
++
++/* Only invoked on boot processor.*/
++void __init init_IRQ(void)
++{
++ irq_init_hv();
++ irq_ivector_init();
++ map_prom_timers();
++ kill_prom_timer();
+
+ if (tlb_type == hypervisor)
+ sun4v_init_mondo_queues();
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: bob picco <bpicco@meloft.net>
+Date: Tue, 16 Sep 2014 09:26:47 -0400
+Subject: sparc64: sun4v TLB error power off events
+
+From: bob picco <bpicco@meloft.net>
+
+[ Upstream commit 4ccb9272892c33ef1c19a783cfa87103b30c2784 ]
+
+We've witnessed a few TLB events causing the machine to power off because
+of prom_halt. In one case it was some nfs related area during rmmod. Another
+was an mmapper of /dev/mem. A more recent one is an ITLB issue with
+a bad pagesize which could be a hardware bug. Bugs happen but we should
+attempt to not power off the machine and/or hang it when possible.
+
+This is a DTLB error from an mmapper of /dev/mem:
+[root@sparcie ~]# SUN4V-DTLB: Error at TPC[fffff80100903e6c], tl 1
+SUN4V-DTLB: TPC<0xfffff80100903e6c>
+SUN4V-DTLB: O7[fffff801081979d0]
+SUN4V-DTLB: O7<0xfffff801081979d0>
+SUN4V-DTLB: vaddr[fffff80100000000] ctx[1250] pte[98000000000f0610] error[2]
+.
+
+This is recent mainline for ITLB:
+[ 3708.179864] SUN4V-ITLB: TPC<0xfffffc010071cefc>
+[ 3708.188866] SUN4V-ITLB: O7[fffffc010071cee8]
+[ 3708.197377] SUN4V-ITLB: O7<0xfffffc010071cee8>
+[ 3708.206539] SUN4V-ITLB: vaddr[e0003] ctx[1a3c] pte[2900000dcc800eeb] error[4]
+.
+
+Normally sun4v_itlb_error_report() and sun4v_dtlb_error_report() would call
+prom_halt() and drop us to OF command prompt "ok". This isn't the case for
+LDOMs and the machine powers off.
+
+For the HV reported error of HV_ENORADDR for HV HV_MMU_MAP_ADDR_TRAP we cause
+a SIGBUS error by qualifying it within do_sparc64_fault() for fault code mask
+of FAULT_CODE_BAD_RA. This is done when trap level (%tl) is less or equal
+one("1"). Otherwise, for %tl > 1, we proceed eventually to die_if_kernel().
+
+The logic of this patch was partially inspired by David Miller's feedback.
+
+Power off of large sparc64 machines is painful. Plus die_if_kernel provides
+more context. A reset sequence isn't a brief period on large sparc64 but
+better than power-off/power-on sequence.
+
+Cc: sparclinux@vger.kernel.org
+Signed-off-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/thread_info_64.h | 1
+ arch/sparc/kernel/sun4v_tlb_miss.S | 35 +++++++++++++++++++-------------
+ arch/sparc/kernel/traps_64.c | 15 ++++++++-----
+ arch/sparc/mm/fault_64.c | 3 ++
+ 4 files changed, 34 insertions(+), 20 deletions(-)
+
+--- a/arch/sparc/include/asm/thread_info_64.h
++++ b/arch/sparc/include/asm/thread_info_64.h
+@@ -102,6 +102,7 @@ struct thread_info {
+ #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */
+ #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */
+ #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */
++#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */
+
+ #if PAGE_SHIFT == 13
+ #define THREAD_SIZE (2*PAGE_SIZE)
+--- a/arch/sparc/kernel/sun4v_tlb_miss.S
++++ b/arch/sparc/kernel/sun4v_tlb_miss.S
+@@ -195,6 +195,11 @@ sun4v_tsb_miss_common:
+ ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
+
+ sun4v_itlb_error:
++ rdpr %tl, %g1
++ cmp %g1, 1
++ ble,pt %icc, sun4v_bad_ra
++ or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
++
+ sethi %hi(sun4v_err_itlb_vaddr), %g1
+ stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
+ sethi %hi(sun4v_err_itlb_ctx), %g1
+@@ -206,15 +211,10 @@ sun4v_itlb_error:
+ sethi %hi(sun4v_err_itlb_error), %g1
+ stx %o0, [%g1 + %lo(sun4v_err_itlb_error)]
+
++ sethi %hi(1f), %g7
+ rdpr %tl, %g4
+- cmp %g4, 1
+- ble,pt %icc, 1f
+- sethi %hi(2f), %g7
+ ba,pt %xcc, etraptl1
+- or %g7, %lo(2f), %g7
+-
+-1: ba,pt %xcc, etrap
+-2: or %g7, %lo(2b), %g7
++1: or %g7, %lo(1f), %g7
+ mov %l4, %o1
+ call sun4v_itlb_error_report
+ add %sp, PTREGS_OFF, %o0
+@@ -222,6 +222,11 @@ sun4v_itlb_error:
+ /* NOTREACHED */
+
+ sun4v_dtlb_error:
++ rdpr %tl, %g1
++ cmp %g1, 1
++ ble,pt %icc, sun4v_bad_ra
++ or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
++
+ sethi %hi(sun4v_err_dtlb_vaddr), %g1
+ stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
+ sethi %hi(sun4v_err_dtlb_ctx), %g1
+@@ -233,21 +238,23 @@ sun4v_dtlb_error:
+ sethi %hi(sun4v_err_dtlb_error), %g1
+ stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)]
+
++ sethi %hi(1f), %g7
+ rdpr %tl, %g4
+- cmp %g4, 1
+- ble,pt %icc, 1f
+- sethi %hi(2f), %g7
+ ba,pt %xcc, etraptl1
+- or %g7, %lo(2f), %g7
+-
+-1: ba,pt %xcc, etrap
+-2: or %g7, %lo(2b), %g7
++1: or %g7, %lo(1f), %g7
+ mov %l4, %o1
+ call sun4v_dtlb_error_report
+ add %sp, PTREGS_OFF, %o0
+
+ /* NOTREACHED */
+
++sun4v_bad_ra:
++ or %g0, %g4, %g5
++ ba,pt %xcc, sparc64_realfault_common
++ or %g1, %g0, %g4
++
++ /* NOTREACHED */
++
+ /* Instruction Access Exception, tl0. */
+ sun4v_iacc:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+--- a/arch/sparc/kernel/traps_64.c
++++ b/arch/sparc/kernel/traps_64.c
+@@ -2102,6 +2102,11 @@ void sun4v_nonresum_overflow(struct pt_r
+ atomic_inc(&sun4v_nonresum_oflow_cnt);
+ }
+
++static void sun4v_tlb_error(struct pt_regs *regs)
++{
++ die_if_kernel("TLB/TSB error", regs);
++}
++
+ unsigned long sun4v_err_itlb_vaddr;
+ unsigned long sun4v_err_itlb_ctx;
+ unsigned long sun4v_err_itlb_pte;
+@@ -2109,8 +2114,7 @@ unsigned long sun4v_err_itlb_error;
+
+ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
+ {
+- if (tl > 1)
+- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
++ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+ printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
+ regs->tpc, tl);
+@@ -2123,7 +2127,7 @@ void sun4v_itlb_error_report(struct pt_r
+ sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
+ sun4v_err_itlb_pte, sun4v_err_itlb_error);
+
+- prom_halt();
++ sun4v_tlb_error(regs);
+ }
+
+ unsigned long sun4v_err_dtlb_vaddr;
+@@ -2133,8 +2137,7 @@ unsigned long sun4v_err_dtlb_error;
+
+ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
+ {
+- if (tl > 1)
+- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
++ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+ printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
+ regs->tpc, tl);
+@@ -2147,7 +2150,7 @@ void sun4v_dtlb_error_report(struct pt_r
+ sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
+ sun4v_err_dtlb_pte, sun4v_err_dtlb_error);
+
+- prom_halt();
++ sun4v_tlb_error(regs);
+ }
+
+ void hypervisor_tlbop_error(unsigned long err, unsigned long op)
+--- a/arch/sparc/mm/fault_64.c
++++ b/arch/sparc/mm/fault_64.c
+@@ -348,6 +348,9 @@ retry:
+ down_read(&mm->mmap_sem);
+ }
+
++ if (fault_code & FAULT_CODE_BAD_RA)
++ goto do_sigbus;
++
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: Allen Pais <allen.pais@oracle.com>
+Date: Mon, 8 Sep 2014 11:48:54 +0530
+Subject: sparc64: support M6 and M7 for building CPU distribution map
+
+From: Allen Pais <allen.pais@oracle.com>
+
+Add M6 and M7 chip type in cpumap.c to correctly build CPU distribution map that spans all online CPUs.
+
+Signed-off-by: Allen Pais <allen.pais@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/cpumap.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/sparc/kernel/cpumap.c
++++ b/arch/sparc/kernel/cpumap.c
+@@ -326,6 +326,8 @@ static int iterate_cpu(struct cpuinfo_tr
+ case SUN4V_CHIP_NIAGARA3:
+ case SUN4V_CHIP_NIAGARA4:
+ case SUN4V_CHIP_NIAGARA5:
++ case SUN4V_CHIP_SPARC_M6:
++ case SUN4V_CHIP_SPARC_M7:
+ case SUN4V_CHIP_SPARC64X:
+ rover_inc_table = niagara_iterate_method;
+ break;
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 26 Sep 2014 21:19:46 -0700
+Subject: sparc64: Switch to 4-level page tables.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit ac55c768143aa34cc3789c4820cbb0809a76fd9c ]
+
+This has become necessary with chips that support more than 43-bits
+of physical addressing.
+
+Based almost entirely upon a patch by Bob Picco.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/page_64.h | 6 +++++
+ arch/sparc/include/asm/pgalloc_64.h | 28 ++++++++++++++++++++++++++-
+ arch/sparc/include/asm/pgtable_64.h | 37 +++++++++++++++++++++++++++++++-----
+ arch/sparc/include/asm/tsb.h | 10 +++++++++
+ arch/sparc/kernel/smp_64.c | 7 ++++++
+ arch/sparc/mm/init_64.c | 31 ++++++++++++++++++++++++++----
+ 6 files changed, 109 insertions(+), 10 deletions(-)
+
+--- a/arch/sparc/include/asm/page_64.h
++++ b/arch/sparc/include/asm/page_64.h
+@@ -57,18 +57,21 @@ extern void copy_user_page(void *to, voi
+ typedef struct { unsigned long pte; } pte_t;
+ typedef struct { unsigned long iopte; } iopte_t;
+ typedef struct { unsigned long pmd; } pmd_t;
++typedef struct { unsigned long pud; } pud_t;
+ typedef struct { unsigned long pgd; } pgd_t;
+ typedef struct { unsigned long pgprot; } pgprot_t;
+
+ #define pte_val(x) ((x).pte)
+ #define iopte_val(x) ((x).iopte)
+ #define pmd_val(x) ((x).pmd)
++#define pud_val(x) ((x).pud)
+ #define pgd_val(x) ((x).pgd)
+ #define pgprot_val(x) ((x).pgprot)
+
+ #define __pte(x) ((pte_t) { (x) } )
+ #define __iopte(x) ((iopte_t) { (x) } )
+ #define __pmd(x) ((pmd_t) { (x) } )
++#define __pud(x) ((pud_t) { (x) } )
+ #define __pgd(x) ((pgd_t) { (x) } )
+ #define __pgprot(x) ((pgprot_t) { (x) } )
+
+@@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; }
+ typedef unsigned long pte_t;
+ typedef unsigned long iopte_t;
+ typedef unsigned long pmd_t;
++typedef unsigned long pud_t;
+ typedef unsigned long pgd_t;
+ typedef unsigned long pgprot_t;
+
+ #define pte_val(x) (x)
+ #define iopte_val(x) (x)
+ #define pmd_val(x) (x)
++#define pud_val(x) (x)
+ #define pgd_val(x) (x)
+ #define pgprot_val(x) (x)
+
+ #define __pte(x) (x)
+ #define __iopte(x) (x)
+ #define __pmd(x) (x)
++#define __pud(x) (x)
+ #define __pgd(x) (x)
+ #define __pgprot(x) (x)
+
+--- a/arch/sparc/include/asm/pgalloc_64.h
++++ b/arch/sparc/include/asm/pgalloc_64.h
+@@ -15,6 +15,13 @@
+
+ extern struct kmem_cache *pgtable_cache;
+
++static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
++{
++ pgd_set(pgd, pud);
++}
++
++#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD)
++
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
+@@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_st
+ kmem_cache_free(pgtable_cache, pgd);
+ }
+
+-#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
++static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
++{
++ pud_set(pud, pmd);
++}
++
++#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD)
++
++static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
++{
++ return kmem_cache_alloc(pgtable_cache,
++ GFP_KERNEL|__GFP_REPEAT);
++}
++
++static inline void pud_free(struct mm_struct *mm, pud_t *pud)
++{
++ kmem_cache_free(pgtable_cache, pud);
++}
+
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+@@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct
+ #define __pmd_free_tlb(tlb, pmd, addr) \
+ pgtable_free_tlb(tlb, pmd, false)
+
++#define __pud_free_tlb(tlb, pud, addr) \
++ pgtable_free_tlb(tlb, pud, false)
++
+ #endif /* _SPARC64_PGALLOC_H */
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -20,8 +20,6 @@
+ #include <asm/page.h>
+ #include <asm/processor.h>
+
+-#include <asm-generic/pgtable-nopud.h>
+-
+ /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
+ * The page copy blockops can use 0x6000000 to 0x8000000.
+ * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range.
+@@ -55,13 +53,21 @@
+ #define PMD_MASK (~(PMD_SIZE-1))
+ #define PMD_BITS (PAGE_SHIFT - 3)
+
+-/* PGDIR_SHIFT determines what a third-level page table entry can map */
+-#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
++/* PUD_SHIFT determines the size of the area a third-level page
++ * table can map
++ */
++#define PUD_SHIFT (PMD_SHIFT + PMD_BITS)
++#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
++#define PUD_MASK (~(PUD_SIZE-1))
++#define PUD_BITS (PAGE_SHIFT - 3)
++
++/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
++#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS)
+ #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
+ #define PGDIR_MASK (~(PGDIR_SIZE-1))
+ #define PGDIR_BITS (PAGE_SHIFT - 3)
+
+-#if (PGDIR_SHIFT + PGDIR_BITS) != 43
++#if (PGDIR_SHIFT + PGDIR_BITS) != 53
+ #error Page table parameters do not cover virtual address space properly.
+ #endif
+
+@@ -93,6 +99,7 @@ static inline bool kern_addr_valid(unsig
+ /* Entries per page directory level. */
+ #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
+ #define PTRS_PER_PMD (1UL << PMD_BITS)
++#define PTRS_PER_PUD (1UL << PUD_BITS)
+ #define PTRS_PER_PGD (1UL << PGDIR_BITS)
+
+ /* Kernel has a separate 44bit address space. */
+@@ -101,6 +108,9 @@ static inline bool kern_addr_valid(unsig
+ #define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \
+ __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0))
++#define pud_ERROR(e) \
++ pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \
++ __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0))
+ #define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \
+ __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0))
+@@ -779,6 +789,11 @@ static inline int pmd_present(pmd_t pmd)
+ #define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \
+ !__kern_addr_valid(pud_val(pud)))
+
++#define pgd_none(pgd) (!pgd_val(pgd))
++
++#define pgd_bad(pgd) ((pgd_val(pgd) & ~PAGE_MASK) || \
++ !__kern_addr_valid(pgd_val(pgd)))
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+@@ -815,10 +830,17 @@ static inline unsigned long __pmd_page(p
+ #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
+ #define pud_present(pud) (pud_val(pud) != 0U)
+ #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
++#define pgd_page_vaddr(pgd) \
++ ((unsigned long) __va(pgd_val(pgd)))
++#define pgd_present(pgd) (pgd_val(pgd) != 0U)
++#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL)
+
+ /* Same in both SUN4V and SUN4U. */
+ #define pte_none(pte) (!pte_val(pte))
+
++#define pgd_set(pgdp, pudp) \
++ (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
++
+ /* to find an entry in a page-table-directory. */
+ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+ #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+@@ -826,6 +848,11 @@ static inline unsigned long __pmd_page(p
+ /* to find an entry in a kernel page-table-directory */
+ #define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
++/* Find an entry in the third-level page table.. */
++#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
++#define pud_offset(pgdp, address) \
++ ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
++
+ /* Find an entry in the second-level page table.. */
+ #define pmd_offset(pudp, address) \
+ ((pmd_t *) pud_page_vaddr(*(pudp)) + \
+--- a/arch/sparc/include/asm/tsb.h
++++ b/arch/sparc/include/asm/tsb.h
+@@ -145,6 +145,11 @@ extern struct tsb_phys_patch_entry __tsb
+ andn REG2, 0x7, REG2; \
+ ldx [REG1 + REG2], REG1; \
+ brz,pn REG1, FAIL_LABEL; \
++ sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
++ srlx REG2, 64 - PAGE_SHIFT, REG2; \
++ andn REG2, 0x7, REG2; \
++ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
++ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x7, REG2; \
+@@ -198,6 +203,11 @@ extern struct tsb_phys_patch_entry __tsb
+ andn REG2, 0x7, REG2; \
+ ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
++ sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
++ srlx REG2, 64 - PAGE_SHIFT, REG2; \
++ andn REG2, 0x7, REG2; \
++ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
++ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x7, REG2; \
+--- a/arch/sparc/kernel/smp_64.c
++++ b/arch/sparc/kernel/smp_64.c
+@@ -1479,6 +1479,13 @@ static void __init pcpu_populate_pte(uns
+ pud_t *pud;
+ pmd_t *pmd;
+
++ if (pgd_none(*pgd)) {
++ pud_t *new;
++
++ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
++ pgd_populate(&init_mm, pgd, new);
++ }
++
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1384,6 +1384,13 @@ static unsigned long __ref kernel_map_ra
+ pmd_t *pmd;
+ pte_t *pte;
+
++ if (pgd_none(*pgd)) {
++ pud_t *new;
++
++ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
++ alloc_bytes += PAGE_SIZE;
++ pgd_populate(&init_mm, pgd, new);
++ }
+ pud = pud_offset(pgd, vstart);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+@@ -1850,7 +1857,12 @@ static void __init sun4v_linear_pte_xor_
+ /* paging_init() sets up the page tables */
+
+ static unsigned long last_valid_pfn;
+-pgd_t swapper_pg_dir[PTRS_PER_PGD];
++
++/* These must be page aligned in order to not trigger the
++ * alignment tests of pgd_bad() and pud_bad().
++ */
++pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((aligned (PAGE_SIZE)));
++static pud_t swapper_pud_dir[PTRS_PER_PUD] __attribute__ ((aligned (PAGE_SIZE)));
+
+ static void sun4u_pgprot_init(void);
+ static void sun4v_pgprot_init(void);
+@@ -1859,6 +1871,8 @@ void __init paging_init(void)
+ {
+ unsigned long end_pfn, shift, phys_base;
+ unsigned long real_end, i;
++ pud_t *pud;
++ pmd_t *pmd;
+ int node;
+
+ setup_page_offset();
+@@ -1955,9 +1969,18 @@ void __init paging_init(void)
+
+ memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
+
+- /* Now can init the kernel/bad page tables. */
+- pud_set(pud_offset(&swapper_pg_dir[0], 0),
+- swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
++ /* The kernel page tables we publish into what the rest of the
++ * world sees must be adjusted so that they see the PAGE_OFFSET
++ * address of these in-kerenel data structures. However right
++ * here we must access them from the kernel image side, because
++ * the trap tables haven't been taken over and therefore we cannot
++ * take TLB misses in the PAGE_OFFSET linear mappings yet.
++ */
++ pud = swapper_pud_dir + (shift / sizeof(pud_t));
++ pgd_set(&swapper_pg_dir[0], pud);
++
++ pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t));
++ pud_set(&swapper_pud_dir[0], pmd);
+
+ inherit_prom_mappings();
+
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: bob picco <bpicco@meloft.net>
+Date: Tue, 16 Sep 2014 10:09:06 -0400
+Subject: sparc64: T5 PMU
+
+From: bob picco <bpicco@meloft.net>
+
+The T5 (niagara5) has different PCR related HV fast trap values and a new
+HV API Group. This patch utilizes these and shares when possible with niagara4.
+
+We use the same sparc_pmu niagara4_pmu. Should there be new effort to
+obtain the MCU perf statistics then this would have to be changed.
+
+Cc: sparclinux@vger.kernel.org
+Signed-off-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/hypervisor.h | 11 ++++++++
+ arch/sparc/kernel/hvapi.c | 1
+ arch/sparc/kernel/hvcalls.S | 16 ++++++++++++
+ arch/sparc/kernel/pcr.c | 47 ++++++++++++++++++++++++++++++++----
+ arch/sparc/kernel/perf_event.c | 3 +-
+ 5 files changed, 73 insertions(+), 5 deletions(-)
+
+--- a/arch/sparc/include/asm/hypervisor.h
++++ b/arch/sparc/include/asm/hypervisor.h
+@@ -2944,6 +2944,16 @@ extern unsigned long sun4v_vt_set_perfre
+ unsigned long reg_val);
+ #endif
+
++#define HV_FAST_T5_GET_PERFREG 0x1a8
++#define HV_FAST_T5_SET_PERFREG 0x1a9
++
++#ifndef __ASSEMBLY__
++unsigned long sun4v_t5_get_perfreg(unsigned long reg_num,
++ unsigned long *reg_val);
++unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
++ unsigned long reg_val);
++#endif
++
+ /* Function numbers for HV_CORE_TRAP. */
+ #define HV_CORE_SET_VER 0x00
+ #define HV_CORE_PUTCHAR 0x01
+@@ -2975,6 +2985,7 @@ extern unsigned long sun4v_vt_set_perfre
+ #define HV_GRP_VF_CPU 0x0205
+ #define HV_GRP_KT_CPU 0x0209
+ #define HV_GRP_VT_CPU 0x020c
++#define HV_GRP_T5_CPU 0x0211
+ #define HV_GRP_DIAG 0x0300
+
+ #ifndef __ASSEMBLY__
+--- a/arch/sparc/kernel/hvapi.c
++++ b/arch/sparc/kernel/hvapi.c
+@@ -46,6 +46,7 @@ static struct api_info api_table[] = {
+ { .group = HV_GRP_VF_CPU, },
+ { .group = HV_GRP_KT_CPU, },
+ { .group = HV_GRP_VT_CPU, },
++ { .group = HV_GRP_T5_CPU, },
+ { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
+ };
+
+--- a/arch/sparc/kernel/hvcalls.S
++++ b/arch/sparc/kernel/hvcalls.S
+@@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg)
+ retl
+ nop
+ ENDPROC(sun4v_vt_set_perfreg)
++
++ENTRY(sun4v_t5_get_perfreg)
++ mov %o1, %o4
++ mov HV_FAST_T5_GET_PERFREG, %o5
++ ta HV_FAST_TRAP
++ stx %o1, [%o4]
++ retl
++ nop
++ENDPROC(sun4v_t5_get_perfreg)
++
++ENTRY(sun4v_t5_set_perfreg)
++ mov HV_FAST_T5_SET_PERFREG, %o5
++ ta HV_FAST_TRAP
++ retl
++ nop
++ENDPROC(sun4v_t5_set_perfreg)
+--- a/arch/sparc/kernel/pcr.c
++++ b/arch/sparc/kernel/pcr.c
+@@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops =
+ .pcr_nmi_disable = PCR_N4_PICNPT,
+ };
+
++static u64 n5_pcr_read(unsigned long reg_num)
++{
++ unsigned long val;
++
++ (void) sun4v_t5_get_perfreg(reg_num, &val);
++
++ return val;
++}
++
++static void n5_pcr_write(unsigned long reg_num, u64 val)
++{
++ (void) sun4v_t5_set_perfreg(reg_num, val);
++}
++
++static const struct pcr_ops n5_pcr_ops = {
++ .read_pcr = n5_pcr_read,
++ .write_pcr = n5_pcr_write,
++ .read_pic = n4_pic_read,
++ .write_pic = n4_pic_write,
++ .nmi_picl_value = n4_picl_value,
++ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
++ PCR_N4_UTRACE | PCR_N4_TOE |
++ (26 << PCR_N4_SL_SHIFT)),
++ .pcr_nmi_disable = PCR_N4_PICNPT,
++};
++
++
+ static unsigned long perf_hsvc_group;
+ static unsigned long perf_hsvc_major;
+ static unsigned long perf_hsvc_minor;
+
+ static int __init register_perf_hsvc(void)
+ {
++ unsigned long hverror;
++
+ if (tlb_type == hypervisor) {
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+@@ -215,6 +244,10 @@ static int __init register_perf_hsvc(voi
+ perf_hsvc_group = HV_GRP_VT_CPU;
+ break;
+
++ case SUN4V_CHIP_NIAGARA5:
++ perf_hsvc_group = HV_GRP_T5_CPU;
++ break;
++
+ default:
+ return -ENODEV;
+ }
+@@ -222,10 +255,12 @@ static int __init register_perf_hsvc(voi
+
+ perf_hsvc_major = 1;
+ perf_hsvc_minor = 0;
+- if (sun4v_hvapi_register(perf_hsvc_group,
+- perf_hsvc_major,
+- &perf_hsvc_minor)) {
+- printk("perfmon: Could not register hvapi.\n");
++ hverror = sun4v_hvapi_register(perf_hsvc_group,
++ perf_hsvc_major,
++ &perf_hsvc_minor);
++ if (hverror) {
++ pr_err("perfmon: Could not register hvapi(0x%lx).\n",
++ hverror);
+ return -ENODEV;
+ }
+ }
+@@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(vo
+ pcr_ops = &n4_pcr_ops;
+ break;
+
++ case SUN4V_CHIP_NIAGARA5:
++ pcr_ops = &n5_pcr_ops;
++ break;
++
+ default:
+ ret = -ENODEV;
+ break;
+--- a/arch/sparc/kernel/perf_event.c
++++ b/arch/sparc/kernel/perf_event.c
+@@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void)
+ sparc_pmu = &niagara2_pmu;
+ return true;
+ }
+- if (!strcmp(sparc_pmu_type, "niagara4")) {
++ if (!strcmp(sparc_pmu_type, "niagara4") ||
++ !strcmp(sparc_pmu_type, "niagara5")) {
+ sparc_pmu = &niagara4_pmu;
+ return true;
+ }
--- /dev/null
+From foo@baz Tue Oct 28 11:13:19 CST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 24 Sep 2014 21:20:14 -0700
+Subject: sparc64: Use kernel page tables for vmemmap.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit c06240c7f5c39c83dfd7849c0770775562441b96 ]
+
+For sparse memory configurations, the vmemmap array behaves terribly
+and it takes up an inordinate amount of space in the BSS section of
+the kernel image unconditionally.
+
+Just build huge PMDs and look them up just like we do for TLB misses
+in the vmalloc area.
+
+Kernel BSS shrinks by about 2MB.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/ktlb.S | 9 +----
+ arch/sparc/mm/init_64.c | 72 ++++++++++++++++++++++-------------------------
+ arch/sparc/mm/init_64.h | 11 -------
+ 3 files changed, 36 insertions(+), 56 deletions(-)
+
+--- a/arch/sparc/kernel/ktlb.S
++++ b/arch/sparc/kernel/ktlb.S
+@@ -186,13 +186,8 @@ kvmap_dtlb_load:
+
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+ kvmap_vmemmap:
+- sub %g4, %g5, %g5
+- srlx %g5, ILOG2_4MB, %g5
+- sethi %hi(vmemmap_table), %g1
+- sllx %g5, 3, %g5
+- or %g1, %lo(vmemmap_table), %g1
+- ba,pt %xcc, kvmap_dtlb_load
+- ldx [%g1 + %g5], %g5
++ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
++ ba,a,pt %xcc, kvmap_dtlb_load
+ #endif
+
+ kvmap_dtlb_nonlinear:
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -2255,18 +2255,9 @@ unsigned long _PAGE_CACHE __read_mostly;
+ EXPORT_SYMBOL(_PAGE_CACHE);
+
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+-unsigned long vmemmap_table[VMEMMAP_SIZE];
+-
+-static long __meminitdata addr_start, addr_end;
+-static int __meminitdata node_start;
+-
+ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
+ int node)
+ {
+- unsigned long phys_start = (vstart - VMEMMAP_BASE);
+- unsigned long phys_end = (vend - VMEMMAP_BASE);
+- unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK;
+- unsigned long end = VMEMMAP_ALIGN(phys_end);
+ unsigned long pte_base;
+
+ pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+@@ -2277,47 +2268,52 @@ int __meminit vmemmap_populate(unsigned
+ _PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+
+- for (; addr < end; addr += VMEMMAP_CHUNK) {
+- unsigned long *vmem_pp =
+- vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT);
+- void *block;
++ pte_base |= _PAGE_PMD_HUGE;
+
+- if (!(*vmem_pp & _PAGE_VALID)) {
+- block = vmemmap_alloc_block(1UL << ILOG2_4MB, node);
+- if (!block)
++ vstart = vstart & PMD_MASK;
++ vend = ALIGN(vend, PMD_SIZE);
++ for (; vstart < vend; vstart += PMD_SIZE) {
++ pgd_t *pgd = pgd_offset_k(vstart);
++ unsigned long pte;
++ pud_t *pud;
++ pmd_t *pmd;
++
++ if (pgd_none(*pgd)) {
++ pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
++
++ if (!new)
+ return -ENOMEM;
++ pgd_populate(&init_mm, pgd, new);
++ }
+
+- *vmem_pp = pte_base | __pa(block);
++ pud = pud_offset(pgd, vstart);
++ if (pud_none(*pud)) {
++ pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
+
+- /* check to see if we have contiguous blocks */
+- if (addr_end != addr || node_start != node) {
+- if (addr_start)
+- printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
+- addr_start, addr_end-1, node_start);
+- addr_start = addr;
+- node_start = node;
+- }
+- addr_end = addr + VMEMMAP_CHUNK;
++ if (!new)
++ return -ENOMEM;
++ pud_populate(&init_mm, pud, new);
+ }
+- }
+- return 0;
+-}
+
+-void __meminit vmemmap_populate_print_last(void)
+-{
+- if (addr_start) {
+- printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
+- addr_start, addr_end-1, node_start);
+- addr_start = 0;
+- addr_end = 0;
+- node_start = 0;
++ pmd = pmd_offset(pud, vstart);
++
++ pte = pmd_val(*pmd);
++ if (!(pte & _PAGE_VALID)) {
++ void *block = vmemmap_alloc_block(PMD_SIZE, node);
++
++ if (!block)
++ return -ENOMEM;
++
++ pmd_val(*pmd) = pte_base | __pa(block);
++ }
+ }
++
++ return 0;
+ }
+
+ void vmemmap_free(unsigned long start, unsigned long end)
+ {
+ }
+-
+ #endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+ static void prot_init_common(unsigned long page_none,
+--- a/arch/sparc/mm/init_64.h
++++ b/arch/sparc/mm/init_64.h
+@@ -31,15 +31,4 @@ extern unsigned long kern_locked_tte_dat
+
+ extern void prom_world(int enter);
+
+-#ifdef CONFIG_SPARSEMEM_VMEMMAP
+-#define VMEMMAP_CHUNK_SHIFT 22
+-#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT)
+-#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL)
+-#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK)
+-
+-#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \
+- sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT)
+-extern unsigned long vmemmap_table[VMEMMAP_SIZE];
+-#endif
+-
+ #endif /* _SPARC64_MM_INIT_H */