--- /dev/null
+From 84d414c702d79553f420aa9f342bc71ba3f37b8e Mon Sep 17 00:00:00 2001
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 19 Apr 2013 17:26:26 -0400
+Subject: sparc64: Fix race in TLB batch processing.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Commits f36391d2790d04993f48da6a45810033a2cdf847 and
+ f0af97070acbad5d6a361f485828223a4faaa0ee upstream. ]
+
+As reported by Dave Kleikamp, when we emit cross calls to do batched
+TLB flush processing we have a race because we do not synchronize on
+the sibling cpus completing the cross call.
+
+So meanwhile the TLB batch can be reset (tb->tlb_nr set to zero, etc.)
+and either flushes are missed or flushes will flush the wrong
+addresses.
+
+Fix this by using generic infrastructure to synchonize on the
+completion of the cross call.
+
+This first required getting the flush_tlb_pending() call out from
+switch_to() which operates with locks held and interrupts disabled.
+The problem is that smp_call_function_many() cannot be invoked with
+IRQs disabled and this is explicitly checked for with WARN_ON_ONCE().
+
+We get the batch processing outside of locked IRQ disabled sections by
+using some ideas from the powerpc port. Namely, we only batch inside
+of arch_{enter,leave}_lazy_mmu_mode() calls. If we're not in such a
+region, we flush TLBs synchronously.
+
+1) Get rid of xcall_flush_tlb_pending and per-cpu type
+ implementations.
+
+2) Do TLB batch cross calls instead via:
+
+ smp_call_function_many()
+ tlb_pending_func()
+ __flush_tlb_pending()
+
+3) Batch only in lazy mmu sequences:
+
+ a) Add 'active' member to struct tlb_batch
+ b) Define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+ c) Set 'active' in arch_enter_lazy_mmu_mode()
+ d) Run batch and clear 'active' in arch_leave_lazy_mmu_mode()
+ e) Check 'active' in tlb_batch_add_one() and do a synchronous
+ flush if it's clear.
+
+4) Add infrastructure for synchronous TLB page flushes.
+
+ a) Implement __flush_tlb_page and per-cpu variants, patch
+ as needed.
+ b) Likewise for xcall_flush_tlb_page.
+ c) Implement smp_flush_tlb_page() to invoke the cross-call.
+ d) Wire up global_flush_tlb_page() to the right routine based
+ upon CONFIG_SMP
+
+5) It turns out that singleton batches are very common, 2 out of every
+ 3 batch flushes have only a single entry in them.
+
+ The batch flush waiting is very expensive, both because of the poll
+ on sibling cpu completeion, as well as because passing the tlb batch
+ pointer to the sibling cpus invokes a shared memory dereference.
+
+ Therefore, in flush_tlb_pending(), if there is only one entry in
+ the batch perform a completely asynchronous global_flush_tlb_page()
+ instead.
+
+Reported-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/pgtable_64.h | 1
+ arch/sparc/include/asm/switch_to_64.h | 3
+ arch/sparc/include/asm/tlbflush_64.h | 37 ++++++++--
+ arch/sparc/kernel/smp_64.c | 41 ++++++++++-
+ arch/sparc/mm/tlb.c | 39 ++++++++++-
+ arch/sparc/mm/tsb.c | 57 ++++++++++++----
+ arch/sparc/mm/ultra.S | 119 +++++++++++++++++++++++++++-------
+ 7 files changed, 242 insertions(+), 55 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -915,6 +915,7 @@ static inline int io_remap_pfn_range(str
+ return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+ }
+
++#include <asm/tlbflush.h>
+ #include <asm-generic/pgtable.h>
+
+ /* We provide our own get_unmapped_area to cope with VA holes and
+--- a/arch/sparc/include/asm/switch_to_64.h
++++ b/arch/sparc/include/asm/switch_to_64.h
+@@ -18,8 +18,7 @@ do { \
+ * and 2 stores in this critical code path. -DaveM
+ */
+ #define switch_to(prev, next, last) \
+-do { flush_tlb_pending(); \
+- save_and_clear_fpu(); \
++do { save_and_clear_fpu(); \
+ /* If you are tempted to conditionalize the following */ \
+ /* so that ASI is only written if it changes, think again. */ \
+ __asm__ __volatile__("wr %%g0, %0, %%asi" \
+--- a/arch/sparc/include/asm/tlbflush_64.h
++++ b/arch/sparc/include/asm/tlbflush_64.h
+@@ -11,24 +11,40 @@
+ struct tlb_batch {
+ struct mm_struct *mm;
+ unsigned long tlb_nr;
++ unsigned long active;
+ unsigned long vaddrs[TLB_BATCH_NR];
+ };
+
+ extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+ extern void flush_tsb_user(struct tlb_batch *tb);
++extern void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr);
+
+ /* TLB flush operations. */
+
+-extern void flush_tlb_pending(void);
++static inline void flush_tlb_mm(struct mm_struct *mm)
++{
++}
++
++static inline void flush_tlb_page(struct vm_area_struct *vma,
++ unsigned long vmaddr)
++{
++}
++
++static inline void flush_tlb_range(struct vm_area_struct *vma,
++ unsigned long start, unsigned long end)
++{
++}
++
++#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+
+-#define flush_tlb_range(vma,start,end) \
+- do { (void)(start); flush_tlb_pending(); } while (0)
+-#define flush_tlb_page(vma,addr) flush_tlb_pending()
+-#define flush_tlb_mm(mm) flush_tlb_pending()
++extern void flush_tlb_pending(void);
++extern void arch_enter_lazy_mmu_mode(void);
++extern void arch_leave_lazy_mmu_mode(void);
++#define arch_flush_lazy_mmu_mode() do {} while (0)
+
+ /* Local cpu only. */
+ extern void __flush_tlb_all(void);
+-
++extern void __flush_tlb_page(unsigned long context, unsigned long vaddr);
+ extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+ #ifndef CONFIG_SMP
+@@ -38,15 +54,24 @@ do { flush_tsb_kernel_range(start,end);
+ __flush_tlb_kernel_range(start,end); \
+ } while (0)
+
++static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
++{
++ __flush_tlb_page(CTX_HWBITS(mm->context), vaddr);
++}
++
+ #else /* CONFIG_SMP */
+
+ extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
++extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr);
+
+ #define flush_tlb_kernel_range(start, end) \
+ do { flush_tsb_kernel_range(start,end); \
+ smp_flush_tlb_kernel_range(start, end); \
+ } while (0)
+
++#define global_flush_tlb_page(mm, vaddr) \
++ smp_flush_tlb_page(mm, vaddr)
++
+ #endif /* ! CONFIG_SMP */
+
+ #endif /* _SPARC64_TLBFLUSH_H */
+--- a/arch/sparc/kernel/smp_64.c
++++ b/arch/sparc/kernel/smp_64.c
+@@ -849,7 +849,7 @@ void smp_tsb_sync(struct mm_struct *mm)
+ }
+
+ extern unsigned long xcall_flush_tlb_mm;
+-extern unsigned long xcall_flush_tlb_pending;
++extern unsigned long xcall_flush_tlb_page;
+ extern unsigned long xcall_flush_tlb_kernel_range;
+ extern unsigned long xcall_fetch_glob_regs;
+ extern unsigned long xcall_fetch_glob_pmu;
+@@ -1074,22 +1074,55 @@ local_flush_and_out:
+ put_cpu();
+ }
+
++struct tlb_pending_info {
++ unsigned long ctx;
++ unsigned long nr;
++ unsigned long *vaddrs;
++};
++
++static void tlb_pending_func(void *info)
++{
++ struct tlb_pending_info *t = info;
++
++ __flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
++}
++
+ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
+ {
+ u32 ctx = CTX_HWBITS(mm->context);
++ struct tlb_pending_info info;
+ int cpu = get_cpu();
+
++ info.ctx = ctx;
++ info.nr = nr;
++ info.vaddrs = vaddrs;
++
+ if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
+ cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
+ else
+- smp_cross_call_masked(&xcall_flush_tlb_pending,
+- ctx, nr, (unsigned long) vaddrs,
+- mm_cpumask(mm));
++ smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
++ &info, 1);
+
+ __flush_tlb_pending(ctx, nr, vaddrs);
+
+ put_cpu();
+ }
++
++void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
++{
++ unsigned long context = CTX_HWBITS(mm->context);
++ int cpu = get_cpu();
++
++ if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
++ cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
++ else
++ smp_cross_call_masked(&xcall_flush_tlb_page,
++ context, vaddr, 0,
++ mm_cpumask(mm));
++ __flush_tlb_page(context, vaddr);
++
++ put_cpu();
++}
+
+ void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+--- a/arch/sparc/mm/tlb.c
++++ b/arch/sparc/mm/tlb.c
+@@ -24,11 +24,17 @@ static DEFINE_PER_CPU(struct tlb_batch,
+ void flush_tlb_pending(void)
+ {
+ struct tlb_batch *tb = &get_cpu_var(tlb_batch);
++ struct mm_struct *mm = tb->mm;
+
+- if (tb->tlb_nr) {
+- flush_tsb_user(tb);
++ if (!tb->tlb_nr)
++ goto out;
+
+- if (CTX_VALID(tb->mm->context)) {
++ flush_tsb_user(tb);
++
++ if (CTX_VALID(mm->context)) {
++ if (tb->tlb_nr == 1) {
++ global_flush_tlb_page(mm, tb->vaddrs[0]);
++ } else {
+ #ifdef CONFIG_SMP
+ smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
+ &tb->vaddrs[0]);
+@@ -37,12 +43,30 @@ void flush_tlb_pending(void)
+ tb->tlb_nr, &tb->vaddrs[0]);
+ #endif
+ }
+- tb->tlb_nr = 0;
+ }
+
++ tb->tlb_nr = 0;
++
++out:
+ put_cpu_var(tlb_batch);
+ }
+
++void arch_enter_lazy_mmu_mode(void)
++{
++ struct tlb_batch *tb = &__get_cpu_var(tlb_batch);
++
++ tb->active = 1;
++}
++
++void arch_leave_lazy_mmu_mode(void)
++{
++ struct tlb_batch *tb = &__get_cpu_var(tlb_batch);
++
++ if (tb->tlb_nr)
++ flush_tlb_pending();
++ tb->active = 0;
++}
++
+ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+ bool exec)
+ {
+@@ -60,6 +84,12 @@ static void tlb_batch_add_one(struct mm_
+ nr = 0;
+ }
+
++ if (!tb->active) {
++ global_flush_tlb_page(mm, vaddr);
++ flush_tsb_user_page(mm, vaddr);
++ goto out;
++ }
++
+ if (nr == 0)
+ tb->mm = mm;
+
+@@ -68,6 +98,7 @@ static void tlb_batch_add_one(struct mm_
+ if (nr >= TLB_BATCH_NR)
+ flush_tlb_pending();
+
++out:
+ put_cpu_var(tlb_batch);
+ }
+
+--- a/arch/sparc/mm/tsb.c
++++ b/arch/sparc/mm/tsb.c
+@@ -7,11 +7,10 @@
+ #include <linux/preempt.h>
+ #include <linux/slab.h>
+ #include <asm/page.h>
+-#include <asm/tlbflush.h>
+-#include <asm/tlb.h>
+-#include <asm/mmu_context.h>
+ #include <asm/pgtable.h>
++#include <asm/mmu_context.h>
+ #include <asm/tsb.h>
++#include <asm/tlb.h>
+ #include <asm/oplib.h>
+
+ extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+@@ -46,23 +45,27 @@ void flush_tsb_kernel_range(unsigned lon
+ }
+ }
+
+-static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+- unsigned long tsb, unsigned long nentries)
++static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v,
++ unsigned long hash_shift,
++ unsigned long nentries)
+ {
+- unsigned long i;
++ unsigned long tag, ent, hash;
+
+- for (i = 0; i < tb->tlb_nr; i++) {
+- unsigned long v = tb->vaddrs[i];
+- unsigned long tag, ent, hash;
++ v &= ~0x1UL;
++ hash = tsb_hash(v, hash_shift, nentries);
++ ent = tsb + (hash * sizeof(struct tsb));
++ tag = (v >> 22UL);
+
+- v &= ~0x1UL;
++ tsb_flush(ent, tag);
++}
+
+- hash = tsb_hash(v, hash_shift, nentries);
+- ent = tsb + (hash * sizeof(struct tsb));
+- tag = (v >> 22UL);
++static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
++ unsigned long tsb, unsigned long nentries)
++{
++ unsigned long i;
+
+- tsb_flush(ent, tag);
+- }
++ for (i = 0; i < tb->tlb_nr; i++)
++ __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
+ }
+
+ void flush_tsb_user(struct tlb_batch *tb)
+@@ -88,6 +91,30 @@ void flush_tsb_user(struct tlb_batch *tb
+ }
+ #endif
+ spin_unlock_irqrestore(&mm->context.lock, flags);
++}
++
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
++{
++ unsigned long nentries, base, flags;
++
++ spin_lock_irqsave(&mm->context.lock, flags);
++
++ base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++ nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++ base = __pa(base);
++ __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
++
++#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
++ if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++ base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
++ nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
++ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++ base = __pa(base);
++ __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries);
++ }
++#endif
++ spin_unlock_irqrestore(&mm->context.lock, flags);
+ }
+
+ #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
+--- a/arch/sparc/mm/ultra.S
++++ b/arch/sparc/mm/ultra.S
+@@ -53,6 +53,33 @@ __flush_tlb_mm: /* 18 insns */
+ nop
+
+ .align 32
++ .globl __flush_tlb_page
++__flush_tlb_page: /* 22 insns */
++ /* %o0 = context, %o1 = vaddr */
++ rdpr %pstate, %g7
++ andn %g7, PSTATE_IE, %g2
++ wrpr %g2, %pstate
++ mov SECONDARY_CONTEXT, %o4
++ ldxa [%o4] ASI_DMMU, %g2
++ stxa %o0, [%o4] ASI_DMMU
++ andcc %o1, 1, %g0
++ andn %o1, 1, %o3
++ be,pn %icc, 1f
++ or %o3, 0x10, %o3
++ stxa %g0, [%o3] ASI_IMMU_DEMAP
++1: stxa %g0, [%o3] ASI_DMMU_DEMAP
++ membar #Sync
++ stxa %g2, [%o4] ASI_DMMU
++ sethi %hi(KERNBASE), %o4
++ flush %o4
++ retl
++ wrpr %g7, 0x0, %pstate
++ nop
++ nop
++ nop
++ nop
++
++ .align 32
+ .globl __flush_tlb_pending
+ __flush_tlb_pending: /* 26 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+@@ -203,6 +230,31 @@ __cheetah_flush_tlb_mm: /* 19 insns */
+ retl
+ wrpr %g7, 0x0, %pstate
+
++__cheetah_flush_tlb_page: /* 22 insns */
++ /* %o0 = context, %o1 = vaddr */
++ rdpr %pstate, %g7
++ andn %g7, PSTATE_IE, %g2
++ wrpr %g2, 0x0, %pstate
++ wrpr %g0, 1, %tl
++ mov PRIMARY_CONTEXT, %o4
++ ldxa [%o4] ASI_DMMU, %g2
++ srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3
++ sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3
++ or %o0, %o3, %o0 /* Preserve nucleus page size fields */
++ stxa %o0, [%o4] ASI_DMMU
++ andcc %o1, 1, %g0
++ be,pn %icc, 1f
++ andn %o1, 1, %o3
++ stxa %g0, [%o3] ASI_IMMU_DEMAP
++1: stxa %g0, [%o3] ASI_DMMU_DEMAP
++ membar #Sync
++ stxa %g2, [%o4] ASI_DMMU
++ sethi %hi(KERNBASE), %o4
++ flush %o4
++ wrpr %g0, 0, %tl
++ retl
++ wrpr %g7, 0x0, %pstate
++
+ __cheetah_flush_tlb_pending: /* 27 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ rdpr %pstate, %g7
+@@ -269,6 +321,20 @@ __hypervisor_flush_tlb_mm: /* 10 insns *
+ retl
+ nop
+
++__hypervisor_flush_tlb_page: /* 11 insns */
++ /* %o0 = context, %o1 = vaddr */
++ mov %o0, %g2
++ mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
++ mov %g2, %o1 /* ARG1: mmu context */
++ mov HV_MMU_ALL, %o2 /* ARG2: flags */
++ srlx %o0, PAGE_SHIFT, %o0
++ sllx %o0, PAGE_SHIFT, %o0
++ ta HV_MMU_UNMAP_ADDR_TRAP
++ brnz,pn %o0, __hypervisor_tlb_tl0_error
++ mov HV_MMU_UNMAP_ADDR_TRAP, %o1
++ retl
++ nop
++
+ __hypervisor_flush_tlb_pending: /* 16 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ sllx %o1, 3, %g1
+@@ -339,6 +405,13 @@ cheetah_patch_cachetlbops:
+ call tlb_patch_one
+ mov 19, %o2
+
++ sethi %hi(__flush_tlb_page), %o0
++ or %o0, %lo(__flush_tlb_page), %o0
++ sethi %hi(__cheetah_flush_tlb_page), %o1
++ or %o1, %lo(__cheetah_flush_tlb_page), %o1
++ call tlb_patch_one
++ mov 22, %o2
++
+ sethi %hi(__flush_tlb_pending), %o0
+ or %o0, %lo(__flush_tlb_pending), %o0
+ sethi %hi(__cheetah_flush_tlb_pending), %o1
+@@ -397,10 +470,9 @@ xcall_flush_tlb_mm: /* 21 insns */
+ nop
+ nop
+
+- .globl xcall_flush_tlb_pending
+-xcall_flush_tlb_pending: /* 21 insns */
+- /* %g5=context, %g1=nr, %g7=vaddrs[] */
+- sllx %g1, 3, %g1
++ .globl xcall_flush_tlb_page
++xcall_flush_tlb_page: /* 17 insns */
++ /* %g5=context, %g1=vaddr */
+ mov PRIMARY_CONTEXT, %g4
+ ldxa [%g4] ASI_DMMU, %g2
+ srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4
+@@ -408,20 +480,16 @@ xcall_flush_tlb_pending: /* 21 insns */
+ or %g5, %g4, %g5
+ mov PRIMARY_CONTEXT, %g4
+ stxa %g5, [%g4] ASI_DMMU
+-1: sub %g1, (1 << 3), %g1
+- ldx [%g7 + %g1], %g5
+- andcc %g5, 0x1, %g0
++ andcc %g1, 0x1, %g0
+ be,pn %icc, 2f
+-
+- andn %g5, 0x1, %g5
++ andn %g1, 0x1, %g5
+ stxa %g0, [%g5] ASI_IMMU_DEMAP
+ 2: stxa %g0, [%g5] ASI_DMMU_DEMAP
+ membar #Sync
+- brnz,pt %g1, 1b
+- nop
+ stxa %g2, [%g4] ASI_DMMU
+ retry
+ nop
++ nop
+
+ .globl xcall_flush_tlb_kernel_range
+ xcall_flush_tlb_kernel_range: /* 25 insns */
+@@ -656,15 +724,13 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 i
+ membar #Sync
+ retry
+
+- .globl __hypervisor_xcall_flush_tlb_pending
+-__hypervisor_xcall_flush_tlb_pending: /* 21 insns */
+- /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */
+- sllx %g1, 3, %g1
++ .globl __hypervisor_xcall_flush_tlb_page
++__hypervisor_xcall_flush_tlb_page: /* 17 insns */
++ /* %g5=ctx, %g1=vaddr */
+ mov %o0, %g2
+ mov %o1, %g3
+ mov %o2, %g4
+-1: sub %g1, (1 << 3), %g1
+- ldx [%g7 + %g1], %o0 /* ARG0: virtual address */
++ mov %g1, %o0 /* ARG0: virtual address */
+ mov %g5, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ srlx %o0, PAGE_SHIFT, %o0
+@@ -673,8 +739,6 @@ __hypervisor_xcall_flush_tlb_pending: /*
+ mov HV_MMU_UNMAP_ADDR_TRAP, %g6
+ brnz,a,pn %o0, __hypervisor_tlb_xcall_error
+ mov %o0, %g5
+- brnz,pt %g1, 1b
+- nop
+ mov %g2, %o0
+ mov %g3, %o1
+ mov %g4, %o2
+@@ -757,6 +821,13 @@ hypervisor_patch_cachetlbops:
+ call tlb_patch_one
+ mov 10, %o2
+
++ sethi %hi(__flush_tlb_page), %o0
++ or %o0, %lo(__flush_tlb_page), %o0
++ sethi %hi(__hypervisor_flush_tlb_page), %o1
++ or %o1, %lo(__hypervisor_flush_tlb_page), %o1
++ call tlb_patch_one
++ mov 11, %o2
++
+ sethi %hi(__flush_tlb_pending), %o0
+ or %o0, %lo(__flush_tlb_pending), %o0
+ sethi %hi(__hypervisor_flush_tlb_pending), %o1
+@@ -788,12 +859,12 @@ hypervisor_patch_cachetlbops:
+ call tlb_patch_one
+ mov 21, %o2
+
+- sethi %hi(xcall_flush_tlb_pending), %o0
+- or %o0, %lo(xcall_flush_tlb_pending), %o0
+- sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1
+- or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1
++ sethi %hi(xcall_flush_tlb_page), %o0
++ or %o0, %lo(xcall_flush_tlb_page), %o0
++ sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
++ or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
+ call tlb_patch_one
+- mov 21, %o2
++ mov 17, %o2
+
+ sethi %hi(xcall_flush_tlb_kernel_range), %o0
+ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0