android_kernel_xiaomi_sm8350/arch/sparc64/mm/ultra.S
David S. Miller b445e26cbf [SPARC64]: Avoid membar instructions in delay slots.
In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-27 15:42:04 -07:00

585 lines
14 KiB
ArmAsm

/* $Id: ultra.S,v 1.72 2002/02/09 19:49:31 davem Exp $
* ultra.S: Don't expand these all over the place...
*
* Copyright (C) 1997, 2000 David S. Miller (davem@redhat.com)
*/
#include <linux/config.h>
#include <asm/asi.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/spitfire.h>
#include <asm/mmu_context.h>
#include <asm/pil.h>
#include <asm/head.h>
#include <asm/thread_info.h>
#include <asm/cacheflush.h>
/* Basically, most of the Spitfire vs. Cheetah madness
* has to do with the fact that Cheetah does not support
* IMMU flushes out of the secondary context. Someone needs
* to throw a south lake birthday party for the folks
* in Microelectronics who refused to fix this shit.
*/
/* This file is meant to be read efficiently by the CPU, not humans.
* Staraj sie tego nikomu nie pierdolnac...
*/
.text
.align 32
.globl __flush_tlb_mm
__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0
bne,pn %icc, __spitfire_flush_tlb_mm_slow
mov 0x50, %g3
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
retl
flush %g6
nop
nop
nop
nop
nop
nop
nop
nop
.align 32
.globl __flush_tlb_pending
__flush_tlb_pending:
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
andn %g7, PSTATE_IE, %g2
wrpr %g2, %pstate
mov SECONDARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
stxa %o0, [%o4] ASI_DMMU
1: sub %o1, (1 << 3), %o1
ldx [%o2 + %o1], %o3
andcc %o3, 1, %g0
andn %o3, 1, %o3
be,pn %icc, 2f
or %o3, 0x10, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
brnz,pt %o1, 1b
nop
stxa %g2, [%o4] ASI_DMMU
flush %g6
retl
wrpr %g7, 0x0, %pstate
.align 32
.globl __flush_tlb_kernel_range
__flush_tlb_kernel_range: /* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
sethi %hi(PAGE_SIZE), %o4
sub %o1, %o0, %o3
sub %o3, %o4, %o3
or %o0, 0x20, %o0 ! Nucleus
1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %o3, 1b
sub %o3, %o4, %o3
2: retl
flush %g6
__spitfire_flush_tlb_mm_slow:
rdpr %pstate, %g1
wrpr %g1, PSTATE_IE, %pstate
stxa %o0, [%o1] ASI_DMMU
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
flush %g6
stxa %g2, [%o1] ASI_DMMU
flush %g6
retl
wrpr %g1, 0, %pstate
/*
* The following code flushes one page_size worth.
*/
#if (PAGE_SHIFT == 13)
#define ITAG_MASK 0xfe
#elif (PAGE_SHIFT == 16)
#define ITAG_MASK 0x7fe
#else
#error unsupported PAGE_SIZE
#endif
.align 32
.globl __flush_icache_page
__flush_icache_page: /* %o0 = phys_page */
membar #StoreStore
srlx %o0, PAGE_SHIFT, %o0
sethi %uhi(PAGE_OFFSET), %g1
sllx %o0, PAGE_SHIFT, %o0
sethi %hi(PAGE_SIZE), %g2
sllx %g1, 32, %g1
add %o0, %g1, %o0
1: subcc %g2, 32, %g2
bne,pt %icc, 1b
flush %o0 + %g2
retl
nop
#ifdef DCACHE_ALIASING_POSSIBLE
#if (PAGE_SHIFT != 13)
#error only page shift of 13 is supported by dcache flush
#endif
#define DTAG_MASK 0x3
.align 64
.globl __flush_dcache_page
__flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */
sethi %uhi(PAGE_OFFSET), %g1
sllx %g1, 32, %g1
sub %o0, %g1, %o0
clr %o4
srlx %o0, 11, %o0
sethi %hi(1 << 14), %o2
1: ldxa [%o4] ASI_DCACHE_TAG, %o3 ! LSU Group
add %o4, (1 << 5), %o4 ! IEU0
ldxa [%o4] ASI_DCACHE_TAG, %g1 ! LSU Group
add %o4, (1 << 5), %o4 ! IEU0
ldxa [%o4] ASI_DCACHE_TAG, %g2 ! LSU Group o3 available
add %o4, (1 << 5), %o4 ! IEU0
andn %o3, DTAG_MASK, %o3 ! IEU1
ldxa [%o4] ASI_DCACHE_TAG, %g3 ! LSU Group
add %o4, (1 << 5), %o4 ! IEU0
andn %g1, DTAG_MASK, %g1 ! IEU1
cmp %o0, %o3 ! IEU1 Group
be,a,pn %xcc, dflush1 ! CTI
sub %o4, (4 << 5), %o4 ! IEU0 (Group)
cmp %o0, %g1 ! IEU1 Group
andn %g2, DTAG_MASK, %g2 ! IEU0
be,a,pn %xcc, dflush2 ! CTI
sub %o4, (3 << 5), %o4 ! IEU0 (Group)
cmp %o0, %g2 ! IEU1 Group
andn %g3, DTAG_MASK, %g3 ! IEU0
be,a,pn %xcc, dflush3 ! CTI
sub %o4, (2 << 5), %o4 ! IEU0 (Group)
cmp %o0, %g3 ! IEU1 Group
be,a,pn %xcc, dflush4 ! CTI
sub %o4, (1 << 5), %o4 ! IEU0
2: cmp %o4, %o2 ! IEU1 Group
bne,pt %xcc, 1b ! CTI
nop ! IEU0
/* The I-cache does not snoop local stores so we
* better flush that too when necessary.
*/
brnz,pt %o1, __flush_icache_page
sllx %o0, 11, %o0
retl
nop
dflush1:stxa %g0, [%o4] ASI_DCACHE_TAG
add %o4, (1 << 5), %o4
dflush2:stxa %g0, [%o4] ASI_DCACHE_TAG
add %o4, (1 << 5), %o4
dflush3:stxa %g0, [%o4] ASI_DCACHE_TAG
add %o4, (1 << 5), %o4
dflush4:stxa %g0, [%o4] ASI_DCACHE_TAG
add %o4, (1 << 5), %o4
membar #Sync
ba,pt %xcc, 2b
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
.align 32
__prefill_dtlb:
rdpr %pstate, %g7
wrpr %g7, PSTATE_IE, %pstate
mov TLB_TAG_ACCESS, %g1
stxa %o5, [%g1] ASI_DMMU
stxa %o2, [%g0] ASI_DTLB_DATA_IN
flush %g6
retl
wrpr %g7, %pstate
__prefill_itlb:
rdpr %pstate, %g7
wrpr %g7, PSTATE_IE, %pstate
mov TLB_TAG_ACCESS, %g1
stxa %o5, [%g1] ASI_IMMU
stxa %o2, [%g0] ASI_ITLB_DATA_IN
flush %g6
retl
wrpr %g7, %pstate
.globl __update_mmu_cache
__update_mmu_cache: /* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */
srlx %o1, PAGE_SHIFT, %o1
andcc %o3, FAULT_CODE_DTLB, %g0
sllx %o1, PAGE_SHIFT, %o5
bne,pt %xcc, __prefill_dtlb
or %o5, %o0, %o5
ba,a,pt %xcc, __prefill_itlb
/* Cheetah specific versions, patched at boot time.
*
* This writes of the PRIMARY_CONTEXT register in this file are
* safe even on Cheetah+ and later wrt. the page size fields.
* The nucleus page size fields do not matter because we make
* no data references, and these instructions execute out of a
* locked I-TLB entry sitting in the fully assosciative I-TLB.
* This sequence should also never trap.
*/
__cheetah_flush_tlb_mm: /* 15 insns */
rdpr %pstate, %g7
andn %g7, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o2
mov 0x40, %g3
ldxa [%o2] ASI_DMMU, %g2
stxa %o0, [%o2] ASI_DMMU
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
stxa %g2, [%o2] ASI_DMMU
flush %g6
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
__cheetah_flush_tlb_pending: /* 22 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
andn %g7, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
stxa %o0, [%o4] ASI_DMMU
1: sub %o1, (1 << 3), %o1
ldx [%o2 + %o1], %o3
andcc %o3, 1, %g0
be,pn %icc, 2f
andn %o3, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
brnz,pt %o1, 1b
nop
stxa %g2, [%o4] ASI_DMMU
flush %g6
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
#ifdef DCACHE_ALIASING_POSSIBLE
flush_dcpage_cheetah: /* 11 insns */
sethi %uhi(PAGE_OFFSET), %g1
sllx %g1, 32, %g1
sub %o0, %g1, %o0
sethi %hi(PAGE_SIZE), %o4
1: subcc %o4, (1 << 5), %o4
stxa %g0, [%o0 + %o4] ASI_DCACHE_INVALIDATE
membar #Sync
bne,pt %icc, 1b
nop
retl /* I-cache flush never needed on Cheetah, see callers. */
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
cheetah_patch_one:
1: lduw [%o1], %g1
stw %g1, [%o0]
flush %o0
subcc %o2, 1, %o2
add %o1, 4, %o1
bne,pt %icc, 1b
add %o0, 4, %o0
retl
nop
.globl cheetah_patch_cachetlbops
cheetah_patch_cachetlbops:
save %sp, -128, %sp
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
or %o1, %lo(__cheetah_flush_tlb_mm), %o1
call cheetah_patch_one
mov 15, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__cheetah_flush_tlb_pending), %o1
or %o1, %lo(__cheetah_flush_tlb_pending), %o1
call cheetah_patch_one
mov 22, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
sethi %hi(flush_dcpage_cheetah), %o1
or %o1, %lo(flush_dcpage_cheetah), %o1
call cheetah_patch_one
mov 11, %o2
#endif /* DCACHE_ALIASING_POSSIBLE */
ret
restore
#ifdef CONFIG_SMP
/* These are all called by the slaves of a cross call, at
* trap level 1, with interrupts fully disabled.
*
* Register usage:
* %g5 mm->context (all tlb flushes)
* %g1 address arg 1 (tlb page and range flushes)
* %g7 address arg 2 (tlb range flush only)
*
* %g6 ivector table, don't touch
* %g2 scratch 1
* %g3 scratch 2
* %g4 scratch 3
*
* TODO: Make xcall TLB range flushes use the tricks above... -DaveM
*/
.align 32
.globl xcall_flush_tlb_mm
xcall_flush_tlb_mm:
mov PRIMARY_CONTEXT, %g2
mov 0x40, %g4
ldxa [%g2] ASI_DMMU, %g3
stxa %g5, [%g2] ASI_DMMU
stxa %g0, [%g4] ASI_DMMU_DEMAP
stxa %g0, [%g4] ASI_IMMU_DEMAP
stxa %g3, [%g2] ASI_DMMU
retry
.globl xcall_flush_tlb_pending
xcall_flush_tlb_pending:
/* %g5=context, %g1=nr, %g7=vaddrs[] */
sllx %g1, 3, %g1
mov PRIMARY_CONTEXT, %g4
ldxa [%g4] ASI_DMMU, %g2
stxa %g5, [%g4] ASI_DMMU
1: sub %g1, (1 << 3), %g1
ldx [%g7 + %g1], %g5
andcc %g5, 0x1, %g0
be,pn %icc, 2f
andn %g5, 0x1, %g5
stxa %g0, [%g5] ASI_IMMU_DEMAP
2: stxa %g0, [%g5] ASI_DMMU_DEMAP
membar #Sync
brnz,pt %g1, 1b
nop
stxa %g2, [%g4] ASI_DMMU
retry
.globl xcall_flush_tlb_kernel_range
xcall_flush_tlb_kernel_range:
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
add %g2, 1, %g2
sub %g3, %g2, %g3
or %g1, 0x20, %g1 ! Nucleus
1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %g3, 1b
sub %g3, %g2, %g3
retry
nop
nop
/* This runs in a very controlled environment, so we do
* not need to worry about BH races etc.
*/
.globl xcall_sync_tick
xcall_sync_tick:
rdpr %pstate, %g2
wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
rdpr %pil, %g2
wrpr %g0, 15, %pil
sethi %hi(109f), %g7
b,pt %xcc, etrap_irq
109: or %g7, %lo(109b), %g7
call smp_synchronize_tick_client
nop
clr %l6
b rtrap_xcall
ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
/* NOTE: This is SPECIAL!! We do etrap/rtrap however
* we choose to deal with the "BH's run with
* %pil==15" problem (described in asm/pil.h)
* by just invoking rtrap directly past where
* BH's are checked for.
*
* We do it like this because we do not want %pil==15
* lockups to prevent regs being reported.
*/
.globl xcall_report_regs
xcall_report_regs:
rdpr %pstate, %g2
wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
rdpr %pil, %g2
wrpr %g0, 15, %pil
sethi %hi(109f), %g7
b,pt %xcc, etrap_irq
109: or %g7, %lo(109b), %g7
call __show_regs
add %sp, PTREGS_OFF, %o0
clr %l6
/* Has to be a non-v9 branch due to the large distance. */
b rtrap_xcall
ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
#ifdef DCACHE_ALIASING_POSSIBLE
.align 32
.globl xcall_flush_dcache_page_cheetah
xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */
sethi %hi(PAGE_SIZE), %g3
1: subcc %g3, (1 << 5), %g3
stxa %g0, [%g1 + %g3] ASI_DCACHE_INVALIDATE
membar #Sync
bne,pt %icc, 1b
nop
retry
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
.globl xcall_flush_dcache_page_spitfire
xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
%g7 == kernel page virtual address
%g5 == (page->mapping != NULL) */
#ifdef DCACHE_ALIASING_POSSIBLE
srlx %g1, (13 - 2), %g1 ! Form tag comparitor
sethi %hi(L1DCACHE_SIZE), %g3 ! D$ size == 16K
sub %g3, (1 << 5), %g3 ! D$ linesize == 32
1: ldxa [%g3] ASI_DCACHE_TAG, %g2
andcc %g2, 0x3, %g0
be,pn %xcc, 2f
andn %g2, 0x3, %g2
cmp %g2, %g1
bne,pt %xcc, 2f
nop
stxa %g0, [%g3] ASI_DCACHE_TAG
membar #Sync
2: cmp %g3, 0
bne,pt %xcc, 1b
sub %g3, (1 << 5), %g3
brz,pn %g5, 2f
#endif /* DCACHE_ALIASING_POSSIBLE */
sethi %hi(PAGE_SIZE), %g3
1: flush %g7
subcc %g3, (1 << 5), %g3
bne,pt %icc, 1b
add %g7, (1 << 5), %g7
2: retry
nop
nop
.globl xcall_promstop
xcall_promstop:
rdpr %pstate, %g2
wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
rdpr %pil, %g2
wrpr %g0, 15, %pil
sethi %hi(109f), %g7
b,pt %xcc, etrap_irq
109: or %g7, %lo(109b), %g7
flushw
call prom_stopself
nop
/* We should not return, just spin if we do... */
1: b,a,pt %xcc, 1b
nop
.data
errata32_hwbug:
.xword 0
.text
/* These two are not performance critical... */
.globl xcall_flush_tlb_all_spitfire
xcall_flush_tlb_all_spitfire:
/* Spitfire Errata #32 workaround. */
sethi %hi(errata32_hwbug), %g4
stx %g0, [%g4 + %lo(errata32_hwbug)]
clr %g2
clr %g3
1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4
and %g4, _PAGE_L, %g5
brnz,pn %g5, 2f
mov TLB_TAG_ACCESS, %g7
stxa %g0, [%g7] ASI_DMMU
membar #Sync
stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS
membar #Sync
/* Spitfire Errata #32 workaround. */
sethi %hi(errata32_hwbug), %g4
stx %g0, [%g4 + %lo(errata32_hwbug)]
2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4
and %g4, _PAGE_L, %g5
brnz,pn %g5, 2f
mov TLB_TAG_ACCESS, %g7
stxa %g0, [%g7] ASI_IMMU
membar #Sync
stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS
membar #Sync
/* Spitfire Errata #32 workaround. */
sethi %hi(errata32_hwbug), %g4
stx %g0, [%g4 + %lo(errata32_hwbug)]
2: add %g2, 1, %g2
cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
ble,pt %icc, 1b
sll %g2, 3, %g3
flush %g6
retry
.globl xcall_flush_tlb_all_cheetah
xcall_flush_tlb_all_cheetah:
mov 0x80, %g2
stxa %g0, [%g2] ASI_DMMU_DEMAP
stxa %g0, [%g2] ASI_IMMU_DEMAP
retry
/* These just get rescheduled to PIL vectors. */
.globl xcall_call_function
xcall_call_function:
wr %g0, (1 << PIL_SMP_CALL_FUNC), %set_softint
retry
.globl xcall_receive_signal
xcall_receive_signal:
wr %g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint
retry
.globl xcall_capture
xcall_capture:
wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
retry
#endif /* CONFIG_SMP */