#include <common.h>
#include <elf.h>
#include <asm/encoding.h>
+#include <generated/asm-offsets.h>
#ifdef CONFIG_32BIT
-#define LREG lw
-#define SREG sw
-#define REGBYTES 4
+#define LREG lw
+#define SREG sw
+#define REGBYTES 4
#define RELOC_TYPE R_RISCV_32
#define SYM_INDEX 0x8
#define SYM_SIZE 0x10
#else
-#define LREG ld
-#define SREG sd
-#define REGBYTES 8
+#define LREG ld
+#define SREG sd
+#define REGBYTES 8
#define RELOC_TYPE R_RISCV_64
#define SYM_INDEX 0x20
#define SYM_SIZE 0x18
#endif
-.section .text
+.section .data
+secondary_harts_relocation_error:
+ .ascii "Relocation of secondary harts has failed, error %d\n"
+
+.section .text
.globl _start
_start:
- j handle_reset
+#if CONFIG_IS_ENABLED(RISCV_MMODE)
+ csrr a0, CSR_MHARTID
+#endif
-nmi_vector:
- j nmi_vector
+ /* save hart id and dtb pointer */
+ mv tp, a0
+ mv s1, a1
-trap_vector:
- j trap_entry
+ la t0, trap_entry
+ csrw MODE_PREFIX(tvec), t0
-.global trap_entry
-handle_reset:
- li t0, CONFIG_SYS_SDRAM_BASE
- SREG a2, 0(t0)
- la t0, trap_entry
- csrw mtvec, t0
- csrwi mstatus, 0
- csrwi mie, 0
+ /* mask all interrupts */
+ csrw MODE_PREFIX(ie), zero
-/*
- * Do CPU critical regs init only at reboot,
- * not when booting from ram
- */
-#ifdef CONFIG_INIT_CRITICAL
- jal cpu_init_crit /* Do CPU critical regs init */
+#ifdef CONFIG_SMP
+ /* check if hart is within range */
+ /* tp: hart id */
+ li t0, CONFIG_NR_CPUS
+ bge tp, t0, hart_out_of_bounds_loop
+#endif
+
+#ifdef CONFIG_SMP
+ /* set xSIE bit to receive IPIs */
+#if CONFIG_IS_ENABLED(RISCV_MMODE)
+ li t0, MIE_MSIE
+#else
+ li t0, SIE_SSIE
+#endif
+ csrs MODE_PREFIX(ie), t0
#endif
/*
* Set stackpointer in internal/ex RAM to call board_init_f
*/
call_board_init_f:
- li t0, -16
- li t1, CONFIG_SYS_INIT_SP_ADDR
- and sp, t1, t0 /* force 16 byte alignment */
-
-#ifdef CONFIG_DEBUG_UART
- jal debug_uart_init
+ li t0, -16
+#if defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
+ li t1, CONFIG_SPL_STACK
+#else
+ li t1, CONFIG_SYS_INIT_SP_ADDR
#endif
+ and sp, t1, t0 /* force 16 byte alignment */
call_board_init_f_0:
mv a0, sp
jal board_init_f_alloc_reserve
+
+ /*
+ * Set global data pointer here for all harts, uninitialized at this
+ * point.
+ */
+ mv gp, a0
+
+ /* setup stack */
+#ifdef CONFIG_SMP
+ /* tp: hart id */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, a0, t0
+#else
mv sp, a0
+#endif
+
+#ifndef CONFIG_XIP
+ /*
+ * Pick hart to initialize global data and run U-Boot. The other harts
+ * wait for initialization to complete.
+ */
+ la t0, hart_lottery
+ li s2, 1
+ amoswap.w s2, t1, 0(t0)
+ bnez s2, wait_for_gd_init
+#else
+ bnez tp, secondary_hart_loop
+#endif
+
+#ifdef CONFIG_OF_PRIOR_STAGE
+ la t0, prior_stage_fdt_address
+ SREG s1, 0(t0)
+#endif
+
jal board_init_f_init_reserve
- mv a0, zero /* a0 <-- boot_flags = 0 */
- la t5, board_init_f
- jr t5 /* jump to board_init_f() */
+ /* save the boot hart id to global_data */
+ SREG tp, GD_BOOT_HART(gp)
+
+#ifndef CONFIG_XIP
+ la t0, available_harts_lock
+ fence rw, w
+ amoswap.w zero, zero, 0(t0)
+
+wait_for_gd_init:
+ la t0, available_harts_lock
+ li t1, 1
+1: amoswap.w t1, t1, 0(t0)
+ fence r, rw
+ bnez t1, 1b
+
+ /* register available harts in the available_harts mask */
+ li t1, 1
+ sll t1, t1, tp
+ LREG t2, GD_AVAILABLE_HARTS(gp)
+ or t2, t2, t1
+ SREG t2, GD_AVAILABLE_HARTS(gp)
+
+ fence rw, w
+ amoswap.w zero, zero, 0(t0)
+
+ /*
+ * Continue on hart lottery winner, others branch to
+ * secondary_hart_loop.
+ */
+ bnez s2, secondary_hart_loop
+#endif
+
+ /* Enable cache */
+ jal icache_enable
+ jal dcache_enable
+
+#ifdef CONFIG_DEBUG_UART
+ jal debug_uart_init
+#endif
+
+ mv a0, zero /* a0 <-- boot_flags = 0 */
+ la t5, board_init_f
+ jalr t5 /* jump to board_init_f() */
+
+#ifdef CONFIG_SPL_BUILD
+spl_clear_bss:
+ la t0, __bss_start
+ la t1, __bss_end
+ beq t0, t1, spl_stack_gd_setup
+
+spl_clear_bss_loop:
+ SREG zero, 0(t0)
+ addi t0, t0, REGBYTES
+ bne t0, t1, spl_clear_bss_loop
+
+spl_stack_gd_setup:
+ jal spl_relocate_stack_gd
+
+ /* skip setup if we did not relocate */
+ beqz a0, spl_call_board_init_r
+ mv s0, a0
+
+ /* setup stack on main hart */
+#ifdef CONFIG_SMP
+ /* tp: hart id */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, s0, t0
+#else
+ mv sp, s0
+#endif
+
+ /* set new stack and global data pointer on secondary harts */
+spl_secondary_hart_stack_gd_setup:
+ la a0, secondary_hart_relocate
+ mv a1, s0
+ mv a2, s0
+ jal smp_call_function
+
+ /* hang if relocation of secondary harts has failed */
+ beqz a0, 1f
+ mv a1, a0
+ la a0, secondary_harts_relocation_error
+ jal printf
+ jal hang
+
+ /* set new global data pointer on main hart */
+1: mv gp, s0
+
+spl_call_board_init_r:
+ mv a0, zero
+ mv a1, zero
+ jal board_init_r
+#endif
/*
* void relocate_code (addr_sp, gd, addr_moni)
*/
.globl relocate_code
relocate_code:
- mv s2, a0 /* save addr_sp */
- mv s3, a1 /* save addr of gd */
- mv s4, a2 /* save addr of destination */
+ mv s2, a0 /* save addr_sp */
+ mv s3, a1 /* save addr of gd */
+ mv s4, a2 /* save addr of destination */
/*
*Set up the stack
*/
stack_setup:
- mv sp, s2
- la t0, _start
- sub t6, s4, t0 /* t6 <- relocation offset */
- beq t0, s4, clear_bss /* skip relocation */
+#ifdef CONFIG_SMP
+ /* tp: hart id */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, s2, t0
+#else
+ mv sp, s2
+#endif
+
+ la t0, _start
+ sub t6, s4, t0 /* t6 <- relocation offset */
+ beq t0, s4, clear_bss /* skip relocation */
- mv t1, s4 /* t1 <- scratch for copy_loop */
- la t3, __bss_start
- sub t3, t3, t0 /* t3 <- __bss_start_ofs */
- add t2, t0, t3 /* t2 <- source end address */
+ mv t1, s4 /* t1 <- scratch for copy_loop */
+ la t3, __bss_start
+ sub t3, t3, t0 /* t3 <- __bss_start_ofs */
+ add t2, t0, t3 /* t2 <- source end address */
copy_loop:
- LREG t5, 0(t0)
- addi t0, t0, REGBYTES
- SREG t5, 0(t1)
- addi t1, t1, REGBYTES
- blt t0, t2, copy_loop
+ LREG t5, 0(t0)
+ addi t0, t0, REGBYTES
+ SREG t5, 0(t1)
+ addi t1, t1, REGBYTES
+ blt t0, t2, copy_loop
/*
* Update dynamic relocations after board_init_f
*/
fix_rela_dyn:
- la t1, __rel_dyn_start
- la t2, __rel_dyn_end
- beq t1, t2, clear_bss
- add t1, t1, t6 /* t1 <- rela_dyn_start in RAM */
- add t2, t2, t6 /* t2 <- rela_dyn_end in RAM */
+ la t1, __rel_dyn_start
+ la t2, __rel_dyn_end
+ beq t1, t2, clear_bss
+ add t1, t1, t6 /* t1 <- rela_dyn_start in RAM */
+ add t2, t2, t6 /* t2 <- rela_dyn_end in RAM */
/*
* skip first reserved entry: address, type, addend
*/
- bne t1, t2, 7f
+ j 10f
6:
- LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */
- li t3, R_RISCV_RELATIVE /* reloc type R_RISCV_RELATIVE */
- bne t5, t3, 8f /* skip non-RISCV_RELOC entries */
- LREG t3, -(REGBYTES*3)(t1)
- LREG t5, -(REGBYTES)(t1) /* t5 <-- addend */
- add t5, t5, t6 /* t5 <-- location to fix up in RAM */
- add t3, t3, t6 /* t3 <-- location to fix up in RAM */
- SREG t5, 0(t3)
-7:
- addi t1, t1, (REGBYTES*3)
- ble t1, t2, 6b
+ LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */
+ li t3, R_RISCV_RELATIVE /* reloc type R_RISCV_RELATIVE */
+ bne t5, t3, 8f /* skip non-RISCV_RELOC entries */
+ LREG t3, -(REGBYTES*3)(t1)
+ LREG t5, -(REGBYTES)(t1) /* t5 <-- addend */
+ add t5, t5, t6 /* t5 <-- location to fix up in RAM */
+ add t3, t3, t6 /* t3 <-- location to fix up in RAM */
+ SREG t5, 0(t3)
+ j 10f
8:
- la t4, __dyn_sym_start
- add t4, t4, t6
+ la t4, __dyn_sym_start
+ add t4, t4, t6
9:
- LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */
- srli t0, t5, SYM_INDEX /* t0 <--- sym table index */
- andi t5, t5, 0xFF /* t5 <--- relocation type */
- li t3, RELOC_TYPE
- bne t5, t3, 10f /* skip non-addned entries */
-
- LREG t3, -(REGBYTES*3)(t1)
- li t5, SYM_SIZE
- mul t0, t0, t5
- add s1, t4, t0
- LREG t5, REGBYTES(s1)
- add t5, t5, t6 /* t5 <-- location to fix up in RAM */
- add t3, t3, t6 /* t3 <-- location to fix up in RAM */
- SREG t5, 0(t3)
+ LREG t5, -(REGBYTES*2)(t1) /* t5 <-- relocation info:type */
+ srli t0, t5, SYM_INDEX /* t0 <--- sym table index */
+ andi t5, t5, 0xFF /* t5 <--- relocation type */
+ li t3, RELOC_TYPE
+ bne t5, t3, 10f /* skip non-addned entries */
+
+ LREG t3, -(REGBYTES*3)(t1)
+ li t5, SYM_SIZE
+ mul t0, t0, t5
+ add s5, t4, t0
+ LREG t0, -(REGBYTES)(t1) /* t0 <-- addend */
+ LREG t5, REGBYTES(s5)
+ add t5, t5, t0
+ add t5, t5, t6 /* t5 <-- location to fix up in RAM */
+ add t3, t3, t6 /* t3 <-- location to fix up in RAM */
+ SREG t5, 0(t3)
10:
- addi t1, t1, (REGBYTES*3)
- ble t1, t2, 9b
+ addi t1, t1, (REGBYTES*3)
+ ble t1, t2, 6b
/*
* trap update
*/
- la t0, trap_entry
- add t0, t0, t6
- csrw mtvec, t0
+ la t0, trap_entry
+ add t0, t0, t6
+ csrw MODE_PREFIX(tvec), t0
clear_bss:
- la t0, __bss_start /* t0 <- rel __bss_start in FLASH */
- add t0, t0, t6 /* t0 <- rel __bss_start in RAM */
- la t1, __bss_end /* t1 <- rel __bss_end in FLASH */
- add t1, t1, t6 /* t1 <- rel __bss_end in RAM */
- li t2, 0x00000000 /* clear */
- beq t0, t1, call_board_init_r
+ la t0, __bss_start /* t0 <- rel __bss_start in FLASH */
+ add t0, t0, t6 /* t0 <- rel __bss_start in RAM */
+ la t1, __bss_end /* t1 <- rel __bss_end in FLASH */
+ add t1, t1, t6 /* t1 <- rel __bss_end in RAM */
+ beq t0, t1, relocate_secondary_harts
clbss_l:
- SREG t2, 0(t0) /* clear loop... */
- addi t0, t0, REGBYTES
- bne t0, t1, clbss_l
+ SREG zero, 0(t0) /* clear loop... */
+ addi t0, t0, REGBYTES
+ bne t0, t1, clbss_l
+
+relocate_secondary_harts:
+#ifdef CONFIG_SMP
+ /* send relocation IPI */
+ la t0, secondary_hart_relocate
+ add a0, t0, t6
+
+ /* store relocation offset */
+ mv s5, t6
+
+ mv a1, s2
+ mv a2, s3
+ jal smp_call_function
+
+ /* hang if relocation of secondary harts has failed */
+ beqz a0, 1f
+ mv a1, a0
+ la a0, secondary_harts_relocation_error
+ jal printf
+ jal hang
+
+ /* restore relocation offset */
+1: mv t6, s5
+#endif
/*
* We are done. Do not return, instead branch to second part of board
* initialization, now running from RAM.
*/
call_board_init_r:
- la t0, board_init_r
- mv t4, t0 /* offset of board_init_r() */
- add t4, t4, t6 /* real address of board_init_r() */
+ jal invalidate_icache_all
+ jal flush_dcache_all
+ la t0, board_init_r
+ mv t4, t0 /* offset of board_init_r() */
+ add t4, t4, t6 /* real address of board_init_r() */
/*
* setup parameters for board_init_r
*/
- mv a0, s3 /* gd_t */
- mv a1, s4 /* dest_addr */
+ mv a0, s3 /* gd_t */
+ mv a1, s4 /* dest_addr */
/*
* jump to it ...
*/
- jr t4 /* jump to board_init_r() */
+ jr t4 /* jump to board_init_r() */
-/*
- * trap entry
- */
-trap_entry:
- addi sp, sp, -32*REGBYTES
- SREG x1, 1*REGBYTES(sp)
- SREG x2, 2*REGBYTES(sp)
- SREG x3, 3*REGBYTES(sp)
- SREG x4, 4*REGBYTES(sp)
- SREG x5, 5*REGBYTES(sp)
- SREG x6, 6*REGBYTES(sp)
- SREG x7, 7*REGBYTES(sp)
- SREG x8, 8*REGBYTES(sp)
- SREG x9, 9*REGBYTES(sp)
- SREG x10, 10*REGBYTES(sp)
- SREG x11, 11*REGBYTES(sp)
- SREG x12, 12*REGBYTES(sp)
- SREG x13, 13*REGBYTES(sp)
- SREG x14, 14*REGBYTES(sp)
- SREG x15, 15*REGBYTES(sp)
- SREG x16, 16*REGBYTES(sp)
- SREG x17, 17*REGBYTES(sp)
- SREG x18, 18*REGBYTES(sp)
- SREG x19, 19*REGBYTES(sp)
- SREG x20, 20*REGBYTES(sp)
- SREG x21, 21*REGBYTES(sp)
- SREG x22, 22*REGBYTES(sp)
- SREG x23, 23*REGBYTES(sp)
- SREG x24, 24*REGBYTES(sp)
- SREG x25, 25*REGBYTES(sp)
- SREG x26, 26*REGBYTES(sp)
- SREG x27, 27*REGBYTES(sp)
- SREG x28, 28*REGBYTES(sp)
- SREG x29, 29*REGBYTES(sp)
- SREG x30, 30*REGBYTES(sp)
- SREG x31, 31*REGBYTES(sp)
- csrr a0, mcause
- csrr a1, mepc
- mv a2, sp
- jal handle_trap
- csrw mepc, a0
+#ifdef CONFIG_SMP
+hart_out_of_bounds_loop:
+ /* Harts in this loop are out of bounds, increase CONFIG_NR_CPUS. */
+ wfi
+ j hart_out_of_bounds_loop
+#endif
-/*
- * Remain in M-mode after mret
- */
- li t0, MSTATUS_MPP
- csrs mstatus, t0
- LREG x1, 1*REGBYTES(sp)
- LREG x2, 2*REGBYTES(sp)
- LREG x3, 3*REGBYTES(sp)
- LREG x4, 4*REGBYTES(sp)
- LREG x5, 5*REGBYTES(sp)
- LREG x6, 6*REGBYTES(sp)
- LREG x7, 7*REGBYTES(sp)
- LREG x8, 8*REGBYTES(sp)
- LREG x9, 9*REGBYTES(sp)
- LREG x10, 10*REGBYTES(sp)
- LREG x11, 11*REGBYTES(sp)
- LREG x12, 12*REGBYTES(sp)
- LREG x13, 13*REGBYTES(sp)
- LREG x14, 14*REGBYTES(sp)
- LREG x15, 15*REGBYTES(sp)
- LREG x16, 16*REGBYTES(sp)
- LREG x17, 17*REGBYTES(sp)
- LREG x18, 18*REGBYTES(sp)
- LREG x19, 19*REGBYTES(sp)
- LREG x20, 20*REGBYTES(sp)
- LREG x21, 21*REGBYTES(sp)
- LREG x22, 22*REGBYTES(sp)
- LREG x23, 23*REGBYTES(sp)
- LREG x24, 24*REGBYTES(sp)
- LREG x25, 25*REGBYTES(sp)
- LREG x26, 26*REGBYTES(sp)
- LREG x27, 27*REGBYTES(sp)
- LREG x28, 28*REGBYTES(sp)
- LREG x29, 29*REGBYTES(sp)
- LREG x30, 30*REGBYTES(sp)
- LREG x31, 31*REGBYTES(sp)
- addi sp, sp, 32*REGBYTES
- mret
-
-#ifdef CONFIG_INIT_CRITICAL
-cpu_init_crit:
- ret
+#ifdef CONFIG_SMP
+/* SMP relocation entry */
+secondary_hart_relocate:
+ /* a1: new sp */
+ /* a2: new gd */
+ /* tp: hart id */
+
+ /* setup stack */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, a1, t0
+
+ /* update global data pointer */
+ mv gp, a2
+#endif
+
+secondary_hart_loop:
+ wfi
+
+#ifdef CONFIG_SMP
+ csrr t0, MODE_PREFIX(ip)
+#if CONFIG_IS_ENABLED(RISCV_MMODE)
+ andi t0, t0, MIE_MSIE
+#else
+ andi t0, t0, SIE_SSIE
+#endif
+ beqz t0, secondary_hart_loop
+
+ mv a0, tp
+ jal handle_ipi
#endif
+
+ j secondary_hart_loop