| /* |
| * (C) Copyright 2004, Psyent Corporation <www.psyent.com> |
| * Scott McNutt <smcnutt@psyent.com> |
| * |
| * SPDX-License-Identifier: GPL-2.0+ |
| */ |
| |
| #include <asm-offsets.h> |
| #include <config.h> |
| #include <version.h> |
| |
| /************************************************************************* |
| * RESTART |
| ************************************************************************/ |
| |
| .text |
| .global _start |
| |
| _start: |
| wrctl status, r0 /* Disable interrupts */ |
| /* ICACHE INIT -- only the icache line at the reset address |
| * is invalidated at reset. So the init must stay within |
| * the cache line size (8 words). If GERMS is used, we'll |
| * just be invalidating the cache a second time. If cache |
| * is not implemented initi behaves as nop. |
| */ |
| ori r4, r0, %lo(CONFIG_SYS_ICACHELINE_SIZE) |
| movhi r5, %hi(CONFIG_SYS_ICACHE_SIZE) |
| ori r5, r5, %lo(CONFIG_SYS_ICACHE_SIZE) |
| 0: initi r5 |
| sub r5, r5, r4 |
| bgt r5, r0, 0b |
| br _except_end /* Skip the tramp */ |
| |
| /* EXCEPTION TRAMPOLINE -- the following gets copied |
| * to the exception address (below), but is otherwise at the |
| * default exception vector offset (0x0020). |
| */ |
| _except_start: |
| movhi et, %hi(_exception) |
| ori et, et, %lo(_exception) |
| jmp et |
| _except_end: |
| |
| /* INTERRUPTS -- for now, all interrupts masked and globally |
| * disabled. |
| */ |
| wrctl ienable, r0 /* All disabled */ |
| |
| /* DCACHE INIT -- if dcache not implemented, initd behaves as |
| * nop. |
| */ |
| movhi r4, %hi(CONFIG_SYS_DCACHELINE_SIZE) |
| ori r4, r4, %lo(CONFIG_SYS_DCACHELINE_SIZE) |
| movhi r5, %hi(CONFIG_SYS_DCACHE_SIZE) |
| ori r5, r5, %lo(CONFIG_SYS_DCACHE_SIZE) |
| mov r6, r0 |
| 1: initd 0(r6) |
| add r6, r6, r4 |
| bltu r6, r5, 1b |
| |
| /* RELOCATE CODE, DATA & COMMAND TABLE -- the following code |
| * assumes code, data and the command table are all |
| * contiguous. This lets us relocate everything as a single |
| * block. Make sure the linker script matches this ;-) |
| */ |
| nextpc r4 |
| _cur: movhi r5, %hi(_cur - _start) |
| ori r5, r5, %lo(_cur - _start) |
| sub r4, r4, r5 /* r4 <- cur _start */ |
| mov r8, r4 |
| movhi r5, %hi(_start) |
| ori r5, r5, %lo(_start) /* r5 <- linked _start */ |
| beq r4, r5, 3f |
| |
| movhi r6, %hi(_edata) |
| ori r6, r6, %lo(_edata) |
| 2: ldwio r7, 0(r4) |
| addi r4, r4, 4 |
| stwio r7, 0(r5) |
| addi r5, r5, 4 |
| bne r5, r6, 2b |
| 3: |
| |
| /* ZERO BSS/SBSS -- bss and sbss are assumed to be adjacent |
| * and between __bss_start and __bss_end. |
| */ |
| movhi r5, %hi(__bss_start) |
| ori r5, r5, %lo(__bss_start) |
| movhi r6, %hi(__bss_end) |
| ori r6, r6, %lo(__bss_end) |
| beq r5, r6, 5f |
| |
| 4: stwio r0, 0(r5) |
| addi r5, r5, 4 |
| bne r5, r6, 4b |
| 5: |
| |
| /* JUMP TO RELOC ADDR */ |
| movhi r4, %hi(_reloc) |
| ori r4, r4, %lo(_reloc) |
| jmp r4 |
| _reloc: |
| |
| /* COPY EXCEPTION TRAMPOLINE -- copy the tramp to the |
| * exception address. Define CONFIG_ROM_STUBS to prevent |
| * the copy (e.g. exception in flash or in other |
| * softare/firmware component). |
| */ |
| #if !defined(CONFIG_ROM_STUBS) |
| movhi r4, %hi(_except_start) |
| ori r4, r4, %lo(_except_start) |
| movhi r5, %hi(_except_end) |
| ori r5, r5, %lo(_except_end) |
| movhi r6, %hi(CONFIG_SYS_EXCEPTION_ADDR) |
| ori r6, r6, %lo(CONFIG_SYS_EXCEPTION_ADDR) |
| beq r4, r6, 7f /* Skip if at proper addr */ |
| |
| 6: ldwio r7, 0(r4) |
| stwio r7, 0(r6) |
| addi r4, r4, 4 |
| addi r6, r6, 4 |
| bne r4, r5, 6b |
| 7: |
| #endif |
| |
| /* STACK INIT -- zero top two words for call back chain. |
| */ |
| movhi sp, %hi(CONFIG_SYS_INIT_SP) |
| ori sp, sp, %lo(CONFIG_SYS_INIT_SP) |
| addi sp, sp, -8 |
| stw r0, 0(sp) |
| stw r0, 4(sp) |
| mov fp, sp |
| |
| /* |
| * Call board_init -- never returns |
| */ |
| movhi r4, %hi(board_init@h) |
| ori r4, r4, %lo(board_init@h) |
| callr r4 |
| |
| /* NEVER RETURNS -- but branch to the _start just |
| * in case ;-) |
| */ |
| br _start |
| |
| |
| /* |
| * dly_clks -- Nios2 (like Nios1) doesn't have a timebase in |
| * the core. For simple delay loops, we do our best by counting |
| * instruction cycles. |
| * |
| * Instruction performance varies based on the core. For cores |
| * with icache and static/dynamic branch prediction (II/f, II/s): |
| * |
| * Normal ALU (e.g. add, cmp, etc): 1 cycle |
| * Branch (correctly predicted, taken): 2 cycles |
| * Negative offset is predicted (II/s). |
| * |
| * For cores without icache and no branch prediction (II/e): |
| * |
| * Normal ALU (e.g. add, cmp, etc): 6 cycles |
| * Branch (no prediction): 6 cycles |
| * |
| * For simplicity, if an instruction cache is implemented we |
| * assume II/f or II/s. Otherwise, we use the II/e. |
| * |
| */ |
| .globl dly_clks |
| |
| dly_clks: |
| |
| #if (CONFIG_SYS_ICACHE_SIZE > 0) |
| subi r4, r4, 3 /* 3 clocks/loop */ |
| #else |
| subi r4, r4, 12 /* 12 clocks/loop */ |
| #endif |
| bge r4, r0, dly_clks |
| ret |
| |
| .data |
| .globl version_string |
| |
| version_string: |
| .ascii U_BOOT_VERSION_STRING, "\0" |