OMAP3: Add common cpu and start code

Add common cpu and start code.

Signed-off-by: Dirk Behme <dirk.behme@googlemail.com>
diff --git a/cpu/arm_cortexa8/Makefile b/cpu/arm_cortexa8/Makefile
new file mode 100644
index 0000000..ae20299
--- /dev/null
+++ b/cpu/arm_cortexa8/Makefile
@@ -0,0 +1,47 @@
+#
+# (C) Copyright 2000-2003
+# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	= $(obj)lib$(CPU).a
+
+START	:= start.o
+COBJS	:= cpu.o
+
+SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS))
+START	:= $(addprefix $(obj),$(START))
+
+all:	$(obj).depend $(START) $(LIB)
+
+$(LIB):	$(OBJS)
+	$(AR) $(ARFLAGS) $@ $(OBJS)
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
\ No newline at end of file
diff --git a/cpu/arm_cortexa8/config.mk b/cpu/arm_cortexa8/config.mk
new file mode 100644
index 0000000..b021762
--- /dev/null
+++ b/cpu/arm_cortexa8/config.mk
@@ -0,0 +1,36 @@
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+PLATFORM_RELFLAGS += -fno-strict-aliasing -fno-common -ffixed-r8 \
+		     -msoft-float
+
+# Make ARMv5 to allow more compilers to work, even though its v7a.
+PLATFORM_CPPFLAGS += -march=armv5
+# =========================================================================
+#
+# Supply options according to compiler version
+#
+# =========================================================================
+PLATFORM_CPPFLAGS +=$(call cc-option)
+PLATFORM_CPPFLAGS +=$(call cc-option,-mno-thumb-interwork,)
+PLATFORM_RELFLAGS +=$(call cc-option,-mshort-load-bytes,\
+		    $(call cc-option,-malignment-traps,))
\ No newline at end of file
diff --git a/cpu/arm_cortexa8/cpu.c b/cpu/arm_cortexa8/cpu.c
new file mode 100644
index 0000000..ebc5ea2
--- /dev/null
+++ b/cpu/arm_cortexa8/cpu.c
@@ -0,0 +1,241 @@
+/*
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+/*
+ * CPU specific code
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/arch/sys_proto.h>
+
+#ifdef CONFIG_USE_IRQ
+DECLARE_GLOBAL_DATA_PTR;
+#endif
+
+#ifndef CONFIG_L2_OFF
+void l2cache_disable(void);
+#endif
+
+static void cache_flush(void);
+
+/* read co-processor 15, register #1 (control register) */
+static unsigned long read_p15_c1(void)
+{
+	unsigned long value;
+
+	__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 0\
+			     @ read control reg\n":"=r"(value)
+			     ::"memory");
+	return value;
+}
+
+/* write to co-processor 15, register #1 (control register) */
+static void write_p15_c1(unsigned long value)
+{
+	__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 0\
+			     @ write it back\n"::"r"(value)
+			     : "memory");
+
+	read_p15_c1();
+}
+
+static void cp_delay(void)
+{
+	/* Many OMAP regs need at least 2 nops */
+	asm("nop");
+	asm("nop");
+}
+
+/* See also ARM Ref. Man. */
+#define C1_MMU		(1<<0)	/* mmu off/on */
+#define C1_ALIGN	(1<<1)	/* alignment faults off/on */
+#define C1_DC		(1<<2)	/* dcache off/on */
+#define C1_WB		(1<<3)	/* merging write buffer on/off */
+#define C1_BIG_ENDIAN	(1<<7)	/* big endian off/on */
+#define C1_SYS_PROT	(1<<8)	/* system protection */
+#define C1_ROM_PROT	(1<<9)	/* ROM protection */
+#define C1_IC		(1<<12)	/* icache off/on */
+#define C1_HIGH_VECTORS	(1<<13)	/* location of vectors: low/high addresses */
+#define RESERVED_1	(0xf << 3)	/* must be 111b for R/W */
+
+int cpu_init(void)
+{
+	/*
+	 * setup up stacks if necessary
+	 */
+#ifdef CONFIG_USE_IRQ
+	IRQ_STACK_START =
+	    _armboot_start - CONFIG_SYS_MALLOC_LEN - CONFIG_SYS_GBL_DATA_SIZE - 4;
+	FIQ_STACK_START = IRQ_STACK_START - CONFIG_STACKSIZE_IRQ;
+#endif
+	return 0;
+}
+
+int cleanup_before_linux(void)
+{
+	unsigned int i;
+
+	/*
+	 * this function is called just before we call linux
+	 * it prepares the processor for linux
+	 *
+	 * we turn off caches etc ...
+	 */
+	disable_interrupts();
+
+	/* turn off I/D-cache */
+	icache_disable();
+	dcache_disable();
+
+	/* invalidate I-cache */
+	cache_flush();
+
+#ifndef CONFIG_L2_OFF
+	/* turn off L2 cache */
+	l2cache_disable();
+	/* invalidate L2 cache also */
+	v7_flush_dcache_all(get_device_type());
+#endif
+	i = 0;
+	/* mem barrier to sync up things */
+	asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i));
+
+#ifndef CONFIG_L2_OFF
+	l2cache_enable();
+#endif
+
+	return 0;
+}
+
+int do_reset(cmd_tbl_t *cmdtp, int flag, int argc, char *argv[])
+{
+	disable_interrupts();
+	reset_cpu(0);
+
+	/* NOTREACHED */
+	return 0;
+}
+
+void icache_enable(void)
+{
+	ulong reg;
+
+	reg = read_p15_c1();	/* get control reg. */
+	cp_delay();
+	write_p15_c1(reg | C1_IC);
+}
+
+void icache_disable(void)
+{
+	ulong reg;
+
+	reg = read_p15_c1();
+	cp_delay();
+	write_p15_c1(reg & ~C1_IC);
+}
+
+void dcache_disable (void)
+{
+	ulong reg;
+
+	reg = read_p15_c1 ();
+	cp_delay ();
+	write_p15_c1 (reg & ~C1_DC);
+}
+
+void l2cache_enable()
+{
+	unsigned long i;
+	volatile unsigned int j;
+
+	/* ES2 onwards we can disable/enable L2 ourselves */
+	if (get_cpu_rev() == CPU_3430_ES2) {
+		__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i));
+		__asm__ __volatile__("orr %0, %0, #0x2":"=r"(i));
+		__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i));
+	} else {
+		/* Save r0, r12 and restore them after usage */
+		__asm__ __volatile__("mov %0, r12":"=r"(j));
+		__asm__ __volatile__("mov %0, r0":"=r"(i));
+
+		/*
+		 * GP Device ROM code API usage here
+		 * r12 = AUXCR Write function and r0 value
+		 */
+		__asm__ __volatile__("mov r12, #0x3");
+		__asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1");
+		__asm__ __volatile__("orr r0, r0, #0x2");
+		/* SMI instruction to call ROM Code API */
+		__asm__ __volatile__(".word 0xE1600070");
+		__asm__ __volatile__("mov r0, %0":"=r"(i));
+		__asm__ __volatile__("mov r12, %0":"=r"(j));
+	}
+
+}
+
+void l2cache_disable()
+{
+	unsigned long i;
+	volatile unsigned int j;
+
+	/* ES2 onwards we can disable/enable L2 ourselves */
+	if (get_cpu_rev() == CPU_3430_ES2) {
+		__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i));
+		__asm__ __volatile__("bic %0, %0, #0x2":"=r"(i));
+		__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i));
+	} else {
+		/* Save r0, r12 and restore them after usage */
+		__asm__ __volatile__("mov %0, r12":"=r"(j));
+		__asm__ __volatile__("mov %0, r0":"=r"(i));
+
+		/*
+		 * GP Device ROM code API usage here
+		 * r12 = AUXCR Write function and r0 value
+		 */
+		__asm__ __volatile__("mov r12, #0x3");
+		__asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1");
+		__asm__ __volatile__("bic r0, r0, #0x2");
+		/* SMI instruction to call ROM Code API */
+		__asm__ __volatile__(".word 0xE1600070");
+		__asm__ __volatile__("mov r0, %0":"=r"(i));
+		__asm__ __volatile__("mov r12, %0":"=r"(j));
+	}
+}
+
+int icache_status(void)
+{
+	return (read_p15_c1() & C1_IC) != 0;
+}
+
+static void cache_flush(void)
+{
+	asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0));
+}
+
diff --git a/cpu/arm_cortexa8/omap3/Makefile b/cpu/arm_cortexa8/omap3/Makefile
new file mode 100644
index 0000000..b96b3dd
--- /dev/null
+++ b/cpu/arm_cortexa8/omap3/Makefile
@@ -0,0 +1,46 @@
+#
+# (C) Copyright 2000-2003
+# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	=  $(obj)lib$(SOC).a
+
+SOBJS	:= lowlevel_init.o
+COBJS	:= sys_info.o board.o clock.o interrupts.o mem.o syslib.o
+
+SRCS	:= $(SOBJS:.o=.S) $(COBJS:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
+
+all:	 $(obj).depend $(LIB)
+
+$(LIB):	$(OBJS)
+	$(AR) $(ARFLAGS) $@ $(OBJS)
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/cpu/arm_cortexa8/omap3/config.mk b/cpu/arm_cortexa8/omap3/config.mk
new file mode 100644
index 0000000..fbb753e
--- /dev/null
+++ b/cpu/arm_cortexa8/omap3/config.mk
@@ -0,0 +1,36 @@
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+PLATFORM_RELFLAGS += -fno-strict-aliasing -fno-common -ffixed-r8 \
+		     -msoft-float
+
+# Make ARMv5 to allow more compilers to work, even though its v7a.
+PLATFORM_CPPFLAGS += -march=armv5
+# =========================================================================
+#
+# Supply options according to compiler version
+#
+# =========================================================================
+PLATFORM_CPPFLAGS +=$(call cc-option)
+PLATFORM_CPPFLAGS +=$(call cc-option,-mno-thumb-interwork,)
+PLATFORM_RELFLAGS +=$(call cc-option,-mshort-load-bytes,\
+		    $(call cc-option,-malignment-traps,))
diff --git a/cpu/arm_cortexa8/start.S b/cpu/arm_cortexa8/start.S
new file mode 100644
index 0000000..07acdbd
--- /dev/null
+++ b/cpu/arm_cortexa8/start.S
@@ -0,0 +1,516 @@
+/*
+ * armboot - Startup Code for OMAP3530/ARM Cortex CPU-core
+ *
+ * Copyright (c) 2004	Texas Instruments <r-woodruff2@ti.com>
+ *
+ * Copyright (c) 2001	Marius Gröger <mag@sysgo.de>
+ * Copyright (c) 2002	Alex Züpke <azu@sysgo.de>
+ * Copyright (c) 2002	Gary Jennejohn <gj@denx.de>
+ * Copyright (c) 2003	Richard Woodruff <r-woodruff2@ti.com>
+ * Copyright (c) 2003	Kshitij <kshitij@ti.com>
+ * Copyright (c) 2006-2008 Syed Mohammed Khasim <x0khasim@ti.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <version.h>
+
+.globl _start
+_start: b	reset
+	ldr	pc, _undefined_instruction
+	ldr	pc, _software_interrupt
+	ldr	pc, _prefetch_abort
+	ldr	pc, _data_abort
+	ldr	pc, _not_used
+	ldr	pc, _irq
+	ldr	pc, _fiq
+
+_undefined_instruction: .word undefined_instruction
+_software_interrupt:	.word software_interrupt
+_prefetch_abort:	.word prefetch_abort
+_data_abort:		.word data_abort
+_not_used:		.word not_used
+_irq:			.word irq
+_fiq:			.word fiq
+_pad:			.word 0x12345678 /* now 16*4=64 */
+.global _end_vect
+_end_vect:
+
+	.balignl 16,0xdeadbeef
+/*************************************************************************
+ *
+ * Startup Code (reset vector)
+ *
+ * do important init only if we don't start from memory!
+ * setup Memory and board specific bits prior to relocation.
+ * relocate armboot to ram
+ * setup stack
+ *
+ *************************************************************************/
+
+_TEXT_BASE:
+	.word	TEXT_BASE
+
+.globl _armboot_start
+_armboot_start:
+	.word _start
+
+/*
+ * These are defined in the board-specific linker script.
+ */
+.globl _bss_start
+_bss_start:
+	.word __bss_start
+
+.globl _bss_end
+_bss_end:
+	.word _end
+
+#ifdef CONFIG_USE_IRQ
+/* IRQ stack memory (calculated at run-time) */
+.globl IRQ_STACK_START
+IRQ_STACK_START:
+	.word	0x0badc0de
+
+/* IRQ stack memory (calculated at run-time) */
+.globl FIQ_STACK_START
+FIQ_STACK_START:
+	.word 0x0badc0de
+#endif
+
+/*
+ * the actual reset code
+ */
+
+reset:
+	/*
+	 * set the cpu to SVC32 mode
+	 */
+	mrs	r0, cpsr
+	bic	r0, r0, #0x1f
+	orr	r0, r0, #0xd3
+	msr	cpsr,r0
+
+#if (CONFIG_OMAP34XX)
+	/* Copy vectors to mask ROM indirect addr */
+	adr	r0, _start		@ r0 <- current position of code
+	add	r0, r0, #4		@ skip reset vector
+	mov	r2, #64			@ r2 <- size to copy
+	add	r2, r0, r2		@ r2 <- source end address
+	mov	r1, #SRAM_OFFSET0	@ build vect addr
+	mov	r3, #SRAM_OFFSET1
+	add	r1, r1, r3
+	mov	r3, #SRAM_OFFSET2
+	add	r1, r1, r3
+next:
+	ldmia	r0!, {r3 - r10}		@ copy from source address [r0]
+	stmia	r1!, {r3 - r10}		@ copy to   target address [r1]
+	cmp	r0, r2			@ until source end address [r2]
+	bne	next			@ loop until equal */
+#if !defined(CONFIG_SYS_NAND_BOOT) && !defined(CONFIG_SYS_ONENAND_BOOT)
+	/* No need to copy/exec the clock code - DPLL adjust already done
+	 * in NAND/oneNAND Boot.
+	 */
+	bl	cpy_clk_code		@ put dpll adjust code behind vectors
+#endif /* NAND Boot */
+#endif
+	/* the mask ROM code should have PLL and others stable */
+#ifndef CONFIG_SKIP_LOWLEVEL_INIT
+	bl	cpu_init_crit
+#endif
+
+#ifndef CONFIG_SKIP_RELOCATE_UBOOT
+relocate:				@ relocate U-Boot to RAM
+	adr	r0, _start		@ r0 <- current position of code
+	ldr	r1, _TEXT_BASE		@ test if we run from flash or RAM
+	cmp	r0, r1			@ don't reloc during debug
+	beq	stack_setup
+
+	ldr	r2, _armboot_start
+	ldr	r3, _bss_start
+	sub	r2, r3, r2		@ r2 <- size of armboot
+	add	r2, r0, r2		@ r2 <- source end address
+
+copy_loop:				@ copy 32 bytes at a time
+	ldmia	r0!, {r3 - r10}		@ copy from source address [r0]
+	stmia	r1!, {r3 - r10}		@ copy to   target address [r1]
+	cmp	r0, r2			@ until source end addreee [r2]
+	ble	copy_loop
+#endif	/* CONFIG_SKIP_RELOCATE_UBOOT */
+
+	/* Set up the stack */
+stack_setup:
+	ldr	r0, _TEXT_BASE		@ upper 128 KiB: relocated uboot
+	sub	r0, r0, #CONFIG_SYS_MALLOC_LEN @ malloc area
+	sub	r0, r0, #CONFIG_SYS_GBL_DATA_SIZE @ bdinfo
+#ifdef CONFIG_USE_IRQ
+	sub	r0, r0, #(CONFIG_STACKSIZE_IRQ + CONFIG_STACKSIZE_FIQ)
+#endif
+	sub	sp, r0, #12		@ leave 3 words for abort-stack
+	and	sp, sp, #~7		@ 8 byte alinged for (ldr/str)d
+
+	/* Clear BSS (if any). Is below tx (watch load addr - need space) */
+clear_bss:
+	ldr	r0, _bss_start		@ find start of bss segment
+	ldr	r1, _bss_end		@ stop here
+	mov	r2, #0x00000000		@ clear value
+clbss_l:
+	str	r2, [r0]		@ clear BSS location
+	cmp	r0, r1			@ are we at the end yet
+	add	r0, r0, #4		@ increment clear index pointer
+	bne	clbss_l			@ keep clearing till at end
+
+	ldr	pc, _start_armboot	@ jump to C code
+
+_start_armboot: .word start_armboot
+
+
+/*************************************************************************
+ *
+ * CPU_init_critical registers
+ *
+ * setup important registers
+ * setup memory timing
+ *
+ *************************************************************************/
+cpu_init_crit:
+	/*
+	 * Invalidate L1 I/D
+	 */
+	mov	r0, #0			@ set up for MCR
+	mcr	p15, 0, r0, c8, c7, 0	@ invalidate TLBs
+	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
+
+	/*
+	 * disable MMU stuff and caches
+	 */
+	mrc	p15, 0, r0, c1, c0, 0
+	bic	r0, r0, #0x00002000	@ clear bits 13 (--V-)
+	bic	r0, r0, #0x00000007	@ clear bits 2:0 (-CAM)
+	orr	r0, r0, #0x00000002	@ set bit 1 (--A-) Align
+	orr	r0, r0, #0x00000800	@ set bit 12 (Z---) BTB
+	mcr	p15, 0, r0, c1, c0, 0
+
+	/*
+	 * Jump to board specific initialization...
+	 * The Mask ROM will have already initialized
+	 * basic memory. Go here to bump up clock rate and handle
+	 * wake up conditions.
+	 */
+	mov	ip, lr			@ persevere link reg across call
+	bl	lowlevel_init		@ go setup pll,mux,memory
+	mov	lr, ip			@ restore link
+	mov	pc, lr			@ back to my caller
+/*
+ *************************************************************************
+ *
+ * Interrupt handling
+ *
+ *************************************************************************
+ */
+@
+@ IRQ stack frame.
+@
+#define S_FRAME_SIZE	72
+
+#define S_OLD_R0	68
+#define S_PSR		64
+#define S_PC		60
+#define S_LR		56
+#define S_SP		52
+
+#define S_IP		48
+#define S_FP		44
+#define S_R10		40
+#define S_R9		36
+#define S_R8		32
+#define S_R7		28
+#define S_R6		24
+#define S_R5		20
+#define S_R4		16
+#define S_R3		12
+#define S_R2		8
+#define S_R1		4
+#define S_R0		0
+
+#define MODE_SVC 0x13
+#define I_BIT	 0x80
+
+/*
+ * use bad_save_user_regs for abort/prefetch/undef/swi ...
+ * use irq_save_user_regs / irq_restore_user_regs for IRQ/FIQ handling
+ */
+
+	.macro	bad_save_user_regs
+	sub	sp, sp, #S_FRAME_SIZE		@ carve out a frame on current
+						@ user stack
+	stmia	sp, {r0 - r12}			@ Save user registers (now in
+						@ svc mode) r0-r12
+
+	ldr	r2, _armboot_start
+	sub	r2, r2, #(CONFIG_SYS_MALLOC_LEN)
+	sub	r2, r2, #(CONFIG_SYS_GBL_DATA_SIZE + 8)	@ set base 2 words into abort
+						@ stack
+	ldmia	r2, {r2 - r3}			@ get values for "aborted" pc
+						@ and cpsr (into parm regs)
+	add	r0, sp, #S_FRAME_SIZE		@ grab pointer to old stack
+
+	add	r5, sp, #S_SP
+	mov	r1, lr
+	stmia	r5, {r0 - r3}			@ save sp_SVC, lr_SVC, pc, cpsr
+	mov	r0, sp				@ save current stack into r0
+						@ (param register)
+	.endm
+
+	.macro	irq_save_user_regs
+	sub	sp, sp, #S_FRAME_SIZE
+	stmia	sp, {r0 - r12}			@ Calling r0-r12
+	add	r8, sp, #S_PC			@ !! R8 NEEDS to be saved !!
+						@ a reserved stack spot would
+						@ be good.
+	stmdb	r8, {sp, lr}^			@ Calling SP, LR
+	str	lr, [r8, #0]			@ Save calling PC
+	mrs	r6, spsr
+	str	r6, [r8, #4]			@ Save CPSR
+	str	r0, [r8, #8]			@ Save OLD_R0
+	mov	r0, sp
+	.endm
+
+	.macro	irq_restore_user_regs
+	ldmia	sp, {r0 - lr}^			@ Calling r0 - lr
+	mov	r0, r0
+	ldr	lr, [sp, #S_PC]			@ Get PC
+	add	sp, sp, #S_FRAME_SIZE
+	subs	pc, lr, #4			@ return & move spsr_svc into
+						@ cpsr
+	.endm
+
+	.macro get_bad_stack
+	ldr	r13, _armboot_start		@ setup our mode stack (enter
+						@ in banked mode)
+	sub	r13, r13, #(CONFIG_SYS_MALLOC_LEN)	@ move past malloc pool
+	sub	r13, r13, #(CONFIG_SYS_GBL_DATA_SIZE + 8) @ move to reserved a couple
+						@ spots for abort stack
+
+	str	lr, [r13]			@ save caller lr in position 0
+						@ of saved stack
+	mrs	lr, spsr			@ get the spsr
+	str	lr, [r13, #4]			@ save spsr in position 1 of
+						@ saved stack
+
+	mov	r13, #MODE_SVC			@ prepare SVC-Mode
+	@ msr	spsr_c, r13
+	msr	spsr, r13			@ switch modes, make sure
+						@ moves will execute
+	mov	lr, pc				@ capture return pc
+	movs	pc, lr				@ jump to next instruction &
+						@ switch modes.
+	.endm
+
+	.macro get_bad_stack_swi
+	sub	r13, r13, #4			@ space on current stack for
+						@ scratch reg.
+	str	r0, [r13]			@ save R0's value.
+	ldr	r0, _armboot_start		@ get data regions start
+	sub	r0, r0, #(CONFIG_SYS_MALLOC_LEN)	@ move past malloc pool
+	sub	r0, r0, #(CONFIG_SYS_GBL_DATA_SIZE + 8)	@ move past gbl and a couple
+						@ spots for abort stack
+	str	lr, [r0]			@ save caller lr in position 0
+						@ of saved stack
+	mrs	r0, spsr			@ get the spsr
+	str	lr, [r0, #4]			@ save spsr in position 1 of
+						@ saved stack
+	ldr	r0, [r13]			@ restore r0
+	add	r13, r13, #4			@ pop stack entry
+	.endm
+
+	.macro get_irq_stack			@ setup IRQ stack
+	ldr	sp, IRQ_STACK_START
+	.endm
+
+	.macro get_fiq_stack			@ setup FIQ stack
+	ldr	sp, FIQ_STACK_START
+	.endm
+
+/*
+ * exception handlers
+ */
+	.align	5
+undefined_instruction:
+	get_bad_stack
+	bad_save_user_regs
+	bl	do_undefined_instruction
+
+	.align	5
+software_interrupt:
+	get_bad_stack_swi
+	bad_save_user_regs
+	bl	do_software_interrupt
+
+	.align	5
+prefetch_abort:
+	get_bad_stack
+	bad_save_user_regs
+	bl	do_prefetch_abort
+
+	.align	5
+data_abort:
+	get_bad_stack
+	bad_save_user_regs
+	bl	do_data_abort
+
+	.align	5
+not_used:
+	get_bad_stack
+	bad_save_user_regs
+	bl	do_not_used
+
+#ifdef CONFIG_USE_IRQ
+
+	.align	5
+irq:
+	get_irq_stack
+	irq_save_user_regs
+	bl	do_irq
+	irq_restore_user_regs
+
+	.align	5
+fiq:
+	get_fiq_stack
+	/* someone ought to write a more effective fiq_save_user_regs */
+	irq_save_user_regs
+	bl	do_fiq
+	irq_restore_user_regs
+
+#else
+
+	.align	5
+irq:
+	get_bad_stack
+	bad_save_user_regs
+	bl	do_irq
+
+	.align	5
+fiq:
+	get_bad_stack
+	bad_save_user_regs
+	bl	do_fiq
+
+#endif
+
+/*
+ *	v7_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r5, r7, r9-r11
+ *
+ *	- mm	- mm_struct describing address space
+ */
+	.align 5
+.global v7_flush_dcache_all
+v7_flush_dcache_all:
+	stmfd	r13!, {r0 - r5, r7, r9 - r12, r14}
+
+	mov	r7, r0				@ take a backup of device type
+	cmp	r0, #0x3			@ check if the device type is
+						@ GP
+	moveq r12, #0x1				@ set up to invalide L2
+smi:	.word 0x01600070			@ Call SMI monitor (smieq)
+	cmp	r7, #0x3			@ compare again in case its
+						@ lost
+	beq	finished_inval			@ if GP device, inval done
+						@ above
+
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	ands	r3, r0, #0x7000000		@ extract loc from clidr
+	mov	r3, r3, lsr #23			@ left align loc bit field
+	beq	finished_inval			@ if loc is 0, then no need to
+						@ clean
+	mov	r10, #0				@ start clean at cache level 0
+inval_loop1:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache
+						@ level
+	mov	r1, r0, lsr r2			@ extract cache type bits from
+						@ clidr
+	and	r1, r1, #7			@ mask of the bits for current
+						@ cache only
+	cmp	r1, #2				@ see what cache we have at
+						@ this level
+	blt	skip_inval			@ skip if no cache, or just
+						@ i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
+						@ in cssr
+	mov	r2, #0				@ operand for mcr SBZ
+	mcr	p15, 0, r2, c7, c5, 4		@ flush prefetch buffer to
+						@ sych the new cssr&csidr,
+						@ with armv7 this is 'isb',
+						@ but we compile with armv5
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the
+						@ cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	ldr	r4, =0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the
+						@ way size
+	clz	r5, r4				@ find bit position of way
+						@ size increment
+	ldr	r7, =0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the
+						@ index size
+inval_loop2:
+	mov	r9, r4				@ create working copy of max
+						@ way size
+inval_loop3:
+	orr	r11, r10, r9, lsl r5		@ factor way and cache number
+						@ into r11
+	orr	r11, r11, r7, lsl r2		@ factor index number into r11
+	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
+	subs	r9, r9, #1			@ decrement the way
+	bge	inval_loop3
+	subs	r7, r7, #1			@ decrement the index
+	bge	inval_loop2
+skip_inval:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	inval_loop1
+finished_inval:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
+						@ in cssr
+	mcr	p15, 0, r10, c7, c5, 4		@ flush prefetch buffer,
+						@ with armv7 this is 'isb',
+						@ but we compile with armv5
+
+	ldmfd	r13!, {r0 - r5, r7, r9 - r12, pc}
+
+
+	.align	5
+.global reset_cpu
+reset_cpu:
+	ldr	r1, rstctl			@ get addr for global reset
+						@ reg
+	mov	r3, #0x2			@ full reset pll + mpu
+	str	r3, [r1]			@ force reset
+	mov	r0, r0
+_loop_forever:
+	b	_loop_forever
+rstctl:
+	.word	PRM_RSTCTRL