OMAP3 Move cache routine to cache.S

v7_flush_dcache_all, because it depends on omap ROM code is not
generic.  Rename the function to 'invalidate_dcache' and move it
to the omap cpu directory.

Collect the other omap cache routines l2_cache_enable and
l2_cache_disable with invalide_dcache into cache.S.  This
means removing the old cache.c file that contained l2_cache_enable
and l2_cache_disable.

The conversion from cache.c to cache.S was done most through
disassembling the uboot binary.  The only significant change was
to change the comparision for the return of get_cpu_rev from

   cmp	r0, #0
   beq	earlier_than_label

Which was lost information to

   cmp	r0, #CPU_3XX_ES20
   blt	earlier_than_label

The paths through the enable routine were verified by
adding an infinite loop and seeing the hang.  Then
removing the infinite loop and seeing it continue.

The disable routine is similar enough that it was not
tested with this method.

Run tested by cold booting from nand on beagle and zoom1.
Compile tested on MAKEALL arm.

Signed-off-by: Tom Rix <Tom.Rix@windriver.com>
diff --git a/cpu/arm_cortexa8/cpu.c b/cpu/arm_cortexa8/cpu.c
index 5a5981e..a01e0d6 100644
--- a/cpu/arm_cortexa8/cpu.c
+++ b/cpu/arm_cortexa8/cpu.c
@@ -64,7 +64,7 @@
 	/* turn off L2 cache */
 	l2_cache_disable();
 	/* invalidate L2 cache also */
-	v7_flush_dcache_all(get_device_type());
+	invalidate_dcache(get_device_type());
 #endif
 	i = 0;
 	/* mem barrier to sync up things */
diff --git a/cpu/arm_cortexa8/omap3/Makefile b/cpu/arm_cortexa8/omap3/Makefile
index eef165c..136b163 100644
--- a/cpu/arm_cortexa8/omap3/Makefile
+++ b/cpu/arm_cortexa8/omap3/Makefile
@@ -26,10 +26,10 @@
 LIB	=  $(obj)lib$(SOC).a
 
 SOBJS	:= lowlevel_init.o
+SOBJS	+= cache.o
 SOBJS	+= reset.o
 
 COBJS	+= board.o
-COBJS	+= cache.o
 COBJS	+= clock.o
 COBJS	+= gpio.o
 COBJS	+= mem.o
diff --git a/cpu/arm_cortexa8/omap3/board.c b/cpu/arm_cortexa8/omap3/board.c
index b8bd052..dd2c940 100644
--- a/cpu/arm_cortexa8/omap3/board.c
+++ b/cpu/arm_cortexa8/omap3/board.c
@@ -201,7 +201,7 @@
 	 * Right now flushing at low MPU speed.
 	 * Need to move after clock init
 	 */
-	v7_flush_dcache_all(get_device_type());
+	invalidate_dcache(get_device_type());
 #ifndef CONFIG_ICACHE_OFF
 	icache_enable();
 #endif
diff --git a/cpu/arm_cortexa8/omap3/cache.S b/cpu/arm_cortexa8/omap3/cache.S
new file mode 100644
index 0000000..0f63815
--- /dev/null
+++ b/cpu/arm_cortexa8/omap3/cache.S
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2009 Wind River Systems, Inc.
+ * Tom Rix <Tom.Rix@windriver.com>
+ *
+ * This file is based on and replaces the existing cache.c file
+ * The copyrights for the cache.c file are:
+ *
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <asm/arch/omap3.h>
+
+/*
+ * omap3 cache code
+ */
+
+.align 5
+.global invalidate_dcache
+.global l2_cache_enable
+.global l2_cache_disable
+
+/*
+ *	invalidate_dcache()
+ *
+ *	Invalidate the whole D-cache.
+ *
+ *	Corrupted registers: r0-r5, r7, r9-r11
+ *
+ *	- mm	- mm_struct describing address space
+ */
+invalidate_dcache:
+	stmfd	r13!, {r0 - r5, r7, r9 - r12, r14}
+
+	mov	r7, r0				@ take a backup of device type
+	cmp	r0, #0x3			@ check if the device type is
+						@ GP
+	moveq r12, #0x1				@ set up to invalide L2
+smi:	.word 0x01600070			@ Call SMI monitor (smieq)
+	cmp	r7, #0x3			@ compare again in case its
+						@ lost
+	beq	finished_inval			@ if GP device, inval done
+						@ above
+
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	ands	r3, r0, #0x7000000		@ extract loc from clidr
+	mov	r3, r3, lsr #23			@ left align loc bit field
+	beq	finished_inval			@ if loc is 0, then no need to
+						@ clean
+	mov	r10, #0				@ start clean at cache level 0
+inval_loop1:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache
+						@ level
+	mov	r1, r0, lsr r2			@ extract cache type bits from
+						@ clidr
+	and	r1, r1, #7			@ mask of the bits for current
+						@ cache only
+	cmp	r1, #2				@ see what cache we have at
+						@ this level
+	blt	skip_inval			@ skip if no cache, or just
+						@ i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
+						@ in cssr
+	mov	r2, #0				@ operand for mcr SBZ
+	mcr	p15, 0, r2, c7, c5, 4		@ flush prefetch buffer to
+						@ sych the new cssr&csidr,
+						@ with armv7 this is 'isb',
+						@ but we compile with armv5
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the
+						@ cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	ldr	r4, =0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the
+						@ way size
+	clz	r5, r4				@ find bit position of way
+						@ size increment
+	ldr	r7, =0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the
+						@ index size
+inval_loop2:
+	mov	r9, r4				@ create working copy of max
+						@ way size
+inval_loop3:
+	orr	r11, r10, r9, lsl r5		@ factor way and cache number
+						@ into r11
+	orr	r11, r11, r7, lsl r2		@ factor index number into r11
+	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
+	subs	r9, r9, #1			@ decrement the way
+	bge	inval_loop3
+	subs	r7, r7, #1			@ decrement the index
+	bge	inval_loop2
+skip_inval:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	inval_loop1
+finished_inval:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
+						@ in cssr
+	mcr	p15, 0, r10, c7, c5, 4		@ flush prefetch buffer,
+						@ with armv7 this is 'isb',
+						@ but we compile with armv5
+
+	ldmfd	r13!, {r0 - r5, r7, r9 - r12, pc}
+
+
+l2_cache_enable:
+	push	{r0, r1, r2, lr}
+	@ ES2 onwards we can disable/enable L2 ourselves
+	bl	get_cpu_rev
+	cmp	r0, #CPU_3XX_ES20
+	blt	l2_cache_disable_EARLIER_THAN_ES2
+	mrc	15, 0, r3, cr1, cr0, 1
+	orr	r3, r3, #2
+	mcr	15, 0, r3, cr1, cr0, 1
+	b	l2_cache_enable_END
+l2_cache_enable_EARLIER_THAN_ES2:
+	@ Save r0, r12 and restore them after usage
+	mov	r3, ip
+	str	r3, [sp, #4]
+	mov	r3, r0
+	@
+	@ GP Device ROM code API usage here
+	@ r12 = AUXCR Write function and r0 value
+	@
+	mov	ip, #3
+	mrc	15, 0, r0, cr1, cr0, 1
+	orr	r0, r0, #2
+	@ SMI instruction to call ROM Code API
+	.word	0xe1600070
+	mov	r0, r3
+	mov	ip, r3
+	str	r3, [sp, #4]
+l2_cache_enable_END:
+	pop	{r1, r2, r3, pc}
+
+
+l2_cache_disable:
+	push	{r0, r1, r2, lr}
+	@ ES2 onwards we can disable/enable L2 ourselves
+	bl	get_cpu_rev
+	cmp	r0, #CPU_3XX_ES20
+	blt	l2_cache_disable_EARLIER_THAN_ES2
+	mrc	15, 0, r3, cr1, cr0, 1
+	bic	r3, r3, #2
+	mcr	15, 0, r3, cr1, cr0, 1
+	b	l2_cache_disable_END
+l2_cache_disable_EARLIER_THAN_ES2:
+	@ Save r0, r12 and restore them after usage
+	mov	r3, ip
+	str	r3, [sp, #4]
+	mov	r3, r0
+	@
+	@ GP Device ROM code API usage here
+	@ r12 = AUXCR Write function and r0 value
+	@
+	mov	ip, #3
+	mrc	15, 0, r0, cr1, cr0, 1
+	bic	r0, r0, #2
+	@ SMI instruction to call ROM Code API
+	.word	0xe1600070
+	mov	r0, r3
+	mov	ip, r3
+	str	r3, [sp, #4]
+l2_cache_disable_END:
+	pop	{r1, r2, r3, pc}
diff --git a/cpu/arm_cortexa8/omap3/cache.c b/cpu/arm_cortexa8/omap3/cache.c
deleted file mode 100644
index 0d5b444..0000000
--- a/cpu/arm_cortexa8/omap3/cache.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * (C) Copyright 2008 Texas Insturments
- *
- * (C) Copyright 2002
- * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
- * Marius Groeger <mgroeger@sysgo.de>
- *
- * (C) Copyright 2002
- * Gary Jennejohn, DENX Software Engineering, <gj@denx.de>
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- */
-
-/*
- * omap3 L2 cache code
- */
-
-#include <common.h>
-#include <asm/arch/sys_proto.h>
-#include <asm/cache.h>
-
-void l2_cache_enable(void)
-{
-	unsigned long i;
-	volatile unsigned int j;
-
-	/* ES2 onwards we can disable/enable L2 ourselves */
-	if (get_cpu_rev() >= CPU_3XX_ES20) {
-		__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i));
-		__asm__ __volatile__("orr %0, %0, #0x2":"=r"(i));
-		__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i));
-	} else {
-		/* Save r0, r12 and restore them after usage */
-		__asm__ __volatile__("mov %0, r12":"=r"(j));
-		__asm__ __volatile__("mov %0, r0":"=r"(i));
-
-		/*
-		 * GP Device ROM code API usage here
-		 * r12 = AUXCR Write function and r0 value
-		 */
-		__asm__ __volatile__("mov r12, #0x3");
-		__asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1");
-		__asm__ __volatile__("orr r0, r0, #0x2");
-		/* SMI instruction to call ROM Code API */
-		__asm__ __volatile__(".word 0xE1600070");
-		__asm__ __volatile__("mov r0, %0":"=r"(i));
-		__asm__ __volatile__("mov r12, %0":"=r"(j));
-	}
-
-}
-
-void l2_cache_disable(void)
-{
-	unsigned long i;
-	volatile unsigned int j;
-
-	/* ES2 onwards we can disable/enable L2 ourselves */
-	if (get_cpu_rev() >= CPU_3XX_ES20) {
-		__asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i));
-		__asm__ __volatile__("bic %0, %0, #0x2":"=r"(i));
-		__asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i));
-	} else {
-		/* Save r0, r12 and restore them after usage */
-		__asm__ __volatile__("mov %0, r12":"=r"(j));
-		__asm__ __volatile__("mov %0, r0":"=r"(i));
-
-		/*
-		 * GP Device ROM code API usage here
-		 * r12 = AUXCR Write function and r0 value
-		 */
-		__asm__ __volatile__("mov r12, #0x3");
-		__asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1");
-		__asm__ __volatile__("bic r0, r0, #0x2");
-		/* SMI instruction to call ROM Code API */
-		__asm__ __volatile__(".word 0xE1600070");
-		__asm__ __volatile__("mov r0, %0":"=r"(i));
-		__asm__ __volatile__("mov r12, %0":"=r"(j));
-	}
-}
diff --git a/cpu/arm_cortexa8/start.S b/cpu/arm_cortexa8/start.S
index 6bd6552..14a9bd3 100644
--- a/cpu/arm_cortexa8/start.S
+++ b/cpu/arm_cortexa8/start.S
@@ -415,88 +415,3 @@
 
 #endif
 
-/*
- *	v7_flush_dcache_all()
- *
- *	Flush the whole D-cache.
- *
- *	Corrupted registers: r0-r5, r7, r9-r11
- *
- *	- mm	- mm_struct describing address space
- */
-	.align 5
-.global v7_flush_dcache_all
-v7_flush_dcache_all:
-	stmfd	r13!, {r0 - r5, r7, r9 - r12, r14}
-
-	mov	r7, r0				@ take a backup of device type
-	cmp	r0, #0x3			@ check if the device type is
-						@ GP
-	moveq r12, #0x1				@ set up to invalide L2
-smi:	.word 0x01600070			@ Call SMI monitor (smieq)
-	cmp	r7, #0x3			@ compare again in case its
-						@ lost
-	beq	finished_inval			@ if GP device, inval done
-						@ above
-
-	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	ands	r3, r0, #0x7000000		@ extract loc from clidr
-	mov	r3, r3, lsr #23			@ left align loc bit field
-	beq	finished_inval			@ if loc is 0, then no need to
-						@ clean
-	mov	r10, #0				@ start clean at cache level 0
-inval_loop1:
-	add	r2, r10, r10, lsr #1		@ work out 3x current cache
-						@ level
-	mov	r1, r0, lsr r2			@ extract cache type bits from
-						@ clidr
-	and	r1, r1, #7			@ mask of the bits for current
-						@ cache only
-	cmp	r1, #2				@ see what cache we have at
-						@ this level
-	blt	skip_inval			@ skip if no cache, or just
-						@ i-cache
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
-						@ in cssr
-	mov	r2, #0				@ operand for mcr SBZ
-	mcr	p15, 0, r2, c7, c5, 4		@ flush prefetch buffer to
-						@ sych the new cssr&csidr,
-						@ with armv7 this is 'isb',
-						@ but we compile with armv5
-	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
-	and	r2, r1, #7			@ extract the length of the
-						@ cache lines
-	add	r2, r2, #4			@ add 4 (line length offset)
-	ldr	r4, =0x3ff
-	ands	r4, r4, r1, lsr #3		@ find maximum number on the
-						@ way size
-	clz	r5, r4				@ find bit position of way
-						@ size increment
-	ldr	r7, =0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the
-						@ index size
-inval_loop2:
-	mov	r9, r4				@ create working copy of max
-						@ way size
-inval_loop3:
-	orr	r11, r10, r9, lsl r5		@ factor way and cache number
-						@ into r11
-	orr	r11, r11, r7, lsl r2		@ factor index number into r11
-	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
-	subs	r9, r9, #1			@ decrement the way
-	bge	inval_loop3
-	subs	r7, r7, #1			@ decrement the index
-	bge	inval_loop2
-skip_inval:
-	add	r10, r10, #2			@ increment cache number
-	cmp	r3, r10
-	bgt	inval_loop1
-finished_inval:
-	mov	r10, #0				@ swith back to cache level 0
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level
-						@ in cssr
-	mcr	p15, 0, r10, c7, c5, 4		@ flush prefetch buffer,
-						@ with armv7 this is 'isb',
-						@ but we compile with armv5
-
-	ldmfd	r13!, {r0 - r5, r7, r9 - r12, pc}
diff --git a/include/asm-arm/arch-omap3/omap3.h b/include/asm-arm/arch-omap3/omap3.h
index 6459d99..12815f6 100644
--- a/include/asm-arm/arch-omap3/omap3.h
+++ b/include/asm-arm/arch-omap3/omap3.h
@@ -168,6 +168,8 @@
  *  ES1     = rev 0
  *
  *  ES2 onwards, the value maps to contents of IDCODE register [31:28].
+ *
+ * Note : CPU_3XX_ES20 is used in cache.S.  Please review before changing.
  */
 #define CPU_3XX_ES10		0
 #define CPU_3XX_ES20		1
diff --git a/include/asm-arm/arch-omap3/sys_proto.h b/include/asm-arm/arch-omap3/sys_proto.h
index 7361d08..2246f80 100644
--- a/include/asm-arm/arch-omap3/sys_proto.h
+++ b/include/asm-arm/arch-omap3/sys_proto.h
@@ -55,7 +55,7 @@
 void setup_auxcr(void);
 void try_unlock_memory(void);
 u32 get_boot_type(void);
-void v7_flush_dcache_all(u32);
+void invalidate_dcache(u32);
 void sr32(void *, u32, u32, u32);
 u32 wait_on_value(u32, u32, void *, u32);
 void sdelay(unsigned long);