arc: clean-up init procedure

Intention behind this work was elimination of as much assembly-written
code as it is possible.

In case of ARC we already have relocation fix-up implemented in C so why
don't we use C for U-Boot copying, .bss zeroing etc.

It turned out x86 uses pretty similar approach so we re-used parts of
code in "board_f.c" initially implemented for x86.

Now assembly usage during init is limited to stack- and frame-pointer
setup before and after relocation.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Simon Glass <sjg@chromium.org>
diff --git a/arch/arc/include/asm/init_helpers.h b/arch/arc/include/asm/init_helpers.h
new file mode 100644
index 0000000..7607e19
--- /dev/null
+++ b/arch/arc/include/asm/init_helpers.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2013-2015 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_ARC_INIT_HELPERS_H
+#define _ASM_ARC_INIT_HELPERS_H
+
+int init_cache_f_r(void);
+
+#endif	/* _ASM_ARC_INIT_HELPERS_H */
diff --git a/arch/arc/include/asm/relocate.h b/arch/arc/include/asm/relocate.h
new file mode 100644
index 0000000..4c5f923
--- /dev/null
+++ b/arch/arc/include/asm/relocate.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2013-2015 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_ARC_RELOCATE_H
+#define _ASM_ARC_RELOCATE_H
+
+#include <common.h>
+
+int copy_uboot_to_ram(void);
+int clear_bss(void);
+int do_elf_reloc_fixups(void);
+
+#endif	/* _ASM_ARC_RELOCATE_H */
diff --git a/arch/arc/include/asm/u-boot-arc.h b/arch/arc/include/asm/u-boot-arc.h
index 0c0e8e6..a56ccf1 100644
--- a/arch/arc/include/asm/u-boot-arc.h
+++ b/arch/arc/include/asm/u-boot-arc.h
@@ -9,4 +9,7 @@
 
 int arch_early_init_r(void);
 
+void	board_init_f_r_trampoline(ulong) __attribute__ ((noreturn));
+void	board_init_f_r(void) __attribute__ ((noreturn));
+
 #endif	/* __ASM_ARC_U_BOOT_ARC_H__ */
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index b1f1fbe..b887904 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -20,6 +20,7 @@
 obj-y += timer.o
 obj-y += start.o
 obj-y += ints_low.o
+obj-y += init_helpers.o
 
 obj-$(CONFIG_CMD_BOOTM) += bootm.o
 
diff --git a/arch/arc/lib/cpu.c b/arch/arc/lib/cpu.c
index 50634b8..3c930bc 100644
--- a/arch/arc/lib/cpu.c
+++ b/arch/arc/lib/cpu.c
@@ -12,19 +12,6 @@
 
 int arch_cpu_init(void)
 {
-#ifdef CONFIG_SYS_ICACHE_OFF
-	icache_disable();
-#else
-	icache_enable();
-	invalidate_icache_all();
-#endif
-
-	flush_dcache_all();
-#ifdef CONFIG_SYS_DCACHE_OFF
-	dcache_disable();
-#else
-	dcache_enable();
-#endif
 	timer_init();
 
 /* In simulation (ISS) "CHIPID" and "ARCNUM" are all "ff" */
diff --git a/arch/arc/lib/init_helpers.c b/arch/arc/lib/init_helpers.c
new file mode 100644
index 0000000..25690ee
--- /dev/null
+++ b/arch/arc/lib/init_helpers.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2013-2015 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+int init_cache_f_r(void)
+{
+#ifndef CONFIG_SYS_ICACHE_OFF
+	icache_enable();
+	/* Make sure no stale entries persist from before we disabled cache */
+	invalidate_icache_all();
+#endif
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+	dcache_enable();
+	/* Make sure no stale entries persist from before we disabled cache */
+	invalidate_dcache_all();
+#endif
+	return 0;
+}
diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c
index 7797782..5c2c2d1 100644
--- a/arch/arc/lib/relocate.c
+++ b/arch/arc/lib/relocate.c
@@ -10,6 +10,25 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+int copy_uboot_to_ram(void)
+{
+	size_t len = (size_t)&__image_copy_end - (size_t)&__image_copy_start;
+
+	memcpy((void *)gd->relocaddr, (void *)&__image_copy_start, len);
+
+	return 0;
+}
+
+int clear_bss(void)
+{
+	ulong dst_addr = (ulong)&__bss_start + gd->reloc_off;
+	size_t len = (size_t)&__bss_end - (size_t)&__bss_start;
+
+	memset((void *)dst_addr, 0x00, len);
+
+	return 0;
+}
+
 /*
  * Base functionality is taken from x86 version with added ARC-specifics
  */
diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S
index 3408f45..82045ae 100644
--- a/arch/arc/lib/start.S
+++ b/arch/arc/lib/start.S
@@ -13,17 +13,14 @@
 	/* Setup interrupt vector base that matches "__text_start" */
 	sr	__ivt_start, [ARC_AUX_INTR_VEC_BASE]
 
-	/* Setup stack pointer */
+	/* Setup stack- and frame-pointers */
 	mov	%sp, CONFIG_SYS_INIT_SP_ADDR
 	mov	%fp, %sp
 
-	/* Clear bss */
-	mov	%r0, __bss_start
-	mov	%r1, __bss_end
-
-clear_bss:
-	st.ab	0, [%r0, 4]
-	brlt	%r0, %r1, clear_bss
+	/* Unconditionally disable caches */
+	bl	flush_dcache_all
+	bl	dcache_disable
+	bl	icache_disable
 
 	/* Zero the one and only argument of "board_init_f" */
 	mov_s	%r0, 0
@@ -31,67 +28,24 @@
 ENDPROC(_start)
 
 /*
- * void relocate_code (addr_sp, gd, addr_moni)
+ * void board_init_f_r_trampoline(stack-pointer address)
  *
  * This "function" does not return, instead it continues in RAM
  * after relocating the monitor code.
  *
- * r0 = start_addr_sp
- * r1 = new__gd
- * r2 = relocaddr
+ * r0 = new stack-pointer
  */
-ENTRY(relocate_code)
-	/*
-	 * r0-r12 might be clobbered by C functions
-	 * so we use r13-r16 for storage here
-	 */
-	mov	%r13, %r0		/* save addr_sp */
-	mov	%r14, %r1		/* save addr of gd */
-	mov	%r15, %r2		/* save addr of destination */
-
-	mov	%r16, %r2		/* %r9 - relocation offset */
-	sub	%r16, %r16, __image_copy_start
-
-/* Set up the stack */
-stack_setup:
-	mov	%sp, %r13
+ENTRY(board_init_f_r_trampoline)
+	/* Set up the stack- and frame-pointers */
+	mov	%sp, %r0
 	mov	%fp, %sp
 
-/* Check if monitor is loaded right in place for relocation */
-	mov	%r0, __image_copy_start
-	cmp	%r0, %r15		/* skip relocation if code loaded */
-	bz	do_board_init_r		/* in target location already */
+	/* Update position of intterupt vector table */
+	lr	%r0, [ARC_AUX_INTR_VEC_BASE]
+	ld	%r1, [%r25, GD_RELOC_OFF]
+	add	%r0, %r0, %r1
+	sr	%r0, [ARC_AUX_INTR_VEC_BASE]
 
-/* Copy data (__image_copy_start - __image_copy_end) to new location */
-	mov	%r1, %r15
-	mov	%r2, __image_copy_end
-	sub	%r2, %r2, %r0		/* r3 <- amount of bytes to copy */
-	asr	%r2, %r2, 2		/* r3 <- amount of words to copy */
-	mov	%lp_count, %r2
-	lp	copy_end
-	ld.ab	%r2,[%r0,4]
-	st.ab	%r2,[%r1,4]
-copy_end:
-
-/* Fix relocations related issues */
-	bl	do_elf_reloc_fixups
-#ifndef CONFIG_SYS_ICACHE_OFF
-	bl	invalidate_icache_all
-#endif
-#ifndef CONFIG_SYS_DCACHE_OFF
-	bl	flush_dcache_all
-#endif
-
-/* Update position of intterupt vector table */
-	lr	%r0, [ARC_AUX_INTR_VEC_BASE]	/* Read current position */
-	add	%r0, %r0, %r16			/* Update address */
-	sr	%r0, [ARC_AUX_INTR_VEC_BASE]	/* Write new position */
-
-do_board_init_r:
-/* Prepare for exection of "board_init_r" in relocated monitor */
-	mov	%r2, board_init_r	/* old address of "board_init_r()" */
-	add	%r2, %r2, %r16		/* new address of "board_init_r()" */
-	mov	%r0, %r14		/* 1-st parameter: gd_t */
-	mov	%r1, %r15		/* 2-nd parameter: dest_addr */
-	j	[%r2]
-ENDPROC(relocate_code)
+	/* Re-enter U-Boot by calling board_init_f_r */
+	j	board_init_f_r
+ENDPROC(board_init_f_r_trampoline)