Blackfin: put memory into self-refresh before/after programming clocks

When initializing the core clocks, stick external memory into self-refresh.
This gains us a few cool things:
 - support suspend-to-RAM with Linux
 - reprogram clocks automatically when doing "go" on u-boot.bin in RAM
 - make sure settings are stable before flashing new version
 - finally fully unify initialize startup code path between LDR/non-LDR

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
diff --git a/cpu/blackfin/initcode.c b/cpu/blackfin/initcode.c
index bbc2fa5..7bd4b22 100644
--- a/cpu/blackfin/initcode.c
+++ b/cpu/blackfin/initcode.c
@@ -12,6 +12,7 @@
 #include <config.h>
 #include <asm/blackfin.h>
 #include <asm/mach-common/bits/bootrom.h>
+#include <asm/mach-common/bits/core.h>
 #include <asm/mach-common/bits/ebiu.h>
 #include <asm/mach-common/bits/pll.h>
 #include <asm/mach-common/bits/uart.h>
@@ -257,6 +258,8 @@
 		divB = serial_early_get_div();
 	}
 
+	serial_putc('A');
+
 #ifdef CONFIG_HW_WATCHDOG
 # ifndef CONFIG_HW_WATCHDOG_TIMEOUT_INITCODE
 #  define CONFIG_HW_WATCHDOG_TIMEOUT_INITCODE 20000
@@ -273,7 +276,23 @@
 	}
 #endif
 
-	serial_putc('S');
+	serial_putc('B');
+
+	/* If external memory is enabled, put it into self refresh first. */
+	bool put_into_srfs = false;
+#ifdef EBIU_RSTCTL
+	if (bfin_read_EBIU_RSTCTL() & DDR_SRESET) {
+		bfin_write_EBIU_RSTCTL(bfin_read_EBIU_RSTCTL() | SRREQ);
+		put_into_srfs = true;
+	}
+#else
+	if (bfin_read_EBIU_SDBCTL() & EBE) {
+		bfin_write_EBIU_SDGCTL(bfin_read_EBIU_SDGCTL() | SRFS);
+		put_into_srfs = true;
+	}
+#endif
+
+	serial_putc('C');
 
 	/* Blackfin bootroms use the SPI slow read opcode instead of the SPI
 	 * fast read, so we need to slow down the SPI clock a lot more during
@@ -286,29 +305,26 @@
 		bfin_write_SPI_BAUD(CONFIG_SPI_BAUD_INITBLOCK);
 	}
 
-	serial_putc('B');
+	serial_putc('D');
 
-	/* Disable all peripheral wakeups except for the PLL event. */
-#ifdef SIC_IWR0
-	bfin_write_SIC_IWR0(1);
-	bfin_write_SIC_IWR1(0);
-# ifdef SIC_IWR2
-	bfin_write_SIC_IWR2(0);
-# endif
-#elif defined(SICA_IWR0)
-	bfin_write_SICA_IWR0(1);
-	bfin_write_SICA_IWR1(0);
+	/* If we're entering self refresh, make sure it has happened. */
+	if (put_into_srfs)
+#ifdef EBIU_RSTCTL
+		while (!(bfin_read_EBIU_RSTCTL() & SRACK))
 #else
-	bfin_write_SIC_IWR(1);
+		while (!(bfin_read_EBIU_SDSTAT() & SDSRA))
 #endif
+			continue;
+
+	serial_putc('E');
 
 	/* With newer bootroms, we use the helper function to set up
 	 * the memory controller.  Older bootroms lacks such helpers
 	 * so we do it ourselves.
 	 */
-#define BOOTROM_CAPS_SYSCONTROL 0
-	if (BOOTROM_CAPS_SYSCONTROL) {
-		serial_putc('S');
+	uint16_t vr_ctl = bfin_read_VR_CTL();
+	if (!ANOMALY_05000386) {
+		serial_putc('F');
 
 		ADI_SYSCTRL_VALUES memory_settings;
 		uint32_t actions = SYSCTRL_WRITE | SYSCTRL_PLLCTL | SYSCTRL_PLLDIV | SYSCTRL_LOCKCNT;
@@ -332,22 +348,38 @@
 		bfin_write_SIC_IWR1(-1);
 #endif
 	} else {
-		serial_putc('L');
+		serial_putc('G');
+
+		/* Disable all peripheral wakeups except for the PLL event. */
+#ifdef SIC_IWR0
+		bfin_write_SIC_IWR0(1);
+		bfin_write_SIC_IWR1(0);
+# ifdef SIC_IWR2
+		bfin_write_SIC_IWR2(0);
+# endif
+#elif defined(SICA_IWR0)
+		bfin_write_SICA_IWR0(1);
+		bfin_write_SICA_IWR1(0);
+#else
+		bfin_write_SIC_IWR(1);
+#endif
+
+		serial_putc('H');
 
 		bfin_write_PLL_LOCKCNT(CONFIG_PLL_LOCKCNT_VAL);
 
-		serial_putc('A');
+		serial_putc('I');
 
 		/* Only reprogram when needed to avoid triggering unnecessary
 		 * PLL relock sequences.
 		 */
-		if (bfin_read_VR_CTL() != CONFIG_VR_CTL_VAL) {
+		if (vr_ctl != CONFIG_VR_CTL_VAL) {
 			serial_putc('!');
 			bfin_write_VR_CTL(CONFIG_VR_CTL_VAL);
 			asm("idle;");
 		}
 
-		serial_putc('C');
+		serial_putc('J');
 
 		bfin_write_PLL_DIV(CONFIG_PLL_DIV_VAL);
 
@@ -361,8 +393,26 @@
 			bfin_write_PLL_CTL(CONFIG_PLL_CTL_VAL);
 			asm("idle;");
 		}
+
+		serial_putc('L');
+
+		/* Restore all peripheral wakeups. */
+#ifdef SIC_IWR0
+		bfin_write_SIC_IWR0(-1);
+		bfin_write_SIC_IWR1(-1);
+# ifdef SIC_IWR2
+		bfin_write_SIC_IWR2(-1);
+# endif
+#elif defined(SICA_IWR0)
+		bfin_write_SICA_IWR0(-1);
+		bfin_write_SICA_IWR1(-1);
+#else
+		bfin_write_SIC_IWR(-1);
+#endif
 	}
 
+	serial_putc('M');
+
 	/* Since we've changed the SCLK above, we may need to update
 	 * the UART divisors (UART baud rates are based on SCLK).
 	 * Do the division by hand as there are no native instructions
@@ -380,7 +430,80 @@
 		serial_early_put_div(quotient - ANOMALY_05000230);
 	}
 
-	serial_putc('F');
+	serial_putc('N');
+
+	/* Program the external memory controller before we come out of
+	 * self-refresh.  This only works with our SDRAM controller.
+	 */
+#ifndef EBIU_RSTCTL
+	bfin_write_EBIU_SDRRC(CONFIG_EBIU_SDRRC_VAL);
+	bfin_write_EBIU_SDBCTL(CONFIG_EBIU_SDBCTL_VAL);
+	bfin_write_EBIU_SDGCTL(CONFIG_EBIU_SDGCTL_VAL);
+#endif
+
+	serial_putc('O');
+
+	/* Now that we've reprogrammed, take things out of self refresh. */
+	if (put_into_srfs)
+#ifdef EBIU_RSTCTL
+		bfin_write_EBIU_RSTCTL(bfin_read_EBIU_RSTCTL() & ~(SRREQ));
+#else
+		bfin_write_EBIU_SDGCTL(bfin_read_EBIU_SDGCTL() & ~(SRFS));
+#endif
+
+	serial_putc('P');
+
+	/* Our DDR controller sucks and cannot be programmed while in
+	 * self-refresh.  So we have to pull it out before programming.
+	 */
+#ifdef EBIU_RSTCTL
+	bfin_write_EBIU_RSTCTL(bfin_read_EBIU_RSTCTL() | 0x1 /*DDRSRESET*/ | CONFIG_EBIU_RSTCTL_VAL);
+	bfin_write_EBIU_DDRCTL0(CONFIG_EBIU_DDRCTL0_VAL);
+	bfin_write_EBIU_DDRCTL1(CONFIG_EBIU_DDRCTL1_VAL);
+	bfin_write_EBIU_DDRCTL2(CONFIG_EBIU_DDRCTL2_VAL);
+# ifdef CONFIG_EBIU_DDRCTL3_VAL
+	/* default is disable, so don't need to force this */
+	bfin_write_EBIU_DDRCTL3(CONFIG_EBIU_DDRCTL3_VAL);
+# endif
+# ifdef CONFIG_EBIU_DDRQUE_VAL
+	bfin_write_EBIU_DDRQUE(bfin_read_EBIU_DDRQUE() | CONFIG_EBIU_DDRQUE_VAL);
+# endif
+#endif
+
+	serial_putc('Q');
+
+	/* Are we coming out of hibernate (suspend to memory) ?
+	 * The memory layout is:
+	 * 0x0: hibernate magic for anomaly 307 (0xDEADBEEF)
+	 * 0x4: return address
+	 * 0x8: stack pointer
+	 *
+	 * SCKELOW is unreliable on older parts (anomaly 307)
+	 */
+	if (ANOMALY_05000307 || vr_ctl & 0x8000) {
+		uint32_t *hibernate_magic = 0;
+		__builtin_bfin_ssync(); /* make sure memory controller is done */
+		if (hibernate_magic[0] == 0xDEADBEEF) {
+			serial_putc('R');
+			bfin_write_EVT15(hibernate_magic[1]);
+			bfin_write_IMASK(EVT_IVG15);
+			__asm__ __volatile__ (
+				/* load reti early to avoid anomaly 281 */
+				"reti = %0;"
+				/* clear hibernate magic */
+				"[%0] = %1;"
+				/* load stack pointer */
+				"SP = [%0 + 8];"
+				/* lower ourselves from reset ivg to ivg15 */
+				"raise 15;"
+				"rti;"
+				:
+				: "p"(hibernate_magic), "d"(0x2000 /* jump.s 0 */)
+			);
+		}
+	}
+
+	serial_putc('S');
 
 	/* Program the async banks controller. */
 	bfin_write_EBIU_AMBCTL0(CONFIG_EBIU_AMBCTL0_VAL);
@@ -394,39 +517,7 @@
 	bfin_write_EBIU_FCTL(CONFIG_EBIU_FCTL_VAL);
 #endif
 
-	serial_putc('I');
-
-	/* Program the external memory controller. */
-#ifdef EBIU_RSTCTL
-	bfin_write_EBIU_RSTCTL(bfin_read_EBIU_RSTCTL() | 0x1 /*DDRSRESET*/ | CONFIG_EBIU_RSTCTL_VAL);
-	bfin_write_EBIU_DDRCTL0(CONFIG_EBIU_DDRCTL0_VAL);
-	bfin_write_EBIU_DDRCTL1(CONFIG_EBIU_DDRCTL1_VAL);
-	bfin_write_EBIU_DDRCTL2(CONFIG_EBIU_DDRCTL2_VAL);
-# ifdef CONFIG_EBIU_DDRCTL3_VAL
-	/* default is disable, so don't need to force this */
-	bfin_write_EBIU_DDRCTL3(CONFIG_EBIU_DDRCTL3_VAL);
-# endif
-#else
-	bfin_write_EBIU_SDRRC(CONFIG_EBIU_SDRRC_VAL);
-	bfin_write_EBIU_SDBCTL(CONFIG_EBIU_SDBCTL_VAL);
-	bfin_write_EBIU_SDGCTL(CONFIG_EBIU_SDGCTL_VAL);
-#endif
-
-	serial_putc('N');
-
-	/* Restore all peripheral wakeups. */
-#ifdef SIC_IWR0
-	bfin_write_SIC_IWR0(-1);
-	bfin_write_SIC_IWR1(-1);
-# ifdef SIC_IWR2
-	bfin_write_SIC_IWR2(-1);
-# endif
-#elif defined(SICA_IWR0)
-	bfin_write_SICA_IWR0(-1);
-	bfin_write_SICA_IWR1(-1);
-#else
-	bfin_write_SIC_IWR(-1);
-#endif
+	serial_putc('T');
 
 	/* tell the bootrom where our entry point is */
 	if (CONFIG_BFIN_BOOT_MODE != BFIN_BOOT_BYPASS)
diff --git a/cpu/blackfin/start.S b/cpu/blackfin/start.S
index 6c8def4..506fea5 100644
--- a/cpu/blackfin/start.S
+++ b/cpu/blackfin/start.S
@@ -95,35 +95,63 @@
 	/* Save RETX so we can pass it while booting Linux */
 	r7 = RETX;
 
-#if (CONFIG_BFIN_BOOT_MODE == BFIN_BOOT_BYPASS)
-	/* In bypass mode, we don't have an LDR with an init block
-	 * so we need to explicitly call it ourselves.  This will
-	 * reprogram our clocks and setup our async banks.
+	/* Figure out where we are currently executing so that we can decide
+	 * how to best reprogram and relocate things.  We'll pass below:
+	 *  R4: load address of _start
+	 *  R5: current (not load) address of _start
 	 */
-	/* XXX: we should DMA this into L1, put external memory into
-	 *      self refresh, and then jump there ...
-	 */
+	serial_early_puts("Find ourselves");
+
 	call _get_pc;
-	r3 = 0x0;
-	r3.h = 0x2000;
-	cc = r0 < r3 (iu);
-	if cc jump .Lproc_initialized;
-
-	serial_early_puts("Program Clocks");
-
-	call _initcode;
-
-	/* Since we reprogrammed SCLK, we need to update the serial divisor */
-	serial_early_set_baud
-
-.Lproc_initialized:
-#endif
+.Loffset:
+	r1.l = .Loffset;
+	r1.h = .Loffset;
+	r4.l = _start;
+	r4.h = _start;
+	r3 = r1 - r4;
+	r5 = r0 - r3;
 
 	/* Inform upper layers if we had to do the relocation ourselves.
 	 * This allows us to detect whether we were loaded by 'go 0x1000'
-	 * or by the bootrom from an LDR.  "r6" is "loaded_from_ldr".
+	 * or by the bootrom from an LDR.  "R6" is "loaded_from_ldr".
 	 */
 	r6 = 1 (x);
+	cc = r4 == r5;
+	if cc jump .Lnorelocate;
+	r6 = 0 (x);
+
+	/* In bypass mode, we don't have an LDR with an init block
+	 * so we need to explicitly call it ourselves.  This will
+	 * reprogram our clocks, memory, and setup our async banks.
+	 */
+	serial_early_puts("Program Clocks");
+
+	/* if we're executing >=0x20000000, then we dont need to dma */
+	r3 = 0x0;
+	r3.h = 0x2000;
+	cc = r5 < r3 (iu);
+	if cc jump .Ldma_and_reprogram;
+	call _initcode;
+	jump .Lprogrammed;
+
+	/* we're sitting in external memory, so dma into L1 and reprogram */
+.Ldma_and_reprogram:
+	r0.l = LO(L1_INST_SRAM);
+	r0.h = HI(L1_INST_SRAM);
+	r1.l = __initcode_start;
+	r1.h = __initcode_start;
+	r2.l = __initcode_end;
+	r2.h = __initcode_end;
+	r2 = r2 - r1;	/* convert r2 into length of initcode */
+	r1 = r1 - r4;	/* convert r1 from load address of initcode ... */
+	r1 = r1 + r5;	/* ... to current (not load) address of initcode */
+	p3 = r0;
+	call _dma_memcpy_nocache;
+	call (p3);
+
+	/* Since we reprogrammed SCLK, we need to update the serial divisor */
+.Lprogrammed:
+	serial_early_set_baud
 
 	/* Relocate from wherever we are (FLASH/RAM/etc...) to the hardcoded
 	 * monitor location in the end of RAM.  We know that memcpy() only
@@ -132,19 +160,8 @@
 	 * it yet (see "lower to 15" below).
 	 */
 	serial_early_puts("Relocate");
-	call _get_pc;
-.Loffset:
-	r2.l = .Loffset;
-	r2.h = .Loffset;
-	r3.l = _start;
-	r3.h = _start;
-	r2 = r2 - r3;
-	r1 = r0 - r2;
-	cc = r1 == r3;
-	if cc jump .Lnorelocate;
-	r6 = 0 (x);
-
-	r0 = r3;
+	r0 = r4;
+	r1 = r5;
 	r2.l = LO(CONFIG_SYS_MONITOR_LEN);
 	r2.h = HI(CONFIG_SYS_MONITOR_LEN);
 	call _memcpy_ASM;