mpc83xx: Fix the incorrect dcbz operation

The 834x rev1.x silicon has one CPU5 errata.

The issue is when the data cache locked with
HID0[DLOCK], the dcbz instruction looks like no-op inst.

The right behavior of the data cache is when the data cache
Locked with HID0[DLOCK], the dcbz instruction allocates
new tags in cache.

The 834x rev3.0 and later and 8360 have not this bug inside.

So, when 834x rev3.0/8360 are working with ECC, the dcbz
instruction will corrupt the stack in cache, the processor will
checkstop reset.

However, the 834x rev1.x can work with ECC with these code,
because the sillicon has this cache bug. The dcbz will not
corrupt the stack in cache.
Really, it is the fault code running on fault sillicon.

This patch fix the incorrect dcbz operation. Instead of
CPU FP writing to initialise the ECC.

CHANGELOG:
* Fix the incorrect dcbz operation instead of CPU FP
writing to initialise the ECC memory. Otherwise, it
will corrupt the stack in cache, The processor will checkstop
reset.

Signed-off-by: Dave Liu <daveliu@freescale.com>
diff --git a/cpu/mpc83xx/spd_sdram.c b/cpu/mpc83xx/spd_sdram.c
index acc8c9e..b91c613 100644
--- a/cpu/mpc83xx/spd_sdram.c
+++ b/cpu/mpc83xx/spd_sdram.c
@@ -562,54 +562,38 @@
 /* #define CONFIG_DDR_ECC_INIT_VIA_DMA */
 void ddr_enable_ecc(unsigned int dram_size)
 {
-	uint *p;
 	volatile immap_t *immap = (immap_t *)CFG_IMMRBAR;
 	volatile ddr83xx_t *ddr= &immap->ddr;
 	unsigned long t_start, t_end;
+	register u64 *p;
+	register uint size;
+	unsigned int pattern[2];
 #if defined(CONFIG_DDR_ECC_INIT_VIA_DMA)
 	uint i;
 #endif
-
-	debug("Initialize a Cachline in DRAM\n");
 	icache_enable();
-
-#if defined(CONFIG_DDR_ECC_INIT_VIA_DMA)
-	/* Initialise DMA for direct Transfers */
-	dma_init();
-#endif
-
 	t_start = get_tbms();
+	pattern[0] = 0xdeadbeef;
+	pattern[1] = 0xdeadbeef;
 
 #if !defined(CONFIG_DDR_ECC_INIT_VIA_DMA)
-	debug("DDR init: Cache flush method\n");
-	for (p = 0; p < (uint *)(dram_size); p++) {
-		if (((unsigned int)p & 0x1f) == 0) {
-			ppcDcbz((unsigned long) p);
-		}
-
-		/* write pattern to cache and flush */
-		*p = (unsigned int)0xdeadbeef;
-
-		if (((unsigned int)p & 0x1c) == 0x1c) {
-			ppcDcbf((unsigned long) p);
-		}
+	debug("ddr init: CPU FP write method\n");
+	size = dram_size;
+	for (p = 0; p < (u64*)(size); p++) {
+		ppcDWstore((u32*)p, pattern);
 	}
+	__asm__ __volatile__ ("sync");
 #else
-	printf("DDR init: DMA method\n");
-	for (p = 0; p < (uint *)(8 * 1024); p++) {
-		/* zero one data cache line */
-		if (((unsigned int)p & 0x1f) == 0) {
-			ppcDcbz((unsigned long)p);
-		}
-
-		/* write pattern to it and flush */
-		*p = (unsigned int)0xdeadbeef;
-
-		if (((unsigned int)p & 0x1c) == 0x1c) {
-			ppcDcbf((unsigned long)p);
-		}
+	debug("ddr init: DMA method\n");
+	size = 0x2000;
+	for (p = 0; p < (u64*)(size); p++) {
+		ppcDWstore((u32*)p, pattern);
 	}
+	__asm__ __volatile__ ("sync");
 
+	/* Initialise DMA for direct transfer */
+	dma_init();
+	/* Start DMA to transfer */
 	dma_xfer((uint *)0x2000, 0x2000, (uint *)0); /* 8K */
 	dma_xfer((uint *)0x4000, 0x4000, (uint *)0); /* 16K */
 	dma_xfer((uint *)0x8000, 0x8000, (uint *)0); /* 32K */
diff --git a/cpu/mpc83xx/start.S b/cpu/mpc83xx/start.S
index a7ed3c5..c43835c 100644
--- a/cpu/mpc83xx/start.S
+++ b/cpu/mpc83xx/start.S
@@ -870,6 +870,18 @@
 	dcbz	r0,r3
 	blr
 
+	.globl	ppcDWstore
+ppcDWstore:
+	lfd	1, 0(r4)
+	stfd	1, 0(r3)
+	blr
+
+	.globl	ppcDWload
+ppcDWload:
+	lfd	1, 0(r3)
+	stfd	1, 0(r4)
+	blr
+
 /*-------------------------------------------------------------------*/
 
 /*