Merge branch 'u-boot/master' into 'u-boot-arm/master'
diff --git a/arch/arm/cpu/armv7/at91/cpu.c b/arch/arm/cpu/armv7/at91/cpu.c
index 2fbf60d..8d86f97 100644
--- a/arch/arm/cpu/armv7/at91/cpu.c
+++ b/arch/arm/cpu/armv7/at91/cpu.c
@@ -61,6 +61,8 @@
 
 void enable_caches(void)
 {
+	icache_enable();
+	dcache_enable();
 }
 
 unsigned int get_chip_id(void)
diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
index e0a5946..38dea5c 100644
--- a/arch/arm/cpu/armv8/transition.S
+++ b/arch/arm/cpu/armv8/transition.S
@@ -43,7 +43,7 @@
 	mrs	x0, cnthctl_el2
 	orr	x0, x0, #0x3		/* Enable EL1 access to timers */
 	msr	cnthctl_el2, x0
-	msr	cntvoff_el2, x0
+	msr	cntvoff_el2, xzr
 	mrs	x0, cntkctl_el1
 	orr	x0, x0, #0x3		/* Enable EL0 access to timers */
 	msr	cntkctl_el1, x0
diff --git a/arch/arm/include/asm/arch-vf610/crm_regs.h b/arch/arm/include/asm/arch-vf610/crm_regs.h
index e17c7d1..5256624 100644
--- a/arch/arm/include/asm/arch-vf610/crm_regs.h
+++ b/arch/arm/include/asm/arch-vf610/crm_regs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013 Freescale Semiconductor, Inc.
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -150,6 +150,9 @@
 #define CCM_CACRR_ARM_CLK_DIV_MASK		0x7
 #define CCM_CACRR_ARM_CLK_DIV(v)		((v) & 0x7)
 
+#define CCM_CSCMR1_QSPI0_CLK_SEL_OFFSET		22
+#define CCM_CSCMR1_QSPI0_CLK_SEL_MASK		(0x3 << 22)
+#define CCM_CSCMR1_QSPI0_CLK_SEL(v)		(((v) & 0x3) << 22)
 #define CCM_CSCMR1_ESDHC1_CLK_SEL_OFFSET	18
 #define CCM_CSCMR1_ESDHC1_CLK_SEL_MASK		(0x3 << 18)
 #define CCM_CSCMR1_ESDHC1_CLK_SEL(v)		(((v) & 0x3) << 18)
@@ -161,6 +164,11 @@
 #define CCM_CSCDR2_ESDHC1_CLK_DIV_MASK		(0xf << 20)
 #define CCM_CSCDR2_ESDHC1_CLK_DIV(v)		(((v) & 0xf) << 20)
 
+#define CCM_CSCDR3_QSPI0_EN			(1 << 4)
+#define CCM_CSCDR3_QSPI0_DIV(v)			((v) << 3)
+#define CCM_CSCDR3_QSPI0_X2_DIV(v)		((v) << 2)
+#define CCM_CSCDR3_QSPI0_X4_DIV(v)		((v) & 0x3)
+
 #define CCM_CSCMR2_RMII_CLK_SEL_OFFSET		4
 #define CCM_CSCMR2_RMII_CLK_SEL_MASK		(0x3 << 4)
 #define CCM_CSCMR2_RMII_CLK_SEL(v)		(((v) & 0x3) << 4)
@@ -170,6 +178,7 @@
 #define CCM_CCGR0_UART1_CTRL_MASK		(0x3 << 16)
 #define CCM_CCGR1_PIT_CTRL_MASK			(0x3 << 14)
 #define CCM_CCGR1_WDOGA5_CTRL_MASK		(0x3 << 28)
+#define CCM_CCGR2_QSPI0_CTRL_MASK		(0x3 << 8)
 #define CCM_CCGR2_IOMUXC_CTRL_MASK		(0x3 << 16)
 #define CCM_CCGR2_PORTA_CTRL_MASK		(0x3 << 18)
 #define CCM_CCGR2_PORTB_CTRL_MASK		(0x3 << 20)
diff --git a/arch/arm/include/asm/arch-vf610/imx-regs.h b/arch/arm/include/asm/arch-vf610/imx-regs.h
index 0c28e1b..bd6f680 100644
--- a/arch/arm/include/asm/arch-vf610/imx-regs.h
+++ b/arch/arm/include/asm/arch-vf610/imx-regs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013 Freescale Semiconductor, Inc.
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -87,6 +87,8 @@
 #define ENET_BASE_ADDR		(AIPS1_BASE_ADDR + 0x00050000)
 #define ENET1_BASE_ADDR		(AIPS1_BASE_ADDR + 0x00051000)
 
+#define QSPI0_AMBA_BASE		0x20000000
+
 /* MUX mode and PAD ctrl are in one register */
 #define CONFIG_IOMUX_SHARE_CONF_REG
 
diff --git a/arch/arm/include/asm/arch-vf610/iomux-vf610.h b/arch/arm/include/asm/arch-vf610/iomux-vf610.h
index 88807d8..a965641 100644
--- a/arch/arm/include/asm/arch-vf610/iomux-vf610.h
+++ b/arch/arm/include/asm/arch-vf610/iomux-vf610.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013 Freescale Semiconductor, Inc.
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -20,6 +20,9 @@
 #define VF610_I2C_PAD_CTRL	(PAD_CTL_PUS_47K_UP | PAD_CTL_DSE_50ohm | \
 				PAD_CTL_SPEED_HIGH | PAD_CTL_OBE_IBE_ENABLE)
 
+#define VF610_QSPI_PAD_CTRL	(PAD_CTL_SPEED_HIGH | PAD_CTL_DSE_150ohm | \
+				PAD_CTL_PUS_22K_UP | PAD_CTL_OBE_IBE_ENABLE)
+
 enum {
 	VF610_PAD_PTA6__RMII0_CLKIN		= IOMUX_PAD(0x0000, 0x0000, 2, __NA_, 0, VF610_ENET_PAD_CTRL),
 	VF610_PAD_PTA6__RMII0_CLKOUT		= IOMUX_PAD(0x0000, 0x0000, 1, __NA_, 0, VF610_ENET_PAD_CTRL),
@@ -53,6 +56,18 @@
 	VF610_PAD_PTA29__ESDHC1_DAT3		= IOMUX_PAD(0x004c, 0x004c, 5, __NA_, 0, VF610_SDHC_PAD_CTRL),
 	VF610_PAD_PTB14__I2C0_SCL		= IOMUX_PAD(0x0090, 0x0090, 2, 0x033c, 1, VF610_I2C_PAD_CTRL),
 	VF610_PAD_PTB15__I2C0_SDA		= IOMUX_PAD(0x0094, 0x0094, 2, 0x0340, 1, VF610_I2C_PAD_CTRL),
+	VF610_PAD_PTD0__QSPI0_A_QSCK		= IOMUX_PAD(0x013c, 0x013c, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD1__QSPI0_A_CS0		= IOMUX_PAD(0x0140, 0x0140, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD2__QSPI0_A_DATA3		= IOMUX_PAD(0x0144, 0x0144, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD3__QSPI0_A_DATA2		= IOMUX_PAD(0x0148, 0x0148, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD4__QSPI0_A_DATA1		= IOMUX_PAD(0x014c, 0x014c, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD5__QSPI0_A_DATA0		= IOMUX_PAD(0x0150, 0x0150, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD7__QSPI0_B_QSCK		= IOMUX_PAD(0x0158, 0x0158, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD8__QSPI0_B_CS0		= IOMUX_PAD(0x015c, 0x015c, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD9__QSPI0_B_DATA3		= IOMUX_PAD(0x0160, 0x0160, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD10__QSPI0_B_DATA2		= IOMUX_PAD(0x0164, 0x0164, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD11__QSPI0_B_DATA1		= IOMUX_PAD(0x0168, 0x0168, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
+	VF610_PAD_PTD12__QSPI0_B_DATA0		= IOMUX_PAD(0x016c, 0x016c, 1, __NA_, 0, VF610_QSPI_PAD_CTRL),
 	VF610_PAD_DDR_A15__DDR_A_15		= IOMUX_PAD(0x0220, 0x0220, 0, __NA_, 0, VF610_DDR_PAD_CTRL),
 	VF610_PAD_DDR_A14__DDR_A_14		= IOMUX_PAD(0x0224, 0x0224, 0, __NA_, 0, VF610_DDR_PAD_CTRL),
 	VF610_PAD_DDR_A13__DDR_A_13		= IOMUX_PAD(0x0228, 0x0228, 0, __NA_, 0, VF610_DDR_PAD_CTRL),
diff --git a/arch/arm/include/asm/imx-common/iomux-v3.h b/arch/arm/include/asm/imx-common/iomux-v3.h
index cca920b..ff45618 100644
--- a/arch/arm/include/asm/imx-common/iomux-v3.h
+++ b/arch/arm/include/asm/imx-common/iomux-v3.h
@@ -123,12 +123,14 @@
 #define PAD_CTL_SPEED_MED	(1 << 12)
 #define PAD_CTL_SPEED_HIGH	(3 << 12)
 
+#define PAD_CTL_DSE_150ohm	(1 << 6)
 #define PAD_CTL_DSE_50ohm	(3 << 6)
 #define PAD_CTL_DSE_25ohm	(6 << 6)
 #define PAD_CTL_DSE_20ohm	(7 << 6)
 
 #define PAD_CTL_PUS_47K_UP	(1 << 4 | PAD_CTL_PUE)
 #define PAD_CTL_PUS_100K_UP	(2 << 4 | PAD_CTL_PUE)
+#define PAD_CTL_PUS_22K_UP	(3 << 4 | PAD_CTL_PUE)
 #define PAD_CTL_PKE		(1 << 3)
 #define PAD_CTL_PUE		(1 << 2 | PAD_CTL_PKE)
 
diff --git a/arch/arm/lib/vectors.S b/arch/arm/lib/vectors.S
index d68cc47..e6538ef 100644
--- a/arch/arm/lib/vectors.S
+++ b/arch/arm/lib/vectors.S
@@ -43,8 +43,6 @@
  *************************************************************************
  */
 
-_start:
-
 #ifdef CONFIG_SYS_DV_NOR_BOOT_CFG
 	.word	CONFIG_SYS_DV_NOR_BOOT_CFG
 #endif
diff --git a/arch/avr32/cpu/cache.c b/arch/avr32/cpu/cache.c
index ab0374e..b3ffc33 100644
--- a/arch/avr32/cpu/cache.c
+++ b/arch/avr32/cpu/cache.c
@@ -24,31 +24,31 @@
 	sync_write_buffer();
 }
 
-void dcache_invalidate_range(volatile void *start, size_t size)
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
 {
-	unsigned long v, begin, end, linesz;
+	unsigned long v, linesz;
 
 	linesz = CONFIG_SYS_DCACHE_LINESZ;
 
 	/* You asked for it, you got it */
-	begin = (unsigned long)start & ~(linesz - 1);
-	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+	start = start & ~(linesz - 1);
+	stop = (stop + linesz - 1) & ~(linesz - 1);
 
-	for (v = begin; v < end; v += linesz)
+	for (v = start; v < stop; v += linesz)
 		dcache_invalidate_line((void *)v);
 }
 
-void dcache_flush_range(volatile void *start, size_t size)
+void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-	unsigned long v, begin, end, linesz;
+	unsigned long v, linesz;
 
 	linesz = CONFIG_SYS_DCACHE_LINESZ;
 
 	/* You asked for it, you got it */
-	begin = (unsigned long)start & ~(linesz - 1);
-	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+	start = start & ~(linesz - 1);
+	stop = (stop + linesz - 1) & ~(linesz - 1);
 
-	for (v = begin; v < end; v += linesz)
+	for (v = start; v < stop; v += linesz)
 		dcache_flush_line((void *)v);
 
 	sync_write_buffer();
diff --git a/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h b/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h
index 13d6d3a..e08cd9d 100644
--- a/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h
+++ b/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h
@@ -49,9 +49,7 @@
  * Applies the above functions on all lines that are touched by the
  * specified virtual address range.
  */
-void dcache_invalidate_range(volatile void *start, size_t len);
 void dcache_clean_range(volatile void *start, size_t len);
-void dcache_flush_range(volatile void *start, size_t len);
 void icache_invalidate_range(volatile void *start, size_t len);
 
 static inline void dcache_flush_unlocked(void)
diff --git a/arch/avr32/include/asm/dma-mapping.h b/arch/avr32/include/asm/dma-mapping.h
index 95ea81f..dbdd2fe 100644
--- a/arch/avr32/include/asm/dma-mapping.h
+++ b/arch/avr32/include/asm/dma-mapping.h
@@ -23,13 +23,15 @@
 
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
-		dcache_flush_range(vaddr, len);
+		flush_dcache_range((unsigned long)vaddr,
+				   (unsigned long)vaddr + len);
 		break;
 	case DMA_TO_DEVICE:
 		dcache_clean_range(vaddr, len);
 		break;
 	case DMA_FROM_DEVICE:
-		dcache_invalidate_range(vaddr, len);
+		invalidate_dcache_range((unsigned long)vaddr,
+					(unsigned long)vaddr + len);
 		break;
 	default:
 		/* This will cause a linker error */
diff --git a/arch/avr32/lib/board.c b/arch/avr32/lib/board.c
index 7680102..bf0997f 100644
--- a/arch/avr32/lib/board.c
+++ b/arch/avr32/lib/board.c
@@ -65,8 +65,8 @@
 	printf("DMA: Using memory from 0x%08lx to 0x%08lx\n",
 	       dma_alloc_start, dma_alloc_end);
 
-	dcache_invalidate_range(cached(dma_alloc_start),
-				dma_alloc_end - dma_alloc_start);
+	invalidate_dcache_range((unsigned long)cached(dma_alloc_start),
+				dma_alloc_end);
 }
 
 void *dma_alloc_coherent(size_t len, unsigned long *handle)
diff --git a/board/freescale/vf610twr/vf610twr.c b/board/freescale/vf610twr/vf610twr.c
index d64d3aa..54a9f2c 100644
--- a/board/freescale/vf610twr/vf610twr.c
+++ b/board/freescale/vf610twr/vf610twr.c
@@ -278,6 +278,26 @@
 	imx_iomux_v3_setup_multiple_pads(i2c0_pads, ARRAY_SIZE(i2c0_pads));
 }
 
+static void setup_iomux_qspi(void)
+{
+	static const iomux_v3_cfg_t qspi0_pads[] = {
+		VF610_PAD_PTD0__QSPI0_A_QSCK,
+		VF610_PAD_PTD1__QSPI0_A_CS0,
+		VF610_PAD_PTD2__QSPI0_A_DATA3,
+		VF610_PAD_PTD3__QSPI0_A_DATA2,
+		VF610_PAD_PTD4__QSPI0_A_DATA1,
+		VF610_PAD_PTD5__QSPI0_A_DATA0,
+		VF610_PAD_PTD7__QSPI0_B_QSCK,
+		VF610_PAD_PTD8__QSPI0_B_CS0,
+		VF610_PAD_PTD9__QSPI0_B_DATA3,
+		VF610_PAD_PTD10__QSPI0_B_DATA2,
+		VF610_PAD_PTD11__QSPI0_B_DATA1,
+		VF610_PAD_PTD12__QSPI0_B_DATA0,
+	};
+
+	imx_iomux_v3_setup_multiple_pads(qspi0_pads, ARRAY_SIZE(qspi0_pads));
+}
+
 #ifdef CONFIG_FSL_ESDHC
 struct fsl_esdhc_cfg esdhc_cfg[1] = {
 	{ESDHC1_BASE_ADDR},
@@ -321,7 +341,8 @@
 	clrsetbits_le32(&ccm->ccgr2, CCM_REG_CTRL_MASK,
 		CCM_CCGR2_IOMUXC_CTRL_MASK | CCM_CCGR2_PORTA_CTRL_MASK |
 		CCM_CCGR2_PORTB_CTRL_MASK | CCM_CCGR2_PORTC_CTRL_MASK |
-		CCM_CCGR2_PORTD_CTRL_MASK | CCM_CCGR2_PORTE_CTRL_MASK);
+		CCM_CCGR2_PORTD_CTRL_MASK | CCM_CCGR2_PORTE_CTRL_MASK |
+		CCM_CCGR2_QSPI0_CTRL_MASK);
 	clrsetbits_le32(&ccm->ccgr3, CCM_REG_CTRL_MASK,
 		CCM_CCGR3_ANADIG_CTRL_MASK);
 	clrsetbits_le32(&ccm->ccgr4, CCM_REG_CTRL_MASK,
@@ -352,11 +373,14 @@
 		CCM_CACRR_IPG_CLK_DIV(1) | CCM_CACRR_BUS_CLK_DIV(2) |
 		CCM_CACRR_ARM_CLK_DIV(0));
 	clrsetbits_le32(&ccm->cscmr1, CCM_REG_CTRL_MASK,
-		CCM_CSCMR1_ESDHC1_CLK_SEL(3));
+		CCM_CSCMR1_ESDHC1_CLK_SEL(3) | CCM_CSCMR1_QSPI0_CLK_SEL(3));
 	clrsetbits_le32(&ccm->cscdr1, CCM_REG_CTRL_MASK,
 		CCM_CSCDR1_RMII_CLK_EN);
 	clrsetbits_le32(&ccm->cscdr2, CCM_REG_CTRL_MASK,
 		CCM_CSCDR2_ESDHC1_EN | CCM_CSCDR2_ESDHC1_CLK_DIV(0));
+	clrsetbits_le32(&ccm->cscdr3, CCM_REG_CTRL_MASK,
+		CCM_CSCDR3_QSPI0_EN | CCM_CSCDR3_QSPI0_DIV(1) |
+		CCM_CSCDR3_QSPI0_X2_DIV(1) | CCM_CSCDR3_QSPI0_X4_DIV(3));
 	clrsetbits_le32(&ccm->cscmr2, CCM_REG_CTRL_MASK,
 		CCM_CSCMR2_RMII_CLK_SEL(0));
 }
@@ -386,6 +410,7 @@
 	setup_iomux_uart();
 	setup_iomux_enet();
 	setup_iomux_i2c();
+	setup_iomux_qspi();
 
 	return 0;
 }
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 781a272..01a94a4 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -40,17 +40,21 @@
 
 #include "macb.h"
 
-#define CONFIG_SYS_MACB_RX_BUFFER_SIZE		4096
-#define CONFIG_SYS_MACB_RX_RING_SIZE		(CONFIG_SYS_MACB_RX_BUFFER_SIZE / 128)
-#define CONFIG_SYS_MACB_TX_RING_SIZE		16
-#define CONFIG_SYS_MACB_TX_TIMEOUT		1000
-#define CONFIG_SYS_MACB_AUTONEG_TIMEOUT	5000000
+#define MACB_RX_BUFFER_SIZE		4096
+#define MACB_RX_RING_SIZE		(MACB_RX_BUFFER_SIZE / 128)
+#define MACB_TX_RING_SIZE		16
+#define MACB_TX_TIMEOUT		1000
+#define MACB_AUTONEG_TIMEOUT	5000000
 
 struct macb_dma_desc {
 	u32	addr;
 	u32	ctrl;
 };
 
+#define DMA_DESC_BYTES(n)	(n * sizeof(struct macb_dma_desc))
+#define MACB_TX_DMA_DESC_SIZE	(DMA_DESC_BYTES(MACB_TX_RING_SIZE))
+#define MACB_RX_DMA_DESC_SIZE	(DMA_DESC_BYTES(MACB_RX_RING_SIZE))
+
 #define RXADDR_USED		0x00000001
 #define RXADDR_WRAP		0x00000002
 
@@ -170,7 +174,7 @@
 	struct eth_device *dev = eth_get_dev_by_name(devname);
 	struct macb_device *macb = to_macb(dev);
 
-	if ( macb->phy_addr != phy_adr )
+	if (macb->phy_addr != phy_adr)
 		return -1;
 
 	arch_get_mdio_control(devname);
@@ -184,7 +188,7 @@
 	struct eth_device *dev = eth_get_dev_by_name(devname);
 	struct macb_device *macb = to_macb(dev);
 
-	if ( macb->phy_addr != phy_adr )
+	if (macb->phy_addr != phy_adr)
 		return -1;
 
 	arch_get_mdio_control(devname);
@@ -194,6 +198,39 @@
 }
 #endif
 
+#define RX	1
+#define TX	0
+static inline void macb_invalidate_ring_desc(struct macb_device *macb, bool rx)
+{
+	if (rx)
+		invalidate_dcache_range(macb->rx_ring_dma, macb->rx_ring_dma +
+			MACB_RX_DMA_DESC_SIZE);
+	else
+		invalidate_dcache_range(macb->tx_ring_dma, macb->tx_ring_dma +
+			MACB_TX_DMA_DESC_SIZE);
+}
+
+static inline void macb_flush_ring_desc(struct macb_device *macb, bool rx)
+{
+	if (rx)
+		flush_dcache_range(macb->rx_ring_dma, macb->rx_ring_dma +
+			MACB_RX_DMA_DESC_SIZE);
+	else
+		flush_dcache_range(macb->tx_ring_dma, macb->tx_ring_dma +
+			MACB_TX_DMA_DESC_SIZE);
+}
+
+static inline void macb_flush_rx_buffer(struct macb_device *macb)
+{
+	flush_dcache_range(macb->rx_buffer_dma, macb->rx_buffer_dma +
+				MACB_RX_BUFFER_SIZE);
+}
+
+static inline void macb_invalidate_rx_buffer(struct macb_device *macb)
+{
+	invalidate_dcache_range(macb->rx_buffer_dma, macb->rx_buffer_dma +
+				MACB_RX_BUFFER_SIZE);
+}
 
 #if defined(CONFIG_CMD_NET)
 
@@ -208,23 +245,28 @@
 
 	ctrl = length & TXBUF_FRMLEN_MASK;
 	ctrl |= TXBUF_FRAME_END;
-	if (tx_head == (CONFIG_SYS_MACB_TX_RING_SIZE - 1)) {
+	if (tx_head == (MACB_TX_RING_SIZE - 1)) {
 		ctrl |= TXBUF_WRAP;
 		macb->tx_head = 0;
-	} else
+	} else {
 		macb->tx_head++;
+	}
 
 	macb->tx_ring[tx_head].ctrl = ctrl;
 	macb->tx_ring[tx_head].addr = paddr;
 	barrier();
+	macb_flush_ring_desc(macb, TX);
+	/* Do we need check paddr and length is dcache line aligned? */
+	flush_dcache_range(paddr, paddr + length);
 	macb_writel(macb, NCR, MACB_BIT(TE) | MACB_BIT(RE) | MACB_BIT(TSTART));
 
 	/*
 	 * I guess this is necessary because the networking core may
 	 * re-use the transmit buffer as soon as we return...
 	 */
-	for (i = 0; i <= CONFIG_SYS_MACB_TX_TIMEOUT; i++) {
+	for (i = 0; i <= MACB_TX_TIMEOUT; i++) {
 		barrier();
+		macb_invalidate_ring_desc(macb, TX);
 		ctrl = macb->tx_ring[tx_head].ctrl;
 		if (ctrl & TXBUF_USED)
 			break;
@@ -233,7 +275,7 @@
 
 	dma_unmap_single(packet, length, paddr);
 
-	if (i <= CONFIG_SYS_MACB_TX_TIMEOUT) {
+	if (i <= MACB_TX_TIMEOUT) {
 		if (ctrl & TXBUF_UNDERRUN)
 			printf("%s: TX underrun\n", netdev->name);
 		if (ctrl & TXBUF_EXHAUSTED)
@@ -253,10 +295,12 @@
 	unsigned int i;
 
 	i = macb->rx_tail;
+
+	macb_invalidate_ring_desc(macb, RX);
 	while (i > new_tail) {
 		macb->rx_ring[i].addr &= ~RXADDR_USED;
 		i++;
-		if (i > CONFIG_SYS_MACB_RX_RING_SIZE)
+		if (i > MACB_RX_RING_SIZE)
 			i = 0;
 	}
 
@@ -266,6 +310,7 @@
 	}
 
 	barrier();
+	macb_flush_ring_desc(macb, RX);
 	macb->rx_tail = new_tail;
 }
 
@@ -279,6 +324,8 @@
 	u32 status;
 
 	for (;;) {
+		macb_invalidate_ring_desc(macb, RX);
+
 		if (!(macb->rx_ring[rx_tail].addr & RXADDR_USED))
 			return -1;
 
@@ -292,10 +339,12 @@
 		if (status & RXBUF_FRAME_END) {
 			buffer = macb->rx_buffer + 128 * macb->rx_tail;
 			length = status & RXBUF_FRMLEN_MASK;
+
+			macb_invalidate_rx_buffer(macb);
 			if (wrapped) {
 				unsigned int headlen, taillen;
 
-				headlen = 128 * (CONFIG_SYS_MACB_RX_RING_SIZE
+				headlen = 128 * (MACB_RX_RING_SIZE
 						 - macb->rx_tail);
 				taillen = length - headlen;
 				memcpy((void *)NetRxPackets[0],
@@ -306,11 +355,11 @@
 			}
 
 			NetReceive(buffer, length);
-			if (++rx_tail >= CONFIG_SYS_MACB_RX_RING_SIZE)
+			if (++rx_tail >= MACB_RX_RING_SIZE)
 				rx_tail = 0;
 			reclaim_rx_buffers(macb, rx_tail);
 		} else {
-			if (++rx_tail >= CONFIG_SYS_MACB_RX_RING_SIZE) {
+			if (++rx_tail >= MACB_RX_RING_SIZE) {
 				wrapped = 1;
 				rx_tail = 0;
 			}
@@ -333,7 +382,7 @@
 	macb_mdio_write(macb, MII_BMCR, (BMCR_ANENABLE
 					 | BMCR_ANRESTART));
 
-	for (i = 0; i < CONFIG_SYS_MACB_AUTONEG_TIMEOUT / 100; i++) {
+	for (i = 0; i < MACB_AUTONEG_TIMEOUT / 100; i++) {
 		status = macb_mdio_read(macb, MII_BMSR);
 		if (status & BMSR_ANEGCOMPLETE)
 			break;
@@ -385,9 +434,8 @@
 	arch_get_mdio_control(netdev->name);
 #ifdef CONFIG_MACB_SEARCH_PHY
 	/* Auto-detect phy_addr */
-	if (!macb_phy_find(macb)) {
+	if (!macb_phy_find(macb))
 		return 0;
-	}
 #endif /* CONFIG_MACB_SEARCH_PHY */
 
 	/* Check if the PHY is up to snuff... */
@@ -414,7 +462,7 @@
 		/* Try to re-negotiate if we don't have link already. */
 		macb_phy_reset(macb);
 
-		for (i = 0; i < CONFIG_SYS_MACB_AUTONEG_TIMEOUT / 100; i++) {
+		for (i = 0; i < MACB_AUTONEG_TIMEOUT / 100; i++) {
 			status = macb_mdio_read(macb, MII_BMSR);
 			if (status & BMSR_LSTATUS)
 				break;
@@ -499,21 +547,28 @@
 
 	/* initialize DMA descriptors */
 	paddr = macb->rx_buffer_dma;
-	for (i = 0; i < CONFIG_SYS_MACB_RX_RING_SIZE; i++) {
-		if (i == (CONFIG_SYS_MACB_RX_RING_SIZE - 1))
+	for (i = 0; i < MACB_RX_RING_SIZE; i++) {
+		if (i == (MACB_RX_RING_SIZE - 1))
 			paddr |= RXADDR_WRAP;
 		macb->rx_ring[i].addr = paddr;
 		macb->rx_ring[i].ctrl = 0;
 		paddr += 128;
 	}
-	for (i = 0; i < CONFIG_SYS_MACB_TX_RING_SIZE; i++) {
+	macb_flush_ring_desc(macb, RX);
+	macb_flush_rx_buffer(macb);
+
+	for (i = 0; i < MACB_TX_RING_SIZE; i++) {
 		macb->tx_ring[i].addr = 0;
-		if (i == (CONFIG_SYS_MACB_TX_RING_SIZE - 1))
+		if (i == (MACB_TX_RING_SIZE - 1))
 			macb->tx_ring[i].ctrl = TXBUF_USED | TXBUF_WRAP;
 		else
 			macb->tx_ring[i].ctrl = TXBUF_USED;
 	}
-	macb->rx_tail = macb->tx_head = macb->tx_tail = 0;
+	macb_flush_ring_desc(macb, TX);
+
+	macb->rx_tail = 0;
+	macb->tx_head = 0;
+	macb->tx_tail = 0;
 
 	macb_writel(macb, RBQP, macb->rx_ring_dma);
 	macb_writel(macb, TBQP, macb->tx_ring_dma);
@@ -654,15 +709,15 @@
 
 	netdev = &macb->netdev;
 
-	macb->rx_buffer = dma_alloc_coherent(CONFIG_SYS_MACB_RX_BUFFER_SIZE,
+	macb->rx_buffer = dma_alloc_coherent(MACB_RX_BUFFER_SIZE,
 					     &macb->rx_buffer_dma);
-	macb->rx_ring = dma_alloc_coherent(CONFIG_SYS_MACB_RX_RING_SIZE
-					   * sizeof(struct macb_dma_desc),
+	macb->rx_ring = dma_alloc_coherent(MACB_RX_DMA_DESC_SIZE,
 					   &macb->rx_ring_dma);
-	macb->tx_ring = dma_alloc_coherent(CONFIG_SYS_MACB_TX_RING_SIZE
-					   * sizeof(struct macb_dma_desc),
+	macb->tx_ring = dma_alloc_coherent(MACB_TX_DMA_DESC_SIZE,
 					   &macb->tx_ring_dma);
 
+	/* TODO: we need check the rx/tx_ring_dma is dcache line aligned */
+
 	macb->regs = regs;
 	macb->phy_addr = phy_addr;
 
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 81b6af6..b587308 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -40,3 +40,4 @@
 obj-$(CONFIG_TI_QSPI) += ti_qspi.o
 obj-$(CONFIG_XILINX_SPI) += xilinx_spi.o
 obj-$(CONFIG_ZYNQ_SPI) += zynq_spi.o
+obj-$(CONFIG_FSL_QSPI) += fsl_qspi.o
diff --git a/drivers/spi/fsl_qspi.c b/drivers/spi/fsl_qspi.c
new file mode 100644
index 0000000..ba20bef
--- /dev/null
+++ b/drivers/spi/fsl_qspi.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
+ *
+ * Freescale Quad Serial Peripheral Interface (QSPI) driver
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <malloc.h>
+#include <spi.h>
+#include <asm/io.h>
+#include <linux/sizes.h>
+#include "fsl_qspi.h"
+
+#define RX_BUFFER_SIZE		0x80
+#define TX_BUFFER_SIZE		0x40
+
+#define OFFSET_BITS_MASK	0x00ffffff
+
+#define FLASH_STATUS_WEL	0x02
+
+/* SEQID */
+#define SEQID_WREN		1
+#define SEQID_FAST_READ		2
+#define SEQID_RDSR		3
+#define SEQID_SE		4
+#define SEQID_CHIP_ERASE	5
+#define SEQID_PP		6
+#define SEQID_RDID		7
+
+/* Flash opcodes */
+#define OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
+#define OPCODE_RDSR		0x05	/* Read status register */
+#define OPCODE_WREN		0x06	/* Write enable */
+#define OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define OPCODE_RDID		0x9f	/* Read JEDEC ID */
+
+/* 4-byte address opcodes - used on Spansion and some Macronix flashes */
+#define OPCODE_FAST_READ_4B	0x0c    /* Read data bytes (high frequency) */
+#define OPCODE_PP_4B		0x12    /* Page program (up to 256 bytes) */
+#define OPCODE_SE_4B		0xdc    /* Sector erase (usually 64KiB) */
+
+#ifdef CONFIG_SYS_FSL_QSPI_LE
+#define qspi_read32		in_le32
+#define qspi_write32		out_le32
+#elif defined(CONFIG_SYS_FSL_QSPI_BE)
+#define qspi_read32		in_be32
+#define qspi_write32		out_be32
+#endif
+
+static unsigned long spi_bases[] = {
+	QSPI0_BASE_ADDR,
+};
+
+static unsigned long amba_bases[] = {
+	QSPI0_AMBA_BASE,
+};
+
+struct fsl_qspi {
+	struct spi_slave slave;
+	unsigned long reg_base;
+	unsigned long amba_base;
+	u32 sf_addr;
+	u8 cur_seqid;
+};
+
+/* QSPI support swapping the flash read/write data
+ * in hardware for LS102xA, but not for VF610 */
+static inline u32 qspi_endian_xchg(u32 data)
+{
+#ifdef CONFIG_VF610
+	return swab32(data);
+#else
+	return data;
+#endif
+}
+
+static inline struct fsl_qspi *to_qspi_spi(struct spi_slave *slave)
+{
+	return container_of(slave, struct fsl_qspi, slave);
+}
+
+static void qspi_set_lut(struct fsl_qspi *qspi)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 lut_base;
+
+	/* Unlock the LUT */
+	qspi_write32(&regs->lutkey, LUT_KEY_VALUE);
+	qspi_write32(&regs->lckcr, QSPI_LCKCR_UNLOCK);
+
+	/* Write Enable */
+	lut_base = SEQID_WREN * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_WREN) |
+		PAD0(LUT_PAD1) | INSTR0(LUT_CMD));
+	qspi_write32(&regs->lut[lut_base + 1], 0);
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* Fast Read */
+	lut_base = SEQID_FAST_READ * 4;
+	if (FSL_QSPI_FLASH_SIZE  <= SZ_16M)
+		qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_FAST_READ) |
+			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
+			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+	else
+		qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_FAST_READ_4B) |
+			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR32BIT) |
+			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+	qspi_write32(&regs->lut[lut_base + 1], OPRND0(8) | PAD0(LUT_PAD1) |
+		INSTR0(LUT_DUMMY) | OPRND1(RX_BUFFER_SIZE) | PAD1(LUT_PAD1) |
+		INSTR1(LUT_READ));
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* Read Status */
+	lut_base = SEQID_RDSR * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_RDSR) |
+		PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(1) |
+		PAD1(LUT_PAD1) | INSTR1(LUT_READ));
+	qspi_write32(&regs->lut[lut_base + 1], 0);
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* Erase a sector */
+	lut_base = SEQID_SE * 4;
+	if (FSL_QSPI_FLASH_SIZE  <= SZ_16M)
+		qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_SE) |
+			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
+			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+	else
+		qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_SE_4B) |
+			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR32BIT) |
+			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+	qspi_write32(&regs->lut[lut_base + 1], 0);
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* Erase the whole chip */
+	lut_base = SEQID_CHIP_ERASE * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_CHIP_ERASE) |
+		PAD0(LUT_PAD1) | INSTR0(LUT_CMD));
+	qspi_write32(&regs->lut[lut_base + 1], 0);
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* Page Program */
+	lut_base = SEQID_PP * 4;
+	if (FSL_QSPI_FLASH_SIZE  <= SZ_16M)
+		qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_PP) |
+			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
+			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+	else
+		qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_PP_4B) |
+			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR32BIT) |
+			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+	qspi_write32(&regs->lut[lut_base + 1], OPRND0(TX_BUFFER_SIZE) |
+		PAD0(LUT_PAD1) | INSTR0(LUT_WRITE));
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* READ ID */
+	lut_base = SEQID_RDID * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(OPCODE_RDID) |
+		PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(8) |
+		PAD1(LUT_PAD1) | INSTR1(LUT_READ));
+	qspi_write32(&regs->lut[lut_base + 1], 0);
+	qspi_write32(&regs->lut[lut_base + 2], 0);
+	qspi_write32(&regs->lut[lut_base + 3], 0);
+
+	/* Lock the LUT */
+	qspi_write32(&regs->lutkey, LUT_KEY_VALUE);
+	qspi_write32(&regs->lckcr, QSPI_LCKCR_LOCK);
+}
+
+void spi_init()
+{
+	/* do nothing */
+}
+
+struct spi_slave *spi_setup_slave(unsigned int bus, unsigned int cs,
+		unsigned int max_hz, unsigned int mode)
+{
+	struct fsl_qspi *qspi;
+	struct fsl_qspi_regs *regs;
+	u32 reg_val, smpr_val;
+	u32 total_size, seq_id;
+
+	if (bus >= ARRAY_SIZE(spi_bases))
+		return NULL;
+
+	qspi = spi_alloc_slave(struct fsl_qspi, bus, cs);
+	if (!qspi)
+		return NULL;
+
+	qspi->reg_base = spi_bases[bus];
+	qspi->amba_base = amba_bases[bus];
+
+	qspi->slave.max_write_size = TX_BUFFER_SIZE;
+
+	regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	qspi_write32(&regs->mcr, QSPI_MCR_RESERVED_MASK | QSPI_MCR_MDIS_MASK);
+
+	smpr_val = qspi_read32(&regs->smpr);
+	qspi_write32(&regs->smpr, smpr_val & ~(QSPI_SMPR_FSDLY_MASK |
+		QSPI_SMPR_FSPHS_MASK | QSPI_SMPR_HSENA_MASK));
+	qspi_write32(&regs->mcr, QSPI_MCR_RESERVED_MASK);
+
+	total_size = FSL_QSPI_FLASH_SIZE * FSL_QSPI_FLASH_NUM;
+	qspi_write32(&regs->sfa1ad, FSL_QSPI_FLASH_SIZE | qspi->amba_base);
+	qspi_write32(&regs->sfa2ad, FSL_QSPI_FLASH_SIZE | qspi->amba_base);
+	qspi_write32(&regs->sfb1ad, total_size | qspi->amba_base);
+	qspi_write32(&regs->sfb2ad, total_size | qspi->amba_base);
+
+	qspi_set_lut(qspi);
+
+	smpr_val = qspi_read32(&regs->smpr);
+	smpr_val &= ~QSPI_SMPR_DDRSMP_MASK;
+	qspi_write32(&regs->smpr, smpr_val);
+	qspi_write32(&regs->mcr, QSPI_MCR_RESERVED_MASK);
+
+	seq_id = 0;
+	reg_val = qspi_read32(&regs->bfgencr);
+	reg_val &= ~QSPI_BFGENCR_SEQID_MASK;
+	reg_val |= (seq_id << QSPI_BFGENCR_SEQID_SHIFT);
+	reg_val &= ~QSPI_BFGENCR_PAR_EN_MASK;
+	qspi_write32(&regs->bfgencr, reg_val);
+
+	return &qspi->slave;
+}
+
+void spi_free_slave(struct spi_slave *slave)
+{
+	struct fsl_qspi *qspi = to_qspi_spi(slave);
+
+	free(qspi);
+}
+
+int spi_claim_bus(struct spi_slave *slave)
+{
+	return 0;
+}
+
+static void qspi_op_rdid(struct fsl_qspi *qspi, u32 *rxbuf, u32 len)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 mcr_reg, rbsr_reg, data;
+	int i, size;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+	qspi_write32(&regs->rbct, QSPI_RBCT_RXBRD_USEIPS);
+
+	qspi_write32(&regs->sfar, qspi->amba_base);
+
+	qspi_write32(&regs->ipcr, (SEQID_RDID << QSPI_IPCR_SEQID_SHIFT) | 0);
+	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+		;
+
+	i = 0;
+	size = len;
+	while ((RX_BUFFER_SIZE >= size) && (size > 0)) {
+		rbsr_reg = qspi_read32(&regs->rbsr);
+		if (rbsr_reg & QSPI_RBSR_RDBFL_MASK) {
+			data = qspi_read32(&regs->rbdr[i]);
+			data = qspi_endian_xchg(data);
+			memcpy(rxbuf, &data, 4);
+			rxbuf++;
+			size -= 4;
+			i++;
+		}
+	}
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+
+static void qspi_op_read(struct fsl_qspi *qspi, u32 *rxbuf, u32 len)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 mcr_reg, data;
+	int i, size;
+	u32 to_or_from;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+	qspi_write32(&regs->rbct, QSPI_RBCT_RXBRD_USEIPS);
+
+	to_or_from = qspi->sf_addr + qspi->amba_base;
+
+	while (len > 0) {
+		qspi_write32(&regs->sfar, to_or_from);
+
+		size = (len > RX_BUFFER_SIZE) ?
+			RX_BUFFER_SIZE : len;
+
+		qspi_write32(&regs->ipcr,
+			(SEQID_FAST_READ << QSPI_IPCR_SEQID_SHIFT) | size);
+		while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+			;
+
+		to_or_from += size;
+		len -= size;
+
+		i = 0;
+		while ((RX_BUFFER_SIZE >= size) && (size > 0)) {
+			data = qspi_read32(&regs->rbdr[i]);
+			data = qspi_endian_xchg(data);
+			memcpy(rxbuf, &data, 4);
+			rxbuf++;
+			size -= 4;
+			i++;
+		}
+		qspi_write32(&regs->mcr, qspi_read32(&regs->mcr) |
+			QSPI_MCR_CLR_RXF_MASK);
+	}
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+
+static void qspi_op_pp(struct fsl_qspi *qspi, u32 *txbuf, u32 len)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 mcr_reg, data, reg, status_reg;
+	int i, size, tx_size;
+	u32 to_or_from = 0;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+	qspi_write32(&regs->rbct, QSPI_RBCT_RXBRD_USEIPS);
+
+	status_reg = 0;
+	while ((status_reg & FLASH_STATUS_WEL) != FLASH_STATUS_WEL) {
+		qspi_write32(&regs->ipcr,
+			(SEQID_WREN << QSPI_IPCR_SEQID_SHIFT) | 0);
+		while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+			;
+
+		qspi_write32(&regs->ipcr,
+			(SEQID_RDSR << QSPI_IPCR_SEQID_SHIFT) | 1);
+		while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+			;
+
+		reg = qspi_read32(&regs->rbsr);
+		if (reg & QSPI_RBSR_RDBFL_MASK) {
+			status_reg = qspi_read32(&regs->rbdr[0]);
+			status_reg = qspi_endian_xchg(status_reg);
+		}
+		qspi_write32(&regs->mcr,
+			qspi_read32(&regs->mcr) | QSPI_MCR_CLR_RXF_MASK);
+	}
+
+	to_or_from = qspi->sf_addr + qspi->amba_base;
+	qspi_write32(&regs->sfar, to_or_from);
+
+	tx_size = (len > TX_BUFFER_SIZE) ?
+		TX_BUFFER_SIZE : len;
+
+	size = (tx_size + 3) / 4;
+
+	for (i = 0; i < size; i++) {
+		data = qspi_endian_xchg(*txbuf);
+		qspi_write32(&regs->tbdr, data);
+		txbuf++;
+	}
+
+	qspi_write32(&regs->ipcr,
+		(SEQID_PP << QSPI_IPCR_SEQID_SHIFT) | tx_size);
+	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+		;
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+
+static void qspi_op_rdsr(struct fsl_qspi *qspi, u32 *rxbuf)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 mcr_reg, reg, data;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+	qspi_write32(&regs->rbct, QSPI_RBCT_RXBRD_USEIPS);
+
+	qspi_write32(&regs->sfar, qspi->amba_base);
+
+	qspi_write32(&regs->ipcr,
+		(SEQID_RDSR << QSPI_IPCR_SEQID_SHIFT) | 0);
+	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+		;
+
+	while (1) {
+		reg = qspi_read32(&regs->rbsr);
+		if (reg & QSPI_RBSR_RDBFL_MASK) {
+			data = qspi_read32(&regs->rbdr[0]);
+			data = qspi_endian_xchg(data);
+			memcpy(rxbuf, &data, 4);
+			qspi_write32(&regs->mcr, qspi_read32(&regs->mcr) |
+				QSPI_MCR_CLR_RXF_MASK);
+			break;
+		}
+	}
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+
+static void qspi_op_se(struct fsl_qspi *qspi)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 mcr_reg;
+	u32 to_or_from = 0;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+	qspi_write32(&regs->rbct, QSPI_RBCT_RXBRD_USEIPS);
+
+	to_or_from = qspi->sf_addr + qspi->amba_base;
+	qspi_write32(&regs->sfar, to_or_from);
+
+	qspi_write32(&regs->ipcr,
+		(SEQID_WREN << QSPI_IPCR_SEQID_SHIFT) | 0);
+	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+		;
+
+	qspi_write32(&regs->ipcr,
+		(SEQID_SE << QSPI_IPCR_SEQID_SHIFT) | 0);
+	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+		;
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+
+int spi_xfer(struct spi_slave *slave, unsigned int bitlen,
+		const void *dout, void *din, unsigned long flags)
+{
+	struct fsl_qspi *qspi = to_qspi_spi(slave);
+	u32 bytes = DIV_ROUND_UP(bitlen, 8);
+	static u32 pp_sfaddr;
+	u32 txbuf;
+
+	if (dout) {
+		memcpy(&txbuf, dout, 4);
+		qspi->cur_seqid = *(u8 *)dout;
+
+		if (flags == SPI_XFER_END) {
+			qspi->sf_addr = pp_sfaddr;
+			qspi_op_pp(qspi, (u32 *)dout, bytes);
+			return 0;
+		}
+
+		if (qspi->cur_seqid == OPCODE_FAST_READ) {
+			qspi->sf_addr = swab32(txbuf) & OFFSET_BITS_MASK;
+		} else if (qspi->cur_seqid == OPCODE_SE) {
+			qspi->sf_addr = swab32(txbuf) & OFFSET_BITS_MASK;
+			qspi_op_se(qspi);
+		} else if (qspi->cur_seqid == OPCODE_PP) {
+			pp_sfaddr = swab32(txbuf) & OFFSET_BITS_MASK;
+		}
+	}
+
+	if (din) {
+		if (qspi->cur_seqid == OPCODE_FAST_READ)
+			qspi_op_read(qspi, din, bytes);
+		else if (qspi->cur_seqid == OPCODE_RDID)
+			qspi_op_rdid(qspi, din, bytes);
+		else if (qspi->cur_seqid == OPCODE_RDSR)
+			qspi_op_rdsr(qspi, din);
+	}
+
+	return 0;
+}
+
+void spi_release_bus(struct spi_slave *slave)
+{
+	/* Nothing to do */
+}
diff --git a/drivers/spi/fsl_qspi.h b/drivers/spi/fsl_qspi.h
new file mode 100644
index 0000000..db400e6
--- /dev/null
+++ b/drivers/spi/fsl_qspi.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
+ *
+ * Register definitions for Freescale QSPI
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _FSL_QSPI_H_
+#define _FSL_QSPI_H_
+
+struct fsl_qspi_regs {
+	u32 mcr;
+	u32 rsvd0[1];
+	u32 ipcr;
+	u32 flshcr;
+	u32 buf0cr;
+	u32 buf1cr;
+	u32 buf2cr;
+	u32 buf3cr;
+	u32 bfgencr;
+	u32 soccr;
+	u32 rsvd1[2];
+	u32 buf0ind;
+	u32 buf1ind;
+	u32 buf2ind;
+	u32 rsvd2[49];
+	u32 sfar;
+	u32 rsvd3[1];
+	u32 smpr;
+	u32 rbsr;
+	u32 rbct;
+	u32 rsvd4[15];
+	u32 tbsr;
+	u32 tbdr;
+	u32 rsvd5[1];
+	u32 sr;
+	u32 fr;
+	u32 rser;
+	u32 spndst;
+	u32 sptrclr;
+	u32 rsvd6[4];
+	u32 sfa1ad;
+	u32 sfa2ad;
+	u32 sfb1ad;
+	u32 sfb2ad;
+	u32 rsvd7[28];
+	u32 rbdr[32];
+	u32 rsvd8[32];
+	u32 lutkey;
+	u32 lckcr;
+	u32 rsvd9[2];
+	u32 lut[64];
+};
+
+#define QSPI_IPCR_SEQID_SHIFT		24
+#define QSPI_IPCR_SEQID_MASK		(0xf << QSPI_IPCR_SEQID_SHIFT)
+
+#define QSPI_MCR_END_CFD_SHIFT		2
+#define QSPI_MCR_END_CFD_MASK		(3 << QSPI_MCR_END_CFD_SHIFT)
+#define QSPI_MCR_END_CFD_LE		(1 << QSPI_MCR_END_CFD_SHIFT)
+#define QSPI_MCR_DDR_EN_SHIFT		7
+#define QSPI_MCR_DDR_EN_MASK		(1 << QSPI_MCR_DDR_EN_SHIFT)
+#define QSPI_MCR_CLR_RXF_SHIFT		10
+#define QSPI_MCR_CLR_RXF_MASK		(1 << QSPI_MCR_CLR_RXF_SHIFT)
+#define QSPI_MCR_CLR_TXF_SHIFT		11
+#define QSPI_MCR_CLR_TXF_MASK		(1 << QSPI_MCR_CLR_TXF_SHIFT)
+#define QSPI_MCR_MDIS_SHIFT		14
+#define QSPI_MCR_MDIS_MASK		(1 << QSPI_MCR_MDIS_SHIFT)
+#define QSPI_MCR_RESERVED_SHIFT		16
+#define QSPI_MCR_RESERVED_MASK		(0xf << QSPI_MCR_RESERVED_SHIFT)
+
+#define QSPI_SMPR_HSENA_SHIFT		0
+#define QSPI_SMPR_HSENA_MASK		(1 << QSPI_SMPR_HSENA_SHIFT)
+#define QSPI_SMPR_FSPHS_SHIFT		5
+#define QSPI_SMPR_FSPHS_MASK		(1 << QSPI_SMPR_FSPHS_SHIFT)
+#define QSPI_SMPR_FSDLY_SHIFT		6
+#define QSPI_SMPR_FSDLY_MASK		(1 << QSPI_SMPR_FSDLY_SHIFT)
+#define QSPI_SMPR_DDRSMP_SHIFT		16
+#define QSPI_SMPR_DDRSMP_MASK		(7 << QSPI_SMPR_DDRSMP_SHIFT)
+
+#define QSPI_BFGENCR_SEQID_SHIFT	12
+#define QSPI_BFGENCR_SEQID_MASK		(0xf << QSPI_BFGENCR_SEQID_SHIFT)
+#define QSPI_BFGENCR_PAR_EN_SHIFT	16
+#define QSPI_BFGENCR_PAR_EN_MASK	(1 << QSPI_BFGENCR_PAR_EN_SHIFT)
+
+#define QSPI_RBSR_RDBFL_SHIFT		8
+#define QSPI_RBSR_RDBFL_MASK		(0x3f << QSPI_RBSR_RDBFL_SHIFT)
+
+#define QSPI_RBCT_RXBRD_SHIFT		8
+#define QSPI_RBCT_RXBRD_USEIPS		(1 << QSPI_RBCT_RXBRD_SHIFT)
+
+#define QSPI_SR_BUSY_SHIFT		0
+#define QSPI_SR_BUSY_MASK		(1 << QSPI_SR_BUSY_SHIFT)
+
+#define QSPI_LCKCR_LOCK			0x1
+#define QSPI_LCKCR_UNLOCK		0x2
+
+#define LUT_KEY_VALUE			0x5af05af0
+
+#define OPRND0_SHIFT			0
+#define OPRND0(x)			((x) << OPRND0_SHIFT)
+#define PAD0_SHIFT			8
+#define PAD0(x)				((x) << PAD0_SHIFT)
+#define INSTR0_SHIFT			10
+#define INSTR0(x)			((x) << INSTR0_SHIFT)
+#define OPRND1_SHIFT			16
+#define OPRND1(x)			((x) << OPRND1_SHIFT)
+#define PAD1_SHIFT			24
+#define PAD1(x)				((x) << PAD1_SHIFT)
+#define INSTR1_SHIFT			26
+#define INSTR1(x)			((x) << INSTR1_SHIFT)
+
+#define LUT_CMD				1
+#define LUT_ADDR			2
+#define LUT_DUMMY			3
+#define LUT_READ			7
+#define LUT_WRITE			8
+
+#define LUT_PAD1			0
+#define LUT_PAD2			1
+#define LUT_PAD4			2
+
+#define ADDR24BIT			0x18
+#define ADDR32BIT			0x20
+
+#endif /* _FSL_QSPI_H_ */
diff --git a/drivers/video/atmel_hlcdfb.c b/drivers/video/atmel_hlcdfb.c
index bb4d7d8..935ae42 100644
--- a/drivers/video/atmel_hlcdfb.c
+++ b/drivers/video/atmel_hlcdfb.c
@@ -171,6 +171,9 @@
 			| LCDC_BASECTRL_DMAIEN | LCDC_BASECTRL_DFETCH;
 	desc->next = (u32)desc;
 
+	/* Flush the DMA descriptor if we enabled dcache */
+	flush_dcache_range((u32)desc, (u32)desc + sizeof(*desc));
+
 	lcdc_writel(&regs->lcdc_baseaddr, desc->address);
 	lcdc_writel(&regs->lcdc_basectrl, desc->control);
 	lcdc_writel(&regs->lcdc_basenext, desc->next);
@@ -194,4 +197,7 @@
 	lcdc_writel(&regs->lcdc_lcden, value | LCDC_LCDEN_PWMEN);
 	while (!(lcdc_readl(&regs->lcdc_lcdsr) & LCDC_LCDSR_PWMSTS))
 		udelay(1);
+
+	/* Enable flushing if we enabled dcache */
+	lcd_set_flush_dcache(1);
 }
diff --git a/include/configs/bcm28155_ap.h b/include/configs/bcm28155_ap.h
index e93b855..bf09939 100644
--- a/include/configs/bcm28155_ap.h
+++ b/include/configs/bcm28155_ap.h
@@ -14,6 +14,7 @@
 #define CONFIG_ARMV7
 #define CONFIG_KONA
 #define CONFIG_SKIP_LOWLEVEL_INIT
+#define CONFIG_SYS_GENERIC_BOARD
 
 /*
  * Memory configuration
diff --git a/include/configs/ethernut5.h b/include/configs/ethernut5.h
index c81fc44..4c69af6 100644
--- a/include/configs/ethernut5.h
+++ b/include/configs/ethernut5.h
@@ -12,6 +12,8 @@
 
 #include <asm/hardware.h>
 
+#define CONFIG_SYS_GENERIC_BOARD
+
 /* The first stage boot loader expects u-boot running at this address. */
 #define CONFIG_SYS_TEXT_BASE	0x27000000	/* 16MB available */
 
diff --git a/include/configs/vf610twr.h b/include/configs/vf610twr.h
index 500fd2f..0342550 100644
--- a/include/configs/vf610twr.h
+++ b/include/configs/vf610twr.h
@@ -68,6 +68,18 @@
 #define CONFIG_PHYLIB
 #define CONFIG_PHY_MICREL
 
+/* QSPI Configs*/
+#define CONFIG_FSL_QSPI
+
+#ifdef CONFIG_FSL_QSPI
+#define CONFIG_CMD_SF
+#define CONFIG_SPI_FLASH
+#define CONFIG_SPI_FLASH_SPANSION
+#define FSL_QSPI_FLASH_SIZE		(1 << 24)
+#define FSL_QSPI_FLASH_NUM		2
+#define CONFIG_SYS_FSL_QSPI_LE
+#endif
+
 /* I2C Configs */
 #define CONFIG_CMD_I2C
 #define CONFIG_SYS_I2C