arm/ls102xa: create TLB to map PCIe region

LS1021A's PCIe1 region begins 0x40_00000000; PCIe2 begins
0x48_00000000. In order to access PCIe device, we must create
TLB to map the 40bit physical address to 32bit virtual address.
This patch will enable MMU after DDR is available and creates MMU
table in DRAM to map all 4G space; then, re-use the reserved space
to map PCIe region. The following the mapping layout.

VA mapping:
    -------  <---- 0GB
   |       |
   |       |
   |-------| <---- 0x24000000
   |///////|  ===> 192MB VA map for PCIe1 with offset 0x40_0000_0000
   |-------| <---- 0x300000000
   |       |
   |-------| <---- 0x34000000
   |///////|  ===> 192MB VA map for PCIe2 with offset 0x48_0000_0000
   |-------| <---- 0x40000000
   |       |
   |-------| <---- 0x80000000 DDR0 space start
   |\\\\\\\|
   |\\\\\\\|  ===> 2GB VA map for 2GB DDR0 Memory space
   |\\\\\\\|
   -------  <---- 4GB DDR0 space end

Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
Reviewed-by: York Sun <yorksun@freescale.com>
diff --git a/arch/arm/cpu/armv7/ls102xa/cpu.c b/arch/arm/cpu/armv7/ls102xa/cpu.c
index ce2d92f..18665a3 100644
--- a/arch/arm/cpu/armv7/ls102xa/cpu.c
+++ b/arch/arm/cpu/armv7/ls102xa/cpu.c
@@ -8,6 +8,8 @@
 #include <asm/arch/clock.h>
 #include <asm/io.h>
 #include <asm/arch/immap_ls102xa.h>
+#include <asm/cache.h>
+#include <asm/system.h>
 #include <tsec.h>
 #include <netdev.h>
 #include <fsl_esdhc.h>
@@ -16,6 +18,197 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+#ifndef CONFIG_SYS_DCACHE_OFF
+
+/*
+ * Bit[1] of the descriptor indicates the descriptor type,
+ * and bit[0] indicates whether the descriptor is valid.
+ */
+#define PMD_TYPE_TABLE		0x3
+#define PMD_TYPE_SECT		0x1
+
+/* AttrIndx[2:0] */
+#define PMD_ATTRINDX(t)		((t) << 2)
+
+/* Section */
+#define PMD_SECT_AF		(1 << 10)
+
+#define BLOCK_SIZE_L1		(1UL << 30)
+#define BLOCK_SIZE_L2		(1UL << 21)
+
+/* TTBCR flags */
+#define TTBCR_EAE		(1 << 31)
+#define TTBCR_T0SZ(x)		((x) << 0)
+#define TTBCR_T1SZ(x)		((x) << 16)
+#define TTBCR_USING_TTBR0	(TTBCR_T0SZ(0) | TTBCR_T1SZ(0))
+#define TTBCR_IRGN0_NC		(0 << 8)
+#define TTBCR_IRGN0_WBWA	(1 << 8)
+#define TTBCR_IRGN0_WT		(2 << 8)
+#define TTBCR_IRGN0_WBNWA	(3 << 8)
+#define TTBCR_IRGN0_MASK	(3 << 8)
+#define TTBCR_ORGN0_NC		(0 << 10)
+#define TTBCR_ORGN0_WBWA	(1 << 10)
+#define TTBCR_ORGN0_WT		(2 << 10)
+#define TTBCR_ORGN0_WBNWA	(3 << 10)
+#define TTBCR_ORGN0_MASK	(3 << 10)
+#define TTBCR_SHARED_NON	(0 << 12)
+#define TTBCR_SHARED_OUTER	(2 << 12)
+#define TTBCR_SHARED_INNER	(3 << 12)
+#define TTBCR_EPD0		(0 << 7)
+#define TTBCR			(TTBCR_SHARED_NON | \
+				 TTBCR_ORGN0_NC	| \
+				 TTBCR_IRGN0_NC	| \
+				 TTBCR_USING_TTBR0 | \
+				 TTBCR_EAE)
+
+/*
+ * Memory region attributes for LPAE (defined in pgtable):
+ *
+ * n = AttrIndx[2:0]
+ *
+ *		              n       MAIR
+ *	UNCACHED              000     00000000
+ *	BUFFERABLE            001     01000100
+ *	DEV_WC                001     01000100
+ *	WRITETHROUGH          010     10101010
+ *	WRITEBACK             011     11101110
+ *	DEV_CACHED            011     11101110
+ *	DEV_SHARED            100     00000100
+ *	DEV_NONSHARED         100     00000100
+ *	unused                101
+ *	unused                110
+ *	WRITEALLOC            111     11111111
+ */
+#define MT_MAIR0		0xeeaa4400
+#define MT_MAIR1		0xff000004
+#define MT_STRONLY_ORDER	0
+#define MT_NORMAL_NC		1
+#define MT_DEVICE_MEM		4
+#define MT_NORMAL		7
+
+/* The phy_addr must be aligned to 4KB */
+static inline void set_pgtable(u32 *page_table, u32 index, u32 phy_addr)
+{
+	u32 value = phy_addr | PMD_TYPE_TABLE;
+
+	page_table[2 * index] = value;
+	page_table[2 * index + 1] = 0;
+}
+
+/* The phy_addr must be aligned to 4KB */
+static inline void set_pgsection(u32 *page_table, u32 index, u64 phy_addr,
+				 u32 memory_type)
+{
+	u64 value;
+
+	value = phy_addr | PMD_TYPE_SECT | PMD_SECT_AF;
+	value |= PMD_ATTRINDX(memory_type);
+	page_table[2 * index] = value & 0xFFFFFFFF;
+	page_table[2 * index + 1] = (value >> 32) & 0xFFFFFFFF;
+}
+
+/*
+ * Start MMU after DDR is available, we create MMU table in DRAM.
+ * The base address of TTLB is gd->arch.tlb_addr. We use two
+ * levels of translation tables here to cover 40-bit address space.
+ *
+ * The TTLBs are located at PHY 2G~4G.
+ *
+ * VA mapping:
+ *
+ *  -------  <---- 0GB
+ * |       |
+ * |       |
+ * |-------| <---- 0x24000000
+ * |///////|  ===> 192MB VA map for PCIe1 with offset 0x40_0000_0000
+ * |-------| <---- 0x300000000
+ * |       |
+ * |-------| <---- 0x34000000
+ * |///////|  ===> 192MB VA map for PCIe2 with offset 0x48_0000_0000
+ * |-------| <---- 0x40000000
+ * |       |
+ * |-------| <---- 0x80000000 DDR0 space start
+ * |\\\\\\\|
+ *.|\\\\\\\|  ===> 2GB VA map for 2GB DDR0 Memory space
+ * |\\\\\\\|
+ *  -------  <---- 4GB DDR0 space end
+ */
+static void mmu_setup(void)
+{
+	u32 *level0_table = (u32 *)gd->arch.tlb_addr;
+	u32 *level1_table = (u32 *)(gd->arch.tlb_addr + 0x1000);
+	u64 va_start = 0;
+	u32 reg;
+	int i;
+
+	/* Level 0 Table 2-3 are used to map DDR */
+	set_pgsection(level0_table, 3, 3 * BLOCK_SIZE_L1, MT_NORMAL);
+	set_pgsection(level0_table, 2, 2 * BLOCK_SIZE_L1, MT_NORMAL);
+	/* Level 0 Table 1 is used to map device */
+	set_pgsection(level0_table, 1, 1 * BLOCK_SIZE_L1, MT_DEVICE_MEM);
+	/* Level 0 Table 0 is used to map device including PCIe MEM */
+	set_pgtable(level0_table, 0, (u32)level1_table);
+
+	/* Level 1 has 512 entries */
+	for (i = 0; i < 512; i++) {
+		/* Mapping for PCIe 1 */
+		if (va_start >= CONFIG_SYS_PCIE1_VIRT_ADDR &&
+		    va_start < (CONFIG_SYS_PCIE1_VIRT_ADDR +
+				 CONFIG_SYS_PCIE_MMAP_SIZE))
+			set_pgsection(level1_table, i,
+				      CONFIG_SYS_PCIE1_PHYS_BASE + va_start,
+				      MT_DEVICE_MEM);
+		/* Mapping for PCIe 2 */
+		else if (va_start >= CONFIG_SYS_PCIE2_VIRT_ADDR &&
+			 va_start < (CONFIG_SYS_PCIE2_VIRT_ADDR +
+				     CONFIG_SYS_PCIE_MMAP_SIZE))
+			set_pgsection(level1_table, i,
+				      CONFIG_SYS_PCIE2_PHYS_BASE + va_start,
+				      MT_DEVICE_MEM);
+		else
+			set_pgsection(level1_table, i,
+				      va_start,
+				      MT_DEVICE_MEM);
+		va_start += BLOCK_SIZE_L2;
+	}
+
+	asm volatile("dsb sy;isb");
+	asm volatile("mcr p15, 0, %0, c2, c0, 2" /* Write RT to TTBCR */
+			: : "r" (TTBCR) : "memory");
+	asm volatile("mcrr p15, 0, %0, %1, c2" /* TTBR 0 */
+			: : "r" ((u32)level0_table), "r" (0) : "memory");
+	asm volatile("mcr p15, 0, %0, c10, c2, 0" /* write MAIR 0 */
+			: : "r" (MT_MAIR0) : "memory");
+	asm volatile("mcr p15, 0, %0, c10, c2, 1" /* write MAIR 1 */
+			: : "r" (MT_MAIR1) : "memory");
+
+	/* Set the access control to all-supervisor */
+	asm volatile("mcr p15, 0, %0, c3, c0, 0"
+		     : : "r" (~0));
+
+	/* Enable the mmu */
+	reg = get_cr();
+	set_cr(reg | CR_M);
+}
+
+/*
+ * This function is called from lib/board.c. It recreates MMU
+ * table in main memory. MMU and i/d-cache are enabled here.
+ */
+void enable_caches(void)
+{
+	/* Invalidate all TLB */
+	mmu_page_table_flush(gd->arch.tlb_addr,
+			     gd->arch.tlb_addr +  gd->arch.tlb_size);
+	/* Set up and enable mmu */
+	mmu_setup();
+
+	/* Invalidate & Enable d-cache */
+	invalidate_dcache_all();
+	set_cr(get_cr() | CR_C);
+}
+#endif /* #ifndef CONFIG_SYS_DCACHE_OFF */
+
 #if defined(CONFIG_DISPLAY_CPUINFO)
 int print_cpuinfo(void)
 {
@@ -78,16 +271,6 @@
 }
 #endif
 
-void enable_caches(void)
-{
-#ifndef CONFIG_SYS_ICACHE_OFF
-	icache_enable();
-#endif
-#ifndef CONFIG_SYS_DCACHE_OFF
-	dcache_enable();
-#endif
-}
-
 #ifdef CONFIG_FSL_ESDHC
 int cpu_mmc_init(bd_t *bis)
 {
diff --git a/arch/arm/include/asm/arch-ls102xa/config.h b/arch/arm/include/asm/arch-ls102xa/config.h
index 7915518..cfabdc6 100644
--- a/arch/arm/include/asm/arch-ls102xa/config.h
+++ b/arch/arm/include/asm/arch-ls102xa/config.h
@@ -61,6 +61,20 @@
 #define CONFIG_SYS_PCIE1_ADDR			(CONFIG_SYS_IMMR + 0x2400000)
 #define CONFIG_SYS_PCIE2_ADDR			(CONFIG_SYS_IMMR + 0x2500000)
 
+#define CONFIG_SYS_PCIE1_PHYS_BASE		0x4000000000ULL
+#define CONFIG_SYS_PCIE2_PHYS_BASE		0x4800000000ULL
+#define CONFIG_SYS_PCIE1_VIRT_ADDR		0x24000000UL
+#define CONFIG_SYS_PCIE2_VIRT_ADDR		0x34000000UL
+#define CONFIG_SYS_PCIE_MMAP_SIZE		(192 * 1024 * 1024) /* 192M */
+/*
+ * TLB will map VIRT_ADDR to (PHYS_BASE + VIRT_ADDR)
+ * So 40bit PCIe PHY addr can directly be converted to a 32bit virtual addr.
+ */
+#define CONFIG_SYS_PCIE1_PHYS_ADDR		(CONFIG_SYS_PCIE1_PHYS_BASE + \
+						 CONFIG_SYS_PCIE1_VIRT_ADDR)
+#define CONFIG_SYS_PCIE2_PHYS_ADDR		(CONFIG_SYS_PCIE2_PHYS_BASE + \
+						 CONFIG_SYS_PCIE2_VIRT_ADDR)
+
 #ifdef CONFIG_DDR_SPD
 #define CONFIG_SYS_FSL_DDR_BE
 #define CONFIG_VERY_BIG_RAM