Tegra114: Add AVP (arm720t) files

This provides SPL support for T114 boards - AVP early init, plus
CPU (A15) init/jump to main U-Boot.

Signed-off-by: Tom Warren <twarren@nvidia.com>
diff --git a/arch/arm/cpu/arm720t/tegra-common/cpu.c b/arch/arm/cpu/arm720t/tegra-common/cpu.c
index c32925b..119342e 100644
--- a/arch/arm/cpu/arm720t/tegra-common/cpu.c
+++ b/arch/arm/cpu/arm720t/tegra-common/cpu.c
@@ -25,30 +25,24 @@
 #include <asm/arch-tegra/scu.h>
 #include "cpu.h"
 
-enum tegra_family_t {
-	TEGRA_FAMILY_T2x,
-	TEGRA_FAMILY_T3x,
-};
-
-
-enum tegra_family_t get_family(void)
-{
-	u32 reg, chip_id;
-
-	reg = readl(NV_PA_APB_MISC_BASE + GP_HIDREV);
-
-	chip_id = reg >> 8;
-	chip_id &= 0xff;
-	debug("  tegra_get_family: chip_id = %x\n", chip_id);
-	if (chip_id == 0x30)
-		return TEGRA_FAMILY_T3x;
-	else
-		return TEGRA_FAMILY_T2x;
-}
-
 int get_num_cpus(void)
 {
-	return get_family() == TEGRA_FAMILY_T3x ? 4 : 2;
+	struct apb_misc_gp_ctlr *gp;
+	uint rev;
+
+	gp = (struct apb_misc_gp_ctlr *)NV_PA_APB_MISC_GP_BASE;
+	rev = (readl(&gp->hidrev) & HIDREV_CHIPID_MASK) >> HIDREV_CHIPID_SHIFT;
+
+	switch (rev) {
+	case CHIPID_TEGRA20:
+		return 2;
+		break;
+	case CHIPID_TEGRA30:
+	case CHIPID_TEGRA114:
+	default:
+		return 4;
+		break;
+	}
 }
 
 /*
@@ -56,6 +50,7 @@
  */
 struct clk_pll_table tegra_pll_x_table[TEGRA_SOC_CNT][CLOCK_OSC_FREQ_COUNT] = {
 	/* T20: 1 GHz */
+	/*  n,  m, p, cpcon */
 	{{ 1000, 13, 0, 12},	/* OSC 13M */
 	 { 625,  12, 0, 8},	/* OSC 19.2M */
 	 { 1000, 12, 0, 12},	/* OSC 12M */
@@ -75,6 +70,13 @@
 	 { 700, 6, 0, 8},
 	 { 700, 13, 0, 8},
 	},
+
+	/* T114: 1.4 GHz */
+	{{ 862, 8, 0, 8},
+	 { 583, 8, 0, 4},
+	 { 696, 12, 0, 8},
+	 { 700, 13, 0, 8},
+	},
 };
 
 void adjust_pllp_out_freqs(void)
@@ -159,8 +161,8 @@
 	sel = &tegra_pll_x_table[chip_type][osc];
 	pllx_set_rate(pll, sel->n, sel->m, sel->p, sel->cpcon);
 
-	/* adjust PLLP_out1-4 on T30 */
-	if (chip_type == TEGRA_SOC_T30) {
+	/* adjust PLLP_out1-4 on T30/T114 */
+	if (chip_type == TEGRA_SOC_T30 || chip_type == TEGRA_SOC_T114) {
 		debug("  init_pllx: adjusting PLLP out freqs\n");
 		adjust_pllp_out_freqs();
 	}
@@ -196,10 +198,9 @@
 	 */
 	clk = readl(&clkrst->crc_clk_cpu_cmplx);
 	clk |= 1 << CPU1_CLK_STP_SHIFT;
-#if defined(CONFIG_TEGRA30)
-	clk |= 1 << CPU2_CLK_STP_SHIFT;
-	clk |= 1 << CPU3_CLK_STP_SHIFT;
-#endif
+	if (get_num_cpus() == 4)
+		clk |= (1 << CPU2_CLK_STP_SHIFT) + (1 << CPU3_CLK_STP_SHIFT);
+
 	/* Stop/Unstop the CPU clock */
 	clk &= ~CPU0_CLK_STP_MASK;
 	clk |= !enable << CPU0_CLK_STP_SHIFT;
@@ -285,7 +286,8 @@
 
 void clock_enable_coresight(int enable)
 {
-	u32 rst, src;
+	u32 rst, src = 2;
+	int chip;
 
 	debug("clock_enable_coresight entry\n");
 	clock_set_enable(PERIPH_ID_CORESIGHT, enable);
@@ -301,20 +303,23 @@
 		 * Clock divider request for 204MHz would setup CSITE clock as
 		 * 144MHz for PLLP base 216MHz and 204MHz for PLLP base 408MHz
 		 */
-		if (tegra_get_chip_type() == TEGRA_SOC_T30)
+		chip = tegra_get_chip_type();
+		if (chip == TEGRA_SOC_T30 || chip == TEGRA_SOC_T114)
 			src = CLK_DIVIDER(NVBL_PLLP_KHZ, 204000);
-		else
+		else if (chip == TEGRA_SOC_T20 || chip == TEGRA_SOC_T25)
 			src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
+		else
+			printf("%s: Unknown chip type %X!\n", __func__, chip);
 		clock_ll_set_source_divisor(PERIPH_ID_CSI, 0, src);
 
 		/* Unlock the CPU CoreSight interfaces */
 		rst = CORESIGHT_UNLOCK;
 		writel(rst, CSITE_CPU_DBG0_LAR);
 		writel(rst, CSITE_CPU_DBG1_LAR);
-#if defined(CONFIG_TEGRA30)
-		writel(rst, CSITE_CPU_DBG2_LAR);
-		writel(rst, CSITE_CPU_DBG3_LAR);
-#endif
+		if (get_num_cpus() == 4) {
+			writel(rst, CSITE_CPU_DBG2_LAR);
+			writel(rst, CSITE_CPU_DBG3_LAR);
+		}
 	}
 }
 
diff --git a/arch/arm/cpu/arm720t/tegra-common/cpu.h b/arch/arm/cpu/arm720t/tegra-common/cpu.h
index 3e2ea3a..e8e05d7 100644
--- a/arch/arm/cpu/arm720t/tegra-common/cpu.h
+++ b/arch/arm/cpu/arm720t/tegra-common/cpu.h
@@ -26,10 +26,12 @@
 #define PLL_STABILIZATION_DELAY (300)
 #define IO_STABILIZATION_DELAY	(1000)
 
-#if defined(CONFIG_TEGRA30)
-#define NVBL_PLLP_KHZ	(408000)
-#else	/* Tegra20 */
+#if defined(CONFIG_TEGRA20)
 #define NVBL_PLLP_KHZ	(216000)
+#elif defined(CONFIG_TEGRA30) || defined(CONFIG_TEGRA114)
+#define NVBL_PLLP_KHZ	(408000)
+#else
+#error "Unknown Tegra chip!"
 #endif
 
 #define PLLX_ENABLED		(1 << 30)
diff --git a/arch/arm/cpu/arm720t/tegra114/Makefile b/arch/arm/cpu/arm720t/tegra114/Makefile
new file mode 100644
index 0000000..6cf7fe9
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra114/Makefile
@@ -0,0 +1,42 @@
+#
+# Copyright (c) 2010-2013, NVIDIA CORPORATION.  All rights reserved.
+#
+# (C) Copyright 2000-2008
+# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	= $(obj)lib$(SOC).o
+
+#COBJS-y	+= cpu.o t11x.o
+COBJS-y	+= cpu.o
+
+SRCS	:= $(COBJS-y:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS-y))
+
+all:	$(obj).depend $(LIB)
+
+$(LIB):	$(OBJS)
+	$(call cmd_link_o_target, $(OBJS))
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/arch/arm/cpu/arm720t/tegra114/config.mk b/arch/arm/cpu/arm720t/tegra114/config.mk
new file mode 100644
index 0000000..7947b50
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra114/config.mk
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2010-2013, NVIDIA CORPORATION.  All rights reserved.
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+USE_PRIVATE_LIBGCC = yes
diff --git a/arch/arm/cpu/arm720t/tegra114/cpu.c b/arch/arm/cpu/arm720t/tegra114/cpu.c
new file mode 100644
index 0000000..5962e15
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra114/cpu.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2010-2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/flow.h>
+#include <asm/arch/pinmux.h>
+#include <asm/arch/tegra.h>
+#include <asm/arch-tegra/clk_rst.h>
+#include <asm/arch-tegra/pmc.h>
+#include "../tegra-common/cpu.h"
+
+/* Tegra114-specific CPU init code */
+static void enable_cpu_power_rail(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg;
+
+	debug("enable_cpu_power_rail entry\n");
+
+	/* un-tristate PWR_I2C SCL/SDA, rest of the defaults are correct */
+	pinmux_tristate_disable(PINGRP_PWR_I2C_SCL);
+	pinmux_tristate_disable(PINGRP_PWR_I2C_SDA);
+
+	/*
+	 * Set CPUPWRGOOD_TIMER - APB clock is 1/2 of SCLK (102MHz),
+	 * set it for 25ms (102MHz * .025)
+	 */
+	reg = 0x26E8F0;
+	writel(reg, &pmc->pmc_cpupwrgood_timer);
+
+	/* Set polarity to 0 (normal) and enable CPUPWRREQ_OE */
+	clrbits_le32(&pmc->pmc_cntrl, CPUPWRREQ_POL);
+	setbits_le32(&pmc->pmc_cntrl, CPUPWRREQ_OE);
+
+	/*
+	 * Set CLK_RST_CONTROLLER_CPU_SOFTRST_CTRL2_0_CAR2PMC_CPU_ACK_WIDTH
+	 * to 408 to satisfy the requirement of having at least 16 CPU clock
+	 * cycles before clamp removal.
+	 */
+
+	clrbits_le32(&clkrst->crc_cpu_softrst_ctrl2, 0xFFF);
+	setbits_le32(&clkrst->crc_cpu_softrst_ctrl2, 408);
+}
+
+static void enable_cpu_clocks(void)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg;
+
+	debug("enable_cpu_clocks entry\n");
+
+	/* Wait for PLL-X to lock */
+	do {
+		reg = readl(&clkrst->crc_pll_simple[SIMPLE_PLLX].pll_base);
+	} while ((reg & (1 << 27)) == 0);
+
+	/* Wait until all clocks are stable */
+	udelay(PLL_STABILIZATION_DELAY);
+
+	writel(CCLK_BURST_POLICY, &clkrst->crc_cclk_brst_pol);
+	writel(SUPER_CCLK_DIVIDER, &clkrst->crc_super_cclk_div);
+
+	/* Always enable the main CPU complex clocks */
+	clock_enable(PERIPH_ID_CPU);
+	clock_enable(PERIPH_ID_CPULP);
+	clock_enable(PERIPH_ID_CPUG);
+}
+
+static void remove_cpu_resets(void)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg;
+
+	debug("remove_cpu_resets entry\n");
+	/* Take the slow non-CPU partition out of reset */
+	reg = readl(&clkrst->crc_rst_cpulp_cmplx_clr);
+	writel((reg | CLR_NONCPURESET), &clkrst->crc_rst_cpulp_cmplx_clr);
+
+	/* Take the fast non-CPU partition out of reset */
+	reg = readl(&clkrst->crc_rst_cpug_cmplx_clr);
+	writel((reg | CLR_NONCPURESET), &clkrst->crc_rst_cpug_cmplx_clr);
+
+	/* Clear the SW-controlled reset of the slow cluster */
+	reg = readl(&clkrst->crc_rst_cpulp_cmplx_clr);
+	reg |= (CLR_CPURESET0+CLR_DBGRESET0+CLR_CORERESET0+CLR_CXRESET0);
+	writel(reg, &clkrst->crc_rst_cpulp_cmplx_clr);
+
+	/* Clear the SW-controlled reset of the fast cluster */
+	reg = readl(&clkrst->crc_rst_cpug_cmplx_clr);
+	reg |= (CLR_CPURESET0+CLR_DBGRESET0+CLR_CORERESET0+CLR_CXRESET0);
+	reg |= (CLR_CPURESET1+CLR_DBGRESET1+CLR_CORERESET1+CLR_CXRESET1);
+	reg |= (CLR_CPURESET2+CLR_DBGRESET2+CLR_CORERESET2+CLR_CXRESET2);
+	reg |= (CLR_CPURESET3+CLR_DBGRESET3+CLR_CORERESET3+CLR_CXRESET3);
+	writel(reg, &clkrst->crc_rst_cpug_cmplx_clr);
+}
+
+/**
+ * The T114 requires some special clock initialization, including setting up
+ * the DVC I2C, turning on MSELECT and selecting the G CPU cluster
+ */
+void t114_init_clocks(void)
+{
+	struct clk_rst_ctlr *clkrst =
+			(struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	struct flow_ctlr *flow = (struct flow_ctlr *)NV_PA_FLOW_BASE;
+	u32 val;
+
+	debug("t114_init_clocks entry\n");
+
+	/* Set active CPU cluster to G */
+	clrbits_le32(&flow->cluster_control, 1);
+
+	/*
+	 * Switch system clock to PLLP_OUT4 (108 MHz), AVP will now run
+	 * at 108 MHz. This is glitch free as only the source is changed, no
+	 * special precaution needed.
+	 */
+	val = (SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_FIQ_SOURCE_SHIFT) |
+		(SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_IRQ_SOURCE_SHIFT) |
+		(SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_RUN_SOURCE_SHIFT) |
+		(SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_IDLE_SOURCE_SHIFT) |
+		(SCLK_SYS_STATE_RUN << SCLK_SYS_STATE_SHIFT);
+	writel(val, &clkrst->crc_sclk_brst_pol);
+
+	writel(SUPER_SCLK_ENB_MASK, &clkrst->crc_super_sclk_div);
+
+	debug("Setting up PLLX\n");
+	init_pllx();
+
+	val = (1 << CLK_SYS_RATE_AHB_RATE_SHIFT);
+	writel(val, &clkrst->crc_clk_sys_rate);
+
+	/* Enable clocks to required peripherals. TBD - minimize this list */
+	debug("Enabling clocks\n");
+
+	clock_set_enable(PERIPH_ID_CACHE2, 1);
+	clock_set_enable(PERIPH_ID_GPIO, 1);
+	clock_set_enable(PERIPH_ID_TMR, 1);
+	clock_set_enable(PERIPH_ID_RTC, 1);
+	clock_set_enable(PERIPH_ID_CPU, 1);
+	clock_set_enable(PERIPH_ID_EMC, 1);
+	clock_set_enable(PERIPH_ID_I2C5, 1);
+	clock_set_enable(PERIPH_ID_FUSE, 1);
+	clock_set_enable(PERIPH_ID_PMC, 1);
+	clock_set_enable(PERIPH_ID_APBDMA, 1);
+	clock_set_enable(PERIPH_ID_MEM, 1);
+	clock_set_enable(PERIPH_ID_IRAMA, 1);
+	clock_set_enable(PERIPH_ID_IRAMB, 1);
+	clock_set_enable(PERIPH_ID_IRAMC, 1);
+	clock_set_enable(PERIPH_ID_IRAMD, 1);
+	clock_set_enable(PERIPH_ID_CORESIGHT, 1);
+	clock_set_enable(PERIPH_ID_MSELECT, 1);
+	clock_set_enable(PERIPH_ID_EMC1, 1);
+	clock_set_enable(PERIPH_ID_MC1, 1);
+	clock_set_enable(PERIPH_ID_DVFS, 1);
+
+	/* Switch MSELECT clock to PLLP (00) */
+	clock_ll_set_source(PERIPH_ID_MSELECT, 0);
+
+	/*
+	 * Clock divider request for 102MHz would setup MSELECT clock as
+	 * 102MHz for PLLP base 408MHz
+	 */
+	clock_ll_set_source_divisor(PERIPH_ID_MSELECT, 0,
+		(NVBL_PLLP_KHZ/102000));
+
+	/* I2C5 (DVC) gets CLK_M and a divisor of 17 */
+	clock_ll_set_source_divisor(PERIPH_ID_I2C5, 3, 16);
+
+	/* Give clocks time to stabilize */
+	udelay(1000);
+
+	/* Take required peripherals out of reset */
+	debug("Taking periphs out of reset\n");
+	reset_set_enable(PERIPH_ID_CACHE2, 0);
+	reset_set_enable(PERIPH_ID_GPIO, 0);
+	reset_set_enable(PERIPH_ID_TMR, 0);
+	reset_set_enable(PERIPH_ID_COP, 0);
+	reset_set_enable(PERIPH_ID_EMC, 0);
+	reset_set_enable(PERIPH_ID_I2C5, 0);
+	reset_set_enable(PERIPH_ID_FUSE, 0);
+	reset_set_enable(PERIPH_ID_APBDMA, 0);
+	reset_set_enable(PERIPH_ID_MEM, 0);
+	reset_set_enable(PERIPH_ID_CORESIGHT, 0);
+	reset_set_enable(PERIPH_ID_MSELECT, 0);
+	reset_set_enable(PERIPH_ID_EMC1, 0);
+	reset_set_enable(PERIPH_ID_MC1, 0);
+
+	debug("t114_init_clocks exit\n");
+}
+
+static int is_partition_powered(u32 mask)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	/* Get power gate status */
+	reg = readl(&pmc->pmc_pwrgate_status);
+	return (reg & mask) == mask;
+}
+
+static int is_clamp_enabled(u32 mask)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	/* Get clamp status. TODO: Add pmc_clamp_status alias to pmc.h */
+	reg = readl(&pmc->pmc_pwrgate_timer_on);
+	return (reg & mask) == mask;
+}
+
+static void power_partition(u32 status, u32 partid)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+
+	debug("%s: status = %08X, part ID = %08X\n", __func__, status, partid);
+	/* Is the partition already on? */
+	if (!is_partition_powered(status)) {
+		/* No, toggle the partition power state (OFF -> ON) */
+		debug("power_partition, toggling state\n");
+		clrbits_le32(&pmc->pmc_pwrgate_toggle, 0x1F);
+		setbits_le32(&pmc->pmc_pwrgate_toggle, partid);
+		setbits_le32(&pmc->pmc_pwrgate_toggle, START_CP);
+
+		/* Wait for the power to come up */
+		while (!is_partition_powered(status))
+			;
+
+		/* Wait for the clamp status to be cleared */
+		while (is_clamp_enabled(status))
+			;
+
+		/* Give I/O signals time to stabilize */
+		udelay(IO_STABILIZATION_DELAY);
+	}
+}
+
+void powerup_cpus(void)
+{
+	debug("powerup_cpus entry\n");
+
+	/* We boot to the fast cluster */
+	debug("powerup_cpus entry: G cluster\n");
+	/* Power up the fast cluster rail partition */
+	power_partition(CRAIL, CRAILID);
+
+	/* Power up the fast cluster non-CPU partition */
+	power_partition(C0NC, C0NCID);
+
+	/* Power up the fast cluster CPU0 partition */
+	power_partition(CE0, CE0ID);
+}
+
+void start_cpu(u32 reset_vector)
+{
+	debug("start_cpu entry, reset_vector = %x\n", reset_vector);
+
+	t114_init_clocks();
+
+	/* Enable VDD_CPU */
+	enable_cpu_power_rail();
+
+	/* Get the CPU(s) running */
+	enable_cpu_clocks();
+
+	/* Enable CoreSight */
+	clock_enable_coresight(1);
+
+	/* Take CPU(s) out of reset */
+	remove_cpu_resets();
+
+	/*
+	 * Set the entry point for CPU execution from reset,
+	 *  if it's a non-zero value.
+	 */
+	if (reset_vector)
+		writel(reset_vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
+
+	/* If the CPU(s) don't already have power, power 'em up */
+	powerup_cpus();
+}