arm: imx8ulp: release and configure XRDC at early phase

Since S400 will set the memory of SPL image to R/X. We can't write
to any data in SPL image.

1. Set the parameters save/restore only for u-boot, not for SPL. to
   avoid write data.
2. Not use MU DM driver but directly call MU API to send release XRDC
   to S400 at early phase.
3. Configure the SPL image memory of SRAM2 to writable (R/W/X)

Signed-off-by: Ye Li <ye.li@nxp.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
diff --git a/arch/arm/include/asm/arch-imx8ulp/mu_hal.h b/arch/arm/include/asm/arch-imx8ulp/mu_hal.h
new file mode 100644
index 0000000..10d966d
--- /dev/null
+++ b/arch/arm/include/asm/arch-imx8ulp/mu_hal.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2021 NXP
+ */
+
+#ifndef __IMX8ULP_MU_HAL_H__
+#define __IMX8ULP_MU_HAL_H__
+
+void mu_hal_init(ulong base);
+int mu_hal_sendmsg(ulong base, u32 reg_index, u32 msg);
+int mu_hal_receivemsg(ulong base, u32 reg_index, u32 *msg);
+#endif
diff --git a/arch/arm/mach-imx/imx8ulp/lowlevel_init.S b/arch/arm/mach-imx/imx8ulp/lowlevel_init.S
index 7d81a75..791c264 100644
--- a/arch/arm/mach-imx/imx8ulp/lowlevel_init.S
+++ b/arch/arm/mach-imx/imx8ulp/lowlevel_init.S
@@ -16,17 +16,11 @@
 
 .global save_boot_params
 save_boot_params:
+#ifndef CONFIG_SPL_BUILD
 	/* The firmware provided ATAG/FDT address can be found in r2/x0 */
 	adr	x0, rom_pointer
 	stp	x1, x2, [x0], #16
 	stp	x3, x4, [x0], #16
-
+#endif
 	/* Returns */
 	b	save_boot_params_ret
-
-.global restore_boot_params
-restore_boot_params:
-	adr	x0, rom_pointer
-	ldp	x1, x2, [x0], #16
-	ldp	x3, x4, [x0], #16
-	ret
diff --git a/arch/arm/mach-imx/imx8ulp/soc.c b/arch/arm/mach-imx/imx8ulp/soc.c
index 5e7bf57..62c02a6 100644
--- a/arch/arm/mach-imx/imx8ulp/soc.c
+++ b/arch/arm/mach-imx/imx8ulp/soc.c
@@ -11,6 +11,10 @@
 #include <asm/mach-imx/boot_mode.h>
 #include <efi_loader.h>
 #include <spl.h>
+#include <asm/arch/s400_api.h>
+#include <asm/arch/mu_hal.h>
+#include <cpu_func.h>
+#include <asm/setup.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -340,9 +344,89 @@
 	setbits_le32(SIM1_BASE_ADDR + 0x8, (0x1 << 26));
 }
 
+static int release_xrdc(void)
+{
+	ulong s_mu_base = 0x27020000UL;
+	struct imx8ulp_s400_msg msg;
+	int ret;
+
+	msg.version = AHAB_VERSION;
+	msg.tag = AHAB_CMD_TAG;
+	msg.size = 2;
+	msg.command = AHAB_RELEASE_RDC_REQ_CID;
+	msg.data[0] = (0x78 << 8) | 0x2; /* A35 XRDC */
+
+	mu_hal_init(s_mu_base);
+	mu_hal_sendmsg(s_mu_base, 0, *((u32 *)&msg));
+	mu_hal_sendmsg(s_mu_base, 1, msg.data[0]);
+
+	ret = mu_hal_receivemsg(s_mu_base, 0, (u32 *)&msg);
+	if (!ret) {
+		ret = mu_hal_receivemsg(s_mu_base, 1, &msg.data[0]);
+		if (!ret)
+			return ret;
+
+		if ((msg.data[0] & 0xff) == 0)
+			return 0;
+		else
+			return -EIO;
+	}
+
+	return ret;
+}
+
+static void xrdc_mrc_region_set_access(int mrc_index, u32 addr, u32 access)
+{
+	ulong xrdc_base = 0x292f0000, off;
+	u32 mrgd[5];
+	u8 mrcfg, j, region_num;
+	u8 dsel;
+
+	mrcfg = readb(xrdc_base + 0x140 + mrc_index);
+	region_num = mrcfg & 0x1f;
+
+	for (j = 0; j < region_num; j++) {
+		off = 0x2000 + mrc_index * 0x200 + j * 0x20;
+
+		mrgd[0] = readl(xrdc_base + off);
+		mrgd[1] = readl(xrdc_base + off + 4);
+		mrgd[2] = readl(xrdc_base + off + 8);
+		mrgd[3] = readl(xrdc_base + off + 0xc);
+		mrgd[4] = readl(xrdc_base + off + 0x10);
+
+		debug("MRC [%u][%u]\n", mrc_index, j);
+		debug("0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n",
+		      mrgd[0], mrgd[1], mrgd[2], mrgd[3], mrgd[4]);
+
+		/* hit */
+		if (addr >= mrgd[0] && addr <= mrgd[1]) {
+			/* find domain 7 DSEL */
+			dsel = (mrgd[2] >> 21) & 0x7;
+			if (dsel == 1) {
+				mrgd[4] &= ~0xFFF;
+				mrgd[4] |= (access & 0xFFF);
+			} else if (dsel == 2) {
+				mrgd[4] &= ~0xFFF0000;
+				mrgd[4] |= ((access & 0xFFF) << 16);
+			}
+
+			/* not handle other cases, since S400 only set ACCESS1 and 2 */
+			writel(mrgd[4], xrdc_base + off + 0x10);
+			return;
+		}
+	}
+}
+
 int arch_cpu_init(void)
 {
 	if (IS_ENABLED(CONFIG_SPL_BUILD)) {
+		/* Disable wdog */
+		init_wdog();
+
+		/* release xrdc, then allow A35 to write SRAM2 */
+		release_xrdc();
+		xrdc_mrc_region_set_access(2, CONFIG_SPL_TEXT_BASE, 0xE00);
+
 		clock_init();
 	} else {
 		/* reconfigure core0 reset vector to ROM */
diff --git a/drivers/misc/imx8ulp/imx8ulp_mu.c b/drivers/misc/imx8ulp/imx8ulp_mu.c
index f3ca547..913ebe7 100644
--- a/drivers/misc/imx8ulp/imx8ulp_mu.c
+++ b/drivers/misc/imx8ulp/imx8ulp_mu.c
@@ -42,24 +42,27 @@
 #define MU_TR_COUNT		4
 #define MU_RR_COUNT		4
 
-static inline void mu_hal_init(struct mu_type *base)
+void mu_hal_init(ulong base)
 {
-	writel(0, &base->tcr);
-	writel(0, &base->rcr);
+	struct mu_type *mu_base = (struct mu_type *)base;
+
+	writel(0, &mu_base->tcr);
+	writel(0, &mu_base->rcr);
 }
 
-static int mu_hal_sendmsg(struct mu_type *base, u32 reg_index, u32 msg)
+int mu_hal_sendmsg(ulong base, u32 reg_index, u32 msg)
 {
+	struct mu_type *mu_base = (struct mu_type *)base;
 	u32 mask = MU_SR_TE0_MASK << reg_index;
 	u32 val;
 	int ret;
 
 	assert(reg_index < MU_TR_COUNT);
 
-	debug("sendmsg sr 0x%x\n", readl(&base->sr));
+	debug("sendmsg sr 0x%x\n", readl(&mu_base->sr));
 
 	/* Wait TX register to be empty. */
-	ret = readl_poll_timeout(&base->tsr, val, val & mask, 10000);
+	ret = readl_poll_timeout(&mu_base->tsr, val, val & mask, 10000);
 	if (ret < 0) {
 		debug("%s timeout\n", __func__);
 		return -ETIMEDOUT;
@@ -67,29 +70,30 @@
 
 	debug("tr[%d] 0x%x\n", reg_index, msg);
 
-	writel(msg, &base->tr[reg_index]);
+	writel(msg, &mu_base->tr[reg_index]);
 
 	return 0;
 }
 
-static int mu_hal_receivemsg(struct mu_type *base, u32 reg_index, u32 *msg)
+int mu_hal_receivemsg(ulong base, u32 reg_index, u32 *msg)
 {
+	struct mu_type *mu_base = (struct mu_type *)base;
 	u32 mask = MU_SR_RF0_MASK << reg_index;
 	u32 val;
 	int ret;
 
 	assert(reg_index < MU_TR_COUNT);
 
-	debug("receivemsg sr 0x%x\n", readl(&base->sr));
+	debug("receivemsg sr 0x%x\n", readl(&mu_base->sr));
 
 	/* Wait RX register to be full. */
-	ret = readl_poll_timeout(&base->rsr, val, val & mask, 10000);
+	ret = readl_poll_timeout(&mu_base->rsr, val, val & mask, 10000);
 	if (ret < 0) {
 		debug("%s timeout\n", __func__);
 		return -ETIMEDOUT;
 	}
 
-	*msg = readl(&base->rr[reg_index]);
+	*msg = readl(&mu_base->rr[reg_index]);
 
 	debug("rr[%d] 0x%x\n", reg_index, *msg);
 
@@ -106,7 +110,7 @@
 		return -EINVAL;
 
 	/* Read first word */
-	ret = mu_hal_receivemsg(base, 0, (u32 *)msg);
+	ret = mu_hal_receivemsg((ulong)base, 0, (u32 *)msg);
 	if (ret)
 		return ret;
 	count++;
@@ -119,7 +123,7 @@
 
 	/* Read remaining words */
 	while (count < msg->size) {
-		ret = mu_hal_receivemsg(base, count % MU_RR_COUNT,
+		ret = mu_hal_receivemsg((ulong)base, count % MU_RR_COUNT,
 					&msg->data[count - 1]);
 		if (ret)
 			return ret;
@@ -143,14 +147,14 @@
 		return -EINVAL;
 
 	/* Write first word */
-	ret = mu_hal_sendmsg(base, 0, *((u32 *)msg));
+	ret = mu_hal_sendmsg((ulong)base, 0, *((u32 *)msg));
 	if (ret)
 		return ret;
 	count++;
 
 	/* Write remaining words */
 	while (count < msg->size) {
-		ret = mu_hal_sendmsg(base, count % MU_TR_COUNT,
+		ret = mu_hal_sendmsg((ulong)base, count % MU_TR_COUNT,
 				     msg->data[count - 1]);
 		if (ret)
 			return ret;
@@ -207,7 +211,7 @@
 	debug("mu base 0x%lx\n", (ulong)priv->base);
 
 	/* U-Boot not enable interrupts, so need to enable RX interrupts */
-	mu_hal_init(priv->base);
+	mu_hal_init((ulong)priv->base);
 
 	gd->arch.s400_dev = dev;