ARM: HYP/non-sec: allow relocation to secure RAM

The current non-sec switching code suffers from one major issue:
it cannot run in secure RAM, as a large part of u-boot still needs
to be run while we're switched to non-secure.

This patch reworks the whole HYP/non-secure strategy by:
- making sure the secure code is the *last* thing u-boot executes
  before entering the payload
- performing an exception return from secure mode directly into
  the payload
- allowing the code to be dynamically relocated to secure RAM
  before switching to non-secure.

This involves quite a bit of horrible code, specially as u-boot
relocation is quite primitive.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Ian Campbell <ijc@hellion.org.uk>
diff --git a/arch/arm/cpu/armv7/nonsec_virt.S b/arch/arm/cpu/armv7/nonsec_virt.S
index b5c946f..2a43e3c 100644
--- a/arch/arm/cpu/armv7/nonsec_virt.S
+++ b/arch/arm/cpu/armv7/nonsec_virt.S
@@ -10,10 +10,13 @@
 #include <linux/linkage.h>
 #include <asm/gic.h>
 #include <asm/armv7.h>
+#include <asm/proc-armv/ptrace.h>
 
 .arch_extension sec
 .arch_extension virt
 
+	.pushsection ._secure.text, "ax"
+
 	.align	5
 /* the vector table for secure state and HYP mode */
 _monitor_vectors:
@@ -22,51 +25,86 @@
 	adr pc, _secure_monitor
 	.word 0
 	.word 0
-	adr pc, _hyp_trap
 	.word 0
 	.word 0
+	.word 0
+
+.macro is_cpu_virt_capable	tmp
+	mrc	p15, 0, \tmp, c0, c1, 1		@ read ID_PFR1
+	and	\tmp, \tmp, #CPUID_ARM_VIRT_MASK	@ mask virtualization bits
+	cmp	\tmp, #(1 << CPUID_ARM_VIRT_SHIFT)
+.endm
 
 /*
  * secure monitor handler
  * U-boot calls this "software interrupt" in start.S
  * This is executed on a "smc" instruction, we use a "smc #0" to switch
  * to non-secure state.
- * We use only r0 and r1 here, due to constraints in the caller.
+ * r0, r1, r2: passed to the callee
+ * ip: target PC
  */
 _secure_monitor:
-	mrc	p15, 0, r1, c1, c1, 0		@ read SCR
-	bic	r1, r1, #0x4e			@ clear IRQ, FIQ, EA, nET bits
-	orr	r1, r1, #0x31			@ enable NS, AW, FW bits
+	mrc	p15, 0, r5, c1, c1, 0		@ read SCR
+	bic	r5, r5, #0x4e			@ clear IRQ, FIQ, EA, nET bits
+	orr	r5, r5, #0x31			@ enable NS, AW, FW bits
 
-	mrc	p15, 0, r0, c0, c1, 1		@ read ID_PFR1
-	and	r0, r0, #CPUID_ARM_VIRT_MASK	@ mask virtualization bits
-	cmp	r0, #(1 << CPUID_ARM_VIRT_SHIFT)
+	mov	r6, #SVC_MODE			@ default mode is SVC
+	is_cpu_virt_capable r4
 #ifdef CONFIG_ARMV7_VIRT
-	orreq	r1, r1, #0x100			@ allow HVC instruction
+	orreq	r5, r5, #0x100			@ allow HVC instruction
+	moveq	r6, #HYP_MODE			@ Enter the kernel as HYP
 #endif
 
-	mcr	p15, 0, r1, c1, c1, 0		@ write SCR (with NS bit set)
+	mcr	p15, 0, r5, c1, c1, 0		@ write SCR (with NS bit set)
 	isb
 
-#ifdef CONFIG_ARMV7_VIRT
-	mrceq	p15, 0, r0, c12, c0, 1		@ get MVBAR value
-	mcreq	p15, 4, r0, c12, c0, 0		@ write HVBAR
-#endif
 	bne	1f
 
 	@ Reset CNTVOFF to 0 before leaving monitor mode
-	mrc	p15, 0, r0, c0, c1, 1		@ read ID_PFR1
-	ands	r0, r0, #CPUID_ARM_GENTIMER_MASK	@ test arch timer bits
-	movne	r0, #0
-	mcrrne	p15, 4, r0, r0, c14		@ Reset CNTVOFF to zero
+	mrc	p15, 0, r4, c0, c1, 1		@ read ID_PFR1
+	ands	r4, r4, #CPUID_ARM_GENTIMER_MASK	@ test arch timer bits
+	movne	r4, #0
+	mcrrne	p15, 4, r4, r4, c14		@ Reset CNTVOFF to zero
 1:
-	movs	pc, lr				@ return to non-secure SVC
+	mov	lr, ip
+	mov	ip, #(F_BIT | I_BIT | A_BIT)	@ Set A, I and F
+	tst	lr, #1				@ Check for Thumb PC
+	orrne	ip, ip, #T_BIT			@ Set T if Thumb
+	orr	ip, ip, r6			@ Slot target mode in
+	msr	spsr_cxfs, ip			@ Set full SPSR
+	movs	pc, lr				@ ERET to non-secure
 
-_hyp_trap:
-	mrs	lr, elr_hyp	@ for older asm: .byte 0x00, 0xe3, 0x0e, 0xe1
-	mov pc, lr				@ do no switch modes, but
-						@ return to caller
+ENTRY(_do_nonsec_entry)
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, r2
+	mov	r2, r3
+	smc	#0
+ENDPROC(_do_nonsec_entry)
 
+.macro get_cbar_addr	addr
+#ifdef CONFIG_ARM_GIC_BASE_ADDRESS
+	ldr	\addr, =CONFIG_ARM_GIC_BASE_ADDRESS
+#else
+	mrc	p15, 4, \addr, c15, c0, 0	@ read CBAR
+	bfc	\addr, #0, #15			@ clear reserved bits
+#endif
+.endm
+
+.macro get_gicd_addr	addr
+	get_cbar_addr	\addr
+	add	\addr, \addr, #GIC_DIST_OFFSET	@ GIC dist i/f offset
+.endm
+
+.macro get_gicc_addr	addr, tmp
+	get_cbar_addr	\addr
+	is_cpu_virt_capable \tmp
+	movne	\tmp, #GIC_CPU_OFFSET_A9	@ GIC CPU offset for A9
+	moveq	\tmp, #GIC_CPU_OFFSET_A15	@ GIC CPU offset for A15/A7
+	add	\addr, \addr, \tmp
+.endm
+
+#ifndef CONFIG_ARMV7_PSCI
 /*
  * Secondary CPUs start here and call the code for the core specific parts
  * of the non-secure and HYP mode transition. The GIC distributor specific
@@ -74,31 +112,21 @@
  * Then they go back to wfi and wait to be woken up by the kernel again.
  */
 ENTRY(_smp_pen)
-	mrs	r0, cpsr
-	orr	r0, r0, #0xc0
-	msr	cpsr, r0			@ disable interrupts
-	ldr	r1, =_start
-	mcr	p15, 0, r1, c12, c0, 0		@ set VBAR
+	cpsid	i
+	cpsid	f
 
 	bl	_nonsec_init
-	mov	r12, r0				@ save GICC address
-#ifdef CONFIG_ARMV7_VIRT
-	bl	_switch_to_hyp
-#endif
-
-	ldr	r1, [r12, #GICC_IAR]		@ acknowledge IPI
-	str	r1, [r12, #GICC_EOIR]		@ signal end of interrupt
 
 	adr	r0, _smp_pen			@ do not use this address again
 	b	smp_waitloop			@ wait for IPIs, board specific
 ENDPROC(_smp_pen)
+#endif
 
 /*
  * Switch a core to non-secure state.
  *
  *  1. initialize the GIC per-core interface
  *  2. allow coprocessor access in non-secure modes
- *  3. switch the cpu mode (by calling "smc #0")
  *
  * Called from smp_pen by secondary cores and directly by the BSP.
  * Do not assume that the stack is available and only use registers
@@ -108,38 +136,23 @@
  * though, but we check this in C before calling this function.
  */
 ENTRY(_nonsec_init)
-#ifdef CONFIG_ARM_GIC_BASE_ADDRESS
-	ldr	r2, =CONFIG_ARM_GIC_BASE_ADDRESS
-#else
-	mrc	p15, 4, r2, c15, c0, 0		@ read CBAR
-	bfc	r2, #0, #15			@ clear reserved bits
-#endif
-	add	r3, r2, #GIC_DIST_OFFSET	@ GIC dist i/f offset
+	get_gicd_addr	r3
+
 	mvn	r1, #0				@ all bits to 1
 	str	r1, [r3, #GICD_IGROUPRn]	@ allow private interrupts
 
-	mrc	p15, 0, r0, c0, c0, 0		@ read MIDR
-	ldr	r1, =MIDR_PRIMARY_PART_MASK
-	and	r0, r0, r1			@ mask out variant and revision
+	get_gicc_addr	r3, r1
 
-	ldr	r1, =MIDR_CORTEX_A7_R0P0 & MIDR_PRIMARY_PART_MASK
-	cmp	r0, r1				@ check for Cortex-A7
-
-	ldr	r1, =MIDR_CORTEX_A15_R0P0 & MIDR_PRIMARY_PART_MASK
-	cmpne	r0, r1				@ check for Cortex-A15
-
-	movne	r1, #GIC_CPU_OFFSET_A9		@ GIC CPU offset for A9
-	moveq	r1, #GIC_CPU_OFFSET_A15		@ GIC CPU offset for A15/A7
-	add	r3, r2, r1			@ r3 = GIC CPU i/f addr
-
-	mov	r1, #1				@ set GICC_CTLR[enable]
+	mov	r1, #3				@ Enable both groups
 	str	r1, [r3, #GICC_CTLR]		@ and clear all other bits
 	mov	r1, #0xff
 	str	r1, [r3, #GICC_PMR]		@ set priority mask register
 
+	mrc	p15, 0, r0, c1, c1, 2
 	movw	r1, #0x3fff
-	movt	r1, #0x0006
-	mcr	p15, 0, r1, c1, c1, 2		@ NSACR = all copros to non-sec
+	movt	r1, #0x0004
+	orr	r0, r0, r1
+	mcr	p15, 0, r0, c1, c1, 2		@ NSACR = all copros to non-sec
 
 /* The CNTFRQ register of the generic timer needs to be
  * programmed in secure state. Some primary bootloaders / firmware
@@ -157,21 +170,9 @@
 
 	adr	r1, _monitor_vectors
 	mcr	p15, 0, r1, c12, c0, 1		@ set MVBAR to secure vectors
-
-	mrc	p15, 0, ip, c12, c0, 0		@ save secure copy of VBAR
-
 	isb
-	smc	#0				@ call into MONITOR mode
-
-	mcr	p15, 0, ip, c12, c0, 0		@ write non-secure copy of VBAR
-
-	mov	r1, #1
-	str	r1, [r3, #GICC_CTLR]		@ enable non-secure CPU i/f
-	add	r2, r2, #GIC_DIST_OFFSET
-	str	r1, [r2, #GICD_CTLR]		@ allow private interrupts
 
 	mov	r0, r3				@ return GICC address
-
 	bx	lr
 ENDPROC(_nonsec_init)
 
@@ -183,18 +184,10 @@
 	ldr	r1, [r1]
 	cmp	r0, r1			@ make sure we dont execute this code
 	beq	smp_waitloop		@ again (due to a spurious wakeup)
-	mov	pc, r1
+	mov	r0, r1
+	b	_do_nonsec_entry
 ENDPROC(smp_waitloop)
 .weak smp_waitloop
 #endif
 
-ENTRY(_switch_to_hyp)
-	mov	r0, lr
-	mov	r1, sp				@ save SVC copy of LR and SP
-	isb
-	hvc #0			 @ for older asm: .byte 0x70, 0x00, 0x40, 0xe1
-	mov	sp, r1
-	mov	lr, r0				@ restore SVC copy of LR and SP
-
-	bx	lr
-ENDPROC(_switch_to_hyp)
+	.popsection