avr32: Add simple paging support

Use the MMU hardware to set up 1:1 mappings between physical and virtual
addresses. This allows us to bypass the cache when accessing the flash
without having to do any physical-to-virtual address mapping in the CFI
driver.

The virtual memory mappings are defined at compile time through a sorted
array of virtual memory range objects. When a TLB miss exception
happens, the exception handler does a binary search through the array
until it finds a matching entry and loads it into the TLB. The u-boot
image itself is covered by a fixed TLB entry which is never replaced.

This makes the 'saveenv' command work again on ATNGW100 and other boards
using the CFI driver, hopefully without breaking any rules.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
diff --git a/arch/avr32/cpu/at32ap700x/Makefile b/arch/avr32/cpu/at32ap700x/Makefile
index 46e6ef6..30ea925 100644
--- a/arch/avr32/cpu/at32ap700x/Makefile
+++ b/arch/avr32/cpu/at32ap700x/Makefile
@@ -24,7 +24,7 @@
 
 LIB	:= $(obj)lib$(SOC).a
 
-COBJS	:= portmux.o clk.o
+COBJS	:= portmux.o clk.o mmu.o
 SRCS	:= $(SOBJS:.o=.S) $(COBJS:.o=.c)
 OBJS	:= $(addprefix $(obj),$(SOBJS) $(COBJS))
 
diff --git a/arch/avr32/cpu/at32ap700x/mmu.c b/arch/avr32/cpu/at32ap700x/mmu.c
new file mode 100644
index 0000000..c3a1b93
--- /dev/null
+++ b/arch/avr32/cpu/at32ap700x/mmu.c
@@ -0,0 +1,78 @@
+#include <common.h>
+#include <asm/arch/mmu.h>
+#include <asm/sysreg.h>
+
+void mmu_init_r(unsigned long dest_addr)
+{
+	uintptr_t	vmr_table_addr;
+
+	/* Round monitor address down to the nearest page boundary */
+	dest_addr &= PAGE_ADDR_MASK;
+
+	/* Initialize TLB entry 0 to cover the monitor, and lock it */
+	sysreg_write(TLBEHI, dest_addr | SYSREG_BIT(TLBEHI_V));
+	sysreg_write(TLBELO, dest_addr | MMU_VMR_CACHE_WRBACK);
+	sysreg_write(MMUCR, SYSREG_BF(DRP, 0) | SYSREG_BF(DLA, 1)
+			| SYSREG_BIT(MMUCR_S) | SYSREG_BIT(M));
+	__builtin_tlbw();
+
+	/*
+	 * Calculate the address of the VM range table in a PC-relative
+	 * manner to make sure we hit the SDRAM and not the flash.
+	 */
+	vmr_table_addr = (uintptr_t)&mmu_vmr_table;
+	sysreg_write(PTBR, vmr_table_addr);
+	printf("VMR table @ 0x%08x\n", vmr_table_addr);
+
+	/* Enable paging */
+	sysreg_write(MMUCR, SYSREG_BF(DRP, 1) | SYSREG_BF(DLA, 1)
+			| SYSREG_BIT(MMUCR_S) | SYSREG_BIT(M) | SYSREG_BIT(E));
+}
+
+int mmu_handle_tlb_miss(void)
+{
+	const struct mmu_vm_range *vmr_table;
+	const struct mmu_vm_range *vmr;
+	unsigned int fault_pgno;
+	int first, last;
+
+	fault_pgno = sysreg_read(TLBEAR) >> PAGE_SHIFT;
+	vmr_table = (const struct mmu_vm_range *)sysreg_read(PTBR);
+
+	/* Do a binary search through the VM ranges */
+	first = 0;
+	last = CONFIG_SYS_NR_VM_REGIONS;
+	while (first < last) {
+		unsigned int start;
+		int middle;
+
+		/* Pick the entry in the middle of the remaining range */
+		middle = (first + last) >> 1;
+		vmr = &vmr_table[middle];
+		start = vmr->virt_pgno;
+
+		/* Do the bisection thing */
+		if (fault_pgno < start) {
+			last = middle;
+		} else if (fault_pgno >= (start + vmr->nr_pages)) {
+			first = middle + 1;
+		} else {
+			/* Got it; let's slam it into the TLB */
+			uint32_t tlbelo;
+
+			tlbelo = vmr->phys & ~PAGE_ADDR_MASK;
+			tlbelo |= fault_pgno << PAGE_SHIFT;
+			sysreg_write(TLBELO, tlbelo);
+			__builtin_tlbw();
+
+			/* Zero means success */
+			return 0;
+		}
+	}
+
+	/*
+	 * Didn't find any matching entries. Return a nonzero value to
+	 * indicate that this should be treated as a fatal exception.
+	 */
+	return -1;
+}
diff --git a/arch/avr32/cpu/start.S b/arch/avr32/cpu/start.S
index 99c9e06..06bf4c6 100644
--- a/arch/avr32/cpu/start.S
+++ b/arch/avr32/cpu/start.S
@@ -82,12 +82,19 @@
 	.org	0x44
 	rjmp	unknown_exception	/* DTLB Modified */
 
-	.org	0x50
-	rjmp	unknown_exception	/* ITLB Miss */
-	.org	0x60
-	rjmp	unknown_exception	/* DTLB Miss (read) */
-	.org	0x70
-	rjmp	unknown_exception	/* DTLB Miss (write) */
+	.org	0x50			/* ITLB Miss */
+	pushm   r8-r12,lr
+	rjmp	1f
+	.org	0x60			/* DTLB Miss (read) */
+	pushm   r8-r12,lr
+	rjmp	1f
+	.org	0x70			/* DTLB Miss (write) */
+	pushm   r8-r12,lr
+1:	mov	r12, sp
+	rcall	mmu_handle_tlb_miss
+	popm	r8-r12,lr
+	brne	unknown_exception
+	rete
 
 	.size	_evba, . - _evba
 
diff --git a/arch/avr32/include/asm/arch-at32ap700x/addrspace.h b/arch/avr32/include/asm/arch-at32ap700x/addrspace.h
index 409eee3..4edc1bd 100644
--- a/arch/avr32/include/asm/arch-at32ap700x/addrspace.h
+++ b/arch/avr32/include/asm/arch-at32ap700x/addrspace.h
@@ -75,10 +75,7 @@
 static inline void *
 map_physmem(phys_addr_t paddr, unsigned long len, unsigned long flags)
 {
-	if (flags == MAP_WRBACK)
-		return (void *)P1SEGADDR(paddr);
-	else
-		return (void *)P2SEGADDR(paddr);
+	return (void *)paddr;
 }
 
 #endif /* __ASM_AVR32_ADDRSPACE_H */
diff --git a/arch/avr32/include/asm/arch-at32ap700x/mmu.h b/arch/avr32/include/asm/arch-at32ap700x/mmu.h
new file mode 100644
index 0000000..fcd9a05
--- /dev/null
+++ b/arch/avr32/include/asm/arch-at32ap700x/mmu.h
@@ -0,0 +1,66 @@
+/*
+ * In order to deal with the hardcoded u-boot requirement that virtual
+ * addresses are always mapped 1:1 with physical addresses, we implement
+ * a small virtual memory manager so that we can use the MMU hardware in
+ * order to get the caching properties right.
+ *
+ * A few pages (or possibly just one) are locked in the TLB permanently
+ * in order to avoid recursive TLB misses, but most pages are faulted in
+ * on demand.
+ */
+#ifndef __ASM_ARCH_MMU_H
+#define __ASM_ARCH_MMU_H
+
+#include <asm/sysreg.h>
+
+#define PAGE_SHIFT	20
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_ADDR_MASK	(~(PAGE_SIZE - 1))
+
+#define MMU_VMR_CACHE_NONE						\
+	(SYSREG_BF(AP, 3) | SYSREG_BF(SZ, 3) | SYSREG_BIT(TLBELO_D))
+#define MMU_VMR_CACHE_WBUF						\
+	(MMU_VMR_CACHE_NONE | SYSREG_BIT(B))
+#define MMU_VMR_CACHE_WRTHRU						\
+	(MMU_VMR_CACHE_NONE | SYSREG_BIT(TLBELO_C) | SYSREG_BIT(W))
+#define MMU_VMR_CACHE_WRBACK						\
+	(MMU_VMR_CACHE_WBUF | SYSREG_BIT(TLBELO_C))
+
+/*
+ * This structure is used in our "page table". Instead of the usual
+ * x86-inspired radix tree, we let each entry cover an arbitrary-sized
+ * virtual address range and store them in a binary search tree. This is
+ * somewhat slower, but should use significantly less RAM, and we
+ * shouldn't get many TLB misses when using 1 MB pages anyway.
+ *
+ * With 1 MB pages, we need 12 bits to store the page number. In
+ * addition, we stick an Invalid bit in the high bit of virt_pgno (if
+ * set, it cannot possibly match any faulting page), and all the bits
+ * that need to be written to TLBELO in phys_pgno.
+ */
+struct mmu_vm_range {
+	uint16_t	virt_pgno;
+	uint16_t	nr_pages;
+	uint32_t	phys;
+};
+
+/*
+ * An array of mmu_vm_range objects describing all pageable addresses.
+ * The array is sorted by virt_pgno so that the TLB miss exception
+ * handler can do a binary search to find the correct entry.
+ */
+extern struct mmu_vm_range mmu_vmr_table[];
+
+/*
+ * Initialize the MMU. This will set up a fixed TLB entry for the static
+ * u-boot image at dest_addr and enable paging.
+ */
+void mmu_init_r(unsigned long dest_addr);
+
+/*
+ * Handle a TLB miss exception. This function is called directly from
+ * the exception vector table written in assembly.
+ */
+int mmu_handle_tlb_miss(void);
+
+#endif /* __ASM_ARCH_MMU_H */
diff --git a/arch/avr32/lib/board.c b/arch/avr32/lib/board.c
index 9e741d2..aa589bb 100644
--- a/arch/avr32/lib/board.c
+++ b/arch/avr32/lib/board.c
@@ -33,6 +33,7 @@
 
 #include <asm/initcalls.h>
 #include <asm/sections.h>
+#include <asm/arch/mmu.h>
 
 #ifndef CONFIG_IDENT_STRING
 #define CONFIG_IDENT_STRING ""
@@ -265,6 +266,9 @@
 	gd->flags |= GD_FLG_RELOC;
 	gd->reloc_off = dest_addr - CONFIG_SYS_MONITOR_BASE;
 
+	/* Enable the MMU so that we can keep u-boot simple */
+	mmu_init_r(dest_addr);
+
 	board_early_init_r();
 
 	monitor_flash_len = _edata - _text;