Blackfin: pull io funcs from linux

Some common code uses more of the io.h funcs than we currently provide, so
pull in all of the ones from the linux kernel.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
diff --git a/include/asm-blackfin/config-pre.h b/include/asm-blackfin/config-pre.h
index b1d3a94..1170a2a 100644
--- a/include/asm-blackfin/config-pre.h
+++ b/include/asm-blackfin/config-pre.h
@@ -71,4 +71,7 @@
 # define BFIN_BOOT_SPI_SSEL 1
 #endif
 
+/* We rarely use interrupts, so favor throughput over latency */
+#define CONFIG_BFIN_INS_LOWOVERHEAD
+
 #endif
diff --git a/include/asm-blackfin/io.h b/include/asm-blackfin/io.h
index 9cb07d4..75244a0 100644
--- a/include/asm-blackfin/io.h
+++ b/include/asm-blackfin/io.h
@@ -1,25 +1,9 @@
 /*
  * U-boot - io.h IO routines
  *
- * Copyright (c) 2005-2007 Analog Devices Inc.
+ * Copyright 2004-2009 Analog Devices Inc.
  *
- * See file CREDITS for list of people who contributed to this
- * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- * MA 02110-1301 USA
+ * Licensed under the GPL-2 or later.
  */
 
 #ifndef _BLACKFIN_IO_H
@@ -29,6 +13,8 @@
 
 #include <asm/blackfin.h>
 
+#define __iomem
+
 static inline void sync(void)
 {
 	SSYNC();
@@ -70,70 +56,154 @@
  *
  * readX/writeX() are used to access memory mapped devices. On some
  * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the m68k architecture, we just read/write the
+ * differently. On the bfin architecture, we just read/write the
  * memory location directly.
  */
-
 #ifndef __ASSEMBLY__
 
-static inline unsigned char readb(const volatile void *addr)
+static inline unsigned char readb(const volatile void __iomem *addr)
 {
 	unsigned int val;
 	int tmp;
 
-	__asm__ __volatile__ ("cli %1;\n\t"
-			"NOP; NOP; SSYNC;\n\t"
-			"%0 = b [%2] (z);\n\t"
-			"sti %1;\n\t"
-			: "=d"(val), "=d"(tmp): "a"(addr));
+	__asm__ __volatile__ (
+		"cli %1;"
+		"NOP; NOP; SSYNC;"
+		"%0 = b [%2] (z);"
+		"sti %1;"
+		: "=d"(val), "=d"(tmp)
+		: "a"(addr)
+	);
 
 	return (unsigned char) val;
 }
 
-static inline unsigned short readw(const volatile void *addr)
+static inline unsigned short readw(const volatile void __iomem *addr)
 {
 	unsigned int val;
 	int tmp;
 
-	__asm__ __volatile__ ("cli %1;\n\t"
-			"NOP; NOP; SSYNC;\n\t"
-			"%0 = w [%2] (z);\n\t"
-			"sti %1;\n\t"
-			: "=d"(val), "=d"(tmp): "a"(addr));
+	__asm__ __volatile__ (
+		"cli %1;"
+		"NOP; NOP; SSYNC;"
+		"%0 = w [%2] (z);"
+		"sti %1;"
+		: "=d"(val), "=d"(tmp)
+		: "a"(addr)
+	);
 
 	return (unsigned short) val;
 }
 
-static inline unsigned int readl(const volatile void *addr)
+static inline unsigned int readl(const volatile void __iomem *addr)
 {
 	unsigned int val;
 	int tmp;
 
-	__asm__ __volatile__ ("cli %1;\n\t"
-			"NOP; NOP; SSYNC;\n\t"
-			"%0 = [%2];\n\t"
-			"sti %1;\n\t"
-			: "=d"(val), "=d"(tmp): "a"(addr));
+	__asm__ __volatile__ (
+		"cli %1;"
+		"NOP; NOP; SSYNC;"
+		"%0 = [%2];"
+		"sti %1;"
+		: "=d"(val), "=d"(tmp)
+		: "a"(addr)
+	);
+
 	return val;
 }
 
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-
 #endif /*  __ASSEMBLY__ */
 
 #define writeb(b, addr) (void)((*(volatile unsigned char *) (addr)) = (b))
 #define writew(b, addr) (void)((*(volatile unsigned short *) (addr)) = (b))
 #define writel(b, addr) (void)((*(volatile unsigned int *) (addr)) = (b))
+
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
 #define __raw_writeb writeb
 #define __raw_writew writew
 #define __raw_writel writel
-
 #define memset_io(a, b, c)	memset((void *)(a), (b), (c))
 #define memcpy_fromio(a, b, c)	memcpy((a), (void *)(b), (c))
 #define memcpy_toio(a, b, c)	memcpy((void *)(a), (b), (c))
 
+/* Convert "I/O port addresses" to actual addresses.  i.e. ugly casts. */
+#define __io(port) ((void *)(unsigned long)(port))
+
+#define inb(port)    readb(__io(port))
+#define inw(port)    readw(__io(port))
+#define inl(port)    readl(__io(port))
+#define outb(x, port) writeb(x, __io(port))
+#define outw(x, port) writew(x, __io(port))
+#define outl(x, port) writel(x, __io(port))
+
+#define inb_p(port)    inb(__io(port))
+#define inw_p(port)    inw(__io(port))
+#define inl_p(port)    inl(__io(port))
+#define outb_p(x, port) outb(x, __io(port))
+#define outw_p(x, port) outw(x, __io(port))
+#define outl_p(x, port) outl(x, __io(port))
+
+#define ioread8_rep(a, d, c)	readsb(a, d, c)
+#define ioread16_rep(a, d, c)	readsw(a, d, c)
+#define ioread32_rep(a, d, c)	readsl(a, d, c)
+#define iowrite8_rep(a, s, c)	writesb(a, s, c)
+#define iowrite16_rep(a, s, c)	writesw(a, s, c)
+#define iowrite32_rep(a, s, c)	writesl(a, s, c)
+
+#define ioread8(x)			readb(x)
+#define ioread16(x)			readw(x)
+#define ioread32(x)			readl(x)
+#define iowrite8(val, x)		writeb(val, x)
+#define iowrite16(val, x)		writew(val, x)
+#define iowrite32(val, x)		writel(val, x)
+
+#define mmiowb() wmb()
+
+#ifndef __ASSEMBLY__
+
+extern void outsb(unsigned long port, const void *addr, unsigned long count);
+extern void outsw(unsigned long port, const void *addr, unsigned long count);
+extern void outsw_8(unsigned long port, const void *addr, unsigned long count);
+extern void outsl(unsigned long port, const void *addr, unsigned long count);
+
+extern void insb(unsigned long port, void *addr, unsigned long count);
+extern void insw(unsigned long port, void *addr, unsigned long count);
+extern void insw_8(unsigned long port, void *addr, unsigned long count);
+extern void insl(unsigned long port, void *addr, unsigned long count);
+extern void insl_16(unsigned long port, void *addr, unsigned long count);
+
+static inline void readsl(const void __iomem *addr, void *buf, int len)
+{
+	insl((unsigned long)addr, buf, len);
+}
+
+static inline void readsw(const void __iomem *addr, void *buf, int len)
+{
+	insw((unsigned long)addr, buf, len);
+}
+
+static inline void readsb(const void __iomem *addr, void *buf, int len)
+{
+	insb((unsigned long)addr, buf, len);
+}
+
+static inline void writesl(const void __iomem *addr, const void *buf, int len)
+{
+	outsl((unsigned long)addr, buf, len);
+}
+
+static inline void writesw(const void __iomem *addr, const void *buf, int len)
+{
+	outsw((unsigned long)addr, buf, len);
+}
+
+static inline void writesb(const void __iomem *addr, const void *buf, int len)
+{
+	outsb((unsigned long)addr, buf, len);
+}
+
 #if defined(CONFIG_STAMP_CF) || defined(CONFIG_BFIN_IDE)
 /* This hack for CF/IDE needs to be addressed at some point */
 extern void cf_outsw(unsigned short *addr, unsigned short *sect_buf, int words);
@@ -151,4 +221,7 @@
 #endif
 
 #endif
+
+#endif				/* __KERNEL__ */
+
 #endif
diff --git a/lib_blackfin/Makefile b/lib_blackfin/Makefile
index cbf47f0..eebb131 100644
--- a/lib_blackfin/Makefile
+++ b/lib_blackfin/Makefile
@@ -31,10 +31,12 @@
 
 LIB	= $(obj)lib$(ARCH).a
 
+SOBJS-y	+= ins.o
 SOBJS-y	+= memcmp.o
 SOBJS-y	+= memcpy.o
 SOBJS-y	+= memmove.o
 SOBJS-y	+= memset.o
+SOBJS-y	+= outs.o
 
 COBJS-y	+= board.o
 COBJS-y	+= boot.o
diff --git a/lib_blackfin/ins.S b/lib_blackfin/ins.S
new file mode 100644
index 0000000..4519596
--- /dev/null
+++ b/lib_blackfin/ins.S
@@ -0,0 +1,117 @@
+/*
+ * arch/blackfin/lib/ins.S - ins{bwl} using hardware loops
+ *
+ * Copyright 2004-2008 Analog Devices Inc.
+ * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <asm/blackfin.h>
+
+.align 2
+
+#ifdef CONFIG_IPIPE
+# define DO_CLI \
+	[--sp] = rets; \
+	[--sp] = (P5:0); \
+	sp += -12; \
+	call ___ipipe_disable_root_irqs_hw; \
+	sp += 12; \
+	(P5:0) = [sp++];
+# define CLI_INNER_NOP
+#else
+# define DO_CLI cli R3;
+# define CLI_INNER_NOP nop; nop; nop;
+#endif
+
+#ifdef CONFIG_IPIPE
+# define DO_STI \
+	sp += -12; \
+	call ___ipipe_enable_root_irqs_hw; \
+	sp += 12; \
+2:	rets = [sp++];
+#else
+# define DO_STI 2: sti R3;
+#endif
+
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
+# define CLI_OUTER DO_CLI;
+# define STI_OUTER DO_STI;
+# define CLI_INNER 1:
+# if ANOMALY_05000416
+#  define STI_INNER nop; 2: nop;
+# else
+#  define STI_INNER 2:
+# endif
+#else
+# define CLI_OUTER
+# define STI_OUTER
+# define CLI_INNER 1: DO_CLI; CLI_INNER_NOP;
+# define STI_INNER DO_STI;
+#endif
+
+/*
+ * Reads on the Blackfin are speculative. In Blackfin terms, this means they
+ * can be interrupted at any time (even after they have been issued on to the
+ * external bus), and re-issued after the interrupt occurs.
+ *
+ * If a FIFO is sitting on the end of the read, it will see two reads,
+ * when the core only sees one. The FIFO receives the read which is cancelled,
+ * and not delivered to the core.
+ *
+ * To solve this, interrupts are turned off before reads occur to I/O space.
+ * There are 3 versions of all these functions
+ *  - turns interrupts off every read (higher overhead, but lower latency)
+ *  - turns interrupts off every loop (low overhead, but longer latency)
+ *  - DMA version, which do not suffer from this issue. DMA versions have
+ *      different name (prefixed by dma_ ), and are located in
+ *      ../kernel/bfin_dma_5xx.c
+ * Using the dma related functions are recommended for transfering large
+ * buffers in/out of FIFOs.
+ */
+
+#define COMMON_INS(func, ops) \
+ENTRY(_ins##func) \
+	P0 = R0;	/* P0 = port */ \
+	CLI_OUTER;	/* 3 instructions before first read access */ \
+	P1 = R1;	/* P1 = address */ \
+	P2 = R2;	/* P2 = count */ \
+	SSYNC; \
+ \
+	LSETUP(1f, 2f) LC0 = P2; \
+	CLI_INNER; \
+	ops; \
+	STI_INNER; \
+ \
+	STI_OUTER; \
+	RTS; \
+ENDPROC(_ins##func)
+
+COMMON_INS(l, \
+	R0 = [P0]; \
+	[P1++] = R0; \
+)
+
+COMMON_INS(w, \
+	R0 = W[P0]; \
+	W[P1++] = R0; \
+)
+
+COMMON_INS(w_8, \
+	R0 = W[P0]; \
+	B[P1++] = R0; \
+	R0 = R0 >> 8; \
+	B[P1++] = R0; \
+)
+
+COMMON_INS(b, \
+	R0 = B[P0]; \
+	B[P1++] = R0; \
+)
+
+COMMON_INS(l_16, \
+	R0 = [P0]; \
+	W[P1++] = R0; \
+	R0 = R0 >> 16; \
+	W[P1++] = R0; \
+)
diff --git a/lib_blackfin/outs.S b/lib_blackfin/outs.S
new file mode 100644
index 0000000..90c6033
--- /dev/null
+++ b/lib_blackfin/outs.S
@@ -0,0 +1,60 @@
+/*
+ * Implementation of outs{bwl} for BlackFin processors using zero overhead loops.
+ *
+ * Copyright 2005-2009 Analog Devices Inc.
+ *                2005 BuyWays BV
+ *                      Bas Vermeulen <bas@buyways.nl>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <asm/linkage.h>
+
+.align 2
+
+ENTRY(_outsl)
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+
+	LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
+.Llong_loop_s: R0 = [P1++];
+.Llong_loop_e: [P0] = R0;
+	RTS;
+ENDPROC(_outsl)
+
+ENTRY(_outsw)
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+
+	LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
+.Lword_loop_s: R0 = W[P1++];
+.Lword_loop_e: W[P0] = R0;
+	RTS;
+ENDPROC(_outsw)
+
+ENTRY(_outsb)
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+
+	LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
+.Lbyte_loop_s: R0 = B[P1++];
+.Lbyte_loop_e: B[P0] = R0;
+	RTS;
+ENDPROC(_outsb)
+
+ENTRY(_outsw_8)
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+
+	LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
+.Lword8_loop_s: R1 = B[P1++];
+		R0 = B[P1++];
+		R0 = R0 << 8;
+		R0 = R0 + R1;
+.Lword8_loop_e: W[P0] = R0;
+	RTS;
+ENDPROC(_outsw_8)