arc: build libgcc in U-Boot

This way we may have very limited set of functions implemented so we
save some space.

Also it allows us to build U-Boot for any ARC core with the same one
toolchain because we don't rely on pre-built libgcc.

For example:
 * we may use little-endian toolchain but build U-Boot for ether
endianess
 * we may use non-multilibbed uClibc toolchain but build U-Boot for
whatever ARC CPU flavour that current GCC supports

Private libgcc built from generic C implementation contributes only 144
bytes to .text section so we don't see significant degradation of size:
--->8---
$ arc-linux-size u-boot.libgcc-prebuilt
   text	   data	    bss	    dec	    hex	filename
 222217	  24912	 214820	 461949	  70c7d	u-boot.libgcc-prebuilt

$ arc-linux-size u-boot.libgcc-private
   text	   data	    bss	    dec	    hex	filename
 222361	  24912	 214820	 462093	  70d0d	u-boot.libgcc-private
--->8---

Also I don't notice visible performance degradation compared to
pre-built libgcc (where at least "*div*" functions are had-written in
assembly) on typical operations of downloading 10Mb uImage over TFTP and
bootm.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index bae4419..b8028c9 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -20,3 +20,5 @@
 obj-y += timer.o
 
 obj-$(CONFIG_CMD_BOOTM) += bootm.o
+
+lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _millicodethunk.o libgcc2.o
diff --git a/arch/arc/lib/_millicodethunk.S b/arch/arc/lib/_millicodethunk.S
new file mode 100644
index 0000000..b332416
--- /dev/null
+++ b/arch/arc/lib/_millicodethunk.S
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 1995, 1997, 2007-2013 Free Software Foundation, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+ /* ANSI concatenation macros.  */
+
+ #define CONCAT1(a, b) CONCAT2(a, b)
+ #define CONCAT2(a, b) a ## b
+
+ /* Use the right prefix for global labels.  */
+
+ #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+#ifndef WORKING_ASSEMBLER
+#define abs_l abs
+#define asl_l asl
+#define mov_l mov
+#endif
+
+#define FUNC(X)         .type SYM(X),@function
+#define HIDDEN_FUNC(X)	FUNC(X)` .hidden X
+#define ENDFUNC0(X)     .Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)      ENDFUNC0(X)
+
+	.section .text
+	.align 4
+	.global SYM(__st_r13_to_r15)
+	.global SYM(__st_r13_to_r16)
+	.global SYM(__st_r13_to_r17)
+	.global SYM(__st_r13_to_r18)
+	.global SYM(__st_r13_to_r19)
+	.global SYM(__st_r13_to_r20)
+	.global SYM(__st_r13_to_r21)
+	.global SYM(__st_r13_to_r22)
+	.global SYM(__st_r13_to_r23)
+	.global SYM(__st_r13_to_r24)
+	.global SYM(__st_r13_to_r25)
+	HIDDEN_FUNC(__st_r13_to_r15)
+	HIDDEN_FUNC(__st_r13_to_r16)
+	HIDDEN_FUNC(__st_r13_to_r17)
+	HIDDEN_FUNC(__st_r13_to_r18)
+	HIDDEN_FUNC(__st_r13_to_r19)
+	HIDDEN_FUNC(__st_r13_to_r20)
+	HIDDEN_FUNC(__st_r13_to_r21)
+	HIDDEN_FUNC(__st_r13_to_r22)
+	HIDDEN_FUNC(__st_r13_to_r23)
+	HIDDEN_FUNC(__st_r13_to_r24)
+	HIDDEN_FUNC(__st_r13_to_r25)
+	.align 4
+SYM(__st_r13_to_r25):
+	st r25, [sp,48]
+SYM(__st_r13_to_r24):
+	st r24, [sp,44]
+SYM(__st_r13_to_r23):
+	st r23, [sp,40]
+SYM(__st_r13_to_r22):
+	st r22, [sp,36]
+SYM(__st_r13_to_r21):
+	st r21, [sp,32]
+SYM(__st_r13_to_r20):
+	st r20, [sp,28]
+SYM(__st_r13_to_r19):
+	st r19, [sp,24]
+SYM(__st_r13_to_r18):
+	st r18, [sp,20]
+SYM(__st_r13_to_r17):
+	st r17, [sp,16]
+SYM(__st_r13_to_r16):
+	st r16, [sp,12]
+SYM(__st_r13_to_r15):
+#ifdef __ARC700__
+	st r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
+#else
+	st_s r15, [sp,8]
+#endif
+	st_s r14, [sp,4]
+	j_s.d [%blink]
+	st_s r13, [sp,0]
+	ENDFUNC(__st_r13_to_r15)
+	ENDFUNC(__st_r13_to_r16)
+	ENDFUNC(__st_r13_to_r17)
+	ENDFUNC(__st_r13_to_r18)
+	ENDFUNC(__st_r13_to_r19)
+	ENDFUNC(__st_r13_to_r20)
+	ENDFUNC(__st_r13_to_r21)
+	ENDFUNC(__st_r13_to_r22)
+	ENDFUNC(__st_r13_to_r23)
+	ENDFUNC(__st_r13_to_r24)
+	ENDFUNC(__st_r13_to_r25)
+
+	.section .text
+	.align 4
+;	==================================
+;	the loads
+
+	.global SYM(__ld_r13_to_r15)
+	.global SYM(__ld_r13_to_r16)
+	.global SYM(__ld_r13_to_r17)
+	.global SYM(__ld_r13_to_r18)
+	.global SYM(__ld_r13_to_r19)
+	.global SYM(__ld_r13_to_r20)
+	.global SYM(__ld_r13_to_r21)
+	.global SYM(__ld_r13_to_r22)
+	.global SYM(__ld_r13_to_r23)
+	.global SYM(__ld_r13_to_r24)
+	.global SYM(__ld_r13_to_r25)
+	HIDDEN_FUNC(__ld_r13_to_r15)
+	HIDDEN_FUNC(__ld_r13_to_r16)
+	HIDDEN_FUNC(__ld_r13_to_r17)
+	HIDDEN_FUNC(__ld_r13_to_r18)
+	HIDDEN_FUNC(__ld_r13_to_r19)
+	HIDDEN_FUNC(__ld_r13_to_r20)
+	HIDDEN_FUNC(__ld_r13_to_r21)
+	HIDDEN_FUNC(__ld_r13_to_r22)
+	HIDDEN_FUNC(__ld_r13_to_r23)
+	HIDDEN_FUNC(__ld_r13_to_r24)
+	HIDDEN_FUNC(__ld_r13_to_r25)
+SYM(__ld_r13_to_r25):
+	ld r25, [sp,48]
+SYM(__ld_r13_to_r24):
+	ld r24, [sp,44]
+SYM(__ld_r13_to_r23):
+	ld r23, [sp,40]
+SYM(__ld_r13_to_r22):
+	ld r22, [sp,36]
+SYM(__ld_r13_to_r21):
+	ld r21, [sp,32]
+SYM(__ld_r13_to_r20):
+	ld r20, [sp,28]
+SYM(__ld_r13_to_r19):
+	ld r19, [sp,24]
+SYM(__ld_r13_to_r18):
+	ld r18, [sp,20]
+SYM(__ld_r13_to_r17):
+	ld r17, [sp,16]
+SYM(__ld_r13_to_r16):
+	ld r16, [sp,12]
+SYM(__ld_r13_to_r15):
+#ifdef __ARC700__
+	ld r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
+#else
+	ld_s r15, [sp,8]
+#endif
+	ld_s r14, [sp,4]
+	j_s.d [%blink]
+	ld_s r13, [sp,0]
+	ENDFUNC(__ld_r13_to_r15)
+	ENDFUNC(__ld_r13_to_r16)
+	ENDFUNC(__ld_r13_to_r17)
+	ENDFUNC(__ld_r13_to_r18)
+	ENDFUNC(__ld_r13_to_r19)
+	ENDFUNC(__ld_r13_to_r20)
+	ENDFUNC(__ld_r13_to_r21)
+	ENDFUNC(__ld_r13_to_r22)
+	ENDFUNC(__ld_r13_to_r23)
+	ENDFUNC(__ld_r13_to_r24)
+	ENDFUNC(__ld_r13_to_r25)
+
+	.global SYM(__ld_r13_to_r14_ret)
+	.global SYM(__ld_r13_to_r15_ret)
+	.global SYM(__ld_r13_to_r16_ret)
+	.global SYM(__ld_r13_to_r17_ret)
+	.global SYM(__ld_r13_to_r18_ret)
+	.global SYM(__ld_r13_to_r19_ret)
+	.global SYM(__ld_r13_to_r20_ret)
+	.global SYM(__ld_r13_to_r21_ret)
+	.global SYM(__ld_r13_to_r22_ret)
+	.global SYM(__ld_r13_to_r23_ret)
+	.global SYM(__ld_r13_to_r24_ret)
+	.global SYM(__ld_r13_to_r25_ret)
+	HIDDEN_FUNC(__ld_r13_to_r14_ret)
+	HIDDEN_FUNC(__ld_r13_to_r15_ret)
+	HIDDEN_FUNC(__ld_r13_to_r16_ret)
+	HIDDEN_FUNC(__ld_r13_to_r17_ret)
+	HIDDEN_FUNC(__ld_r13_to_r18_ret)
+	HIDDEN_FUNC(__ld_r13_to_r19_ret)
+	HIDDEN_FUNC(__ld_r13_to_r20_ret)
+	HIDDEN_FUNC(__ld_r13_to_r21_ret)
+	HIDDEN_FUNC(__ld_r13_to_r22_ret)
+	HIDDEN_FUNC(__ld_r13_to_r23_ret)
+	HIDDEN_FUNC(__ld_r13_to_r24_ret)
+	HIDDEN_FUNC(__ld_r13_to_r25_ret)
+	.section .text
+	.align 4
+SYM(__ld_r13_to_r25_ret):
+	ld r25, [sp,48]
+SYM(__ld_r13_to_r24_ret):
+	ld r24, [sp,44]
+SYM(__ld_r13_to_r23_ret):
+	ld r23, [sp,40]
+SYM(__ld_r13_to_r22_ret):
+	ld r22, [sp,36]
+SYM(__ld_r13_to_r21_ret):
+	ld r21, [sp,32]
+SYM(__ld_r13_to_r20_ret):
+	ld r20, [sp,28]
+SYM(__ld_r13_to_r19_ret):
+	ld r19, [sp,24]
+SYM(__ld_r13_to_r18_ret):
+	ld r18, [sp,20]
+SYM(__ld_r13_to_r17_ret):
+	ld r17, [sp,16]
+SYM(__ld_r13_to_r16_ret):
+	ld r16, [sp,12]
+SYM(__ld_r13_to_r15_ret):
+	ld r15, [sp,8]
+SYM(__ld_r13_to_r14_ret):
+	ld blink,[sp,r12]
+	ld_s r14, [sp,4]
+	ld.ab r13, [sp,r12]
+	j_s.d [%blink]
+	add_s sp,sp,4
+	ENDFUNC(__ld_r13_to_r14_ret)
+	ENDFUNC(__ld_r13_to_r15_ret)
+	ENDFUNC(__ld_r13_to_r16_ret)
+	ENDFUNC(__ld_r13_to_r17_ret)
+	ENDFUNC(__ld_r13_to_r18_ret)
+	ENDFUNC(__ld_r13_to_r19_ret)
+	ENDFUNC(__ld_r13_to_r20_ret)
+	ENDFUNC(__ld_r13_to_r21_ret)
+	ENDFUNC(__ld_r13_to_r22_ret)
+	ENDFUNC(__ld_r13_to_r23_ret)
+	ENDFUNC(__ld_r13_to_r24_ret)
+	ENDFUNC(__ld_r13_to_r25_ret)
diff --git a/arch/arc/lib/libgcc2.c b/arch/arc/lib/libgcc2.c
new file mode 100644
index 0000000..d5ad327
--- /dev/null
+++ b/arch/arc/lib/libgcc2.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 1989-2013 Free Software Foundation, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include "libgcc2.h"
+
+DWtype
+__ashldi3(DWtype u, shift_count_type b)
+{
+	if (b == 0)
+		return u;
+
+	const DWunion uu = {.ll = u};
+	const shift_count_type bm = W_TYPE_SIZE - b;
+	DWunion w;
+
+	if (bm <= 0) {
+		w.s.low = 0;
+		w.s.high = (UWtype)uu.s.low << -bm;
+	} else {
+		const UWtype carries = (UWtype) uu.s.low >> bm;
+
+		w.s.low = (UWtype)uu.s.low << b;
+		w.s.high = ((UWtype)uu.s.high << b) | carries;
+	}
+
+	return w.ll;
+}
+
+DWtype
+__ashrdi3(DWtype u, shift_count_type b)
+{
+	if (b == 0)
+		return u;
+
+	const DWunion uu = {.ll = u};
+	const shift_count_type bm = W_TYPE_SIZE - b;
+	DWunion w;
+
+	if (bm <= 0) {
+		/* w.s.high = 1..1 or 0..0 */
+		w.s.high = uu.s.high >> (W_TYPE_SIZE - 1);
+		w.s.low = uu.s.high >> -bm;
+	} else {
+		const UWtype carries = (UWtype) uu.s.high << bm;
+
+		w.s.high = uu.s.high >> b;
+		w.s.low = ((UWtype)uu.s.low >> b) | carries;
+	}
+
+	return w.ll;
+}
+
+DWtype
+__lshrdi3(DWtype u, shift_count_type b)
+{
+	if (b == 0)
+		return u;
+
+	const DWunion uu = {.ll = u};
+	const shift_count_type bm = W_TYPE_SIZE - b;
+	DWunion w;
+
+	if (bm <= 0) {
+		w.s.high = 0;
+		w.s.low = (UWtype)uu.s.high >> -bm;
+	} else {
+		const UWtype carries = (UWtype)uu.s.high << bm;
+
+		w.s.high = (UWtype)uu.s.high >> b;
+		w.s.low = ((UWtype)uu.s.low >> b) | carries;
+	}
+
+	return w.ll;
+}
+
+unsigned long
+udivmodsi4(unsigned long num, unsigned long den, int modwanted)
+{
+	unsigned long bit = 1;
+	unsigned long res = 0;
+
+	while (den < num && bit && !(den & (1L<<31))) {
+		den <<= 1;
+		bit <<= 1;
+	}
+
+	while (bit) {
+		if (num >= den) {
+			num -= den;
+			res |= bit;
+		}
+		bit >>= 1;
+		den >>= 1;
+	}
+
+	if (modwanted)
+		return num;
+
+	return res;
+}
+
+long
+__divsi3(long a, long b)
+{
+	int neg = 0;
+	long res;
+
+	if (a < 0) {
+		a = -a;
+		neg = !neg;
+	}
+
+	if (b < 0) {
+		b = -b;
+		neg = !neg;
+	}
+
+	res = udivmodsi4(a, b, 0);
+
+	if (neg)
+		res = -res;
+
+	return res;
+}
+
+long
+__modsi3(long a, long b)
+{
+	int neg = 0;
+	long res;
+
+	if (a < 0) {
+		a = -a;
+		neg = 1;
+	}
+
+	if (b < 0)
+		b = -b;
+
+	res = udivmodsi4(a, b, 1);
+
+	if (neg)
+		res = -res;
+
+	return res;
+}
+
+long
+__udivsi3(long a, long b)
+{
+	return udivmodsi4(a, b, 0);
+}
+
+long
+__umodsi3(long a, long b)
+{
+	return udivmodsi4(a, b, 1);
+}
diff --git a/arch/arc/lib/libgcc2.h b/arch/arc/lib/libgcc2.h
new file mode 100644
index 0000000..8813c3b
--- /dev/null
+++ b/arch/arc/lib/libgcc2.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 1989-2013 Free Software Foundation, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef __ASM_LIBGCC_H
+#define __ASM_LIBGCC_H
+
+#define UNITS_PER_WORD 4	/* for ARC */
+#define BITS_PER_UNIT 8		/* for ARC */
+
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+
+#define MIN_UNITS_PER_WORD UNITS_PER_WORD
+
+/* Work out the largest "word" size that we can deal with on this target.  */
+#if MIN_UNITS_PER_WORD > 4
+# define LIBGCC2_MAX_UNITS_PER_WORD 8
+#elif (MIN_UNITS_PER_WORD > 2 \
+       || (MIN_UNITS_PER_WORD > 1 && __SIZEOF_LONG_LONG__ > 4))
+# define LIBGCC2_MAX_UNITS_PER_WORD 4
+#else
+# define LIBGCC2_MAX_UNITS_PER_WORD MIN_UNITS_PER_WORD
+#endif
+
+/* Work out what word size we are using for this compilation.
+   The value can be set on the command line.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD LIBGCC2_MAX_UNITS_PER_WORD
+#endif
+
+typedef		 int QItype	__attribute__ ((mode (QI)));
+typedef unsigned int UQItype	__attribute__ ((mode (QI)));
+typedef		 int HItype	__attribute__ ((mode (HI)));
+typedef unsigned int UHItype	__attribute__ ((mode (HI)));
+#if MIN_UNITS_PER_WORD > 1
+/* These typedefs are usually forbidden on dsp's with UNITS_PER_WORD 1.  */
+typedef 	 int SItype	__attribute__ ((mode (SI)));
+typedef unsigned int USItype	__attribute__ ((mode (SI)));
+#if __SIZEOF_LONG_LONG__ > 4
+/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 2.  */
+typedef		 int DItype	__attribute__ ((mode (DI)));
+typedef unsigned int UDItype	__attribute__ ((mode (DI)));
+#if MIN_UNITS_PER_WORD > 4
+/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 4.  */
+typedef		 int TItype	__attribute__ ((mode (TI)));
+typedef unsigned int UTItype	__attribute__ ((mode (TI)));
+#endif
+#endif
+#endif
+
+#if LIBGCC2_UNITS_PER_WORD == 8
+#define W_TYPE_SIZE (8 * BITS_PER_UNIT)
+#define Wtype	DItype
+#define UWtype	UDItype
+#define HWtype	DItype
+#define UHWtype	UDItype
+#define DWtype	TItype
+#define UDWtype	UTItype
+#ifdef LIBGCC2_GNU_PREFIX
+#define __NW(a,b)	__gnu_ ## a ## di ## b
+#define __NDW(a,b)	__gnu_ ## a ## ti ## b
+#else
+#define __NW(a,b)	__ ## a ## di ## b
+#define __NDW(a,b)	__ ## a ## ti ## b
+#endif
+#elif LIBGCC2_UNITS_PER_WORD == 4
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define Wtype	SItype
+#define UWtype	USItype
+#define HWtype	SItype
+#define UHWtype	USItype
+#define DWtype	DItype
+#define UDWtype	UDItype
+#ifdef LIBGCC2_GNU_PREFIX
+#define __NW(a,b)	__gnu_ ## a ## si ## b
+#define __NDW(a,b)	__gnu_ ## a ## di ## b
+#else
+#define __NW(a,b)	__ ## a ## si ## b
+#define __NDW(a,b)	__ ## a ## di ## b
+#endif
+#elif LIBGCC2_UNITS_PER_WORD == 2
+#define W_TYPE_SIZE (2 * BITS_PER_UNIT)
+#define Wtype	HItype
+#define UWtype	UHItype
+#define HWtype	HItype
+#define UHWtype	UHItype
+#define DWtype	SItype
+#define UDWtype	USItype
+#ifdef LIBGCC2_GNU_PREFIX
+#define __NW(a,b)	__gnu_ ## a ## hi ## b
+#define __NDW(a,b)	__gnu_ ## a ## si ## b
+#else
+#define __NW(a,b)	__ ## a ## hi ## b
+#define __NDW(a,b)	__ ## a ## si ## b
+#endif
+#else
+#define W_TYPE_SIZE BITS_PER_UNIT
+#define Wtype	QItype
+#define UWtype  UQItype
+#define HWtype	QItype
+#define UHWtype	UQItype
+#define DWtype	HItype
+#define UDWtype	UHItype
+#ifdef LIBGCC2_GNU_PREFIX
+#define __NW(a,b)	__gnu_ ## a ## qi ## b
+#define __NDW(a,b)	__gnu_ ## a ## hi ## b
+#else
+#define __NW(a,b)	__ ## a ## qi ## b
+#define __NDW(a,b)	__ ## a ## hi ## b
+#endif
+#endif
+
+typedef int shift_count_type __attribute__((mode (__libgcc_shift_count__)));
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+	struct DWstruct {Wtype high, low;};
+#else
+	struct DWstruct {Wtype low, high;};
+#endif
+
+/* We need this union to unpack/pack DImode values, since we don't have
+   any arithmetic yet.  Incoming DImode parameters are stored into the
+   `ll' field, and the unpacked result is read from the struct `s'.  */
+
+typedef union {
+	struct DWstruct s;
+	DWtype ll;
+} DWunion;
+
+#endif /* __ASM_LIBGCC_H */