blob: e6b359a344b56cc45bd5da002b0008715114bc82 [file] [log] [blame]
Aubrey.Li3f0606a2007-03-09 13:38:44 +08001/*
Aubrey Li155fd762007-04-05 18:31:18 +08002 * File: memcpy.S
Aubrey.Li3f0606a2007-03-09 13:38:44 +08003 *
Aubrey Li155fd762007-04-05 18:31:18 +08004 * Copyright 2004-2007 Analog Devices Inc.
5 * Enter bugs at http://blackfin.uclinux.org/
Aubrey.Li3f0606a2007-03-09 13:38:44 +08006 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see the file COPYING, or write
19 * to the Free Software Foundation, Inc.,
20 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
Aubrey Li155fd762007-04-05 18:31:18 +080022
Aubrey.Li3f0606a2007-03-09 13:38:44 +080023.align 2
24
25.globl _memcpy_ASM;
Mike Frysinger5b221632008-02-19 00:36:14 -050026.type _memcpy_ASM, STT_FUNC;
Aubrey.Li3f0606a2007-03-09 13:38:44 +080027_memcpy_ASM:
28 CC = R2 <= 0; /* length not positive?*/
29 IF CC JUMP .L_P1L2147483647; /* Nothing to do */
30
31 P0 = R0 ; /* dst*/
32 P1 = R1 ; /* src*/
33 P2 = R2 ; /* length */
34
35 /* check for overlapping data */
36 CC = R1 < R0; /* src < dst */
37 IF !CC JUMP .Lno_overlap;
38 R3 = R1 + R2;
39 CC = R0 < R3; /* and dst < src+len */
40 IF CC JUMP .Lhas_overlap;
41
42.Lno_overlap:
43 /* Check for aligned data.*/
44
45 R3 = R1 | R0;
46 R0 = 0x3;
47 R3 = R3 & R0;
48 CC = R3; /* low bits set on either address? */
49 IF CC JUMP .Lnot_aligned;
50
51 /* Both addresses are word-aligned, so we can copy
52 at least part of the data using word copies.*/
53 P2 = P2 >> 2;
54 CC = P2 <= 2;
55 IF !CC JUMP .Lmore_than_seven;
56 /* less than eight bytes... */
57 P2 = R2;
58 LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
59 R0 = R1; /* setup src address for return */
60.Lthree_start:
61 R3 = B[P1++] (X);
62.Lthree_end:
63 B[P0++] = R3;
64
65 RTS;
66
67.Lmore_than_seven:
68 /* There's at least eight bytes to copy. */
69 P2 += -1; /* because we unroll one iteration */
70 LSETUP(.Lword_loop, .Lword_loop) LC0=P2;
71 R0 = R1;
72 I1 = P1;
73 R3 = [I1++];
74.Lword_loop:
75 MNOP || [P0++] = R3 || R3 = [I1++];
76
77 [P0++] = R3;
78 /* Any remaining bytes to copy? */
79 R3 = 0x3;
80 R3 = R2 & R3;
81 CC = R3 == 0;
82 P1 = I1; /* in case there's something left, */
83 IF !CC JUMP .Lbytes_left;
84 RTS;
85.Lbytes_left: P2 = R3;
86.Lnot_aligned:
87 /* From here, we're copying byte-by-byte. */
88 LSETUP (.Lbyte_start , .Lbyte_end) LC0=P2;
89 R0 = R1; /* Save src address for return */
90.Lbyte_start:
91 R1 = B[P1++] (X);
92.Lbyte_end:
93 B[P0++] = R1;
94
95.L_P1L2147483647:
96 RTS;
97
98.Lhas_overlap:
99/* Need to reverse the copying, because the
100 * dst would clobber the src.
101 * Don't bother to work out alignment for
102 * the reverse case.
103 */
104 R0 = R1; /* save src for later. */
105 P0 = P0 + P2;
106 P0 += -1;
107 P1 = P1 + P2;
108 P1 += -1;
109 LSETUP(.Lover_start, .Lover_end) LC0=P2;
110.Lover_start:
111 R1 = B[P1--] (X);
112.Lover_end:
113 B[P0--] = R1;
114
115 RTS;
Mike Frysinger5b221632008-02-19 00:36:14 -0500116
117.size _memcpy_ASM, .-_memcpy_ASM