]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/mips/mips64/memcpy.S
Replace FSF snail mail address by URL.
[thirdparty/glibc.git] / sysdeps / mips / mips64 / memcpy.S
1 /* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
4 Ported to mips3 n32/n64 by Alexandre Oliva <aoliva@redhat.com>
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <sysdep.h>
21 #include <endian.h>
22 #include <sys/asm.h>
23
24
25 /* void *memcpy(void *s1, const void *s2, size_t n);
26
27 This could probably be optimized further. */
28
29 #if __BYTE_ORDER == __BIG_ENDIAN
30 # define LDHI ldl /* high part is left in big-endian */
31 # define SDHI sdl /* high part is left in big-endian */
32 # define LDLO ldr /* low part is right in big-endian */
33 # define SDLO sdr /* low part is right in big-endian */
34 #else
35 # define LDHI ldr /* high part is right in little-endian */
36 # define SDHI sdr /* high part is right in little-endian */
37 # define LDLO ldl /* low part is left in little-endian */
38 # define SDLO sdl /* low part is left in little-endian */
39 #endif
40
41 ENTRY (memcpy)
42 .set noreorder
43
44 slti t0, a2, 16 # Less than 16?
45 bne t0, zero, L(last16)
46 move v0, a0 # Setup exit value before too late
47
48 xor t0, a1, a0 # Find a0/a1 displacement
49 andi t0, 0x7
50 bne t0, zero, L(shift) # Go handle the unaligned case
51 PTR_SUBU t1, zero, a1
52 andi t1, 0x7 # a0/a1 are aligned, but are we
53 beq t1, zero, L(chk8w) # starting in the middle of a word?
54 PTR_SUBU a2, t1
55 LDHI t0, 0(a1) # Yes we are... take care of that
56 PTR_ADDU a1, t1
57 SDHI t0, 0(a0)
58 PTR_ADDU a0, t1
59
60 L(chk8w):
61 andi t0, a2, 0x3f # 64 or more bytes left?
62 beq t0, a2, L(chk1w)
63 PTR_SUBU a3, a2, t0 # Yes
64 PTR_ADDU a3, a1 # a3 = end address of loop
65 move a2, t0 # a2 = what will be left after loop
66 L(lop8w):
67 ld t0, 0(a1) # Loop taking 8 words at a time
68 ld t1, 8(a1)
69 ld t2, 16(a1)
70 ld t3, 24(a1)
71 ld ta0, 32(a1)
72 ld ta1, 40(a1)
73 ld ta2, 48(a1)
74 ld ta3, 56(a1)
75 PTR_ADDIU a0, 64
76 PTR_ADDIU a1, 64
77 sd t0, -64(a0)
78 sd t1, -56(a0)
79 sd t2, -48(a0)
80 sd t3, -40(a0)
81 sd ta0, -32(a0)
82 sd ta1, -24(a0)
83 sd ta2, -16(a0)
84 bne a1, a3, L(lop8w)
85 sd ta3, -8(a0)
86
87 L(chk1w):
88 andi t0, a2, 0x7 # 8 or more bytes left?
89 beq t0, a2, L(last16)
90 PTR_SUBU a3, a2, t0 # Yes, handle them one dword at a time
91 PTR_ADDU a3, a1 # a3 again end address
92 move a2, t0
93 L(lop1w):
94 ld t0, 0(a1)
95 PTR_ADDIU a0, 8
96 PTR_ADDIU a1, 8
97 bne a1, a3, L(lop1w)
98 sd t0, -8(a0)
99
100 L(last16):
101 blez a2, L(lst16e) # Handle last 16 bytes, one at a time
102 PTR_ADDU a3, a2, a1
103 L(lst16l):
104 lb t0, 0(a1)
105 PTR_ADDIU a0, 1
106 PTR_ADDIU a1, 1
107 bne a1, a3, L(lst16l)
108 sb t0, -1(a0)
109 L(lst16e):
110 jr ra # Bye, bye
111 nop
112
113 L(shift):
114 PTR_SUBU a3, zero, a0 # Src and Dest unaligned
115 andi a3, 0x7 # (unoptimized case...)
116 beq a3, zero, L(shft1)
117 PTR_SUBU a2, a3 # a2 = bytes left
118 LDHI t0, 0(a1) # Take care of first odd part
119 LDLO t0, 7(a1)
120 PTR_ADDU a1, a3
121 SDHI t0, 0(a0)
122 PTR_ADDU a0, a3
123 L(shft1):
124 andi t0, a2, 0x7
125 PTR_SUBU a3, a2, t0
126 PTR_ADDU a3, a1
127 L(shfth):
128 LDHI t1, 0(a1) # Limp through, dword by dword
129 LDLO t1, 7(a1)
130 PTR_ADDIU a0, 8
131 PTR_ADDIU a1, 8
132 bne a1, a3, L(shfth)
133 sd t1, -8(a0)
134 b L(last16) # Handle anything which may be left
135 move a2, t0
136
137 .set reorder
138 END (memcpy)
139 libc_hidden_builtin_def (memcpy)