]>
Commit | Line | Data |
---|---|---|
bb769ab6 UD |
1 | /* Copy SIZE bytes from SRC to DEST. |
2 | For UltraSPARC-III. | |
62f29da7 | 3 | Copyright (C) 2001, 2003 Free Software Foundation, Inc. |
bb769ab6 UD |
4 | This file is part of the GNU C Library. |
5 | Contributed by David S. Miller (davem@redhat.com) | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
bb769ab6 UD |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 15 | Lesser General Public License for more details. |
bb769ab6 | 16 | |
41bdb6e2 AJ |
17 | You should have received a copy of the GNU Lesser General Public |
18 | License along with the GNU C Library; if not, write to the Free | |
19 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
20 | 02111-1307 USA. */ | |
bb769ab6 UD |
21 | |
22 | #include <sysdep.h> | |
3ee3a002 | 23 | |
bb769ab6 UD |
24 | #define ASI_BLK_P 0xf0 |
25 | #define FPRS_FEF 0x04 | |
26 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs | |
27 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | |
bb769ab6 UD |
28 | |
29 | #ifndef XCC | |
30 | #define USE_BPR | |
31 | #define XCC xcc | |
32 | #endif | |
33 | ||
3ee3a002 UD |
34 | .register %g2,#scratch |
35 | .register %g3,#scratch | |
36 | .register %g6,#scratch | |
37 | ||
bb769ab6 UD |
38 | .text |
39 | .align 32 | |
40 | ||
41 | ENTRY(bcopy) | |
3ee3a002 UD |
42 | sub %o1, %o0, %o4 |
43 | mov %o0, %g4 | |
44 | cmp %o4, %o2 | |
45 | mov %o1, %o0 | |
46 | bgeu,pt %XCC, 100f | |
47 | mov %g4, %o1 | |
bb769ab6 | 48 | #ifndef USE_BPR |
3ee3a002 | 49 | srl %o2, 0, %o2 |
bb769ab6 | 50 | #endif |
3ee3a002 UD |
51 | brnz,pn %o2, 220f |
52 | add %o0, %o2, %o0 | |
bb769ab6 UD |
53 | retl |
54 | nop | |
55 | END(bcopy) | |
56 | ||
57 | /* Special/non-trivial issues of this code: | |
58 | * | |
59 | * 1) %o5 is preserved from VISEntryHalf to VISExitHalf | |
60 | * 2) Only low 32 FPU registers are used so that only the | |
61 | * lower half of the FPU register set is dirtied by this | |
62 | * code. This is especially important in the kernel. | |
63 | * 3) This code never prefetches cachelines past the end | |
64 | * of the source buffer. | |
65 | * | |
66 | * The cheetah's flexible spine, oversized liver, enlarged heart, | |
67 | * slender muscular body, and claws make it the swiftest hunter | |
68 | * in Africa and the fastest animal on land. Can reach speeds | |
69 | * of up to 2.4GB per second. | |
70 | */ | |
71 | .align 32 | |
72 | ENTRY(memcpy) | |
73 | ||
74 | 100: /* %o0=dst, %o1=src, %o2=len */ | |
3ee3a002 UD |
75 | mov %o0, %g5 |
76 | cmp %o2, 0 | |
77 | be,pn %XCC, out | |
78 | 218: or %o0, %o1, %o3 | |
79 | cmp %o2, 16 | |
80 | bleu,a,pn %XCC, small_copy | |
81 | or %o3, %o2, %o3 | |
bb769ab6 | 82 | |
3ee3a002 UD |
83 | cmp %o2, 256 |
84 | blu,pt %XCC, medium_copy | |
85 | andcc %o3, 0x7, %g0 | |
bb769ab6 | 86 | |
3ee3a002 UD |
87 | ba,pt %xcc, enter |
88 | andcc %o0, 0x3f, %g2 | |
bb769ab6 | 89 | |
3ee3a002 | 90 | /* Here len >= 256 and condition codes reflect execution |
bb769ab6 UD |
91 | * of "andcc %o0, 0x7, %g2", done by caller. |
92 | */ | |
93 | .align 64 | |
3ee3a002 | 94 | enter: |
bb769ab6 | 95 | /* Is 'dst' already aligned on an 64-byte boundary? */ |
3ee3a002 | 96 | be,pt %XCC, 2f |
bb769ab6 UD |
97 | |
98 | /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number | |
99 | * of bytes to copy to make 'dst' 64-byte aligned. We pre- | |
100 | * subtract this from 'len'. | |
101 | */ | |
3ee3a002 UD |
102 | sub %g2, 0x40, %g2 |
103 | sub %g0, %g2, %g2 | |
104 | sub %o2, %g2, %o2 | |
bb769ab6 UD |
105 | |
106 | /* Copy %g2 bytes from src to dst, one byte at a time. */ | |
3ee3a002 UD |
107 | 1: ldub [%o1 + 0x00], %o3 |
108 | add %o1, 0x1, %o1 | |
109 | add %o0, 0x1, %o0 | |
110 | subcc %g2, 0x1, %g2 | |
bb769ab6 | 111 | |
3ee3a002 UD |
112 | bg,pt %XCC, 1b |
113 | stb %o3, [%o0 + -1] | |
bb769ab6 | 114 | |
3ee3a002 UD |
115 | 2: VISEntryHalf |
116 | and %o1, 0x7, %g1 | |
117 | ba,pt %xcc, begin | |
118 | alignaddr %o1, %g0, %o1 | |
bb769ab6 UD |
119 | |
120 | .align 64 | |
3ee3a002 UD |
121 | begin: |
122 | prefetch [%o1 + 0x000], #one_read | |
123 | prefetch [%o1 + 0x040], #one_read | |
124 | andn %o2, (0x40 - 1), %o4 | |
125 | prefetch [%o1 + 0x080], #one_read | |
126 | prefetch [%o1 + 0x0c0], #one_read | |
127 | ldd [%o1 + 0x000], %f0 | |
128 | prefetch [%o1 + 0x100], #one_read | |
129 | ldd [%o1 + 0x008], %f2 | |
130 | prefetch [%o1 + 0x140], #one_read | |
131 | ldd [%o1 + 0x010], %f4 | |
132 | prefetch [%o1 + 0x180], #one_read | |
133 | faligndata %f0, %f2, %f16 | |
134 | ldd [%o1 + 0x018], %f6 | |
135 | faligndata %f2, %f4, %f18 | |
136 | ldd [%o1 + 0x020], %f8 | |
137 | faligndata %f4, %f6, %f20 | |
138 | ldd [%o1 + 0x028], %f10 | |
139 | faligndata %f6, %f8, %f22 | |
140 | ||
141 | ldd [%o1 + 0x030], %f12 | |
142 | faligndata %f8, %f10, %f24 | |
143 | ldd [%o1 + 0x038], %f14 | |
144 | faligndata %f10, %f12, %f26 | |
145 | ldd [%o1 + 0x040], %f0 | |
146 | ||
147 | sub %o4, 0x80, %o4 | |
148 | add %o1, 0x40, %o1 | |
149 | ba,pt %xcc, loop | |
150 | srl %o4, 6, %o3 | |
151 | ||
152 | .align 64 | |
153 | loop: | |
154 | ldd [%o1 + 0x008], %f2 | |
155 | faligndata %f12, %f14, %f28 | |
156 | ldd [%o1 + 0x010], %f4 | |
157 | faligndata %f14, %f0, %f30 | |
158 | stda %f16, [%o0] ASI_BLK_P | |
159 | ldd [%o1 + 0x018], %f6 | |
160 | faligndata %f0, %f2, %f16 | |
161 | ||
162 | ldd [%o1 + 0x020], %f8 | |
163 | faligndata %f2, %f4, %f18 | |
164 | ldd [%o1 + 0x028], %f10 | |
165 | faligndata %f4, %f6, %f20 | |
166 | ldd [%o1 + 0x030], %f12 | |
167 | faligndata %f6, %f8, %f22 | |
168 | ldd [%o1 + 0x038], %f14 | |
169 | faligndata %f8, %f10, %f24 | |
170 | ||
171 | ldd [%o1 + 0x040], %f0 | |
172 | prefetch [%o1 + 0x180], #one_read | |
173 | faligndata %f10, %f12, %f26 | |
174 | subcc %o3, 0x01, %o3 | |
175 | add %o1, 0x40, %o1 | |
176 | bg,pt %XCC, loop | |
177 | add %o0, 0x40, %o0 | |
bb769ab6 UD |
178 | |
179 | /* Finally we copy the last full 64-byte block. */ | |
3ee3a002 UD |
180 | loopfini: |
181 | ldd [%o1 + 0x008], %f2 | |
182 | faligndata %f12, %f14, %f28 | |
183 | ldd [%o1 + 0x010], %f4 | |
184 | faligndata %f14, %f0, %f30 | |
185 | stda %f16, [%o0] ASI_BLK_P | |
186 | ldd [%o1 + 0x018], %f6 | |
187 | faligndata %f0, %f2, %f16 | |
188 | ldd [%o1 + 0x020], %f8 | |
189 | faligndata %f2, %f4, %f18 | |
190 | ldd [%o1 + 0x028], %f10 | |
191 | faligndata %f4, %f6, %f20 | |
192 | ldd [%o1 + 0x030], %f12 | |
193 | faligndata %f6, %f8, %f22 | |
194 | ldd [%o1 + 0x038], %f14 | |
195 | faligndata %f8, %f10, %f24 | |
196 | cmp %g1, 0 | |
197 | be,pt %XCC, 1f | |
198 | add %o0, 0x40, %o0 | |
199 | ldd [%o1 + 0x040], %f0 | |
200 | 1: faligndata %f10, %f12, %f26 | |
201 | faligndata %f12, %f14, %f28 | |
202 | faligndata %f14, %f0, %f30 | |
203 | stda %f16, [%o0] ASI_BLK_P | |
204 | add %o0, 0x40, %o0 | |
205 | add %o1, 0x40, %o1 | |
206 | membar #Sync | |
bb769ab6 UD |
207 | |
208 | /* Now we copy the (len modulo 64) bytes at the end. | |
209 | * Note how we borrow the %f0 loaded above. | |
210 | * | |
211 | * Also notice how this code is careful not to perform a | |
3ee3a002 | 212 | * load past the end of the src buffer. |
bb769ab6 | 213 | */ |
3ee3a002 UD |
214 | loopend: |
215 | and %o2, 0x3f, %o2 | |
216 | andcc %o2, 0x38, %g2 | |
217 | be,pn %XCC, endcruft | |
218 | subcc %g2, 0x8, %g2 | |
219 | be,pn %XCC, endcruft | |
220 | cmp %g1, 0 | |
221 | ||
222 | be,a,pt %XCC, 1f | |
223 | ldd [%o1 + 0x00], %f0 | |
224 | ||
225 | 1: ldd [%o1 + 0x08], %f2 | |
226 | add %o1, 0x8, %o1 | |
227 | sub %o2, 0x8, %o2 | |
228 | subcc %g2, 0x8, %g2 | |
229 | faligndata %f0, %f2, %f8 | |
230 | std %f8, [%o0 + 0x00] | |
231 | be,pn %XCC, endcruft | |
232 | add %o0, 0x8, %o0 | |
233 | ldd [%o1 + 0x08], %f0 | |
234 | add %o1, 0x8, %o1 | |
235 | sub %o2, 0x8, %o2 | |
236 | subcc %g2, 0x8, %g2 | |
237 | faligndata %f2, %f0, %f8 | |
238 | std %f8, [%o0 + 0x00] | |
239 | bne,pn %XCC, 1b | |
240 | add %o0, 0x8, %o0 | |
bb769ab6 UD |
241 | |
242 | /* If anything is left, we copy it one byte at a time. | |
243 | * Note that %g1 is (src & 0x3) saved above before the | |
244 | * alignaddr was performed. | |
245 | */ | |
3ee3a002 | 246 | endcruft: |
bb769ab6 UD |
247 | cmp %o2, 0 |
248 | add %o1, %g1, %o1 | |
249 | VISExitHalf | |
3ee3a002 UD |
250 | be,pn %XCC, out |
251 | sub %o0, %o1, %o3 | |
bb769ab6 | 252 | |
3ee3a002 UD |
253 | andcc %g1, 0x7, %g0 |
254 | bne,pn %icc, small_copy_unaligned | |
255 | andcc %o2, 0x8, %g0 | |
256 | be,pt %icc, 1f | |
257 | nop | |
258 | ldx [%o1], %o5 | |
259 | stx %o5, [%o1 + %o3] | |
260 | add %o1, 0x8, %o1 | |
bb769ab6 | 261 | |
3ee3a002 UD |
262 | 1: andcc %o2, 0x4, %g0 |
263 | be,pt %icc, 1f | |
264 | nop | |
265 | lduw [%o1], %o5 | |
266 | stw %o5, [%o1 + %o3] | |
267 | add %o1, 0x4, %o1 | |
bb769ab6 | 268 | |
3ee3a002 UD |
269 | 1: andcc %o2, 0x2, %g0 |
270 | be,pt %icc, 1f | |
271 | nop | |
272 | lduh [%o1], %o5 | |
273 | sth %o5, [%o1 + %o3] | |
274 | add %o1, 0x2, %o1 | |
bb769ab6 | 275 | |
3ee3a002 UD |
276 | 1: andcc %o2, 0x1, %g0 |
277 | be,pt %icc, out | |
278 | nop | |
279 | ldub [%o1], %o5 | |
280 | ba,pt %xcc, out | |
281 | stb %o5, [%o1 + %o3] | |
282 | ||
283 | medium_copy: /* 16 < len <= 64 */ | |
284 | bne,pn %XCC, small_copy_unaligned | |
285 | sub %o0, %o1, %o3 | |
286 | ||
287 | medium_copy_aligned: | |
288 | andn %o2, 0x7, %o4 | |
289 | and %o2, 0x7, %o2 | |
290 | 1: subcc %o4, 0x8, %o4 | |
291 | ldx [%o1], %o5 | |
292 | stx %o5, [%o1 + %o3] | |
293 | bgu,pt %XCC, 1b | |
294 | add %o1, 0x8, %o1 | |
295 | andcc %o2, 0x4, %g0 | |
296 | be,pt %XCC, 1f | |
297 | nop | |
298 | sub %o2, 0x4, %o2 | |
299 | lduw [%o1], %o5 | |
300 | stw %o5, [%o1 + %o3] | |
301 | add %o1, 0x4, %o1 | |
302 | 1: cmp %o2, 0 | |
303 | be,pt %XCC, out | |
304 | nop | |
305 | ba,pt %xcc, small_copy_unaligned | |
306 | nop | |
bb769ab6 | 307 | |
3ee3a002 UD |
308 | small_copy: /* 0 < len <= 16 */ |
309 | andcc %o3, 0x3, %g0 | |
310 | bne,pn %XCC, small_copy_unaligned | |
311 | sub %o0, %o1, %o3 | |
bb769ab6 | 312 | |
3ee3a002 UD |
313 | small_copy_aligned: |
314 | subcc %o2, 4, %o2 | |
315 | lduw [%o1], %g1 | |
316 | stw %g1, [%o1 + %o3] | |
317 | bgu,pt %XCC, small_copy_aligned | |
318 | add %o1, 4, %o1 | |
bb769ab6 | 319 | |
3ee3a002 UD |
320 | out: retl |
321 | mov %g5, %o0 | |
bb769ab6 | 322 | |
3ee3a002 UD |
323 | .align 32 |
324 | small_copy_unaligned: | |
325 | subcc %o2, 1, %o2 | |
326 | ldub [%o1], %g1 | |
327 | stb %g1, [%o1 + %o3] | |
328 | bgu,pt %XCC, small_copy_unaligned | |
329 | add %o1, 1, %o1 | |
330 | retl | |
331 | mov %g5, %o0 | |
bb769ab6 | 332 | |
bb769ab6 UD |
333 | END(memcpy) |
334 | ||
335 | #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
336 | ldx [%src - offset - 0x20], %t0; \ | |
337 | ldx [%src - offset - 0x18], %t1; \ | |
338 | ldx [%src - offset - 0x10], %t2; \ | |
339 | ldx [%src - offset - 0x08], %t3; \ | |
340 | stw %t0, [%dst - offset - 0x1c]; \ | |
341 | srlx %t0, 32, %t0; \ | |
342 | stw %t0, [%dst - offset - 0x20]; \ | |
343 | stw %t1, [%dst - offset - 0x14]; \ | |
344 | srlx %t1, 32, %t1; \ | |
345 | stw %t1, [%dst - offset - 0x18]; \ | |
346 | stw %t2, [%dst - offset - 0x0c]; \ | |
347 | srlx %t2, 32, %t2; \ | |
348 | stw %t2, [%dst - offset - 0x10]; \ | |
349 | stw %t3, [%dst - offset - 0x04]; \ | |
350 | srlx %t3, 32, %t3; \ | |
351 | stw %t3, [%dst - offset - 0x08]; | |
352 | ||
353 | #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
354 | ldx [%src - offset - 0x20], %t0; \ | |
355 | ldx [%src - offset - 0x18], %t1; \ | |
356 | ldx [%src - offset - 0x10], %t2; \ | |
357 | ldx [%src - offset - 0x08], %t3; \ | |
358 | stx %t0, [%dst - offset - 0x20]; \ | |
359 | stx %t1, [%dst - offset - 0x18]; \ | |
360 | stx %t2, [%dst - offset - 0x10]; \ | |
361 | stx %t3, [%dst - offset - 0x08]; \ | |
362 | ldx [%src - offset - 0x40], %t0; \ | |
363 | ldx [%src - offset - 0x38], %t1; \ | |
364 | ldx [%src - offset - 0x30], %t2; \ | |
365 | ldx [%src - offset - 0x28], %t3; \ | |
366 | stx %t0, [%dst - offset - 0x40]; \ | |
367 | stx %t1, [%dst - offset - 0x38]; \ | |
368 | stx %t2, [%dst - offset - 0x30]; \ | |
369 | stx %t3, [%dst - offset - 0x28]; | |
370 | ||
371 | #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
372 | ldx [%src + offset + 0x00], %t0; \ | |
373 | ldx [%src + offset + 0x08], %t1; \ | |
374 | stw %t0, [%dst + offset + 0x04]; \ | |
375 | srlx %t0, 32, %t2; \ | |
376 | stw %t2, [%dst + offset + 0x00]; \ | |
377 | stw %t1, [%dst + offset + 0x0c]; \ | |
378 | srlx %t1, 32, %t3; \ | |
379 | stw %t3, [%dst + offset + 0x08]; | |
380 | ||
381 | #define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ | |
382 | ldx [%src + offset + 0x00], %t0; \ | |
383 | ldx [%src + offset + 0x08], %t1; \ | |
384 | stx %t0, [%dst + offset + 0x00]; \ | |
385 | stx %t1, [%dst + offset + 0x08]; | |
386 | ||
387 | .align 32 | |
388 | 228: andcc %o2, 1, %g0 /* IEU1 Group */ | |
389 | be,pt %icc, 2f+4 /* CTI */ | |
390 | 1: ldub [%o1 - 1], %o5 /* LOAD Group */ | |
391 | sub %o1, 1, %o1 /* IEU0 */ | |
392 | sub %o0, 1, %o0 /* IEU1 */ | |
393 | subcc %o2, 1, %o2 /* IEU1 Group */ | |
394 | be,pn %xcc, 229f /* CTI */ | |
395 | stb %o5, [%o0] /* Store */ | |
396 | 2: ldub [%o1 - 1], %o5 /* LOAD Group */ | |
397 | sub %o0, 2, %o0 /* IEU0 */ | |
398 | ldub [%o1 - 2], %g5 /* LOAD Group */ | |
399 | sub %o1, 2, %o1 /* IEU0 */ | |
400 | subcc %o2, 2, %o2 /* IEU1 Group */ | |
401 | stb %o5, [%o0 + 1] /* Store */ | |
402 | bne,pt %xcc, 2b /* CTI */ | |
403 | stb %g5, [%o0] /* Store */ | |
404 | 229: retl | |
405 | mov %g4, %o0 | |
406 | ||
407 | .align 32 | |
408 | ENTRY(memmove) | |
3ee3a002 | 409 | mov %o0, %g5 |
bb769ab6 UD |
410 | #ifndef USE_BPR |
411 | srl %o2, 0, %o2 /* IEU1 Group */ | |
412 | #endif | |
3ee3a002 | 413 | brz,pn %o2, out /* CTI Group */ |
bb769ab6 UD |
414 | sub %o0, %o1, %o4 /* IEU0 */ |
415 | cmp %o4, %o2 /* IEU1 Group */ | |
416 | bgeu,pt %XCC, 218b /* CTI */ | |
417 | mov %o0, %g4 /* IEU0 */ | |
418 | add %o0, %o2, %o0 /* IEU0 Group */ | |
419 | 220: add %o1, %o2, %o1 /* IEU1 */ | |
420 | cmp %o2, 15 /* IEU1 Group */ | |
421 | bleu,pn %xcc, 228b /* CTI */ | |
422 | andcc %o0, 7, %g2 /* IEU1 Group */ | |
423 | sub %o0, %o1, %g5 /* IEU0 */ | |
424 | andcc %g5, 3, %o5 /* IEU1 Group */ | |
425 | bne,pn %xcc, 232f /* CTI */ | |
426 | andcc %o1, 3, %g0 /* IEU1 Group */ | |
427 | be,a,pt %xcc, 236f /* CTI */ | |
428 | andcc %o1, 4, %g0 /* IEU1 Group */ | |
429 | andcc %o1, 1, %g0 /* IEU1 Group */ | |
430 | be,pn %xcc, 4f /* CTI */ | |
431 | andcc %o1, 2, %g0 /* IEU1 Group */ | |
432 | ldub [%o1 - 1], %g2 /* Load Group */ | |
433 | sub %o1, 1, %o1 /* IEU0 */ | |
434 | sub %o0, 1, %o0 /* IEU1 */ | |
435 | sub %o2, 1, %o2 /* IEU0 Group */ | |
436 | be,pn %xcc, 5f /* CTI Group */ | |
437 | stb %g2, [%o0] /* Store */ | |
438 | 4: lduh [%o1 - 2], %g2 /* Load Group */ | |
439 | sub %o1, 2, %o1 /* IEU0 */ | |
440 | sub %o0, 2, %o0 /* IEU1 */ | |
441 | sub %o2, 2, %o2 /* IEU0 */ | |
442 | sth %g2, [%o0] /* Store Group + bubble */ | |
443 | 5: andcc %o1, 4, %g0 /* IEU1 */ | |
444 | 236: be,a,pn %xcc, 2f /* CTI */ | |
62f29da7 | 445 | andcc %o2, -128, %g6 /* IEU1 Group */ |
bb769ab6 UD |
446 | lduw [%o1 - 4], %g5 /* Load Group */ |
447 | sub %o1, 4, %o1 /* IEU0 */ | |
448 | sub %o0, 4, %o0 /* IEU1 */ | |
449 | sub %o2, 4, %o2 /* IEU0 Group */ | |
450 | stw %g5, [%o0] /* Store */ | |
62f29da7 | 451 | andcc %o2, -128, %g6 /* IEU1 Group */ |
bb769ab6 UD |
452 | 2: be,pn %xcc, 235f /* CTI */ |
453 | andcc %o0, 4, %g0 /* IEU1 Group */ | |
454 | be,pn %xcc, 282f + 4 /* CTI Group */ | |
455 | 5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) | |
456 | RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) | |
457 | RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) | |
458 | RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) | |
62f29da7 | 459 | subcc %g6, 128, %g6 /* IEU1 Group */ |
bb769ab6 UD |
460 | sub %o1, 128, %o1 /* IEU0 */ |
461 | bne,pt %xcc, 5b /* CTI */ | |
462 | sub %o0, 128, %o0 /* IEU0 Group */ | |
62f29da7 | 463 | 235: andcc %o2, 0x70, %g6 /* IEU1 Group */ |
bb769ab6 UD |
464 | 41: be,pn %xcc, 280f /* CTI */ |
465 | andcc %o2, 8, %g0 /* IEU1 Group */ | |
466 | /* Clk1 8-( */ | |
467 | /* Clk2 8-( */ | |
468 | /* Clk3 8-( */ | |
469 | /* Clk4 8-( */ | |
470 | 279: rd %pc, %o5 /* PDU Group */ | |
62f29da7 UD |
471 | sll %g6, 1, %g5 /* IEU0 Group */ |
472 | sub %o1, %g6, %o1 /* IEU1 */ | |
bb769ab6 UD |
473 | sub %o5, %g5, %o5 /* IEU0 Group */ |
474 | jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ | |
62f29da7 | 475 | sub %o0, %g6, %o0 /* IEU0 Group */ |
bb769ab6 UD |
476 | RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) |
477 | RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) | |
478 | RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) | |
479 | RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) | |
480 | RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) | |
481 | RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) | |
482 | RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) | |
483 | 280: be,pt %xcc, 281f /* CTI */ | |
484 | andcc %o2, 4, %g0 /* IEU1 */ | |
485 | ldx [%o1 - 8], %g2 /* Load Group */ | |
486 | sub %o0, 8, %o0 /* IEU0 */ | |
487 | stw %g2, [%o0 + 4] /* Store Group */ | |
488 | sub %o1, 8, %o1 /* IEU1 */ | |
489 | srlx %g2, 32, %g2 /* IEU0 Group */ | |
490 | stw %g2, [%o0] /* Store */ | |
491 | 281: be,pt %xcc, 1f /* CTI */ | |
492 | andcc %o2, 2, %g0 /* IEU1 Group */ | |
493 | lduw [%o1 - 4], %g2 /* Load Group */ | |
494 | sub %o1, 4, %o1 /* IEU0 */ | |
495 | stw %g2, [%o0 - 4] /* Store Group */ | |
496 | sub %o0, 4, %o0 /* IEU0 */ | |
497 | 1: be,pt %xcc, 1f /* CTI */ | |
498 | andcc %o2, 1, %g0 /* IEU1 Group */ | |
499 | lduh [%o1 - 2], %g2 /* Load Group */ | |
500 | sub %o1, 2, %o1 /* IEU0 */ | |
501 | sth %g2, [%o0 - 2] /* Store Group */ | |
502 | sub %o0, 2, %o0 /* IEU0 */ | |
503 | 1: be,pt %xcc, 211f /* CTI */ | |
504 | nop /* IEU1 */ | |
505 | ldub [%o1 - 1], %g2 /* Load Group */ | |
506 | stb %g2, [%o0 - 1] /* Store Group + bubble */ | |
507 | 211: retl | |
508 | mov %g4, %o0 | |
509 | ||
510 | 282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) | |
511 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) | |
62f29da7 | 512 | subcc %g6, 128, %g6 /* IEU1 Group */ |
bb769ab6 UD |
513 | sub %o1, 128, %o1 /* IEU0 */ |
514 | bne,pt %xcc, 282b /* CTI */ | |
515 | sub %o0, 128, %o0 /* IEU0 Group */ | |
62f29da7 | 516 | andcc %o2, 0x70, %g6 /* IEU1 */ |
bb769ab6 UD |
517 | be,pn %xcc, 284f /* CTI */ |
518 | andcc %o2, 8, %g0 /* IEU1 Group */ | |
519 | /* Clk1 8-( */ | |
520 | /* Clk2 8-( */ | |
521 | /* Clk3 8-( */ | |
522 | /* Clk4 8-( */ | |
523 | 283: rd %pc, %o5 /* PDU Group */ | |
62f29da7 UD |
524 | sub %o1, %g6, %o1 /* IEU0 Group */ |
525 | sub %o5, %g6, %o5 /* IEU1 */ | |
bb769ab6 | 526 | jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ |
62f29da7 | 527 | sub %o0, %g6, %o0 /* IEU0 Group */ |
bb769ab6 UD |
528 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) |
529 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) | |
530 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) | |
531 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) | |
532 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) | |
533 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) | |
534 | RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) | |
535 | 284: be,pt %xcc, 285f /* CTI Group */ | |
536 | andcc %o2, 4, %g0 /* IEU1 */ | |
537 | ldx [%o1 - 8], %g2 /* Load Group */ | |
538 | sub %o0, 8, %o0 /* IEU0 */ | |
539 | sub %o1, 8, %o1 /* IEU0 Group */ | |
540 | stx %g2, [%o0] /* Store */ | |
541 | 285: be,pt %xcc, 1f /* CTI */ | |
542 | andcc %o2, 2, %g0 /* IEU1 Group */ | |
543 | lduw [%o1 - 4], %g2 /* Load Group */ | |
544 | sub %o0, 4, %o0 /* IEU0 */ | |
545 | sub %o1, 4, %o1 /* IEU0 Group */ | |
546 | stw %g2, [%o0] /* Store */ | |
547 | 1: be,pt %xcc, 1f /* CTI */ | |
548 | andcc %o2, 1, %g0 /* IEU1 Group */ | |
549 | lduh [%o1 - 2], %g2 /* Load Group */ | |
550 | sub %o0, 2, %o0 /* IEU0 */ | |
551 | sub %o1, 2, %o1 /* IEU0 Group */ | |
552 | sth %g2, [%o0] /* Store */ | |
553 | 1: be,pt %xcc, 1f /* CTI */ | |
554 | nop /* IEU0 Group */ | |
555 | ldub [%o1 - 1], %g2 /* Load Group */ | |
556 | stb %g2, [%o0 - 1] /* Store Group + bubble */ | |
557 | 1: retl | |
558 | mov %g4, %o0 | |
559 | ||
560 | 232: brz,pt %g2, 2f /* CTI Group */ | |
561 | sub %o2, %g2, %o2 /* IEU0 Group */ | |
562 | 1: ldub [%o1 - 1], %g5 /* Load Group */ | |
563 | sub %o1, 1, %o1 /* IEU0 */ | |
564 | sub %o0, 1, %o0 /* IEU1 */ | |
565 | subcc %g2, 1, %g2 /* IEU1 Group */ | |
566 | bne,pt %xcc, 1b /* CTI */ | |
567 | stb %g5, [%o0] /* Store */ | |
568 | 2: andn %o2, 7, %g5 /* IEU0 Group */ | |
569 | and %o2, 7, %o2 /* IEU1 */ | |
570 | fmovd %f0, %f2 /* FPU */ | |
571 | alignaddr %o1, %g0, %g1 /* GRU Group */ | |
572 | ldd [%g1], %f4 /* Load Group */ | |
573 | 1: ldd [%g1 - 8], %f6 /* Load Group */ | |
574 | sub %g1, 8, %g1 /* IEU0 Group */ | |
575 | subcc %g5, 8, %g5 /* IEU1 */ | |
576 | faligndata %f6, %f4, %f0 /* GRU Group */ | |
577 | std %f0, [%o0 - 8] /* Store */ | |
578 | sub %o1, 8, %o1 /* IEU0 Group */ | |
579 | be,pn %xcc, 233f /* CTI */ | |
580 | sub %o0, 8, %o0 /* IEU1 */ | |
581 | ldd [%g1 - 8], %f4 /* Load Group */ | |
582 | sub %g1, 8, %g1 /* IEU0 */ | |
583 | subcc %g5, 8, %g5 /* IEU1 */ | |
584 | faligndata %f4, %f6, %f0 /* GRU Group */ | |
585 | std %f0, [%o0 - 8] /* Store */ | |
586 | sub %o1, 8, %o1 /* IEU0 */ | |
587 | bne,pn %xcc, 1b /* CTI Group */ | |
588 | sub %o0, 8, %o0 /* IEU0 */ | |
589 | 233: brz,pn %o2, 234f /* CTI Group */ | |
590 | nop /* IEU0 */ | |
591 | 237: ldub [%o1 - 1], %g5 /* LOAD */ | |
592 | sub %o1, 1, %o1 /* IEU0 */ | |
593 | sub %o0, 1, %o0 /* IEU1 */ | |
594 | subcc %o2, 1, %o2 /* IEU1 */ | |
595 | bne,pt %xcc, 237b /* CTI */ | |
596 | stb %g5, [%o0] /* Store Group */ | |
597 | 234: wr %g0, FPRS_FEF, %fprs | |
598 | retl | |
599 | mov %g4, %o0 | |
600 | END(memmove) | |
601 | ||
602 | #ifdef USE_BPR | |
a334319f UD |
603 | weak_alias(memcpy, __align_cpy_1) |
604 | weak_alias(memcpy, __align_cpy_2) | |
605 | weak_alias(memcpy, __align_cpy_4) | |
606 | weak_alias(memcpy, __align_cpy_8) | |
607 | weak_alias(memcpy, __align_cpy_16) | |
bb769ab6 | 608 | #endif |
85dd1003 UD |
609 | libc_hidden_builtin_def (memcpy) |
610 | libc_hidden_builtin_def (memmove) |