]>
git.ipfire.org Git - thirdparty/glibc.git/blob - ports/sysdeps/tile/wordcopy.c
1 /* wordcopy.c -- subroutines for memory copy functions. Tile version.
2 Copyright (C) 1991-2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 /* To optimize for tile, we make the following changes from the
20 default glibc version:
21 - Use the double align instruction instead of the MERGE macro.
22 - Since we don't have offset addressing mode, make sure the loads /
23 stores in the inner loop always have indices of 0.
24 - Use post-increment addresses in the inner loops, which yields
27 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
32 /* Provide the appropriate dblalign builtin to shift two registers
33 based on the alignment of a pointer held in a third register. */
35 #define DBLALIGN __insn_dblalign
37 #define DBLALIGN __insn_dword_align
40 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
41 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
42 Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
45 _wordcopy_fwd_aligned (dstp
, srcp
, len
)
55 a0
= ((op_t
*) srcp
)[0];
60 a1
= ((op_t
*) srcp
)[0];
65 a0
= ((op_t
*) srcp
)[0];
70 a1
= ((op_t
*) srcp
)[0];
75 a0
= ((op_t
*) srcp
)[0];
80 a1
= ((op_t
*) srcp
)[0];
86 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
88 a0
= ((op_t
*) srcp
)[0];
92 a1
= ((op_t
*) srcp
)[0];
95 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
97 goto do8
; /* No-op. */
103 a0
= ((op_t
*) srcp
)[0];
104 ((op_t
*) dstp
)[0] = a1
;
108 a1
= ((op_t
*) srcp
)[0];
109 ((op_t
*) dstp
)[0] = a0
;
113 a0
= ((op_t
*) srcp
)[0];
114 ((op_t
*) dstp
)[0] = a1
;
118 a1
= ((op_t
*) srcp
)[0];
119 ((op_t
*) dstp
)[0] = a0
;
123 a0
= ((op_t
*) srcp
)[0];
124 ((op_t
*) dstp
)[0] = a1
;
128 a1
= ((op_t
*) srcp
)[0];
129 ((op_t
*) dstp
)[0] = a0
;
133 a0
= ((op_t
*) srcp
)[0];
134 ((op_t
*) dstp
)[0] = a1
;
138 a1
= ((op_t
*) srcp
)[0];
139 ((op_t
*) dstp
)[0] = a0
;
147 /* This is the right position for do0. Please don't move
150 ((op_t
*) dstp
)[0] = a1
;
153 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
154 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
155 DSTP should be aligned for memory operations on `op_t's, but SRCP must
159 _wordcopy_fwd_dest_aligned (dstp
, srcp
, len
)
167 /* Save the initial source pointer so we know the number of bytes to
168 shift for merging two unaligned results. */
169 srci
= (void *) srcp
;
171 /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
172 it points in the middle of. */
178 a1
= ((op_t
*) srcp
)[0];
179 a2
= ((op_t
*) srcp
)[1];
184 a0
= ((op_t
*) srcp
)[0];
185 a1
= ((op_t
*) srcp
)[1];
190 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
192 a3
= ((op_t
*) srcp
)[0];
193 a0
= ((op_t
*) srcp
)[1];
198 a2
= ((op_t
*) srcp
)[0];
199 a3
= ((op_t
*) srcp
)[1];
202 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
204 goto do4
; /* No-op. */
210 a0
= ((op_t
*) srcp
)[0];
211 a2
= DBLALIGN (a2
, a3
, srci
);
212 ((op_t
*) dstp
)[0] = a2
;
216 a1
= ((op_t
*) srcp
)[0];
217 a3
= DBLALIGN (a3
, a0
, srci
);
218 ((op_t
*) dstp
)[0] = a3
;
222 a2
= ((op_t
*) srcp
)[0];
223 a0
= DBLALIGN (a0
, a1
, srci
);
224 ((op_t
*) dstp
)[0] = a0
;
228 a3
= ((op_t
*) srcp
)[0];
229 a1
= DBLALIGN (a1
, a2
, srci
);
230 ((op_t
*) dstp
)[0] = a1
;
237 /* This is the right position for do0. Please don't move
240 ((op_t
*) dstp
)[0] = DBLALIGN (a2
, a3
, srci
);
243 /* _wordcopy_bwd_aligned -- Copy block finishing right before
244 SRCP to block finishing right before DSTP with LEN `op_t' words
245 (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
246 operations on `op_t's. */
249 _wordcopy_bwd_aligned (dstp
, srcp
, len
)
257 srcp1
= srcp
- 1 * OPSIZ
;
264 a0
= ((op_t
*) srcp1
)[0];
268 a1
= ((op_t
*) srcp1
)[0];
272 a0
= ((op_t
*) srcp1
)[0];
276 a1
= ((op_t
*) srcp1
)[0];
280 a0
= ((op_t
*) srcp1
)[0];
284 a1
= ((op_t
*) srcp1
)[0];
289 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
291 a0
= ((op_t
*) srcp1
)[0];
294 a1
= ((op_t
*) srcp1
)[0];
296 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
298 goto do8
; /* No-op. */
304 a0
= ((op_t
*) srcp
)[0];
305 ((op_t
*) dstp
)[0] = a1
;
309 a1
= ((op_t
*) srcp
)[0];
310 ((op_t
*) dstp
)[0] = a0
;
314 a0
= ((op_t
*) srcp
)[0];
315 ((op_t
*) dstp
)[0] = a1
;
319 a1
= ((op_t
*) srcp
)[0];
320 ((op_t
*) dstp
)[0] = a0
;
324 a0
= ((op_t
*) srcp
)[0];
325 ((op_t
*) dstp
)[0] = a1
;
329 a1
= ((op_t
*) srcp
)[0];
330 ((op_t
*) dstp
)[0] = a0
;
334 a0
= ((op_t
*) srcp
)[0];
335 ((op_t
*) dstp
)[0] = a1
;
339 a1
= ((op_t
*) srcp
)[0];
340 ((op_t
*) dstp
)[0] = a0
;
348 /* This is the right position for do0. Please don't move
351 ((op_t
*) dstp
)[0] = a1
;
354 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
355 before SRCP to block finishing right before DSTP with LEN `op_t'
356 words (not LEN bytes!). DSTP should be aligned for memory
357 operations on `op_t', but SRCP must *not* be aligned. */
360 _wordcopy_bwd_dest_aligned (dstp
, srcp
, len
)
369 /* Save the initial source pointer so we know the number of bytes to
370 shift for merging two unaligned results. */
371 srci
= (void *) srcp
;
373 /* Make SRCP aligned by rounding it down to the beginning of the op_t
374 it points in the middle of. */
383 b2
= ((op_t
*) srcp
)[2];
384 b1
= a1
= ((op_t
*) srcp
)[1];
390 b3
= ((op_t
*) srcp
)[2];
391 b2
= a2
= ((op_t
*) srcp
)[1];
395 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
399 b0
= ((op_t
*) srcp
)[2];
400 b3
= a3
= ((op_t
*) srcp
)[1];
405 b1
= ((op_t
*) srcp
)[2];
406 b0
= a0
= ((op_t
*) srcp
)[1];
408 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
410 goto do4
; /* No-op. */
416 b3
= a3
= ((op_t
*) srcp
)[0];
417 a0
= DBLALIGN (a0
, b1
, srci
);
418 ((op_t
*) dstp
)[0] = a0
;
422 b2
= a2
= ((op_t
*) srcp
)[0];
423 a3
= DBLALIGN (a3
, b0
, srci
);
424 ((op_t
*) dstp
)[0] = a3
;
428 b1
= a1
= ((op_t
*) srcp
)[0];
429 a2
= DBLALIGN (a2
, b3
, srci
);
430 ((op_t
*) dstp
)[0] = a2
;
434 b0
= a0
= ((op_t
*) srcp
)[0];
435 a1
= DBLALIGN (a1
, b2
, srci
);
436 ((op_t
*) dstp
)[0] = a1
;
444 /* This is the right position for do0. Please don't move
447 a0
= DBLALIGN (a0
, b1
, srci
);
448 ((op_t
*) dstp
)[0] = a0
;