]> git.ipfire.org Git - thirdparty/glibc.git/blob - ports/sysdeps/tile/wordcopy.c
57898a53c17352f36888e01ebca0fdbc2badf2a3
[thirdparty/glibc.git] / ports / sysdeps / tile / wordcopy.c
1 /* wordcopy.c -- subroutines for memory copy functions. Tile version.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 /* To optimize for tile, we make the following changes from the
20 default glibc version:
21 - Use the double align instruction instead of the MERGE macro.
22 - Since we don't have offset addressing mode, make sure the loads /
23 stores in the inner loop always have indices of 0.
24 - Use post-increment addresses in the inner loops, which yields
25 better scheduling. */
26
27 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
28
29 #include <stddef.h>
30 #include <memcopy.h>
31
32 /* Provide the appropriate dblalign builtin to shift two registers
33 based on the alignment of a pointer held in a third register. */
34 #ifdef __tilegx__
35 #define DBLALIGN __insn_dblalign
36 #else
37 #define DBLALIGN __insn_dword_align
38 #endif
39
40 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
41 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
42 Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
43
44 void
45 _wordcopy_fwd_aligned (dstp, srcp, len)
46 long int dstp;
47 long int srcp;
48 size_t len;
49 {
50 op_t a0, a1;
51
52 switch (len % 8)
53 {
54 case 2:
55 a0 = ((op_t *) srcp)[0];
56 srcp += OPSIZ;
57 len += 6;
58 goto do1;
59 case 3:
60 a1 = ((op_t *) srcp)[0];
61 srcp += OPSIZ;
62 len += 5;
63 goto do2;
64 case 4:
65 a0 = ((op_t *) srcp)[0];
66 srcp += OPSIZ;
67 len += 4;
68 goto do3;
69 case 5:
70 a1 = ((op_t *) srcp)[0];
71 srcp += OPSIZ;
72 len += 3;
73 goto do4;
74 case 6:
75 a0 = ((op_t *) srcp)[0];
76 srcp += OPSIZ;
77 len += 2;
78 goto do5;
79 case 7:
80 a1 = ((op_t *) srcp)[0];
81 srcp += OPSIZ;
82 len += 1;
83 goto do6;
84
85 case 0:
86 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
87 return;
88 a0 = ((op_t *) srcp)[0];
89 srcp += OPSIZ;
90 goto do7;
91 case 1:
92 a1 = ((op_t *) srcp)[0];
93 srcp += OPSIZ;
94 len -= 1;
95 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
96 goto do0;
97 goto do8; /* No-op. */
98 }
99
100 do
101 {
102 do8:
103 a0 = ((op_t *) srcp)[0];
104 ((op_t *) dstp)[0] = a1;
105 srcp += OPSIZ;
106 dstp += OPSIZ;
107 do7:
108 a1 = ((op_t *) srcp)[0];
109 ((op_t *) dstp)[0] = a0;
110 srcp += OPSIZ;
111 dstp += OPSIZ;
112 do6:
113 a0 = ((op_t *) srcp)[0];
114 ((op_t *) dstp)[0] = a1;
115 srcp += OPSIZ;
116 dstp += OPSIZ;
117 do5:
118 a1 = ((op_t *) srcp)[0];
119 ((op_t *) dstp)[0] = a0;
120 srcp += OPSIZ;
121 dstp += OPSIZ;
122 do4:
123 a0 = ((op_t *) srcp)[0];
124 ((op_t *) dstp)[0] = a1;
125 srcp += OPSIZ;
126 dstp += OPSIZ;
127 do3:
128 a1 = ((op_t *) srcp)[0];
129 ((op_t *) dstp)[0] = a0;
130 srcp += OPSIZ;
131 dstp += OPSIZ;
132 do2:
133 a0 = ((op_t *) srcp)[0];
134 ((op_t *) dstp)[0] = a1;
135 srcp += OPSIZ;
136 dstp += OPSIZ;
137 do1:
138 a1 = ((op_t *) srcp)[0];
139 ((op_t *) dstp)[0] = a0;
140 srcp += OPSIZ;
141 dstp += OPSIZ;
142
143 len -= 8;
144 }
145 while (len != 0);
146
147 /* This is the right position for do0. Please don't move
148 it into the loop. */
149 do0:
150 ((op_t *) dstp)[0] = a1;
151 }
152
153 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
154 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
155 DSTP should be aligned for memory operations on `op_t's, but SRCP must
156 *not* be aligned. */
157
158 void
159 _wordcopy_fwd_dest_aligned (dstp, srcp, len)
160 long int dstp;
161 long int srcp;
162 size_t len;
163 {
164 void * srci;
165 op_t a0, a1, a2, a3;
166
167 /* Save the initial source pointer so we know the number of bytes to
168 shift for merging two unaligned results. */
169 srci = (void *) srcp;
170
171 /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
172 it points in the middle of. */
173 srcp &= -OPSIZ;
174
175 switch (len % 4)
176 {
177 case 2:
178 a1 = ((op_t *) srcp)[0];
179 a2 = ((op_t *) srcp)[1];
180 len += 2;
181 srcp += 2 * OPSIZ;
182 goto do1;
183 case 3:
184 a0 = ((op_t *) srcp)[0];
185 a1 = ((op_t *) srcp)[1];
186 len += 1;
187 srcp += 2 * OPSIZ;
188 goto do2;
189 case 0:
190 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
191 return;
192 a3 = ((op_t *) srcp)[0];
193 a0 = ((op_t *) srcp)[1];
194 len += 0;
195 srcp += 2 * OPSIZ;
196 goto do3;
197 case 1:
198 a2 = ((op_t *) srcp)[0];
199 a3 = ((op_t *) srcp)[1];
200 srcp += 2 * OPSIZ;
201 len -= 1;
202 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
203 goto do0;
204 goto do4; /* No-op. */
205 }
206
207 do
208 {
209 do4:
210 a0 = ((op_t *) srcp)[0];
211 a2 = DBLALIGN (a2, a3, srci);
212 ((op_t *) dstp)[0] = a2;
213 srcp += OPSIZ;
214 dstp += OPSIZ;
215 do3:
216 a1 = ((op_t *) srcp)[0];
217 a3 = DBLALIGN (a3, a0, srci);
218 ((op_t *) dstp)[0] = a3;
219 srcp += OPSIZ;
220 dstp += OPSIZ;
221 do2:
222 a2 = ((op_t *) srcp)[0];
223 a0 = DBLALIGN (a0, a1, srci);
224 ((op_t *) dstp)[0] = a0;
225 srcp += OPSIZ;
226 dstp += OPSIZ;
227 do1:
228 a3 = ((op_t *) srcp)[0];
229 a1 = DBLALIGN (a1, a2, srci);
230 ((op_t *) dstp)[0] = a1;
231 srcp += OPSIZ;
232 dstp += OPSIZ;
233 len -= 4;
234 }
235 while (len != 0);
236
237 /* This is the right position for do0. Please don't move
238 it into the loop. */
239 do0:
240 ((op_t *) dstp)[0] = DBLALIGN (a2, a3, srci);
241 }
242
243 /* _wordcopy_bwd_aligned -- Copy block finishing right before
244 SRCP to block finishing right before DSTP with LEN `op_t' words
245 (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
246 operations on `op_t's. */
247
248 void
249 _wordcopy_bwd_aligned (dstp, srcp, len)
250 long int dstp;
251 long int srcp;
252 size_t len;
253 {
254 op_t a0, a1;
255 long int srcp1;
256
257 srcp1 = srcp - 1 * OPSIZ;
258 srcp -= 2 * OPSIZ;
259 dstp -= 1 * OPSIZ;
260
261 switch (len % 8)
262 {
263 case 2:
264 a0 = ((op_t *) srcp1)[0];
265 len += 6;
266 goto do1;
267 case 3:
268 a1 = ((op_t *) srcp1)[0];
269 len += 5;
270 goto do2;
271 case 4:
272 a0 = ((op_t *) srcp1)[0];
273 len += 4;
274 goto do3;
275 case 5:
276 a1 = ((op_t *) srcp1)[0];
277 len += 3;
278 goto do4;
279 case 6:
280 a0 = ((op_t *) srcp1)[0];
281 len += 2;
282 goto do5;
283 case 7:
284 a1 = ((op_t *) srcp1)[0];
285 len += 1;
286 goto do6;
287
288 case 0:
289 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
290 return;
291 a0 = ((op_t *) srcp1)[0];
292 goto do7;
293 case 1:
294 a1 = ((op_t *) srcp1)[0];
295 len -= 1;
296 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
297 goto do0;
298 goto do8; /* No-op. */
299 }
300
301 do
302 {
303 do8:
304 a0 = ((op_t *) srcp)[0];
305 ((op_t *) dstp)[0] = a1;
306 srcp -= OPSIZ;
307 dstp -= OPSIZ;
308 do7:
309 a1 = ((op_t *) srcp)[0];
310 ((op_t *) dstp)[0] = a0;
311 srcp -= OPSIZ;
312 dstp -= OPSIZ;
313 do6:
314 a0 = ((op_t *) srcp)[0];
315 ((op_t *) dstp)[0] = a1;
316 srcp -= OPSIZ;
317 dstp -= OPSIZ;
318 do5:
319 a1 = ((op_t *) srcp)[0];
320 ((op_t *) dstp)[0] = a0;
321 srcp -= OPSIZ;
322 dstp -= OPSIZ;
323 do4:
324 a0 = ((op_t *) srcp)[0];
325 ((op_t *) dstp)[0] = a1;
326 srcp -= OPSIZ;
327 dstp -= OPSIZ;
328 do3:
329 a1 = ((op_t *) srcp)[0];
330 ((op_t *) dstp)[0] = a0;
331 srcp -= OPSIZ;
332 dstp -= OPSIZ;
333 do2:
334 a0 = ((op_t *) srcp)[0];
335 ((op_t *) dstp)[0] = a1;
336 srcp -= OPSIZ;
337 dstp -= OPSIZ;
338 do1:
339 a1 = ((op_t *) srcp)[0];
340 ((op_t *) dstp)[0] = a0;
341 srcp -= OPSIZ;
342 dstp -= OPSIZ;
343
344 len -= 8;
345 }
346 while (len != 0);
347
348 /* This is the right position for do0. Please don't move
349 it into the loop. */
350 do0:
351 ((op_t *) dstp)[0] = a1;
352 }
353
354 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
355 before SRCP to block finishing right before DSTP with LEN `op_t'
356 words (not LEN bytes!). DSTP should be aligned for memory
357 operations on `op_t', but SRCP must *not* be aligned. */
358
359 void
360 _wordcopy_bwd_dest_aligned (dstp, srcp, len)
361 long int dstp;
362 long int srcp;
363 size_t len;
364 {
365 void * srci;
366 op_t a0, a1, a2, a3;
367 op_t b0, b1, b2, b3;
368
369 /* Save the initial source pointer so we know the number of bytes to
370 shift for merging two unaligned results. */
371 srci = (void *) srcp;
372
373 /* Make SRCP aligned by rounding it down to the beginning of the op_t
374 it points in the middle of. */
375 srcp &= -OPSIZ;
376 srcp += OPSIZ;
377
378 switch (len % 4)
379 {
380 case 2:
381 srcp -= 3 * OPSIZ;
382 dstp -= 1 * OPSIZ;
383 b2 = ((op_t *) srcp)[2];
384 b1 = a1 = ((op_t *) srcp)[1];
385 len += 2;
386 goto do1;
387 case 3:
388 srcp -= 3 * OPSIZ;
389 dstp -= 1 * OPSIZ;
390 b3 = ((op_t *) srcp)[2];
391 b2 = a2 = ((op_t *) srcp)[1];
392 len += 1;
393 goto do2;
394 case 0:
395 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
396 return;
397 srcp -= 3 * OPSIZ;
398 dstp -= 1 * OPSIZ;
399 b0 = ((op_t *) srcp)[2];
400 b3 = a3 = ((op_t *) srcp)[1];
401 goto do3;
402 case 1:
403 srcp -= 3 * OPSIZ;
404 dstp -= 1 * OPSIZ;
405 b1 = ((op_t *) srcp)[2];
406 b0 = a0 = ((op_t *) srcp)[1];
407 len -= 1;
408 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
409 goto do0;
410 goto do4; /* No-op. */
411 }
412
413 do
414 {
415 do4:
416 b3 = a3 = ((op_t *) srcp)[0];
417 a0 = DBLALIGN (a0, b1, srci);
418 ((op_t *) dstp)[0] = a0;
419 srcp -= OPSIZ;
420 dstp -= OPSIZ;
421 do3:
422 b2 = a2 = ((op_t *) srcp)[0];
423 a3 = DBLALIGN (a3, b0, srci);
424 ((op_t *) dstp)[0] = a3;
425 srcp -= OPSIZ;
426 dstp -= OPSIZ;
427 do2:
428 b1 = a1 = ((op_t *) srcp)[0];
429 a2 = DBLALIGN (a2, b3, srci);
430 ((op_t *) dstp)[0] = a2;
431 srcp -= OPSIZ;
432 dstp -= OPSIZ;
433 do1:
434 b0 = a0 = ((op_t *) srcp)[0];
435 a1 = DBLALIGN (a1, b2, srci);
436 ((op_t *) dstp)[0] = a1;
437 srcp -= OPSIZ;
438 dstp -= OPSIZ;
439
440 len -= 4;
441 }
442 while (len != 0);
443
444 /* This is the right position for do0. Please don't move
445 it into the loop. */
446 do0:
447 a0 = DBLALIGN (a0, b1, srci);
448 ((op_t *) dstp)[0] = a0;
449 }