]>
Commit | Line | Data |
---|---|---|
28f540f4 | 1 | /* _memcopy.c -- subroutines for memory copy functions. |
b168057a | 2 | Copyright (C) 1991-2015 Free Software Foundation, Inc. |
41bdb6e2 | 3 | This file is part of the GNU C Library. |
28f540f4 RM |
4 | Contributed by Torbjorn Granlund (tege@sics.se). |
5 | ||
01c901a5 | 6 | The GNU C Library is free software; you can redistribute it and/or |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
28f540f4 | 10 | |
01c901a5 UD |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
28f540f4 | 15 | |
41bdb6e2 | 16 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
28f540f4 RM |
19 | |
20 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ | |
21 | ||
28f540f4 RM |
22 | #include <stddef.h> |
23 | #include <memcopy.h> | |
24 | ||
25 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to | |
26 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | |
27 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ | |
28 | ||
e28bcd42 AZ |
29 | #ifndef WORDCOPY_FWD_ALIGNED |
30 | # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned | |
31 | #endif | |
32 | ||
28f540f4 | 33 | void |
e28bcd42 | 34 | WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) |
28f540f4 RM |
35 | { |
36 | op_t a0, a1; | |
37 | ||
38 | switch (len % 8) | |
39 | { | |
40 | case 2: | |
41 | a0 = ((op_t *) srcp)[0]; | |
42 | srcp -= 6 * OPSIZ; | |
43 | dstp -= 7 * OPSIZ; | |
44 | len += 6; | |
45 | goto do1; | |
46 | case 3: | |
47 | a1 = ((op_t *) srcp)[0]; | |
48 | srcp -= 5 * OPSIZ; | |
49 | dstp -= 6 * OPSIZ; | |
50 | len += 5; | |
51 | goto do2; | |
52 | case 4: | |
53 | a0 = ((op_t *) srcp)[0]; | |
54 | srcp -= 4 * OPSIZ; | |
55 | dstp -= 5 * OPSIZ; | |
56 | len += 4; | |
57 | goto do3; | |
58 | case 5: | |
59 | a1 = ((op_t *) srcp)[0]; | |
60 | srcp -= 3 * OPSIZ; | |
61 | dstp -= 4 * OPSIZ; | |
62 | len += 3; | |
63 | goto do4; | |
64 | case 6: | |
65 | a0 = ((op_t *) srcp)[0]; | |
66 | srcp -= 2 * OPSIZ; | |
67 | dstp -= 3 * OPSIZ; | |
68 | len += 2; | |
69 | goto do5; | |
70 | case 7: | |
71 | a1 = ((op_t *) srcp)[0]; | |
72 | srcp -= 1 * OPSIZ; | |
73 | dstp -= 2 * OPSIZ; | |
74 | len += 1; | |
75 | goto do6; | |
01c901a5 | 76 | |
28f540f4 RM |
77 | case 0: |
78 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
79 | return; | |
80 | a0 = ((op_t *) srcp)[0]; | |
81 | srcp -= 0 * OPSIZ; | |
82 | dstp -= 1 * OPSIZ; | |
83 | goto do7; | |
84 | case 1: | |
85 | a1 = ((op_t *) srcp)[0]; | |
86 | srcp -=-1 * OPSIZ; | |
87 | dstp -= 0 * OPSIZ; | |
88 | len -= 1; | |
89 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
90 | goto do0; | |
91 | goto do8; /* No-op. */ | |
92 | } | |
93 | ||
94 | do | |
95 | { | |
96 | do8: | |
97 | a0 = ((op_t *) srcp)[0]; | |
98 | ((op_t *) dstp)[0] = a1; | |
99 | do7: | |
100 | a1 = ((op_t *) srcp)[1]; | |
101 | ((op_t *) dstp)[1] = a0; | |
102 | do6: | |
103 | a0 = ((op_t *) srcp)[2]; | |
104 | ((op_t *) dstp)[2] = a1; | |
105 | do5: | |
106 | a1 = ((op_t *) srcp)[3]; | |
107 | ((op_t *) dstp)[3] = a0; | |
108 | do4: | |
109 | a0 = ((op_t *) srcp)[4]; | |
110 | ((op_t *) dstp)[4] = a1; | |
111 | do3: | |
112 | a1 = ((op_t *) srcp)[5]; | |
113 | ((op_t *) dstp)[5] = a0; | |
114 | do2: | |
115 | a0 = ((op_t *) srcp)[6]; | |
116 | ((op_t *) dstp)[6] = a1; | |
117 | do1: | |
118 | a1 = ((op_t *) srcp)[7]; | |
119 | ((op_t *) dstp)[7] = a0; | |
120 | ||
121 | srcp += 8 * OPSIZ; | |
122 | dstp += 8 * OPSIZ; | |
123 | len -= 8; | |
124 | } | |
125 | while (len != 0); | |
126 | ||
127 | /* This is the right position for do0. Please don't move | |
128 | it into the loop. */ | |
129 | do0: | |
130 | ((op_t *) dstp)[0] = a1; | |
131 | } | |
132 | ||
133 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to | |
134 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | |
135 | DSTP should be aligned for memory operations on `op_t's, but SRCP must | |
136 | *not* be aligned. */ | |
137 | ||
e28bcd42 AZ |
138 | #ifndef WORDCOPY_FWD_DEST_ALIGNED |
139 | # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned | |
140 | #endif | |
141 | ||
28f540f4 | 142 | void |
e28bcd42 | 143 | WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
28f540f4 RM |
144 | { |
145 | op_t a0, a1, a2, a3; | |
146 | int sh_1, sh_2; | |
147 | ||
148 | /* Calculate how to shift a word read at the memory operation | |
149 | aligned srcp to make it aligned for copy. */ | |
150 | ||
151 | sh_1 = 8 * (srcp % OPSIZ); | |
152 | sh_2 = 8 * OPSIZ - sh_1; | |
153 | ||
154 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' | |
155 | it points in the middle of. */ | |
156 | srcp &= -OPSIZ; | |
157 | ||
158 | switch (len % 4) | |
159 | { | |
160 | case 2: | |
161 | a1 = ((op_t *) srcp)[0]; | |
162 | a2 = ((op_t *) srcp)[1]; | |
163 | srcp -= 1 * OPSIZ; | |
164 | dstp -= 3 * OPSIZ; | |
165 | len += 2; | |
166 | goto do1; | |
167 | case 3: | |
168 | a0 = ((op_t *) srcp)[0]; | |
169 | a1 = ((op_t *) srcp)[1]; | |
170 | srcp -= 0 * OPSIZ; | |
171 | dstp -= 2 * OPSIZ; | |
172 | len += 1; | |
173 | goto do2; | |
174 | case 0: | |
175 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
176 | return; | |
177 | a3 = ((op_t *) srcp)[0]; | |
178 | a0 = ((op_t *) srcp)[1]; | |
179 | srcp -=-1 * OPSIZ; | |
180 | dstp -= 1 * OPSIZ; | |
181 | len += 0; | |
182 | goto do3; | |
183 | case 1: | |
184 | a2 = ((op_t *) srcp)[0]; | |
185 | a3 = ((op_t *) srcp)[1]; | |
186 | srcp -=-2 * OPSIZ; | |
187 | dstp -= 0 * OPSIZ; | |
188 | len -= 1; | |
189 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
190 | goto do0; | |
191 | goto do4; /* No-op. */ | |
192 | } | |
193 | ||
194 | do | |
195 | { | |
196 | do4: | |
197 | a0 = ((op_t *) srcp)[0]; | |
198 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); | |
199 | do3: | |
200 | a1 = ((op_t *) srcp)[1]; | |
201 | ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); | |
202 | do2: | |
203 | a2 = ((op_t *) srcp)[2]; | |
204 | ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); | |
205 | do1: | |
206 | a3 = ((op_t *) srcp)[3]; | |
207 | ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); | |
208 | ||
209 | srcp += 4 * OPSIZ; | |
210 | dstp += 4 * OPSIZ; | |
211 | len -= 4; | |
212 | } | |
213 | while (len != 0); | |
214 | ||
215 | /* This is the right position for do0. Please don't move | |
216 | it into the loop. */ | |
217 | do0: | |
218 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); | |
219 | } | |
220 | ||
221 | /* _wordcopy_bwd_aligned -- Copy block finishing right before | |
222 | SRCP to block finishing right before DSTP with LEN `op_t' words | |
223 | (not LEN bytes!). Both SRCP and DSTP should be aligned for memory | |
224 | operations on `op_t's. */ | |
225 | ||
e28bcd42 AZ |
226 | #ifndef WORDCOPY_BWD_ALIGNED |
227 | # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned | |
228 | #endif | |
229 | ||
28f540f4 | 230 | void |
e28bcd42 | 231 | WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) |
28f540f4 RM |
232 | { |
233 | op_t a0, a1; | |
234 | ||
235 | switch (len % 8) | |
236 | { | |
237 | case 2: | |
238 | srcp -= 2 * OPSIZ; | |
239 | dstp -= 1 * OPSIZ; | |
240 | a0 = ((op_t *) srcp)[1]; | |
241 | len += 6; | |
242 | goto do1; | |
243 | case 3: | |
244 | srcp -= 3 * OPSIZ; | |
245 | dstp -= 2 * OPSIZ; | |
246 | a1 = ((op_t *) srcp)[2]; | |
247 | len += 5; | |
248 | goto do2; | |
249 | case 4: | |
250 | srcp -= 4 * OPSIZ; | |
251 | dstp -= 3 * OPSIZ; | |
252 | a0 = ((op_t *) srcp)[3]; | |
253 | len += 4; | |
254 | goto do3; | |
255 | case 5: | |
256 | srcp -= 5 * OPSIZ; | |
257 | dstp -= 4 * OPSIZ; | |
258 | a1 = ((op_t *) srcp)[4]; | |
259 | len += 3; | |
260 | goto do4; | |
261 | case 6: | |
262 | srcp -= 6 * OPSIZ; | |
263 | dstp -= 5 * OPSIZ; | |
264 | a0 = ((op_t *) srcp)[5]; | |
265 | len += 2; | |
266 | goto do5; | |
267 | case 7: | |
268 | srcp -= 7 * OPSIZ; | |
269 | dstp -= 6 * OPSIZ; | |
270 | a1 = ((op_t *) srcp)[6]; | |
271 | len += 1; | |
272 | goto do6; | |
01c901a5 | 273 | |
28f540f4 RM |
274 | case 0: |
275 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
276 | return; | |
277 | srcp -= 8 * OPSIZ; | |
278 | dstp -= 7 * OPSIZ; | |
279 | a0 = ((op_t *) srcp)[7]; | |
280 | goto do7; | |
281 | case 1: | |
282 | srcp -= 9 * OPSIZ; | |
283 | dstp -= 8 * OPSIZ; | |
284 | a1 = ((op_t *) srcp)[8]; | |
285 | len -= 1; | |
286 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
287 | goto do0; | |
288 | goto do8; /* No-op. */ | |
289 | } | |
290 | ||
291 | do | |
292 | { | |
293 | do8: | |
294 | a0 = ((op_t *) srcp)[7]; | |
295 | ((op_t *) dstp)[7] = a1; | |
296 | do7: | |
297 | a1 = ((op_t *) srcp)[6]; | |
298 | ((op_t *) dstp)[6] = a0; | |
299 | do6: | |
300 | a0 = ((op_t *) srcp)[5]; | |
301 | ((op_t *) dstp)[5] = a1; | |
302 | do5: | |
303 | a1 = ((op_t *) srcp)[4]; | |
304 | ((op_t *) dstp)[4] = a0; | |
305 | do4: | |
306 | a0 = ((op_t *) srcp)[3]; | |
307 | ((op_t *) dstp)[3] = a1; | |
308 | do3: | |
309 | a1 = ((op_t *) srcp)[2]; | |
310 | ((op_t *) dstp)[2] = a0; | |
311 | do2: | |
312 | a0 = ((op_t *) srcp)[1]; | |
313 | ((op_t *) dstp)[1] = a1; | |
314 | do1: | |
315 | a1 = ((op_t *) srcp)[0]; | |
316 | ((op_t *) dstp)[0] = a0; | |
317 | ||
318 | srcp -= 8 * OPSIZ; | |
319 | dstp -= 8 * OPSIZ; | |
320 | len -= 8; | |
321 | } | |
322 | while (len != 0); | |
323 | ||
324 | /* This is the right position for do0. Please don't move | |
325 | it into the loop. */ | |
326 | do0: | |
327 | ((op_t *) dstp)[7] = a1; | |
328 | } | |
329 | ||
330 | /* _wordcopy_bwd_dest_aligned -- Copy block finishing right | |
331 | before SRCP to block finishing right before DSTP with LEN `op_t' | |
332 | words (not LEN bytes!). DSTP should be aligned for memory | |
333 | operations on `op_t', but SRCP must *not* be aligned. */ | |
334 | ||
e28bcd42 AZ |
335 | #ifndef WORDCOPY_BWD_DEST_ALIGNED |
336 | # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned | |
337 | #endif | |
338 | ||
28f540f4 | 339 | void |
e28bcd42 | 340 | WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
28f540f4 RM |
341 | { |
342 | op_t a0, a1, a2, a3; | |
343 | int sh_1, sh_2; | |
344 | ||
345 | /* Calculate how to shift a word read at the memory operation | |
346 | aligned srcp to make it aligned for copy. */ | |
347 | ||
348 | sh_1 = 8 * (srcp % OPSIZ); | |
349 | sh_2 = 8 * OPSIZ - sh_1; | |
350 | ||
351 | /* Make srcp aligned by rounding it down to the beginning of the op_t | |
352 | it points in the middle of. */ | |
353 | srcp &= -OPSIZ; | |
354 | srcp += OPSIZ; | |
355 | ||
356 | switch (len % 4) | |
357 | { | |
358 | case 2: | |
359 | srcp -= 3 * OPSIZ; | |
360 | dstp -= 1 * OPSIZ; | |
361 | a2 = ((op_t *) srcp)[2]; | |
362 | a1 = ((op_t *) srcp)[1]; | |
363 | len += 2; | |
364 | goto do1; | |
365 | case 3: | |
366 | srcp -= 4 * OPSIZ; | |
367 | dstp -= 2 * OPSIZ; | |
368 | a3 = ((op_t *) srcp)[3]; | |
369 | a2 = ((op_t *) srcp)[2]; | |
370 | len += 1; | |
371 | goto do2; | |
372 | case 0: | |
373 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
374 | return; | |
375 | srcp -= 5 * OPSIZ; | |
376 | dstp -= 3 * OPSIZ; | |
377 | a0 = ((op_t *) srcp)[4]; | |
378 | a3 = ((op_t *) srcp)[3]; | |
379 | goto do3; | |
380 | case 1: | |
381 | srcp -= 6 * OPSIZ; | |
382 | dstp -= 4 * OPSIZ; | |
383 | a1 = ((op_t *) srcp)[5]; | |
384 | a0 = ((op_t *) srcp)[4]; | |
385 | len -= 1; | |
386 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
387 | goto do0; | |
388 | goto do4; /* No-op. */ | |
389 | } | |
390 | ||
391 | do | |
392 | { | |
393 | do4: | |
394 | a3 = ((op_t *) srcp)[3]; | |
395 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); | |
396 | do3: | |
397 | a2 = ((op_t *) srcp)[2]; | |
398 | ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); | |
399 | do2: | |
400 | a1 = ((op_t *) srcp)[1]; | |
401 | ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); | |
402 | do1: | |
403 | a0 = ((op_t *) srcp)[0]; | |
404 | ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); | |
405 | ||
406 | srcp -= 4 * OPSIZ; | |
407 | dstp -= 4 * OPSIZ; | |
408 | len -= 4; | |
409 | } | |
410 | while (len != 0); | |
411 | ||
412 | /* This is the right position for do0. Please don't move | |
413 | it into the loop. */ | |
414 | do0: | |
415 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); | |
416 | } |