]>
Commit | Line | Data |
---|---|---|
3af48cbd | 1 | /* memset with SSE2 |
bfff8b1b | 2 | Copyright (C) 2010-2017 Free Software Foundation, Inc. |
3af48cbd L |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
3af48cbd | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
3af48cbd L |
21 | |
22 | #include <sysdep.h> | |
23 | #include "asm-syntax.h" | |
24 | ||
25 | #define CFI_PUSH(REG) \ | |
26 | cfi_adjust_cfa_offset (4); \ | |
27 | cfi_rel_offset (REG, 0) | |
28 | ||
29 | #define CFI_POP(REG) \ | |
30 | cfi_adjust_cfa_offset (-4); \ | |
31 | cfi_restore (REG) | |
32 | ||
33 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
34 | #define POP(REG) popl REG; CFI_POP (REG) | |
35 | ||
36 | #ifdef USE_AS_BZERO | |
37 | # define DEST PARMS | |
38 | # define LEN DEST+4 | |
39 | # define SETRTNVAL | |
40 | #else | |
41 | # define DEST PARMS | |
42 | # define CHR DEST+4 | |
43 | # define LEN CHR+4 | |
44 | # define SETRTNVAL movl DEST(%esp), %eax | |
45 | #endif | |
46 | ||
47 | #ifdef SHARED | |
48 | # define ENTRANCE PUSH (%ebx); | |
49 | # define RETURN_END POP (%ebx); ret | |
50 | # define RETURN RETURN_END; CFI_PUSH (%ebx) | |
51 | # define PARMS 8 /* Preserve EBX. */ | |
52 | # define JMPTBL(I, B) I - B | |
53 | ||
54 | /* Load an entry in a jump table into EBX and branch to it. TABLE is a | |
55 | jump table with relative offsets. */ | |
56 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ | |
57 | /* We first load PC into EBX. */ \ | |
9a1d9254 | 58 | SETUP_PIC_REG(bx); \ |
3af48cbd L |
59 | /* Get the address of the jump table. */ \ |
60 | add $(TABLE - .), %ebx; \ | |
61 | /* Get the entry and convert the relative offset to the \ | |
62 | absolute address. */ \ | |
63 | add (%ebx,%ecx,4), %ebx; \ | |
64 | add %ecx, %edx; \ | |
c0c3f78a | 65 | /* We loaded the jump table and adjusted EDX. Go. */ \ |
3af48cbd | 66 | jmp *%ebx |
3af48cbd L |
67 | #else |
68 | # define ENTRANCE | |
69 | # define RETURN_END ret | |
70 | # define RETURN RETURN_END | |
71 | # define PARMS 4 | |
72 | # define JMPTBL(I, B) I | |
73 | ||
74 | /* Branch to an entry in a jump table. TABLE is a jump table with | |
75 | absolute offsets. */ | |
76 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ | |
77 | add %ecx, %edx; \ | |
78 | jmp *TABLE(,%ecx,4) | |
79 | #endif | |
80 | ||
81 | .section .text.sse2,"ax",@progbits | |
4f41c682 | 82 | #if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO |
3af48cbd L |
83 | ENTRY (__memset_chk_sse2) |
84 | movl 12(%esp), %eax | |
85 | cmpl %eax, 16(%esp) | |
86 | jb HIDDEN_JUMPTARGET (__chk_fail) | |
87 | END (__memset_chk_sse2) | |
88 | #endif | |
89 | ENTRY (__memset_sse2) | |
90 | ENTRANCE | |
91 | ||
92 | movl LEN(%esp), %ecx | |
93 | #ifdef USE_AS_BZERO | |
94 | xor %eax, %eax | |
95 | #else | |
96 | movzbl CHR(%esp), %eax | |
97 | movb %al, %ah | |
98 | /* Fill the whole EAX with pattern. */ | |
99 | movl %eax, %edx | |
100 | shl $16, %eax | |
101 | or %edx, %eax | |
102 | #endif | |
103 | movl DEST(%esp), %edx | |
104 | cmp $32, %ecx | |
105 | jae L(32bytesormore) | |
106 | ||
107 | L(write_less32bytes): | |
108 | BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) | |
109 | ||
110 | ||
111 | .pushsection .rodata.sse2,"a",@progbits | |
112 | ALIGN (2) | |
113 | L(table_less_32bytes): | |
114 | .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) | |
115 | .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) | |
116 | .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) | |
117 | .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) | |
118 | .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) | |
119 | .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) | |
120 | .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) | |
121 | .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) | |
122 | .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) | |
123 | .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) | |
124 | .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) | |
125 | .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) | |
126 | .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) | |
127 | .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) | |
128 | .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) | |
129 | .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) | |
130 | .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) | |
131 | .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) | |
132 | .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) | |
133 | .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) | |
134 | .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) | |
135 | .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) | |
136 | .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) | |
137 | .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) | |
138 | .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) | |
139 | .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) | |
140 | .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) | |
141 | .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) | |
142 | .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) | |
143 | .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) | |
144 | .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) | |
145 | .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) | |
146 | .popsection | |
147 | ||
148 | ALIGN (4) | |
149 | L(write_28bytes): | |
150 | movl %eax, -28(%edx) | |
151 | L(write_24bytes): | |
152 | movl %eax, -24(%edx) | |
153 | L(write_20bytes): | |
154 | movl %eax, -20(%edx) | |
155 | L(write_16bytes): | |
156 | movl %eax, -16(%edx) | |
157 | L(write_12bytes): | |
158 | movl %eax, -12(%edx) | |
159 | L(write_8bytes): | |
160 | movl %eax, -8(%edx) | |
161 | L(write_4bytes): | |
162 | movl %eax, -4(%edx) | |
163 | L(write_0bytes): | |
164 | SETRTNVAL | |
165 | RETURN | |
166 | ||
167 | ALIGN (4) | |
168 | L(write_29bytes): | |
169 | movl %eax, -29(%edx) | |
170 | L(write_25bytes): | |
171 | movl %eax, -25(%edx) | |
172 | L(write_21bytes): | |
173 | movl %eax, -21(%edx) | |
174 | L(write_17bytes): | |
175 | movl %eax, -17(%edx) | |
176 | L(write_13bytes): | |
177 | movl %eax, -13(%edx) | |
178 | L(write_9bytes): | |
179 | movl %eax, -9(%edx) | |
180 | L(write_5bytes): | |
181 | movl %eax, -5(%edx) | |
182 | L(write_1bytes): | |
183 | movb %al, -1(%edx) | |
184 | SETRTNVAL | |
185 | RETURN | |
186 | ||
187 | ALIGN (4) | |
188 | L(write_30bytes): | |
189 | movl %eax, -30(%edx) | |
190 | L(write_26bytes): | |
191 | movl %eax, -26(%edx) | |
192 | L(write_22bytes): | |
193 | movl %eax, -22(%edx) | |
194 | L(write_18bytes): | |
195 | movl %eax, -18(%edx) | |
196 | L(write_14bytes): | |
197 | movl %eax, -14(%edx) | |
198 | L(write_10bytes): | |
199 | movl %eax, -10(%edx) | |
200 | L(write_6bytes): | |
201 | movl %eax, -6(%edx) | |
202 | L(write_2bytes): | |
203 | movw %ax, -2(%edx) | |
204 | SETRTNVAL | |
205 | RETURN | |
206 | ||
207 | ALIGN (4) | |
208 | L(write_31bytes): | |
209 | movl %eax, -31(%edx) | |
210 | L(write_27bytes): | |
211 | movl %eax, -27(%edx) | |
212 | L(write_23bytes): | |
213 | movl %eax, -23(%edx) | |
214 | L(write_19bytes): | |
215 | movl %eax, -19(%edx) | |
216 | L(write_15bytes): | |
217 | movl %eax, -15(%edx) | |
218 | L(write_11bytes): | |
219 | movl %eax, -11(%edx) | |
220 | L(write_7bytes): | |
221 | movl %eax, -7(%edx) | |
222 | L(write_3bytes): | |
223 | movw %ax, -3(%edx) | |
224 | movb %al, -1(%edx) | |
225 | SETRTNVAL | |
226 | RETURN | |
227 | ||
228 | ALIGN (4) | |
229 | /* ECX > 32 and EDX is 4 byte aligned. */ | |
230 | L(32bytesormore): | |
231 | /* Fill xmm0 with the pattern. */ | |
232 | #ifdef USE_AS_BZERO | |
233 | pxor %xmm0, %xmm0 | |
234 | #else | |
235 | movd %eax, %xmm0 | |
3af48cbd L |
236 | pshufd $0, %xmm0, %xmm0 |
237 | #endif | |
238 | testl $0xf, %edx | |
239 | jz L(aligned_16) | |
240 | /* ECX > 32 and EDX is not 16 byte aligned. */ | |
241 | L(not_aligned_16): | |
242 | movdqu %xmm0, (%edx) | |
243 | movl %edx, %eax | |
244 | and $-16, %edx | |
245 | add $16, %edx | |
246 | sub %edx, %eax | |
247 | add %eax, %ecx | |
248 | movd %xmm0, %eax | |
249 | ||
250 | ALIGN (4) | |
251 | L(aligned_16): | |
252 | cmp $128, %ecx | |
cc50f1a4 | 253 | jae L(128bytesormore) |
3af48cbd L |
254 | |
255 | L(aligned_16_less128bytes): | |
256 | BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) | |
257 | ||
258 | ALIGN (4) | |
259 | L(128bytesormore): | |
260 | #ifdef SHARED_CACHE_SIZE | |
261 | PUSH (%ebx) | |
262 | mov $SHARED_CACHE_SIZE, %ebx | |
263 | #else | |
264 | # ifdef SHARED | |
9a1d9254 | 265 | SETUP_PIC_REG(bx) |
3af48cbd L |
266 | add $_GLOBAL_OFFSET_TABLE_, %ebx |
267 | mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx | |
268 | # else | |
269 | PUSH (%ebx) | |
270 | mov __x86_shared_cache_size, %ebx | |
271 | # endif | |
272 | #endif | |
273 | cmp %ebx, %ecx | |
274 | jae L(128bytesormore_nt_start) | |
275 | ||
f9a97dda | 276 | |
3af48cbd L |
277 | #ifdef DATA_CACHE_SIZE |
278 | POP (%ebx) | |
cc50f1a4 | 279 | # define RESTORE_EBX_STATE CFI_PUSH (%ebx) |
3af48cbd L |
280 | cmp $DATA_CACHE_SIZE, %ecx |
281 | #else | |
282 | # ifdef SHARED | |
cc50f1a4 | 283 | # define RESTORE_EBX_STATE |
9a1d9254 | 284 | SETUP_PIC_REG(bx) |
3af48cbd L |
285 | add $_GLOBAL_OFFSET_TABLE_, %ebx |
286 | cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx | |
287 | # else | |
288 | POP (%ebx) | |
cc50f1a4 | 289 | # define RESTORE_EBX_STATE CFI_PUSH (%ebx) |
3af48cbd L |
290 | cmp __x86_data_cache_size, %ecx |
291 | # endif | |
292 | #endif | |
293 | ||
294 | jae L(128bytes_L2_normal) | |
295 | subl $128, %ecx | |
296 | L(128bytesormore_normal): | |
297 | sub $128, %ecx | |
298 | movdqa %xmm0, (%edx) | |
299 | movdqa %xmm0, 0x10(%edx) | |
300 | movdqa %xmm0, 0x20(%edx) | |
301 | movdqa %xmm0, 0x30(%edx) | |
302 | movdqa %xmm0, 0x40(%edx) | |
303 | movdqa %xmm0, 0x50(%edx) | |
304 | movdqa %xmm0, 0x60(%edx) | |
305 | movdqa %xmm0, 0x70(%edx) | |
306 | lea 128(%edx), %edx | |
cc50f1a4 | 307 | jb L(128bytesless_normal) |
3af48cbd L |
308 | |
309 | ||
310 | sub $128, %ecx | |
311 | movdqa %xmm0, (%edx) | |
312 | movdqa %xmm0, 0x10(%edx) | |
313 | movdqa %xmm0, 0x20(%edx) | |
314 | movdqa %xmm0, 0x30(%edx) | |
315 | movdqa %xmm0, 0x40(%edx) | |
316 | movdqa %xmm0, 0x50(%edx) | |
317 | movdqa %xmm0, 0x60(%edx) | |
318 | movdqa %xmm0, 0x70(%edx) | |
319 | lea 128(%edx), %edx | |
cc50f1a4 | 320 | jae L(128bytesormore_normal) |
3af48cbd L |
321 | |
322 | L(128bytesless_normal): | |
cc50f1a4 | 323 | add $128, %ecx |
3af48cbd L |
324 | BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) |
325 | ||
326 | ALIGN (4) | |
327 | L(128bytes_L2_normal): | |
328 | prefetcht0 0x380(%edx) | |
329 | prefetcht0 0x3c0(%edx) | |
330 | sub $128, %ecx | |
331 | movdqa %xmm0, (%edx) | |
332 | movaps %xmm0, 0x10(%edx) | |
333 | movaps %xmm0, 0x20(%edx) | |
334 | movaps %xmm0, 0x30(%edx) | |
335 | movaps %xmm0, 0x40(%edx) | |
336 | movaps %xmm0, 0x50(%edx) | |
337 | movaps %xmm0, 0x60(%edx) | |
338 | movaps %xmm0, 0x70(%edx) | |
339 | add $128, %edx | |
f9a97dda | 340 | cmp $128, %ecx |
cc50f1a4 | 341 | jae L(128bytes_L2_normal) |
3af48cbd L |
342 | |
343 | L(128bytesless_L2_normal): | |
344 | BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) | |
345 | ||
cc50f1a4 | 346 | RESTORE_EBX_STATE |
3af48cbd L |
347 | L(128bytesormore_nt_start): |
348 | sub %ebx, %ecx | |
349 | ALIGN (4) | |
350 | L(128bytesormore_shared_cache_loop): | |
351 | prefetcht0 0x3c0(%edx) | |
352 | prefetcht0 0x380(%edx) | |
353 | sub $0x80, %ebx | |
354 | movdqa %xmm0, (%edx) | |
355 | movdqa %xmm0, 0x10(%edx) | |
356 | movdqa %xmm0, 0x20(%edx) | |
357 | movdqa %xmm0, 0x30(%edx) | |
358 | movdqa %xmm0, 0x40(%edx) | |
359 | movdqa %xmm0, 0x50(%edx) | |
360 | movdqa %xmm0, 0x60(%edx) | |
361 | movdqa %xmm0, 0x70(%edx) | |
362 | add $0x80, %edx | |
363 | cmp $0x80, %ebx | |
cc50f1a4 | 364 | jae L(128bytesormore_shared_cache_loop) |
3af48cbd L |
365 | cmp $0x80, %ecx |
366 | jb L(shared_cache_loop_end) | |
367 | ALIGN (4) | |
368 | L(128bytesormore_nt): | |
369 | sub $0x80, %ecx | |
370 | movntdq %xmm0, (%edx) | |
371 | movntdq %xmm0, 0x10(%edx) | |
372 | movntdq %xmm0, 0x20(%edx) | |
373 | movntdq %xmm0, 0x30(%edx) | |
374 | movntdq %xmm0, 0x40(%edx) | |
375 | movntdq %xmm0, 0x50(%edx) | |
376 | movntdq %xmm0, 0x60(%edx) | |
377 | movntdq %xmm0, 0x70(%edx) | |
378 | add $0x80, %edx | |
379 | cmp $0x80, %ecx | |
cc50f1a4 | 380 | jae L(128bytesormore_nt) |
3af48cbd L |
381 | sfence |
382 | L(shared_cache_loop_end): | |
383 | #if defined DATA_CACHE_SIZE || !defined SHARED | |
384 | POP (%ebx) | |
385 | #endif | |
386 | BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) | |
387 | ||
388 | ||
389 | .pushsection .rodata.sse2,"a",@progbits | |
390 | ALIGN (2) | |
391 | L(table_16_128bytes): | |
392 | .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) | |
393 | .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) | |
394 | .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) | |
395 | .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) | |
396 | .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) | |
397 | .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) | |
398 | .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) | |
399 | .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) | |
400 | .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) | |
401 | .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) | |
402 | .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) | |
403 | .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) | |
404 | .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) | |
405 | .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) | |
406 | .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) | |
407 | .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) | |
408 | .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) | |
409 | .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) | |
410 | .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) | |
411 | .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) | |
412 | .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) | |
413 | .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) | |
414 | .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) | |
415 | .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) | |
416 | .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) | |
417 | .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) | |
418 | .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) | |
419 | .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) | |
420 | .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) | |
421 | .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) | |
422 | .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) | |
423 | .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) | |
424 | .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) | |
425 | .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) | |
426 | .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) | |
427 | .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) | |
428 | .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) | |
429 | .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) | |
430 | .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) | |
431 | .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) | |
432 | .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) | |
433 | .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) | |
434 | .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) | |
435 | .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) | |
436 | .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) | |
437 | .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) | |
438 | .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) | |
439 | .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) | |
440 | .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) | |
441 | .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) | |
442 | .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) | |
443 | .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) | |
444 | .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) | |
445 | .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) | |
446 | .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) | |
447 | .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) | |
448 | .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) | |
449 | .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) | |
450 | .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) | |
451 | .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) | |
452 | .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) | |
453 | .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) | |
454 | .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) | |
455 | .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) | |
456 | .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) | |
457 | .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) | |
458 | .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) | |
459 | .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) | |
460 | .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) | |
461 | .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) | |
462 | .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) | |
463 | .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) | |
464 | .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) | |
465 | .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) | |
466 | .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) | |
467 | .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) | |
468 | .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) | |
469 | .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) | |
470 | .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) | |
471 | .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) | |
472 | .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) | |
473 | .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) | |
474 | .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) | |
475 | .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) | |
476 | .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) | |
477 | .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) | |
478 | .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) | |
479 | .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) | |
480 | .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) | |
481 | .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) | |
482 | .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) | |
483 | .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) | |
484 | .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) | |
485 | .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) | |
486 | .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) | |
487 | .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) | |
488 | .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) | |
489 | .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) | |
490 | .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) | |
491 | .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) | |
492 | .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) | |
493 | .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) | |
494 | .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) | |
495 | .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) | |
496 | .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) | |
497 | .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) | |
498 | .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) | |
499 | .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) | |
500 | .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) | |
501 | .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) | |
502 | .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) | |
503 | .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) | |
504 | .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) | |
505 | .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) | |
506 | .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) | |
507 | .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) | |
508 | .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) | |
509 | .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) | |
510 | .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) | |
511 | .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) | |
512 | .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) | |
513 | .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) | |
514 | .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) | |
515 | .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) | |
516 | .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) | |
517 | .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) | |
518 | .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) | |
519 | .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) | |
520 | .popsection | |
521 | ||
522 | ALIGN (4) | |
523 | L(aligned_16_112bytes): | |
524 | movdqa %xmm0, -112(%edx) | |
525 | L(aligned_16_96bytes): | |
526 | movdqa %xmm0, -96(%edx) | |
527 | L(aligned_16_80bytes): | |
528 | movdqa %xmm0, -80(%edx) | |
529 | L(aligned_16_64bytes): | |
530 | movdqa %xmm0, -64(%edx) | |
531 | L(aligned_16_48bytes): | |
532 | movdqa %xmm0, -48(%edx) | |
533 | L(aligned_16_32bytes): | |
534 | movdqa %xmm0, -32(%edx) | |
535 | L(aligned_16_16bytes): | |
536 | movdqa %xmm0, -16(%edx) | |
537 | L(aligned_16_0bytes): | |
538 | SETRTNVAL | |
539 | RETURN | |
540 | ||
541 | ALIGN (4) | |
542 | L(aligned_16_113bytes): | |
543 | movdqa %xmm0, -113(%edx) | |
544 | L(aligned_16_97bytes): | |
545 | movdqa %xmm0, -97(%edx) | |
546 | L(aligned_16_81bytes): | |
547 | movdqa %xmm0, -81(%edx) | |
548 | L(aligned_16_65bytes): | |
549 | movdqa %xmm0, -65(%edx) | |
550 | L(aligned_16_49bytes): | |
551 | movdqa %xmm0, -49(%edx) | |
552 | L(aligned_16_33bytes): | |
553 | movdqa %xmm0, -33(%edx) | |
554 | L(aligned_16_17bytes): | |
555 | movdqa %xmm0, -17(%edx) | |
556 | L(aligned_16_1bytes): | |
557 | movb %al, -1(%edx) | |
558 | SETRTNVAL | |
559 | RETURN | |
560 | ||
561 | ALIGN (4) | |
562 | L(aligned_16_114bytes): | |
563 | movdqa %xmm0, -114(%edx) | |
564 | L(aligned_16_98bytes): | |
565 | movdqa %xmm0, -98(%edx) | |
566 | L(aligned_16_82bytes): | |
567 | movdqa %xmm0, -82(%edx) | |
568 | L(aligned_16_66bytes): | |
569 | movdqa %xmm0, -66(%edx) | |
570 | L(aligned_16_50bytes): | |
571 | movdqa %xmm0, -50(%edx) | |
572 | L(aligned_16_34bytes): | |
573 | movdqa %xmm0, -34(%edx) | |
574 | L(aligned_16_18bytes): | |
575 | movdqa %xmm0, -18(%edx) | |
576 | L(aligned_16_2bytes): | |
577 | movw %ax, -2(%edx) | |
578 | SETRTNVAL | |
579 | RETURN | |
580 | ||
581 | ALIGN (4) | |
582 | L(aligned_16_115bytes): | |
583 | movdqa %xmm0, -115(%edx) | |
584 | L(aligned_16_99bytes): | |
585 | movdqa %xmm0, -99(%edx) | |
586 | L(aligned_16_83bytes): | |
587 | movdqa %xmm0, -83(%edx) | |
588 | L(aligned_16_67bytes): | |
589 | movdqa %xmm0, -67(%edx) | |
590 | L(aligned_16_51bytes): | |
591 | movdqa %xmm0, -51(%edx) | |
592 | L(aligned_16_35bytes): | |
593 | movdqa %xmm0, -35(%edx) | |
594 | L(aligned_16_19bytes): | |
595 | movdqa %xmm0, -19(%edx) | |
596 | L(aligned_16_3bytes): | |
597 | movw %ax, -3(%edx) | |
598 | movb %al, -1(%edx) | |
599 | SETRTNVAL | |
600 | RETURN | |
601 | ||
602 | ALIGN (4) | |
603 | L(aligned_16_116bytes): | |
604 | movdqa %xmm0, -116(%edx) | |
605 | L(aligned_16_100bytes): | |
606 | movdqa %xmm0, -100(%edx) | |
607 | L(aligned_16_84bytes): | |
608 | movdqa %xmm0, -84(%edx) | |
609 | L(aligned_16_68bytes): | |
610 | movdqa %xmm0, -68(%edx) | |
611 | L(aligned_16_52bytes): | |
612 | movdqa %xmm0, -52(%edx) | |
613 | L(aligned_16_36bytes): | |
614 | movdqa %xmm0, -36(%edx) | |
615 | L(aligned_16_20bytes): | |
616 | movdqa %xmm0, -20(%edx) | |
617 | L(aligned_16_4bytes): | |
618 | movl %eax, -4(%edx) | |
619 | SETRTNVAL | |
620 | RETURN | |
621 | ||
622 | ALIGN (4) | |
623 | L(aligned_16_117bytes): | |
624 | movdqa %xmm0, -117(%edx) | |
625 | L(aligned_16_101bytes): | |
626 | movdqa %xmm0, -101(%edx) | |
627 | L(aligned_16_85bytes): | |
628 | movdqa %xmm0, -85(%edx) | |
629 | L(aligned_16_69bytes): | |
630 | movdqa %xmm0, -69(%edx) | |
631 | L(aligned_16_53bytes): | |
632 | movdqa %xmm0, -53(%edx) | |
633 | L(aligned_16_37bytes): | |
634 | movdqa %xmm0, -37(%edx) | |
635 | L(aligned_16_21bytes): | |
636 | movdqa %xmm0, -21(%edx) | |
637 | L(aligned_16_5bytes): | |
638 | movl %eax, -5(%edx) | |
639 | movb %al, -1(%edx) | |
640 | SETRTNVAL | |
641 | RETURN | |
642 | ||
643 | ALIGN (4) | |
644 | L(aligned_16_118bytes): | |
645 | movdqa %xmm0, -118(%edx) | |
646 | L(aligned_16_102bytes): | |
647 | movdqa %xmm0, -102(%edx) | |
648 | L(aligned_16_86bytes): | |
649 | movdqa %xmm0, -86(%edx) | |
650 | L(aligned_16_70bytes): | |
651 | movdqa %xmm0, -70(%edx) | |
652 | L(aligned_16_54bytes): | |
653 | movdqa %xmm0, -54(%edx) | |
654 | L(aligned_16_38bytes): | |
655 | movdqa %xmm0, -38(%edx) | |
656 | L(aligned_16_22bytes): | |
657 | movdqa %xmm0, -22(%edx) | |
658 | L(aligned_16_6bytes): | |
659 | movl %eax, -6(%edx) | |
660 | movw %ax, -2(%edx) | |
661 | SETRTNVAL | |
662 | RETURN | |
663 | ||
664 | ALIGN (4) | |
665 | L(aligned_16_119bytes): | |
666 | movdqa %xmm0, -119(%edx) | |
667 | L(aligned_16_103bytes): | |
668 | movdqa %xmm0, -103(%edx) | |
669 | L(aligned_16_87bytes): | |
670 | movdqa %xmm0, -87(%edx) | |
671 | L(aligned_16_71bytes): | |
672 | movdqa %xmm0, -71(%edx) | |
673 | L(aligned_16_55bytes): | |
674 | movdqa %xmm0, -55(%edx) | |
675 | L(aligned_16_39bytes): | |
676 | movdqa %xmm0, -39(%edx) | |
677 | L(aligned_16_23bytes): | |
678 | movdqa %xmm0, -23(%edx) | |
679 | L(aligned_16_7bytes): | |
680 | movl %eax, -7(%edx) | |
681 | movw %ax, -3(%edx) | |
682 | movb %al, -1(%edx) | |
683 | SETRTNVAL | |
684 | RETURN | |
685 | ||
686 | ALIGN (4) | |
687 | L(aligned_16_120bytes): | |
688 | movdqa %xmm0, -120(%edx) | |
689 | L(aligned_16_104bytes): | |
690 | movdqa %xmm0, -104(%edx) | |
691 | L(aligned_16_88bytes): | |
692 | movdqa %xmm0, -88(%edx) | |
693 | L(aligned_16_72bytes): | |
694 | movdqa %xmm0, -72(%edx) | |
695 | L(aligned_16_56bytes): | |
696 | movdqa %xmm0, -56(%edx) | |
697 | L(aligned_16_40bytes): | |
698 | movdqa %xmm0, -40(%edx) | |
699 | L(aligned_16_24bytes): | |
700 | movdqa %xmm0, -24(%edx) | |
701 | L(aligned_16_8bytes): | |
702 | movq %xmm0, -8(%edx) | |
703 | SETRTNVAL | |
704 | RETURN | |
705 | ||
706 | ALIGN (4) | |
707 | L(aligned_16_121bytes): | |
708 | movdqa %xmm0, -121(%edx) | |
709 | L(aligned_16_105bytes): | |
710 | movdqa %xmm0, -105(%edx) | |
711 | L(aligned_16_89bytes): | |
712 | movdqa %xmm0, -89(%edx) | |
713 | L(aligned_16_73bytes): | |
714 | movdqa %xmm0, -73(%edx) | |
715 | L(aligned_16_57bytes): | |
716 | movdqa %xmm0, -57(%edx) | |
717 | L(aligned_16_41bytes): | |
718 | movdqa %xmm0, -41(%edx) | |
719 | L(aligned_16_25bytes): | |
720 | movdqa %xmm0, -25(%edx) | |
721 | L(aligned_16_9bytes): | |
722 | movq %xmm0, -9(%edx) | |
723 | movb %al, -1(%edx) | |
724 | SETRTNVAL | |
725 | RETURN | |
726 | ||
727 | ALIGN (4) | |
728 | L(aligned_16_122bytes): | |
729 | movdqa %xmm0, -122(%edx) | |
730 | L(aligned_16_106bytes): | |
731 | movdqa %xmm0, -106(%edx) | |
732 | L(aligned_16_90bytes): | |
733 | movdqa %xmm0, -90(%edx) | |
734 | L(aligned_16_74bytes): | |
735 | movdqa %xmm0, -74(%edx) | |
736 | L(aligned_16_58bytes): | |
737 | movdqa %xmm0, -58(%edx) | |
738 | L(aligned_16_42bytes): | |
739 | movdqa %xmm0, -42(%edx) | |
740 | L(aligned_16_26bytes): | |
741 | movdqa %xmm0, -26(%edx) | |
742 | L(aligned_16_10bytes): | |
743 | movq %xmm0, -10(%edx) | |
744 | movw %ax, -2(%edx) | |
745 | SETRTNVAL | |
746 | RETURN | |
747 | ||
748 | ALIGN (4) | |
749 | L(aligned_16_123bytes): | |
750 | movdqa %xmm0, -123(%edx) | |
751 | L(aligned_16_107bytes): | |
752 | movdqa %xmm0, -107(%edx) | |
753 | L(aligned_16_91bytes): | |
754 | movdqa %xmm0, -91(%edx) | |
755 | L(aligned_16_75bytes): | |
756 | movdqa %xmm0, -75(%edx) | |
757 | L(aligned_16_59bytes): | |
758 | movdqa %xmm0, -59(%edx) | |
759 | L(aligned_16_43bytes): | |
760 | movdqa %xmm0, -43(%edx) | |
761 | L(aligned_16_27bytes): | |
762 | movdqa %xmm0, -27(%edx) | |
763 | L(aligned_16_11bytes): | |
764 | movq %xmm0, -11(%edx) | |
765 | movw %ax, -3(%edx) | |
766 | movb %al, -1(%edx) | |
767 | SETRTNVAL | |
768 | RETURN | |
769 | ||
770 | ALIGN (4) | |
771 | L(aligned_16_124bytes): | |
772 | movdqa %xmm0, -124(%edx) | |
773 | L(aligned_16_108bytes): | |
774 | movdqa %xmm0, -108(%edx) | |
775 | L(aligned_16_92bytes): | |
776 | movdqa %xmm0, -92(%edx) | |
777 | L(aligned_16_76bytes): | |
778 | movdqa %xmm0, -76(%edx) | |
779 | L(aligned_16_60bytes): | |
780 | movdqa %xmm0, -60(%edx) | |
781 | L(aligned_16_44bytes): | |
782 | movdqa %xmm0, -44(%edx) | |
783 | L(aligned_16_28bytes): | |
784 | movdqa %xmm0, -28(%edx) | |
785 | L(aligned_16_12bytes): | |
786 | movq %xmm0, -12(%edx) | |
787 | movl %eax, -4(%edx) | |
788 | SETRTNVAL | |
789 | RETURN | |
790 | ||
791 | ALIGN (4) | |
792 | L(aligned_16_125bytes): | |
793 | movdqa %xmm0, -125(%edx) | |
794 | L(aligned_16_109bytes): | |
795 | movdqa %xmm0, -109(%edx) | |
796 | L(aligned_16_93bytes): | |
797 | movdqa %xmm0, -93(%edx) | |
798 | L(aligned_16_77bytes): | |
799 | movdqa %xmm0, -77(%edx) | |
800 | L(aligned_16_61bytes): | |
801 | movdqa %xmm0, -61(%edx) | |
802 | L(aligned_16_45bytes): | |
803 | movdqa %xmm0, -45(%edx) | |
804 | L(aligned_16_29bytes): | |
805 | movdqa %xmm0, -29(%edx) | |
806 | L(aligned_16_13bytes): | |
807 | movq %xmm0, -13(%edx) | |
808 | movl %eax, -5(%edx) | |
809 | movb %al, -1(%edx) | |
810 | SETRTNVAL | |
811 | RETURN | |
812 | ||
813 | ALIGN (4) | |
814 | L(aligned_16_126bytes): | |
815 | movdqa %xmm0, -126(%edx) | |
816 | L(aligned_16_110bytes): | |
817 | movdqa %xmm0, -110(%edx) | |
818 | L(aligned_16_94bytes): | |
819 | movdqa %xmm0, -94(%edx) | |
820 | L(aligned_16_78bytes): | |
821 | movdqa %xmm0, -78(%edx) | |
822 | L(aligned_16_62bytes): | |
823 | movdqa %xmm0, -62(%edx) | |
824 | L(aligned_16_46bytes): | |
825 | movdqa %xmm0, -46(%edx) | |
826 | L(aligned_16_30bytes): | |
827 | movdqa %xmm0, -30(%edx) | |
828 | L(aligned_16_14bytes): | |
829 | movq %xmm0, -14(%edx) | |
830 | movl %eax, -6(%edx) | |
831 | movw %ax, -2(%edx) | |
832 | SETRTNVAL | |
833 | RETURN | |
834 | ||
835 | ALIGN (4) | |
836 | L(aligned_16_127bytes): | |
837 | movdqa %xmm0, -127(%edx) | |
838 | L(aligned_16_111bytes): | |
839 | movdqa %xmm0, -111(%edx) | |
840 | L(aligned_16_95bytes): | |
841 | movdqa %xmm0, -95(%edx) | |
842 | L(aligned_16_79bytes): | |
843 | movdqa %xmm0, -79(%edx) | |
844 | L(aligned_16_63bytes): | |
845 | movdqa %xmm0, -63(%edx) | |
846 | L(aligned_16_47bytes): | |
847 | movdqa %xmm0, -47(%edx) | |
848 | L(aligned_16_31bytes): | |
849 | movdqa %xmm0, -31(%edx) | |
850 | L(aligned_16_15bytes): | |
851 | movq %xmm0, -15(%edx) | |
852 | movl %eax, -7(%edx) | |
853 | movw %ax, -3(%edx) | |
854 | movb %al, -1(%edx) | |
855 | SETRTNVAL | |
856 | RETURN_END | |
857 | ||
858 | END (__memset_sse2) | |
859 | ||
860 | #endif |