]>
Commit | Line | Data |
---|---|---|
be13f7bf | 1 | /* memcmp with SSE4.1, wmemcmp with SSE4.1 |
04277e02 | 2 | Copyright (C) 2010-2019 Free Software Foundation, Inc. |
404a6e32 L |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
404a6e32 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
404a6e32 | 21 | |
be13f7bf | 22 | # include <sysdep.h> |
404a6e32 | 23 | |
be13f7bf LD |
24 | # ifndef MEMCMP |
25 | # define MEMCMP __memcmp_sse4_1 | |
26 | # endif | |
404a6e32 | 27 | |
be13f7bf | 28 | # define JMPTBL(I, B) (I - B) |
404a6e32 | 29 | |
be13f7bf | 30 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
df87f549 | 31 | lea TABLE(%rip), %r11; \ |
404a6e32 L |
32 | movslq (%r11, INDEX, SCALE), %rcx; \ |
33 | add %r11, %rcx; \ | |
e2d40a88 | 34 | _CET_NOTRACK jmp *%rcx; \ |
404a6e32 L |
35 | ud2 |
36 | ||
be13f7bf LD |
37 | /* Warning! |
38 | wmemcmp has to use SIGNED comparison for elements. | |
39 | memcmp has to use UNSIGNED comparison for elemnts. | |
40 | */ | |
41 | ||
404a6e32 L |
42 | .section .text.sse4.1,"ax",@progbits |
43 | ENTRY (MEMCMP) | |
be13f7bf | 44 | # ifdef USE_AS_WMEMCMP |
b304fc20 L |
45 | shl $2, %RDX_LP |
46 | # elif defined __ILP32__ | |
47 | /* Clear the upper 32 bits. */ | |
48 | mov %edx, %edx | |
be13f7bf | 49 | # endif |
404a6e32 | 50 | pxor %xmm0, %xmm0 |
b304fc20 | 51 | cmp $79, %RDX_LP |
404a6e32 | 52 | ja L(79bytesormore) |
be13f7bf | 53 | # ifndef USE_AS_WMEMCMP |
b304fc20 | 54 | cmp $1, %RDX_LP |
404a6e32 | 55 | je L(firstbyte) |
be13f7bf | 56 | # endif |
404a6e32 L |
57 | add %rdx, %rsi |
58 | add %rdx, %rdi | |
59 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
60 | ||
be13f7bf | 61 | # ifndef USE_AS_WMEMCMP |
e7044ea7 | 62 | .p2align 4 |
404a6e32 L |
63 | L(firstbyte): |
64 | movzbl (%rdi), %eax | |
65 | movzbl (%rsi), %ecx | |
66 | sub %ecx, %eax | |
67 | ret | |
be13f7bf | 68 | # endif |
404a6e32 | 69 | |
e7044ea7 | 70 | .p2align 4 |
404a6e32 L |
71 | L(79bytesormore): |
72 | movdqu (%rsi), %xmm1 | |
73 | movdqu (%rdi), %xmm2 | |
74 | pxor %xmm1, %xmm2 | |
75 | ptest %xmm2, %xmm0 | |
76 | jnc L(16bytesin256) | |
77 | mov %rsi, %rcx | |
78 | and $-16, %rsi | |
79 | add $16, %rsi | |
80 | sub %rsi, %rcx | |
81 | ||
82 | sub %rcx, %rdi | |
83 | add %rcx, %rdx | |
dd37cd1a L |
84 | test $0xf, %rdi |
85 | jz L(2aligned) | |
404a6e32 L |
86 | |
87 | cmp $128, %rdx | |
88 | ja L(128bytesormore) | |
89 | L(less128bytes): | |
90 | sub $64, %rdx | |
91 | ||
92 | movdqu (%rdi), %xmm2 | |
93 | pxor (%rsi), %xmm2 | |
94 | ptest %xmm2, %xmm0 | |
95 | jnc L(16bytesin256) | |
96 | ||
97 | movdqu 16(%rdi), %xmm2 | |
98 | pxor 16(%rsi), %xmm2 | |
99 | ptest %xmm2, %xmm0 | |
100 | jnc L(32bytesin256) | |
101 | ||
102 | movdqu 32(%rdi), %xmm2 | |
103 | pxor 32(%rsi), %xmm2 | |
104 | ptest %xmm2, %xmm0 | |
105 | jnc L(48bytesin256) | |
106 | ||
107 | movdqu 48(%rdi), %xmm2 | |
108 | pxor 48(%rsi), %xmm2 | |
109 | ptest %xmm2, %xmm0 | |
110 | jnc L(64bytesin256) | |
111 | cmp $32, %rdx | |
112 | jb L(less32bytesin64) | |
113 | ||
114 | movdqu 64(%rdi), %xmm2 | |
115 | pxor 64(%rsi), %xmm2 | |
116 | ptest %xmm2, %xmm0 | |
117 | jnc L(80bytesin256) | |
118 | ||
119 | movdqu 80(%rdi), %xmm2 | |
120 | pxor 80(%rsi), %xmm2 | |
121 | ptest %xmm2, %xmm0 | |
122 | jnc L(96bytesin256) | |
123 | sub $32, %rdx | |
124 | add $32, %rdi | |
125 | add $32, %rsi | |
126 | L(less32bytesin64): | |
127 | add $64, %rdi | |
128 | add $64, %rsi | |
129 | add %rdx, %rsi | |
130 | add %rdx, %rdi | |
131 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
132 | ||
133 | L(128bytesormore): | |
dd37cd1a L |
134 | cmp $512, %rdx |
135 | ja L(512bytesormore) | |
404a6e32 | 136 | cmp $256, %rdx |
dd37cd1a | 137 | ja L(less512bytes) |
404a6e32 L |
138 | L(less256bytes): |
139 | sub $128, %rdx | |
140 | ||
141 | movdqu (%rdi), %xmm2 | |
142 | pxor (%rsi), %xmm2 | |
143 | ptest %xmm2, %xmm0 | |
144 | jnc L(16bytesin256) | |
145 | ||
146 | movdqu 16(%rdi), %xmm2 | |
147 | pxor 16(%rsi), %xmm2 | |
148 | ptest %xmm2, %xmm0 | |
149 | jnc L(32bytesin256) | |
150 | ||
151 | movdqu 32(%rdi), %xmm2 | |
152 | pxor 32(%rsi), %xmm2 | |
153 | ptest %xmm2, %xmm0 | |
154 | jnc L(48bytesin256) | |
155 | ||
156 | movdqu 48(%rdi), %xmm2 | |
157 | pxor 48(%rsi), %xmm2 | |
158 | ptest %xmm2, %xmm0 | |
159 | jnc L(64bytesin256) | |
160 | ||
161 | movdqu 64(%rdi), %xmm2 | |
162 | pxor 64(%rsi), %xmm2 | |
163 | ptest %xmm2, %xmm0 | |
164 | jnc L(80bytesin256) | |
165 | ||
166 | movdqu 80(%rdi), %xmm2 | |
167 | pxor 80(%rsi), %xmm2 | |
168 | ptest %xmm2, %xmm0 | |
169 | jnc L(96bytesin256) | |
170 | ||
171 | movdqu 96(%rdi), %xmm2 | |
172 | pxor 96(%rsi), %xmm2 | |
173 | ptest %xmm2, %xmm0 | |
174 | jnc L(112bytesin256) | |
175 | ||
176 | movdqu 112(%rdi), %xmm2 | |
177 | pxor 112(%rsi), %xmm2 | |
178 | ptest %xmm2, %xmm0 | |
179 | jnc L(128bytesin256) | |
180 | ||
181 | add $128, %rsi | |
182 | add $128, %rdi | |
183 | ||
184 | cmp $64, %rdx | |
185 | jae L(less128bytes) | |
186 | ||
187 | cmp $32, %rdx | |
188 | jb L(less32bytesin128) | |
189 | ||
190 | movdqu (%rdi), %xmm2 | |
191 | pxor (%rsi), %xmm2 | |
192 | ptest %xmm2, %xmm0 | |
193 | jnc L(16bytesin256) | |
194 | ||
195 | movdqu 16(%rdi), %xmm2 | |
196 | pxor 16(%rsi), %xmm2 | |
197 | ptest %xmm2, %xmm0 | |
198 | jnc L(32bytesin256) | |
199 | sub $32, %rdx | |
200 | add $32, %rdi | |
201 | add $32, %rsi | |
202 | L(less32bytesin128): | |
203 | add %rdx, %rsi | |
204 | add %rdx, %rdi | |
205 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
206 | ||
404a6e32 L |
207 | L(less512bytes): |
208 | sub $256, %rdx | |
209 | movdqu (%rdi), %xmm2 | |
210 | pxor (%rsi), %xmm2 | |
211 | ptest %xmm2, %xmm0 | |
212 | jnc L(16bytesin256) | |
213 | ||
214 | movdqu 16(%rdi), %xmm2 | |
215 | pxor 16(%rsi), %xmm2 | |
216 | ptest %xmm2, %xmm0 | |
217 | jnc L(32bytesin256) | |
218 | ||
219 | movdqu 32(%rdi), %xmm2 | |
220 | pxor 32(%rsi), %xmm2 | |
221 | ptest %xmm2, %xmm0 | |
222 | jnc L(48bytesin256) | |
223 | ||
224 | movdqu 48(%rdi), %xmm2 | |
225 | pxor 48(%rsi), %xmm2 | |
226 | ptest %xmm2, %xmm0 | |
227 | jnc L(64bytesin256) | |
228 | ||
229 | movdqu 64(%rdi), %xmm2 | |
230 | pxor 64(%rsi), %xmm2 | |
231 | ptest %xmm2, %xmm0 | |
232 | jnc L(80bytesin256) | |
233 | ||
234 | movdqu 80(%rdi), %xmm2 | |
235 | pxor 80(%rsi), %xmm2 | |
236 | ptest %xmm2, %xmm0 | |
237 | jnc L(96bytesin256) | |
238 | ||
239 | movdqu 96(%rdi), %xmm2 | |
240 | pxor 96(%rsi), %xmm2 | |
241 | ptest %xmm2, %xmm0 | |
242 | jnc L(112bytesin256) | |
243 | ||
244 | movdqu 112(%rdi), %xmm2 | |
245 | pxor 112(%rsi), %xmm2 | |
246 | ptest %xmm2, %xmm0 | |
247 | jnc L(128bytesin256) | |
248 | ||
249 | movdqu 128(%rdi), %xmm2 | |
250 | pxor 128(%rsi), %xmm2 | |
251 | ptest %xmm2, %xmm0 | |
252 | jnc L(144bytesin256) | |
253 | ||
254 | movdqu 144(%rdi), %xmm2 | |
255 | pxor 144(%rsi), %xmm2 | |
256 | ptest %xmm2, %xmm0 | |
257 | jnc L(160bytesin256) | |
258 | ||
259 | movdqu 160(%rdi), %xmm2 | |
260 | pxor 160(%rsi), %xmm2 | |
261 | ptest %xmm2, %xmm0 | |
262 | jnc L(176bytesin256) | |
263 | ||
264 | movdqu 176(%rdi), %xmm2 | |
265 | pxor 176(%rsi), %xmm2 | |
266 | ptest %xmm2, %xmm0 | |
267 | jnc L(192bytesin256) | |
268 | ||
269 | movdqu 192(%rdi), %xmm2 | |
270 | pxor 192(%rsi), %xmm2 | |
271 | ptest %xmm2, %xmm0 | |
272 | jnc L(208bytesin256) | |
273 | ||
274 | movdqu 208(%rdi), %xmm2 | |
275 | pxor 208(%rsi), %xmm2 | |
276 | ptest %xmm2, %xmm0 | |
277 | jnc L(224bytesin256) | |
278 | ||
279 | movdqu 224(%rdi), %xmm2 | |
280 | pxor 224(%rsi), %xmm2 | |
281 | ptest %xmm2, %xmm0 | |
282 | jnc L(240bytesin256) | |
283 | ||
284 | movdqu 240(%rdi), %xmm2 | |
285 | pxor 240(%rsi), %xmm2 | |
286 | ptest %xmm2, %xmm0 | |
287 | jnc L(256bytesin256) | |
288 | ||
289 | add $256, %rsi | |
290 | add $256, %rdi | |
291 | ||
292 | cmp $128, %rdx | |
293 | jae L(less256bytes) | |
294 | ||
295 | cmp $64, %rdx | |
296 | jae L(less128bytes) | |
297 | ||
298 | cmp $32, %rdx | |
299 | jb L(less32bytesin256) | |
300 | ||
301 | movdqu (%rdi), %xmm2 | |
302 | pxor (%rsi), %xmm2 | |
303 | ptest %xmm2, %xmm0 | |
304 | jnc L(16bytesin256) | |
305 | ||
306 | movdqu 16(%rdi), %xmm2 | |
307 | pxor 16(%rsi), %xmm2 | |
308 | ptest %xmm2, %xmm0 | |
309 | jnc L(32bytesin256) | |
310 | sub $32, %rdx | |
311 | add $32, %rdi | |
312 | add $32, %rsi | |
313 | L(less32bytesin256): | |
314 | add %rdx, %rsi | |
315 | add %rdx, %rdi | |
316 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
317 | ||
e7044ea7 | 318 | .p2align 4 |
404a6e32 | 319 | L(512bytesormore): |
be13f7bf | 320 | # ifdef DATA_CACHE_SIZE_HALF |
8a17f349 | 321 | mov $DATA_CACHE_SIZE_HALF, %R8_LP |
be13f7bf | 322 | # else |
afec409a | 323 | mov __x86_data_cache_size_half(%rip), %R8_LP |
be13f7bf | 324 | # endif |
dd37cd1a L |
325 | mov %r8, %r9 |
326 | shr $1, %r8 | |
327 | add %r9, %r8 | |
328 | cmp %r8, %rdx | |
329 | ja L(L2_L3_cache_unaglined) | |
404a6e32 | 330 | sub $64, %rdx |
e7044ea7 | 331 | .p2align 4 |
404a6e32 L |
332 | L(64bytesormore_loop): |
333 | movdqu (%rdi), %xmm2 | |
334 | pxor (%rsi), %xmm2 | |
335 | movdqa %xmm2, %xmm1 | |
336 | ||
337 | movdqu 16(%rdi), %xmm3 | |
338 | pxor 16(%rsi), %xmm3 | |
339 | por %xmm3, %xmm1 | |
340 | ||
341 | movdqu 32(%rdi), %xmm4 | |
342 | pxor 32(%rsi), %xmm4 | |
343 | por %xmm4, %xmm1 | |
344 | ||
345 | movdqu 48(%rdi), %xmm5 | |
346 | pxor 48(%rsi), %xmm5 | |
347 | por %xmm5, %xmm1 | |
348 | ||
349 | ptest %xmm1, %xmm0 | |
350 | jnc L(64bytesormore_loop_end) | |
351 | add $64, %rsi | |
352 | add $64, %rdi | |
353 | sub $64, %rdx | |
354 | jae L(64bytesormore_loop) | |
355 | ||
356 | add $64, %rdx | |
357 | add %rdx, %rsi | |
358 | add %rdx, %rdi | |
359 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
360 | ||
dd37cd1a L |
361 | L(L2_L3_cache_unaglined): |
362 | sub $64, %rdx | |
e7044ea7 | 363 | .p2align 4 |
dd37cd1a L |
364 | L(L2_L3_unaligned_128bytes_loop): |
365 | prefetchnta 0x1c0(%rdi) | |
366 | prefetchnta 0x1c0(%rsi) | |
367 | movdqu (%rdi), %xmm2 | |
368 | pxor (%rsi), %xmm2 | |
369 | movdqa %xmm2, %xmm1 | |
370 | ||
371 | movdqu 16(%rdi), %xmm3 | |
372 | pxor 16(%rsi), %xmm3 | |
373 | por %xmm3, %xmm1 | |
374 | ||
375 | movdqu 32(%rdi), %xmm4 | |
376 | pxor 32(%rsi), %xmm4 | |
377 | por %xmm4, %xmm1 | |
378 | ||
379 | movdqu 48(%rdi), %xmm5 | |
380 | pxor 48(%rsi), %xmm5 | |
381 | por %xmm5, %xmm1 | |
382 | ||
383 | ptest %xmm1, %xmm0 | |
384 | jnc L(64bytesormore_loop_end) | |
385 | add $64, %rsi | |
386 | add $64, %rdi | |
387 | sub $64, %rdx | |
388 | jae L(L2_L3_unaligned_128bytes_loop) | |
389 | ||
390 | add $64, %rdx | |
391 | add %rdx, %rsi | |
392 | add %rdx, %rdi | |
393 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
394 | ||
395 | /* | |
396 | * This case is for machines which are sensitive for unaligned instructions. | |
397 | */ | |
e7044ea7 | 398 | .p2align 4 |
dd37cd1a L |
399 | L(2aligned): |
400 | cmp $128, %rdx | |
401 | ja L(128bytesormorein2aligned) | |
402 | L(less128bytesin2aligned): | |
403 | sub $64, %rdx | |
404 | ||
405 | movdqa (%rdi), %xmm2 | |
406 | pxor (%rsi), %xmm2 | |
407 | ptest %xmm2, %xmm0 | |
408 | jnc L(16bytesin256) | |
409 | ||
410 | movdqa 16(%rdi), %xmm2 | |
411 | pxor 16(%rsi), %xmm2 | |
412 | ptest %xmm2, %xmm0 | |
413 | jnc L(32bytesin256) | |
414 | ||
415 | movdqa 32(%rdi), %xmm2 | |
416 | pxor 32(%rsi), %xmm2 | |
417 | ptest %xmm2, %xmm0 | |
418 | jnc L(48bytesin256) | |
419 | ||
420 | movdqa 48(%rdi), %xmm2 | |
421 | pxor 48(%rsi), %xmm2 | |
422 | ptest %xmm2, %xmm0 | |
423 | jnc L(64bytesin256) | |
424 | cmp $32, %rdx | |
425 | jb L(less32bytesin64in2alinged) | |
426 | ||
427 | movdqa 64(%rdi), %xmm2 | |
428 | pxor 64(%rsi), %xmm2 | |
429 | ptest %xmm2, %xmm0 | |
430 | jnc L(80bytesin256) | |
431 | ||
432 | movdqa 80(%rdi), %xmm2 | |
433 | pxor 80(%rsi), %xmm2 | |
434 | ptest %xmm2, %xmm0 | |
435 | jnc L(96bytesin256) | |
436 | sub $32, %rdx | |
437 | add $32, %rdi | |
438 | add $32, %rsi | |
439 | L(less32bytesin64in2alinged): | |
440 | add $64, %rdi | |
441 | add $64, %rsi | |
442 | add %rdx, %rsi | |
443 | add %rdx, %rdi | |
444 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
445 | ||
e7044ea7 | 446 | .p2align 4 |
dd37cd1a L |
447 | L(128bytesormorein2aligned): |
448 | cmp $512, %rdx | |
449 | ja L(512bytesormorein2aligned) | |
450 | cmp $256, %rdx | |
451 | ja L(256bytesormorein2aligned) | |
452 | L(less256bytesin2alinged): | |
453 | sub $128, %rdx | |
454 | ||
455 | movdqa (%rdi), %xmm2 | |
456 | pxor (%rsi), %xmm2 | |
457 | ptest %xmm2, %xmm0 | |
458 | jnc L(16bytesin256) | |
459 | ||
460 | movdqa 16(%rdi), %xmm2 | |
461 | pxor 16(%rsi), %xmm2 | |
462 | ptest %xmm2, %xmm0 | |
463 | jnc L(32bytesin256) | |
464 | ||
465 | movdqa 32(%rdi), %xmm2 | |
466 | pxor 32(%rsi), %xmm2 | |
467 | ptest %xmm2, %xmm0 | |
468 | jnc L(48bytesin256) | |
469 | ||
470 | movdqa 48(%rdi), %xmm2 | |
471 | pxor 48(%rsi), %xmm2 | |
472 | ptest %xmm2, %xmm0 | |
473 | jnc L(64bytesin256) | |
474 | ||
475 | movdqa 64(%rdi), %xmm2 | |
476 | pxor 64(%rsi), %xmm2 | |
477 | ptest %xmm2, %xmm0 | |
478 | jnc L(80bytesin256) | |
479 | ||
480 | movdqa 80(%rdi), %xmm2 | |
481 | pxor 80(%rsi), %xmm2 | |
482 | ptest %xmm2, %xmm0 | |
483 | jnc L(96bytesin256) | |
484 | ||
485 | movdqa 96(%rdi), %xmm2 | |
486 | pxor 96(%rsi), %xmm2 | |
487 | ptest %xmm2, %xmm0 | |
488 | jnc L(112bytesin256) | |
489 | ||
490 | movdqa 112(%rdi), %xmm2 | |
491 | pxor 112(%rsi), %xmm2 | |
492 | ptest %xmm2, %xmm0 | |
493 | jnc L(128bytesin256) | |
494 | ||
495 | add $128, %rsi | |
496 | add $128, %rdi | |
497 | ||
498 | cmp $64, %rdx | |
499 | jae L(less128bytesin2aligned) | |
500 | ||
501 | cmp $32, %rdx | |
502 | jb L(less32bytesin128in2aligned) | |
503 | ||
504 | movdqu (%rdi), %xmm2 | |
505 | pxor (%rsi), %xmm2 | |
506 | ptest %xmm2, %xmm0 | |
507 | jnc L(16bytesin256) | |
508 | ||
509 | movdqu 16(%rdi), %xmm2 | |
510 | pxor 16(%rsi), %xmm2 | |
511 | ptest %xmm2, %xmm0 | |
512 | jnc L(32bytesin256) | |
513 | sub $32, %rdx | |
514 | add $32, %rdi | |
515 | add $32, %rsi | |
516 | L(less32bytesin128in2aligned): | |
517 | add %rdx, %rsi | |
518 | add %rdx, %rdi | |
519 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
520 | ||
e7044ea7 | 521 | .p2align 4 |
dd37cd1a L |
522 | L(256bytesormorein2aligned): |
523 | ||
524 | sub $256, %rdx | |
525 | movdqa (%rdi), %xmm2 | |
526 | pxor (%rsi), %xmm2 | |
527 | ptest %xmm2, %xmm0 | |
528 | jnc L(16bytesin256) | |
529 | ||
530 | movdqa 16(%rdi), %xmm2 | |
531 | pxor 16(%rsi), %xmm2 | |
532 | ptest %xmm2, %xmm0 | |
533 | jnc L(32bytesin256) | |
534 | ||
535 | movdqa 32(%rdi), %xmm2 | |
536 | pxor 32(%rsi), %xmm2 | |
537 | ptest %xmm2, %xmm0 | |
538 | jnc L(48bytesin256) | |
539 | ||
540 | movdqa 48(%rdi), %xmm2 | |
541 | pxor 48(%rsi), %xmm2 | |
542 | ptest %xmm2, %xmm0 | |
543 | jnc L(64bytesin256) | |
544 | ||
545 | movdqa 64(%rdi), %xmm2 | |
546 | pxor 64(%rsi), %xmm2 | |
547 | ptest %xmm2, %xmm0 | |
548 | jnc L(80bytesin256) | |
549 | ||
550 | movdqa 80(%rdi), %xmm2 | |
551 | pxor 80(%rsi), %xmm2 | |
552 | ptest %xmm2, %xmm0 | |
553 | jnc L(96bytesin256) | |
554 | ||
555 | movdqa 96(%rdi), %xmm2 | |
556 | pxor 96(%rsi), %xmm2 | |
557 | ptest %xmm2, %xmm0 | |
558 | jnc L(112bytesin256) | |
559 | ||
560 | movdqa 112(%rdi), %xmm2 | |
561 | pxor 112(%rsi), %xmm2 | |
562 | ptest %xmm2, %xmm0 | |
563 | jnc L(128bytesin256) | |
564 | ||
565 | movdqa 128(%rdi), %xmm2 | |
566 | pxor 128(%rsi), %xmm2 | |
567 | ptest %xmm2, %xmm0 | |
568 | jnc L(144bytesin256) | |
569 | ||
570 | movdqa 144(%rdi), %xmm2 | |
571 | pxor 144(%rsi), %xmm2 | |
572 | ptest %xmm2, %xmm0 | |
573 | jnc L(160bytesin256) | |
574 | ||
575 | movdqa 160(%rdi), %xmm2 | |
576 | pxor 160(%rsi), %xmm2 | |
577 | ptest %xmm2, %xmm0 | |
578 | jnc L(176bytesin256) | |
579 | ||
580 | movdqa 176(%rdi), %xmm2 | |
581 | pxor 176(%rsi), %xmm2 | |
582 | ptest %xmm2, %xmm0 | |
583 | jnc L(192bytesin256) | |
584 | ||
585 | movdqa 192(%rdi), %xmm2 | |
586 | pxor 192(%rsi), %xmm2 | |
587 | ptest %xmm2, %xmm0 | |
588 | jnc L(208bytesin256) | |
589 | ||
590 | movdqa 208(%rdi), %xmm2 | |
591 | pxor 208(%rsi), %xmm2 | |
592 | ptest %xmm2, %xmm0 | |
593 | jnc L(224bytesin256) | |
594 | ||
595 | movdqa 224(%rdi), %xmm2 | |
596 | pxor 224(%rsi), %xmm2 | |
597 | ptest %xmm2, %xmm0 | |
598 | jnc L(240bytesin256) | |
599 | ||
600 | movdqa 240(%rdi), %xmm2 | |
601 | pxor 240(%rsi), %xmm2 | |
602 | ptest %xmm2, %xmm0 | |
603 | jnc L(256bytesin256) | |
604 | ||
605 | add $256, %rsi | |
606 | add $256, %rdi | |
607 | ||
608 | cmp $128, %rdx | |
609 | jae L(less256bytesin2alinged) | |
610 | ||
611 | cmp $64, %rdx | |
612 | jae L(less128bytesin2aligned) | |
613 | ||
614 | cmp $32, %rdx | |
615 | jb L(less32bytesin256in2alinged) | |
616 | ||
617 | movdqa (%rdi), %xmm2 | |
618 | pxor (%rsi), %xmm2 | |
619 | ptest %xmm2, %xmm0 | |
620 | jnc L(16bytesin256) | |
621 | ||
622 | movdqa 16(%rdi), %xmm2 | |
623 | pxor 16(%rsi), %xmm2 | |
624 | ptest %xmm2, %xmm0 | |
625 | jnc L(32bytesin256) | |
626 | sub $32, %rdx | |
627 | add $32, %rdi | |
628 | add $32, %rsi | |
629 | L(less32bytesin256in2alinged): | |
630 | add %rdx, %rsi | |
631 | add %rdx, %rdi | |
632 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
633 | ||
e7044ea7 | 634 | .p2align 4 |
dd37cd1a | 635 | L(512bytesormorein2aligned): |
be13f7bf | 636 | # ifdef DATA_CACHE_SIZE_HALF |
8a17f349 | 637 | mov $DATA_CACHE_SIZE_HALF, %R8_LP |
be13f7bf | 638 | # else |
afec409a | 639 | mov __x86_data_cache_size_half(%rip), %R8_LP |
be13f7bf | 640 | # endif |
dd37cd1a L |
641 | mov %r8, %r9 |
642 | shr $1, %r8 | |
643 | add %r9, %r8 | |
644 | cmp %r8, %rdx | |
645 | ja L(L2_L3_cache_aglined) | |
646 | ||
647 | sub $64, %rdx | |
e7044ea7 | 648 | .p2align 4 |
dd37cd1a L |
649 | L(64bytesormore_loopin2aligned): |
650 | movdqa (%rdi), %xmm2 | |
651 | pxor (%rsi), %xmm2 | |
652 | movdqa %xmm2, %xmm1 | |
653 | ||
654 | movdqa 16(%rdi), %xmm3 | |
655 | pxor 16(%rsi), %xmm3 | |
656 | por %xmm3, %xmm1 | |
657 | ||
658 | movdqa 32(%rdi), %xmm4 | |
659 | pxor 32(%rsi), %xmm4 | |
660 | por %xmm4, %xmm1 | |
661 | ||
662 | movdqa 48(%rdi), %xmm5 | |
663 | pxor 48(%rsi), %xmm5 | |
664 | por %xmm5, %xmm1 | |
665 | ||
666 | ptest %xmm1, %xmm0 | |
667 | jnc L(64bytesormore_loop_end) | |
668 | add $64, %rsi | |
669 | add $64, %rdi | |
670 | sub $64, %rdx | |
671 | jae L(64bytesormore_loopin2aligned) | |
672 | ||
673 | add $64, %rdx | |
674 | add %rdx, %rsi | |
675 | add %rdx, %rdi | |
676 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
677 | L(L2_L3_cache_aglined): | |
678 | sub $64, %rdx | |
be13f7bf | 679 | |
e7044ea7 | 680 | .p2align 4 |
dd37cd1a L |
681 | L(L2_L3_aligned_128bytes_loop): |
682 | prefetchnta 0x1c0(%rdi) | |
683 | prefetchnta 0x1c0(%rsi) | |
684 | movdqa (%rdi), %xmm2 | |
685 | pxor (%rsi), %xmm2 | |
686 | movdqa %xmm2, %xmm1 | |
687 | ||
688 | movdqa 16(%rdi), %xmm3 | |
689 | pxor 16(%rsi), %xmm3 | |
690 | por %xmm3, %xmm1 | |
691 | ||
692 | movdqa 32(%rdi), %xmm4 | |
693 | pxor 32(%rsi), %xmm4 | |
694 | por %xmm4, %xmm1 | |
695 | ||
696 | movdqa 48(%rdi), %xmm5 | |
697 | pxor 48(%rsi), %xmm5 | |
698 | por %xmm5, %xmm1 | |
699 | ||
700 | ptest %xmm1, %xmm0 | |
701 | jnc L(64bytesormore_loop_end) | |
702 | add $64, %rsi | |
703 | add $64, %rdi | |
704 | sub $64, %rdx | |
705 | jae L(L2_L3_aligned_128bytes_loop) | |
706 | ||
707 | add $64, %rdx | |
708 | add %rdx, %rsi | |
709 | add %rdx, %rdi | |
710 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) | |
711 | ||
712 | ||
e7044ea7 | 713 | .p2align 4 |
404a6e32 L |
714 | L(64bytesormore_loop_end): |
715 | add $16, %rdi | |
716 | add $16, %rsi | |
717 | ptest %xmm2, %xmm0 | |
718 | jnc L(16bytes) | |
719 | ||
720 | add $16, %rdi | |
721 | add $16, %rsi | |
722 | ptest %xmm3, %xmm0 | |
723 | jnc L(16bytes) | |
724 | ||
725 | add $16, %rdi | |
726 | add $16, %rsi | |
727 | ptest %xmm4, %xmm0 | |
728 | jnc L(16bytes) | |
729 | ||
730 | add $16, %rdi | |
731 | add $16, %rsi | |
732 | jmp L(16bytes) | |
733 | ||
734 | L(256bytesin256): | |
735 | add $256, %rdi | |
736 | add $256, %rsi | |
737 | jmp L(16bytes) | |
738 | L(240bytesin256): | |
739 | add $240, %rdi | |
740 | add $240, %rsi | |
741 | jmp L(16bytes) | |
742 | L(224bytesin256): | |
743 | add $224, %rdi | |
744 | add $224, %rsi | |
745 | jmp L(16bytes) | |
746 | L(208bytesin256): | |
747 | add $208, %rdi | |
748 | add $208, %rsi | |
749 | jmp L(16bytes) | |
750 | L(192bytesin256): | |
751 | add $192, %rdi | |
752 | add $192, %rsi | |
753 | jmp L(16bytes) | |
754 | L(176bytesin256): | |
755 | add $176, %rdi | |
756 | add $176, %rsi | |
757 | jmp L(16bytes) | |
758 | L(160bytesin256): | |
759 | add $160, %rdi | |
760 | add $160, %rsi | |
761 | jmp L(16bytes) | |
762 | L(144bytesin256): | |
763 | add $144, %rdi | |
764 | add $144, %rsi | |
765 | jmp L(16bytes) | |
766 | L(128bytesin256): | |
767 | add $128, %rdi | |
768 | add $128, %rsi | |
769 | jmp L(16bytes) | |
770 | L(112bytesin256): | |
771 | add $112, %rdi | |
772 | add $112, %rsi | |
773 | jmp L(16bytes) | |
774 | L(96bytesin256): | |
775 | add $96, %rdi | |
776 | add $96, %rsi | |
777 | jmp L(16bytes) | |
778 | L(80bytesin256): | |
779 | add $80, %rdi | |
780 | add $80, %rsi | |
781 | jmp L(16bytes) | |
782 | L(64bytesin256): | |
783 | add $64, %rdi | |
784 | add $64, %rsi | |
785 | jmp L(16bytes) | |
786 | L(48bytesin256): | |
787 | add $16, %rdi | |
788 | add $16, %rsi | |
789 | L(32bytesin256): | |
790 | add $16, %rdi | |
791 | add $16, %rsi | |
792 | L(16bytesin256): | |
793 | add $16, %rdi | |
794 | add $16, %rsi | |
795 | L(16bytes): | |
796 | mov -16(%rdi), %rax | |
797 | mov -16(%rsi), %rcx | |
798 | cmp %rax, %rcx | |
799 | jne L(diffin8bytes) | |
800 | L(8bytes): | |
801 | mov -8(%rdi), %rax | |
802 | mov -8(%rsi), %rcx | |
803 | cmp %rax, %rcx | |
804 | jne L(diffin8bytes) | |
805 | xor %eax, %eax | |
806 | ret | |
807 | ||
e7044ea7 | 808 | .p2align 4 |
404a6e32 L |
809 | L(12bytes): |
810 | mov -12(%rdi), %rax | |
811 | mov -12(%rsi), %rcx | |
812 | cmp %rax, %rcx | |
813 | jne L(diffin8bytes) | |
814 | L(4bytes): | |
815 | mov -4(%rsi), %ecx | |
be13f7bf | 816 | # ifndef USE_AS_WMEMCMP |
404a6e32 L |
817 | mov -4(%rdi), %eax |
818 | cmp %eax, %ecx | |
be13f7bf LD |
819 | # else |
820 | cmp -4(%rdi), %ecx | |
821 | # endif | |
404a6e32 L |
822 | jne L(diffin4bytes) |
823 | L(0bytes): | |
824 | xor %eax, %eax | |
825 | ret | |
826 | ||
be13f7bf LD |
827 | # ifndef USE_AS_WMEMCMP |
828 | /* unreal case for wmemcmp */ | |
e7044ea7 | 829 | .p2align 4 |
404a6e32 L |
830 | L(65bytes): |
831 | movdqu -65(%rdi), %xmm1 | |
832 | movdqu -65(%rsi), %xmm2 | |
833 | mov $-65, %dl | |
834 | pxor %xmm1, %xmm2 | |
835 | ptest %xmm2, %xmm0 | |
836 | jnc L(less16bytes) | |
837 | L(49bytes): | |
838 | movdqu -49(%rdi), %xmm1 | |
839 | movdqu -49(%rsi), %xmm2 | |
840 | mov $-49, %dl | |
841 | pxor %xmm1, %xmm2 | |
842 | ptest %xmm2, %xmm0 | |
843 | jnc L(less16bytes) | |
844 | L(33bytes): | |
845 | movdqu -33(%rdi), %xmm1 | |
846 | movdqu -33(%rsi), %xmm2 | |
847 | mov $-33, %dl | |
848 | pxor %xmm1, %xmm2 | |
849 | ptest %xmm2, %xmm0 | |
850 | jnc L(less16bytes) | |
851 | L(17bytes): | |
852 | mov -17(%rdi), %rax | |
853 | mov -17(%rsi), %rcx | |
854 | cmp %rax, %rcx | |
855 | jne L(diffin8bytes) | |
856 | L(9bytes): | |
857 | mov -9(%rdi), %rax | |
858 | mov -9(%rsi), %rcx | |
859 | cmp %rax, %rcx | |
860 | jne L(diffin8bytes) | |
861 | movzbl -1(%rdi), %eax | |
862 | movzbl -1(%rsi), %edx | |
863 | sub %edx, %eax | |
864 | ret | |
865 | ||
e7044ea7 | 866 | .p2align 4 |
404a6e32 L |
867 | L(13bytes): |
868 | mov -13(%rdi), %rax | |
869 | mov -13(%rsi), %rcx | |
870 | cmp %rax, %rcx | |
871 | jne L(diffin8bytes) | |
872 | mov -8(%rdi), %rax | |
873 | mov -8(%rsi), %rcx | |
874 | cmp %rax, %rcx | |
875 | jne L(diffin8bytes) | |
876 | xor %eax, %eax | |
877 | ret | |
878 | ||
e7044ea7 | 879 | .p2align 4 |
404a6e32 L |
880 | L(5bytes): |
881 | mov -5(%rdi), %eax | |
882 | mov -5(%rsi), %ecx | |
883 | cmp %eax, %ecx | |
884 | jne L(diffin4bytes) | |
885 | movzbl -1(%rdi), %eax | |
886 | movzbl -1(%rsi), %edx | |
887 | sub %edx, %eax | |
888 | ret | |
889 | ||
e7044ea7 | 890 | .p2align 4 |
404a6e32 L |
891 | L(66bytes): |
892 | movdqu -66(%rdi), %xmm1 | |
893 | movdqu -66(%rsi), %xmm2 | |
894 | mov $-66, %dl | |
895 | pxor %xmm1, %xmm2 | |
896 | ptest %xmm2, %xmm0 | |
897 | jnc L(less16bytes) | |
898 | L(50bytes): | |
899 | movdqu -50(%rdi), %xmm1 | |
900 | movdqu -50(%rsi), %xmm2 | |
901 | mov $-50, %dl | |
902 | pxor %xmm1, %xmm2 | |
903 | ptest %xmm2, %xmm0 | |
904 | jnc L(less16bytes) | |
905 | L(34bytes): | |
906 | movdqu -34(%rdi), %xmm1 | |
907 | movdqu -34(%rsi), %xmm2 | |
908 | mov $-34, %dl | |
909 | pxor %xmm1, %xmm2 | |
910 | ptest %xmm2, %xmm0 | |
911 | jnc L(less16bytes) | |
912 | L(18bytes): | |
913 | mov -18(%rdi), %rax | |
914 | mov -18(%rsi), %rcx | |
915 | cmp %rax, %rcx | |
916 | jne L(diffin8bytes) | |
917 | L(10bytes): | |
918 | mov -10(%rdi), %rax | |
919 | mov -10(%rsi), %rcx | |
920 | cmp %rax, %rcx | |
921 | jne L(diffin8bytes) | |
922 | movzwl -2(%rdi), %eax | |
923 | movzwl -2(%rsi), %ecx | |
924 | cmp %cl, %al | |
925 | jne L(end) | |
926 | and $0xffff, %eax | |
927 | and $0xffff, %ecx | |
928 | sub %ecx, %eax | |
929 | ret | |
930 | ||
e7044ea7 | 931 | .p2align 4 |
404a6e32 L |
932 | L(14bytes): |
933 | mov -14(%rdi), %rax | |
934 | mov -14(%rsi), %rcx | |
935 | cmp %rax, %rcx | |
936 | jne L(diffin8bytes) | |
937 | mov -8(%rdi), %rax | |
938 | mov -8(%rsi), %rcx | |
939 | cmp %rax, %rcx | |
940 | jne L(diffin8bytes) | |
941 | xor %eax, %eax | |
942 | ret | |
943 | ||
e7044ea7 | 944 | .p2align 4 |
404a6e32 L |
945 | L(6bytes): |
946 | mov -6(%rdi), %eax | |
947 | mov -6(%rsi), %ecx | |
948 | cmp %eax, %ecx | |
949 | jne L(diffin4bytes) | |
950 | L(2bytes): | |
951 | movzwl -2(%rsi), %ecx | |
952 | movzwl -2(%rdi), %eax | |
953 | cmp %cl, %al | |
954 | jne L(end) | |
955 | and $0xffff, %eax | |
956 | and $0xffff, %ecx | |
957 | sub %ecx, %eax | |
958 | ret | |
959 | ||
e7044ea7 | 960 | .p2align 4 |
404a6e32 L |
961 | L(67bytes): |
962 | movdqu -67(%rdi), %xmm2 | |
963 | movdqu -67(%rsi), %xmm1 | |
964 | mov $-67, %dl | |
965 | pxor %xmm1, %xmm2 | |
966 | ptest %xmm2, %xmm0 | |
967 | jnc L(less16bytes) | |
968 | L(51bytes): | |
969 | movdqu -51(%rdi), %xmm2 | |
970 | movdqu -51(%rsi), %xmm1 | |
971 | mov $-51, %dl | |
972 | pxor %xmm1, %xmm2 | |
973 | ptest %xmm2, %xmm0 | |
974 | jnc L(less16bytes) | |
975 | L(35bytes): | |
976 | movdqu -35(%rsi), %xmm1 | |
977 | movdqu -35(%rdi), %xmm2 | |
978 | mov $-35, %dl | |
979 | pxor %xmm1, %xmm2 | |
980 | ptest %xmm2, %xmm0 | |
981 | jnc L(less16bytes) | |
982 | L(19bytes): | |
983 | mov -19(%rdi), %rax | |
984 | mov -19(%rsi), %rcx | |
985 | cmp %rax, %rcx | |
986 | jne L(diffin8bytes) | |
987 | L(11bytes): | |
988 | mov -11(%rdi), %rax | |
989 | mov -11(%rsi), %rcx | |
990 | cmp %rax, %rcx | |
991 | jne L(diffin8bytes) | |
992 | mov -4(%rdi), %eax | |
993 | mov -4(%rsi), %ecx | |
994 | cmp %eax, %ecx | |
995 | jne L(diffin4bytes) | |
996 | xor %eax, %eax | |
997 | ret | |
998 | ||
e7044ea7 | 999 | .p2align 4 |
404a6e32 L |
1000 | L(15bytes): |
1001 | mov -15(%rdi), %rax | |
1002 | mov -15(%rsi), %rcx | |
1003 | cmp %rax, %rcx | |
1004 | jne L(diffin8bytes) | |
1005 | mov -8(%rdi), %rax | |
1006 | mov -8(%rsi), %rcx | |
1007 | cmp %rax, %rcx | |
1008 | jne L(diffin8bytes) | |
1009 | xor %eax, %eax | |
1010 | ret | |
1011 | ||
e7044ea7 | 1012 | .p2align 4 |
404a6e32 L |
1013 | L(7bytes): |
1014 | mov -7(%rdi), %eax | |
1015 | mov -7(%rsi), %ecx | |
1016 | cmp %eax, %ecx | |
1017 | jne L(diffin4bytes) | |
1018 | mov -4(%rdi), %eax | |
1019 | mov -4(%rsi), %ecx | |
1020 | cmp %eax, %ecx | |
1021 | jne L(diffin4bytes) | |
1022 | xor %eax, %eax | |
1023 | ret | |
1024 | ||
e7044ea7 | 1025 | .p2align 4 |
404a6e32 L |
1026 | L(3bytes): |
1027 | movzwl -3(%rdi), %eax | |
1028 | movzwl -3(%rsi), %ecx | |
1029 | cmp %eax, %ecx | |
1030 | jne L(diffin2bytes) | |
1031 | L(1bytes): | |
1032 | movzbl -1(%rdi), %eax | |
1033 | movzbl -1(%rsi), %ecx | |
1034 | sub %ecx, %eax | |
1035 | ret | |
be13f7bf | 1036 | # endif |
404a6e32 | 1037 | |
e7044ea7 | 1038 | .p2align 4 |
404a6e32 L |
1039 | L(68bytes): |
1040 | movdqu -68(%rdi), %xmm2 | |
1041 | movdqu -68(%rsi), %xmm1 | |
1042 | mov $-68, %dl | |
1043 | pxor %xmm1, %xmm2 | |
1044 | ptest %xmm2, %xmm0 | |
1045 | jnc L(less16bytes) | |
1046 | L(52bytes): | |
1047 | movdqu -52(%rdi), %xmm2 | |
1048 | movdqu -52(%rsi), %xmm1 | |
1049 | mov $-52, %dl | |
1050 | pxor %xmm1, %xmm2 | |
1051 | ptest %xmm2, %xmm0 | |
1052 | jnc L(less16bytes) | |
1053 | L(36bytes): | |
1054 | movdqu -36(%rdi), %xmm2 | |
1055 | movdqu -36(%rsi), %xmm1 | |
1056 | mov $-36, %dl | |
1057 | pxor %xmm1, %xmm2 | |
1058 | ptest %xmm2, %xmm0 | |
1059 | jnc L(less16bytes) | |
1060 | L(20bytes): | |
1061 | movdqu -20(%rdi), %xmm2 | |
1062 | movdqu -20(%rsi), %xmm1 | |
1063 | mov $-20, %dl | |
1064 | pxor %xmm1, %xmm2 | |
1065 | ptest %xmm2, %xmm0 | |
1066 | jnc L(less16bytes) | |
404a6e32 | 1067 | mov -4(%rsi), %ecx |
be13f7bf LD |
1068 | |
1069 | # ifndef USE_AS_WMEMCMP | |
1070 | mov -4(%rdi), %eax | |
404a6e32 | 1071 | cmp %eax, %ecx |
be13f7bf LD |
1072 | # else |
1073 | cmp -4(%rdi), %ecx | |
1074 | # endif | |
404a6e32 L |
1075 | jne L(diffin4bytes) |
1076 | xor %eax, %eax | |
1077 | ret | |
1078 | ||
be13f7bf LD |
1079 | # ifndef USE_AS_WMEMCMP |
1080 | /* unreal cases for wmemcmp */ | |
e7044ea7 | 1081 | .p2align 4 |
404a6e32 L |
1082 | L(69bytes): |
1083 | movdqu -69(%rsi), %xmm1 | |
1084 | movdqu -69(%rdi), %xmm2 | |
1085 | mov $-69, %dl | |
1086 | pxor %xmm1, %xmm2 | |
1087 | ptest %xmm2, %xmm0 | |
1088 | jnc L(less16bytes) | |
1089 | L(53bytes): | |
1090 | movdqu -53(%rsi), %xmm1 | |
1091 | movdqu -53(%rdi), %xmm2 | |
1092 | mov $-53, %dl | |
1093 | pxor %xmm1, %xmm2 | |
1094 | ptest %xmm2, %xmm0 | |
1095 | jnc L(less16bytes) | |
1096 | L(37bytes): | |
1097 | movdqu -37(%rsi), %xmm1 | |
1098 | movdqu -37(%rdi), %xmm2 | |
1099 | mov $-37, %dl | |
1100 | pxor %xmm1, %xmm2 | |
1101 | ptest %xmm2, %xmm0 | |
1102 | jnc L(less16bytes) | |
1103 | L(21bytes): | |
1104 | movdqu -21(%rsi), %xmm1 | |
1105 | movdqu -21(%rdi), %xmm2 | |
1106 | mov $-21, %dl | |
1107 | pxor %xmm1, %xmm2 | |
1108 | ptest %xmm2, %xmm0 | |
1109 | jnc L(less16bytes) | |
1110 | mov -8(%rdi), %rax | |
1111 | mov -8(%rsi), %rcx | |
1112 | cmp %rax, %rcx | |
1113 | jne L(diffin8bytes) | |
1114 | xor %eax, %eax | |
1115 | ret | |
1116 | ||
e7044ea7 | 1117 | .p2align 4 |
404a6e32 L |
1118 | L(70bytes): |
1119 | movdqu -70(%rsi), %xmm1 | |
1120 | movdqu -70(%rdi), %xmm2 | |
1121 | mov $-70, %dl | |
1122 | pxor %xmm1, %xmm2 | |
1123 | ptest %xmm2, %xmm0 | |
1124 | jnc L(less16bytes) | |
1125 | L(54bytes): | |
1126 | movdqu -54(%rsi), %xmm1 | |
1127 | movdqu -54(%rdi), %xmm2 | |
1128 | mov $-54, %dl | |
1129 | pxor %xmm1, %xmm2 | |
1130 | ptest %xmm2, %xmm0 | |
1131 | jnc L(less16bytes) | |
1132 | L(38bytes): | |
1133 | movdqu -38(%rsi), %xmm1 | |
1134 | movdqu -38(%rdi), %xmm2 | |
1135 | mov $-38, %dl | |
1136 | pxor %xmm1, %xmm2 | |
1137 | ptest %xmm2, %xmm0 | |
1138 | jnc L(less16bytes) | |
1139 | L(22bytes): | |
1140 | movdqu -22(%rsi), %xmm1 | |
1141 | movdqu -22(%rdi), %xmm2 | |
1142 | mov $-22, %dl | |
1143 | pxor %xmm1, %xmm2 | |
1144 | ptest %xmm2, %xmm0 | |
1145 | jnc L(less16bytes) | |
1146 | mov -8(%rdi), %rax | |
1147 | mov -8(%rsi), %rcx | |
1148 | cmp %rax, %rcx | |
1149 | jne L(diffin8bytes) | |
1150 | xor %eax, %eax | |
1151 | ret | |
1152 | ||
e7044ea7 | 1153 | .p2align 4 |
404a6e32 L |
1154 | L(71bytes): |
1155 | movdqu -71(%rsi), %xmm1 | |
1156 | movdqu -71(%rdi), %xmm2 | |
1157 | mov $-71, %dl | |
1158 | pxor %xmm1, %xmm2 | |
1159 | ptest %xmm2, %xmm0 | |
1160 | jnc L(less16bytes) | |
1161 | L(55bytes): | |
1162 | movdqu -55(%rdi), %xmm2 | |
1163 | movdqu -55(%rsi), %xmm1 | |
1164 | mov $-55, %dl | |
1165 | pxor %xmm1, %xmm2 | |
1166 | ptest %xmm2, %xmm0 | |
1167 | jnc L(less16bytes) | |
1168 | L(39bytes): | |
1169 | movdqu -39(%rdi), %xmm2 | |
1170 | movdqu -39(%rsi), %xmm1 | |
1171 | mov $-39, %dl | |
1172 | pxor %xmm1, %xmm2 | |
1173 | ptest %xmm2, %xmm0 | |
1174 | jnc L(less16bytes) | |
1175 | L(23bytes): | |
1176 | movdqu -23(%rdi), %xmm2 | |
1177 | movdqu -23(%rsi), %xmm1 | |
1178 | mov $-23, %dl | |
1179 | pxor %xmm1, %xmm2 | |
1180 | ptest %xmm2, %xmm0 | |
1181 | jnc L(less16bytes) | |
1182 | mov -8(%rdi), %rax | |
1183 | mov -8(%rsi), %rcx | |
1184 | cmp %rax, %rcx | |
1185 | jne L(diffin8bytes) | |
1186 | xor %eax, %eax | |
1187 | ret | |
be13f7bf | 1188 | # endif |
404a6e32 | 1189 | |
e7044ea7 | 1190 | .p2align 4 |
404a6e32 L |
1191 | L(72bytes): |
1192 | movdqu -72(%rsi), %xmm1 | |
1193 | movdqu -72(%rdi), %xmm2 | |
1194 | mov $-72, %dl | |
1195 | pxor %xmm1, %xmm2 | |
1196 | ptest %xmm2, %xmm0 | |
1197 | jnc L(less16bytes) | |
1198 | L(56bytes): | |
1199 | movdqu -56(%rdi), %xmm2 | |
1200 | movdqu -56(%rsi), %xmm1 | |
1201 | mov $-56, %dl | |
1202 | pxor %xmm1, %xmm2 | |
1203 | ptest %xmm2, %xmm0 | |
1204 | jnc L(less16bytes) | |
1205 | L(40bytes): | |
1206 | movdqu -40(%rdi), %xmm2 | |
1207 | movdqu -40(%rsi), %xmm1 | |
1208 | mov $-40, %dl | |
1209 | pxor %xmm1, %xmm2 | |
1210 | ptest %xmm2, %xmm0 | |
1211 | jnc L(less16bytes) | |
1212 | L(24bytes): | |
1213 | movdqu -24(%rdi), %xmm2 | |
1214 | movdqu -24(%rsi), %xmm1 | |
1215 | mov $-24, %dl | |
1216 | pxor %xmm1, %xmm2 | |
1217 | ptest %xmm2, %xmm0 | |
1218 | jnc L(less16bytes) | |
be13f7bf | 1219 | |
404a6e32 | 1220 | mov -8(%rsi), %rcx |
be13f7bf | 1221 | mov -8(%rdi), %rax |
404a6e32 L |
1222 | cmp %rax, %rcx |
1223 | jne L(diffin8bytes) | |
1224 | xor %eax, %eax | |
1225 | ret | |
1226 | ||
be13f7bf LD |
1227 | # ifndef USE_AS_WMEMCMP |
1228 | /* unreal cases for wmemcmp */ | |
e7044ea7 | 1229 | .p2align 4 |
404a6e32 L |
1230 | L(73bytes): |
1231 | movdqu -73(%rsi), %xmm1 | |
1232 | movdqu -73(%rdi), %xmm2 | |
1233 | mov $-73, %dl | |
1234 | pxor %xmm1, %xmm2 | |
1235 | ptest %xmm2, %xmm0 | |
1236 | jnc L(less16bytes) | |
1237 | L(57bytes): | |
1238 | movdqu -57(%rdi), %xmm2 | |
1239 | movdqu -57(%rsi), %xmm1 | |
1240 | mov $-57, %dl | |
1241 | pxor %xmm1, %xmm2 | |
1242 | ptest %xmm2, %xmm0 | |
1243 | jnc L(less16bytes) | |
1244 | L(41bytes): | |
1245 | movdqu -41(%rdi), %xmm2 | |
1246 | movdqu -41(%rsi), %xmm1 | |
1247 | mov $-41, %dl | |
1248 | pxor %xmm1, %xmm2 | |
1249 | ptest %xmm2, %xmm0 | |
1250 | jnc L(less16bytes) | |
1251 | L(25bytes): | |
1252 | movdqu -25(%rdi), %xmm2 | |
1253 | movdqu -25(%rsi), %xmm1 | |
1254 | mov $-25, %dl | |
1255 | pxor %xmm1, %xmm2 | |
1256 | ptest %xmm2, %xmm0 | |
1257 | jnc L(less16bytes) | |
1258 | mov -9(%rdi), %rax | |
1259 | mov -9(%rsi), %rcx | |
1260 | cmp %rax, %rcx | |
1261 | jne L(diffin8bytes) | |
1262 | movzbl -1(%rdi), %eax | |
1263 | movzbl -1(%rsi), %ecx | |
1264 | sub %ecx, %eax | |
1265 | ret | |
1266 | ||
e7044ea7 | 1267 | .p2align 4 |
404a6e32 L |
1268 | L(74bytes): |
1269 | movdqu -74(%rsi), %xmm1 | |
1270 | movdqu -74(%rdi), %xmm2 | |
1271 | mov $-74, %dl | |
1272 | pxor %xmm1, %xmm2 | |
1273 | ptest %xmm2, %xmm0 | |
1274 | jnc L(less16bytes) | |
1275 | L(58bytes): | |
1276 | movdqu -58(%rdi), %xmm2 | |
1277 | movdqu -58(%rsi), %xmm1 | |
1278 | mov $-58, %dl | |
1279 | pxor %xmm1, %xmm2 | |
1280 | ptest %xmm2, %xmm0 | |
1281 | jnc L(less16bytes) | |
1282 | L(42bytes): | |
1283 | movdqu -42(%rdi), %xmm2 | |
1284 | movdqu -42(%rsi), %xmm1 | |
1285 | mov $-42, %dl | |
1286 | pxor %xmm1, %xmm2 | |
1287 | ptest %xmm2, %xmm0 | |
1288 | jnc L(less16bytes) | |
1289 | L(26bytes): | |
1290 | movdqu -26(%rdi), %xmm2 | |
1291 | movdqu -26(%rsi), %xmm1 | |
1292 | mov $-26, %dl | |
1293 | pxor %xmm1, %xmm2 | |
1294 | ptest %xmm2, %xmm0 | |
1295 | jnc L(less16bytes) | |
1296 | mov -10(%rdi), %rax | |
1297 | mov -10(%rsi), %rcx | |
1298 | cmp %rax, %rcx | |
1299 | jne L(diffin8bytes) | |
1300 | movzwl -2(%rdi), %eax | |
1301 | movzwl -2(%rsi), %ecx | |
1302 | jmp L(diffin2bytes) | |
1303 | ||
e7044ea7 | 1304 | .p2align 4 |
404a6e32 L |
1305 | L(75bytes): |
1306 | movdqu -75(%rsi), %xmm1 | |
1307 | movdqu -75(%rdi), %xmm2 | |
1308 | mov $-75, %dl | |
1309 | pxor %xmm1, %xmm2 | |
1310 | ptest %xmm2, %xmm0 | |
1311 | jnc L(less16bytes) | |
1312 | L(59bytes): | |
1313 | movdqu -59(%rdi), %xmm2 | |
1314 | movdqu -59(%rsi), %xmm1 | |
1315 | mov $-59, %dl | |
1316 | pxor %xmm1, %xmm2 | |
1317 | ptest %xmm2, %xmm0 | |
1318 | jnc L(less16bytes) | |
1319 | L(43bytes): | |
1320 | movdqu -43(%rdi), %xmm2 | |
1321 | movdqu -43(%rsi), %xmm1 | |
1322 | mov $-43, %dl | |
1323 | pxor %xmm1, %xmm2 | |
1324 | ptest %xmm2, %xmm0 | |
1325 | jnc L(less16bytes) | |
1326 | L(27bytes): | |
1327 | movdqu -27(%rdi), %xmm2 | |
1328 | movdqu -27(%rsi), %xmm1 | |
1329 | mov $-27, %dl | |
1330 | pxor %xmm1, %xmm2 | |
1331 | ptest %xmm2, %xmm0 | |
1332 | jnc L(less16bytes) | |
1333 | mov -11(%rdi), %rax | |
1334 | mov -11(%rsi), %rcx | |
1335 | cmp %rax, %rcx | |
1336 | jne L(diffin8bytes) | |
1337 | mov -4(%rdi), %eax | |
1338 | mov -4(%rsi), %ecx | |
1339 | cmp %eax, %ecx | |
1340 | jne L(diffin4bytes) | |
1341 | xor %eax, %eax | |
1342 | ret | |
be13f7bf | 1343 | # endif |
e7044ea7 | 1344 | .p2align 4 |
404a6e32 L |
1345 | L(76bytes): |
1346 | movdqu -76(%rsi), %xmm1 | |
1347 | movdqu -76(%rdi), %xmm2 | |
1348 | mov $-76, %dl | |
1349 | pxor %xmm1, %xmm2 | |
1350 | ptest %xmm2, %xmm0 | |
1351 | jnc L(less16bytes) | |
1352 | L(60bytes): | |
1353 | movdqu -60(%rdi), %xmm2 | |
1354 | movdqu -60(%rsi), %xmm1 | |
1355 | mov $-60, %dl | |
1356 | pxor %xmm1, %xmm2 | |
1357 | ptest %xmm2, %xmm0 | |
1358 | jnc L(less16bytes) | |
1359 | L(44bytes): | |
1360 | movdqu -44(%rdi), %xmm2 | |
1361 | movdqu -44(%rsi), %xmm1 | |
1362 | mov $-44, %dl | |
1363 | pxor %xmm1, %xmm2 | |
1364 | ptest %xmm2, %xmm0 | |
1365 | jnc L(less16bytes) | |
1366 | L(28bytes): | |
1367 | movdqu -28(%rdi), %xmm2 | |
1368 | movdqu -28(%rsi), %xmm1 | |
1369 | mov $-28, %dl | |
1370 | pxor %xmm1, %xmm2 | |
1371 | ptest %xmm2, %xmm0 | |
1372 | jnc L(less16bytes) | |
1373 | mov -12(%rdi), %rax | |
1374 | mov -12(%rsi), %rcx | |
1375 | cmp %rax, %rcx | |
1376 | jne L(diffin8bytes) | |
404a6e32 | 1377 | mov -4(%rsi), %ecx |
be13f7bf LD |
1378 | # ifndef USE_AS_WMEMCMP |
1379 | mov -4(%rdi), %eax | |
404a6e32 | 1380 | cmp %eax, %ecx |
be13f7bf LD |
1381 | # else |
1382 | cmp -4(%rdi), %ecx | |
1383 | # endif | |
404a6e32 L |
1384 | jne L(diffin4bytes) |
1385 | xor %eax, %eax | |
1386 | ret | |
1387 | ||
be13f7bf LD |
1388 | # ifndef USE_AS_WMEMCMP |
1389 | /* unreal cases for wmemcmp */ | |
e7044ea7 | 1390 | .p2align 4 |
404a6e32 L |
1391 | L(77bytes): |
1392 | movdqu -77(%rsi), %xmm1 | |
1393 | movdqu -77(%rdi), %xmm2 | |
1394 | mov $-77, %dl | |
1395 | pxor %xmm1, %xmm2 | |
1396 | ptest %xmm2, %xmm0 | |
1397 | jnc L(less16bytes) | |
1398 | L(61bytes): | |
1399 | movdqu -61(%rdi), %xmm2 | |
1400 | movdqu -61(%rsi), %xmm1 | |
1401 | mov $-61, %dl | |
1402 | pxor %xmm1, %xmm2 | |
1403 | ptest %xmm2, %xmm0 | |
1404 | jnc L(less16bytes) | |
1405 | L(45bytes): | |
1406 | movdqu -45(%rdi), %xmm2 | |
1407 | movdqu -45(%rsi), %xmm1 | |
1408 | mov $-45, %dl | |
1409 | pxor %xmm1, %xmm2 | |
1410 | ptest %xmm2, %xmm0 | |
1411 | jnc L(less16bytes) | |
1412 | L(29bytes): | |
1413 | movdqu -29(%rdi), %xmm2 | |
1414 | movdqu -29(%rsi), %xmm1 | |
1415 | mov $-29, %dl | |
1416 | pxor %xmm1, %xmm2 | |
1417 | ptest %xmm2, %xmm0 | |
1418 | jnc L(less16bytes) | |
1419 | ||
1420 | mov -13(%rdi), %rax | |
1421 | mov -13(%rsi), %rcx | |
1422 | cmp %rax, %rcx | |
1423 | jne L(diffin8bytes) | |
1424 | ||
1425 | mov -8(%rdi), %rax | |
1426 | mov -8(%rsi), %rcx | |
1427 | cmp %rax, %rcx | |
1428 | jne L(diffin8bytes) | |
1429 | xor %eax, %eax | |
1430 | ret | |
1431 | ||
e7044ea7 | 1432 | .p2align 4 |
404a6e32 L |
1433 | L(78bytes): |
1434 | movdqu -78(%rsi), %xmm1 | |
1435 | movdqu -78(%rdi), %xmm2 | |
1436 | mov $-78, %dl | |
1437 | pxor %xmm1, %xmm2 | |
1438 | ptest %xmm2, %xmm0 | |
1439 | jnc L(less16bytes) | |
1440 | L(62bytes): | |
1441 | movdqu -62(%rdi), %xmm2 | |
1442 | movdqu -62(%rsi), %xmm1 | |
1443 | mov $-62, %dl | |
1444 | pxor %xmm1, %xmm2 | |
1445 | ptest %xmm2, %xmm0 | |
1446 | jnc L(less16bytes) | |
1447 | L(46bytes): | |
1448 | movdqu -46(%rdi), %xmm2 | |
1449 | movdqu -46(%rsi), %xmm1 | |
1450 | mov $-46, %dl | |
1451 | pxor %xmm1, %xmm2 | |
1452 | ptest %xmm2, %xmm0 | |
1453 | jnc L(less16bytes) | |
1454 | L(30bytes): | |
1455 | movdqu -30(%rdi), %xmm2 | |
1456 | movdqu -30(%rsi), %xmm1 | |
1457 | mov $-30, %dl | |
1458 | pxor %xmm1, %xmm2 | |
1459 | ptest %xmm2, %xmm0 | |
1460 | jnc L(less16bytes) | |
1461 | mov -14(%rdi), %rax | |
1462 | mov -14(%rsi), %rcx | |
1463 | cmp %rax, %rcx | |
1464 | jne L(diffin8bytes) | |
1465 | mov -8(%rdi), %rax | |
1466 | mov -8(%rsi), %rcx | |
1467 | cmp %rax, %rcx | |
1468 | jne L(diffin8bytes) | |
1469 | xor %eax, %eax | |
1470 | ret | |
1471 | ||
e7044ea7 | 1472 | .p2align 4 |
404a6e32 L |
1473 | L(79bytes): |
1474 | movdqu -79(%rsi), %xmm1 | |
1475 | movdqu -79(%rdi), %xmm2 | |
1476 | mov $-79, %dl | |
1477 | pxor %xmm1, %xmm2 | |
1478 | ptest %xmm2, %xmm0 | |
1479 | jnc L(less16bytes) | |
1480 | L(63bytes): | |
1481 | movdqu -63(%rdi), %xmm2 | |
1482 | movdqu -63(%rsi), %xmm1 | |
1483 | mov $-63, %dl | |
1484 | pxor %xmm1, %xmm2 | |
1485 | ptest %xmm2, %xmm0 | |
1486 | jnc L(less16bytes) | |
1487 | L(47bytes): | |
1488 | movdqu -47(%rdi), %xmm2 | |
1489 | movdqu -47(%rsi), %xmm1 | |
1490 | mov $-47, %dl | |
1491 | pxor %xmm1, %xmm2 | |
1492 | ptest %xmm2, %xmm0 | |
1493 | jnc L(less16bytes) | |
1494 | L(31bytes): | |
1495 | movdqu -31(%rdi), %xmm2 | |
1496 | movdqu -31(%rsi), %xmm1 | |
1497 | mov $-31, %dl | |
1498 | pxor %xmm1, %xmm2 | |
1499 | ptest %xmm2, %xmm0 | |
1500 | jnc L(less16bytes) | |
1501 | mov -15(%rdi), %rax | |
1502 | mov -15(%rsi), %rcx | |
1503 | cmp %rax, %rcx | |
1504 | jne L(diffin8bytes) | |
1505 | mov -8(%rdi), %rax | |
1506 | mov -8(%rsi), %rcx | |
1507 | cmp %rax, %rcx | |
1508 | jne L(diffin8bytes) | |
1509 | xor %eax, %eax | |
1510 | ret | |
be13f7bf | 1511 | # endif |
e7044ea7 | 1512 | .p2align 4 |
404a6e32 L |
1513 | L(64bytes): |
1514 | movdqu -64(%rdi), %xmm2 | |
1515 | movdqu -64(%rsi), %xmm1 | |
1516 | mov $-64, %dl | |
1517 | pxor %xmm1, %xmm2 | |
1518 | ptest %xmm2, %xmm0 | |
1519 | jnc L(less16bytes) | |
1520 | L(48bytes): | |
1521 | movdqu -48(%rdi), %xmm2 | |
1522 | movdqu -48(%rsi), %xmm1 | |
1523 | mov $-48, %dl | |
1524 | pxor %xmm1, %xmm2 | |
1525 | ptest %xmm2, %xmm0 | |
1526 | jnc L(less16bytes) | |
1527 | L(32bytes): | |
1528 | movdqu -32(%rdi), %xmm2 | |
1529 | movdqu -32(%rsi), %xmm1 | |
1530 | mov $-32, %dl | |
1531 | pxor %xmm1, %xmm2 | |
1532 | ptest %xmm2, %xmm0 | |
1533 | jnc L(less16bytes) | |
1534 | ||
1535 | mov -16(%rdi), %rax | |
1536 | mov -16(%rsi), %rcx | |
1537 | cmp %rax, %rcx | |
1538 | jne L(diffin8bytes) | |
1539 | ||
1540 | mov -8(%rdi), %rax | |
1541 | mov -8(%rsi), %rcx | |
1542 | cmp %rax, %rcx | |
1543 | jne L(diffin8bytes) | |
1544 | xor %eax, %eax | |
1545 | ret | |
1546 | ||
dd37cd1a L |
1547 | /* |
1548 | * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. | |
1549 | */ | |
e7044ea7 | 1550 | .p2align 3 |
404a6e32 L |
1551 | L(less16bytes): |
1552 | movsbq %dl, %rdx | |
1553 | mov (%rsi, %rdx), %rcx | |
1554 | mov (%rdi, %rdx), %rax | |
1555 | cmp %rax, %rcx | |
1556 | jne L(diffin8bytes) | |
1557 | mov 8(%rsi, %rdx), %rcx | |
1558 | mov 8(%rdi, %rdx), %rax | |
1559 | L(diffin8bytes): | |
1560 | cmp %eax, %ecx | |
1561 | jne L(diffin4bytes) | |
1562 | shr $32, %rcx | |
1563 | shr $32, %rax | |
be13f7bf LD |
1564 | |
1565 | # ifdef USE_AS_WMEMCMP | |
1566 | /* for wmemcmp */ | |
1567 | cmp %eax, %ecx | |
1568 | jne L(diffin4bytes) | |
1569 | xor %eax, %eax | |
1570 | ret | |
1571 | # endif | |
1572 | ||
404a6e32 | 1573 | L(diffin4bytes): |
be13f7bf | 1574 | # ifndef USE_AS_WMEMCMP |
404a6e32 L |
1575 | cmp %cx, %ax |
1576 | jne L(diffin2bytes) | |
1577 | shr $16, %ecx | |
1578 | shr $16, %eax | |
404a6e32 L |
1579 | L(diffin2bytes): |
1580 | cmp %cl, %al | |
1581 | jne L(end) | |
1582 | and $0xffff, %eax | |
1583 | and $0xffff, %ecx | |
1584 | sub %ecx, %eax | |
1585 | ret | |
1586 | ||
e7044ea7 | 1587 | .p2align 4 |
404a6e32 L |
1588 | L(end): |
1589 | and $0xff, %eax | |
1590 | and $0xff, %ecx | |
1591 | sub %ecx, %eax | |
1592 | ret | |
be13f7bf LD |
1593 | # else |
1594 | ||
1595 | /* for wmemcmp */ | |
1596 | mov $1, %eax | |
1597 | jl L(nequal_bigger) | |
1598 | neg %eax | |
1599 | ret | |
1600 | ||
e7044ea7 | 1601 | .p2align 4 |
be13f7bf LD |
1602 | L(nequal_bigger): |
1603 | ret | |
1604 | ||
1605 | L(unreal_case): | |
1606 | xor %eax, %eax | |
1607 | ret | |
1608 | # endif | |
404a6e32 L |
1609 | |
1610 | END (MEMCMP) | |
1611 | ||
1612 | .section .rodata.sse4.1,"a",@progbits | |
e7044ea7 | 1613 | .p2align 3 |
be13f7bf | 1614 | # ifndef USE_AS_WMEMCMP |
404a6e32 L |
1615 | L(table_64bytes): |
1616 | .int JMPTBL (L(0bytes), L(table_64bytes)) | |
1617 | .int JMPTBL (L(1bytes), L(table_64bytes)) | |
1618 | .int JMPTBL (L(2bytes), L(table_64bytes)) | |
1619 | .int JMPTBL (L(3bytes), L(table_64bytes)) | |
1620 | .int JMPTBL (L(4bytes), L(table_64bytes)) | |
1621 | .int JMPTBL (L(5bytes), L(table_64bytes)) | |
1622 | .int JMPTBL (L(6bytes), L(table_64bytes)) | |
1623 | .int JMPTBL (L(7bytes), L(table_64bytes)) | |
1624 | .int JMPTBL (L(8bytes), L(table_64bytes)) | |
1625 | .int JMPTBL (L(9bytes), L(table_64bytes)) | |
1626 | .int JMPTBL (L(10bytes), L(table_64bytes)) | |
1627 | .int JMPTBL (L(11bytes), L(table_64bytes)) | |
1628 | .int JMPTBL (L(12bytes), L(table_64bytes)) | |
1629 | .int JMPTBL (L(13bytes), L(table_64bytes)) | |
1630 | .int JMPTBL (L(14bytes), L(table_64bytes)) | |
1631 | .int JMPTBL (L(15bytes), L(table_64bytes)) | |
1632 | .int JMPTBL (L(16bytes), L(table_64bytes)) | |
1633 | .int JMPTBL (L(17bytes), L(table_64bytes)) | |
1634 | .int JMPTBL (L(18bytes), L(table_64bytes)) | |
1635 | .int JMPTBL (L(19bytes), L(table_64bytes)) | |
1636 | .int JMPTBL (L(20bytes), L(table_64bytes)) | |
1637 | .int JMPTBL (L(21bytes), L(table_64bytes)) | |
1638 | .int JMPTBL (L(22bytes), L(table_64bytes)) | |
1639 | .int JMPTBL (L(23bytes), L(table_64bytes)) | |
1640 | .int JMPTBL (L(24bytes), L(table_64bytes)) | |
1641 | .int JMPTBL (L(25bytes), L(table_64bytes)) | |
1642 | .int JMPTBL (L(26bytes), L(table_64bytes)) | |
1643 | .int JMPTBL (L(27bytes), L(table_64bytes)) | |
1644 | .int JMPTBL (L(28bytes), L(table_64bytes)) | |
1645 | .int JMPTBL (L(29bytes), L(table_64bytes)) | |
1646 | .int JMPTBL (L(30bytes), L(table_64bytes)) | |
1647 | .int JMPTBL (L(31bytes), L(table_64bytes)) | |
1648 | .int JMPTBL (L(32bytes), L(table_64bytes)) | |
1649 | .int JMPTBL (L(33bytes), L(table_64bytes)) | |
1650 | .int JMPTBL (L(34bytes), L(table_64bytes)) | |
1651 | .int JMPTBL (L(35bytes), L(table_64bytes)) | |
1652 | .int JMPTBL (L(36bytes), L(table_64bytes)) | |
1653 | .int JMPTBL (L(37bytes), L(table_64bytes)) | |
1654 | .int JMPTBL (L(38bytes), L(table_64bytes)) | |
1655 | .int JMPTBL (L(39bytes), L(table_64bytes)) | |
1656 | .int JMPTBL (L(40bytes), L(table_64bytes)) | |
1657 | .int JMPTBL (L(41bytes), L(table_64bytes)) | |
1658 | .int JMPTBL (L(42bytes), L(table_64bytes)) | |
1659 | .int JMPTBL (L(43bytes), L(table_64bytes)) | |
1660 | .int JMPTBL (L(44bytes), L(table_64bytes)) | |
1661 | .int JMPTBL (L(45bytes), L(table_64bytes)) | |
1662 | .int JMPTBL (L(46bytes), L(table_64bytes)) | |
1663 | .int JMPTBL (L(47bytes), L(table_64bytes)) | |
1664 | .int JMPTBL (L(48bytes), L(table_64bytes)) | |
1665 | .int JMPTBL (L(49bytes), L(table_64bytes)) | |
1666 | .int JMPTBL (L(50bytes), L(table_64bytes)) | |
1667 | .int JMPTBL (L(51bytes), L(table_64bytes)) | |
1668 | .int JMPTBL (L(52bytes), L(table_64bytes)) | |
1669 | .int JMPTBL (L(53bytes), L(table_64bytes)) | |
1670 | .int JMPTBL (L(54bytes), L(table_64bytes)) | |
1671 | .int JMPTBL (L(55bytes), L(table_64bytes)) | |
1672 | .int JMPTBL (L(56bytes), L(table_64bytes)) | |
1673 | .int JMPTBL (L(57bytes), L(table_64bytes)) | |
1674 | .int JMPTBL (L(58bytes), L(table_64bytes)) | |
1675 | .int JMPTBL (L(59bytes), L(table_64bytes)) | |
1676 | .int JMPTBL (L(60bytes), L(table_64bytes)) | |
1677 | .int JMPTBL (L(61bytes), L(table_64bytes)) | |
1678 | .int JMPTBL (L(62bytes), L(table_64bytes)) | |
1679 | .int JMPTBL (L(63bytes), L(table_64bytes)) | |
1680 | .int JMPTBL (L(64bytes), L(table_64bytes)) | |
1681 | .int JMPTBL (L(65bytes), L(table_64bytes)) | |
1682 | .int JMPTBL (L(66bytes), L(table_64bytes)) | |
1683 | .int JMPTBL (L(67bytes), L(table_64bytes)) | |
1684 | .int JMPTBL (L(68bytes), L(table_64bytes)) | |
1685 | .int JMPTBL (L(69bytes), L(table_64bytes)) | |
1686 | .int JMPTBL (L(70bytes), L(table_64bytes)) | |
1687 | .int JMPTBL (L(71bytes), L(table_64bytes)) | |
1688 | .int JMPTBL (L(72bytes), L(table_64bytes)) | |
1689 | .int JMPTBL (L(73bytes), L(table_64bytes)) | |
1690 | .int JMPTBL (L(74bytes), L(table_64bytes)) | |
1691 | .int JMPTBL (L(75bytes), L(table_64bytes)) | |
1692 | .int JMPTBL (L(76bytes), L(table_64bytes)) | |
1693 | .int JMPTBL (L(77bytes), L(table_64bytes)) | |
1694 | .int JMPTBL (L(78bytes), L(table_64bytes)) | |
1695 | .int JMPTBL (L(79bytes), L(table_64bytes)) | |
be13f7bf LD |
1696 | # else |
1697 | L(table_64bytes): | |
1698 | .int JMPTBL (L(0bytes), L(table_64bytes)) | |
1699 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1700 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1701 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1702 | .int JMPTBL (L(4bytes), L(table_64bytes)) | |
1703 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1704 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1705 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1706 | .int JMPTBL (L(8bytes), L(table_64bytes)) | |
1707 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1708 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1709 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1710 | .int JMPTBL (L(12bytes), L(table_64bytes)) | |
1711 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1712 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1713 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1714 | .int JMPTBL (L(16bytes), L(table_64bytes)) | |
1715 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1716 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1717 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1718 | .int JMPTBL (L(20bytes), L(table_64bytes)) | |
1719 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1720 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1721 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1722 | .int JMPTBL (L(24bytes), L(table_64bytes)) | |
1723 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1724 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1725 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1726 | .int JMPTBL (L(28bytes), L(table_64bytes)) | |
1727 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1728 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1729 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1730 | .int JMPTBL (L(32bytes), L(table_64bytes)) | |
1731 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1732 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1733 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1734 | .int JMPTBL (L(36bytes), L(table_64bytes)) | |
1735 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1736 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1737 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1738 | .int JMPTBL (L(40bytes), L(table_64bytes)) | |
1739 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1740 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1741 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1742 | .int JMPTBL (L(44bytes), L(table_64bytes)) | |
1743 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1744 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1745 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1746 | .int JMPTBL (L(48bytes), L(table_64bytes)) | |
1747 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1748 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1749 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1750 | .int JMPTBL (L(52bytes), L(table_64bytes)) | |
1751 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1752 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1753 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1754 | .int JMPTBL (L(56bytes), L(table_64bytes)) | |
1755 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1756 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1757 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1758 | .int JMPTBL (L(60bytes), L(table_64bytes)) | |
1759 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1760 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1761 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1762 | .int JMPTBL (L(64bytes), L(table_64bytes)) | |
1763 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1764 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1765 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1766 | .int JMPTBL (L(68bytes), L(table_64bytes)) | |
1767 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1768 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1769 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1770 | .int JMPTBL (L(72bytes), L(table_64bytes)) | |
1771 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1772 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1773 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1774 | .int JMPTBL (L(76bytes), L(table_64bytes)) | |
1775 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1776 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1777 | .int JMPTBL (L(unreal_case), L(table_64bytes)) | |
1778 | # endif | |
404a6e32 | 1779 | #endif |