]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/multiarch/memcmp-sse4.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / memcmp-sse4.S
CommitLineData
be13f7bf 1/* memcmp with SSE4.2, wmemcmp with SSE4.2
04277e02 2 Copyright (C) 2010-2019 Free Software Foundation, Inc.
904057bc
L
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
5a82c748 18 <https://www.gnu.org/licenses/>. */
904057bc 19
4f41c682 20#if IS_IN (libc)
904057bc 21
be13f7bf 22# include <sysdep.h>
904057bc 23
be13f7bf
LD
24# ifndef MEMCMP
25# define MEMCMP __memcmp_sse4_2
26# endif
904057bc 27
be13f7bf
LD
28# define CFI_PUSH(REG) \
29 cfi_adjust_cfa_offset (4); \
30 cfi_rel_offset (REG, 0)
904057bc 31
be13f7bf
LD
32# define CFI_POP(REG) \
33 cfi_adjust_cfa_offset (-4); \
34 cfi_restore (REG)
904057bc 35
be13f7bf
LD
36# define PUSH(REG) pushl REG; CFI_PUSH (REG)
37# define POP(REG) popl REG; CFI_POP (REG)
904057bc 38
be13f7bf
LD
39# define PARMS 4
40# define BLK1 PARMS
41# define BLK2 BLK1 + 4
42# define LEN BLK2 + 4
43# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
904057bc
L
44
45
dfc93c41 46# ifdef PIC
be13f7bf 47# define JMPTBL(I, B) I - B
904057bc
L
48
49/* Load an entry in a jump table into EBX and branch to it. TABLE is a
be13f7bf
LD
50 jump table with relative offsets. INDEX is a register contains the
51 index into the jump table. SCALE is the scale of INDEX. */
52
53# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
54/* We first load PC into EBX. */ \
9a1d9254 55 SETUP_PIC_REG(bx); \
be13f7bf
LD
56/* Get the address of the jump table. */ \
57 addl $(TABLE - .), %ebx; \
58/* Get the entry and convert the relative offset to the \
59 absolute address. */ \
60 addl (%ebx,INDEX,SCALE), %ebx; \
c0c3f78a 61/* We loaded the jump table and adjusted EDX/ESI. Go. */ \
177824e2 62 _CET_NOTRACK jmp *%ebx
be13f7bf
LD
63# else
64# define JMPTBL(I, B) I
904057bc
L
65
66/* Load an entry in a jump table into EBX and branch to it. TABLE is a
be13f7bf
LD
67 jump table with relative offsets. INDEX is a register contains the
68 index into the jump table. SCALE is the scale of INDEX. */
69# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
177824e2 70 _CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
be13f7bf
LD
71# endif
72
73
74/* Warning!
75 wmemcmp has to use SIGNED comparison for elements.
76 memcmp has to use UNSIGNED comparison for elemnts.
77*/
904057bc
L
78
79 .section .text.sse4.2,"ax",@progbits
80ENTRY (MEMCMP)
81 movl BLK1(%esp), %eax
82 movl BLK2(%esp), %edx
83 movl LEN(%esp), %ecx
be13f7bf
LD
84
85# ifdef USE_AS_WMEMCMP
86 shl $2, %ecx
87 test %ecx, %ecx
88 jz L(return0)
89# else
904057bc
L
90 cmp $1, %ecx
91 jbe L(less1bytes)
be13f7bf
LD
92# endif
93
904057bc
L
94 pxor %xmm0, %xmm0
95 cmp $64, %ecx
96 ja L(64bytesormore)
97 cmp $8, %ecx
be13f7bf
LD
98
99# ifndef USE_AS_WMEMCMP
100 PUSH (%ebx)
101 jb L(less8bytes)
102# else
904057bc 103 jb L(less8bytes)
be13f7bf
LD
104 PUSH (%ebx)
105# endif
106
904057bc
L
107 add %ecx, %edx
108 add %ecx, %eax
109 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
904057bc 110
be13f7bf
LD
111# ifndef USE_AS_WMEMCMP
112 .p2align 4
020ecba7 113L(less8bytes):
904057bc
L
114 mov (%eax), %bl
115 cmpb (%edx), %bl
116 jne L(nonzero)
117
118 mov 1(%eax), %bl
119 cmpb 1(%edx), %bl
120 jne L(nonzero)
6bb74d9f
UD
121
122 cmp $2, %ecx
904057bc
L
123 jz L(0bytes)
124
125 mov 2(%eax), %bl
126 cmpb 2(%edx), %bl
127 jne L(nonzero)
6bb74d9f
UD
128
129 cmp $3, %ecx
904057bc 130 jz L(0bytes)
6bb74d9f 131
904057bc
L
132 mov 3(%eax), %bl
133 cmpb 3(%edx), %bl
134 jne L(nonzero)
6bb74d9f
UD
135
136 cmp $4, %ecx
904057bc 137 jz L(0bytes)
6bb74d9f 138
904057bc
L
139 mov 4(%eax), %bl
140 cmpb 4(%edx), %bl
141 jne L(nonzero)
142
6bb74d9f 143 cmp $5, %ecx
904057bc 144 jz L(0bytes)
6bb74d9f 145
904057bc
L
146 mov 5(%eax), %bl
147 cmpb 5(%edx), %bl
148 jne L(nonzero)
149
6bb74d9f 150 cmp $6, %ecx
904057bc 151 jz L(0bytes)
6bb74d9f 152
904057bc
L
153 mov 6(%eax), %bl
154 cmpb 6(%edx), %bl
155 je L(0bytes)
be13f7bf 156
904057bc 157L(nonzero):
be13f7bf 158 POP (%ebx)
904057bc
L
159 mov $1, %eax
160 ja L(above)
161 neg %eax
162L(above):
163 ret
164 CFI_PUSH (%ebx)
be13f7bf 165# endif
904057bc 166
be13f7bf 167 .p2align 4
904057bc 168L(0bytes):
be13f7bf 169 POP (%ebx)
904057bc
L
170 xor %eax, %eax
171 ret
6bb74d9f 172
be13f7bf
LD
173# ifdef USE_AS_WMEMCMP
174
175/* for wmemcmp, case N == 1 */
176
177 .p2align 4
178L(less8bytes):
179 mov (%eax), %ecx
180 cmp (%edx), %ecx
181 je L(return0)
182 mov $1, %eax
183 jg L(find_diff_bigger)
184 neg %eax
185 ret
186
187 .p2align 4
188L(find_diff_bigger):
189 ret
190
191 .p2align 4
192L(return0):
193 xor %eax, %eax
194 ret
195# endif
196
197# ifndef USE_AS_WMEMCMP
198 .p2align 4
904057bc
L
199L(less1bytes):
200 jb L(0bytesend)
201 movzbl (%eax), %eax
202 movzbl (%edx), %edx
203 sub %edx, %eax
204 ret
205
be13f7bf 206 .p2align 4
904057bc
L
207L(0bytesend):
208 xor %eax, %eax
209 ret
be13f7bf
LD
210# endif
211 .p2align 4
904057bc 212L(64bytesormore):
be13f7bf 213 PUSH (%ebx)
904057bc
L
214 mov %ecx, %ebx
215 mov $64, %ecx
216 sub $64, %ebx
217L(64bytesormore_loop):
218 movdqu (%eax), %xmm1
219 movdqu (%edx), %xmm2
220 pxor %xmm1, %xmm2
221 ptest %xmm2, %xmm0
222 jnc L(find_16diff)
223
224 movdqu 16(%eax), %xmm1
225 movdqu 16(%edx), %xmm2
226 pxor %xmm1, %xmm2
227 ptest %xmm2, %xmm0
228 jnc L(find_32diff)
229
230 movdqu 32(%eax), %xmm1
231 movdqu 32(%edx), %xmm2
232 pxor %xmm1, %xmm2
233 ptest %xmm2, %xmm0
234 jnc L(find_48diff)
235
236 movdqu 48(%eax), %xmm1
237 movdqu 48(%edx), %xmm2
238 pxor %xmm1, %xmm2
239 ptest %xmm2, %xmm0
240 jnc L(find_64diff)
241 add %ecx, %eax
242 add %ecx, %edx
243 sub %ecx, %ebx
244 jae L(64bytesormore_loop)
245 add %ebx, %ecx
246 add %ecx, %edx
247 add %ecx, %eax
248 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
020ecba7 249
be13f7bf
LD
250# ifdef USE_AS_WMEMCMP
251
252/* Label needs only for table_64bytes filling */
253L(unreal_case):
254/* no code here */
255
256# endif
257 .p2align 4
904057bc
L
258L(find_16diff):
259 sub $16, %ecx
260L(find_32diff):
261 sub $16, %ecx
262L(find_48diff):
263 sub $16, %ecx
264L(find_64diff):
265 add %ecx, %edx
266 add %ecx, %eax
904057bc 267
be13f7bf
LD
268# ifndef USE_AS_WMEMCMP
269 .p2align 4
904057bc
L
270L(16bytes):
271 mov -16(%eax), %ecx
272 mov -16(%edx), %ebx
273 cmp %ebx, %ecx
274 jne L(find_diff)
275L(12bytes):
276 mov -12(%eax), %ecx
277 mov -12(%edx), %ebx
278 cmp %ebx, %ecx
279 jne L(find_diff)
280L(8bytes):
281 mov -8(%eax), %ecx
282 mov -8(%edx), %ebx
283 cmp %ebx, %ecx
284 jne L(find_diff)
285L(4bytes):
286 mov -4(%eax), %ecx
287 mov -4(%edx), %ebx
288 cmp %ebx, %ecx
289 mov $0, %eax
290 jne L(find_diff)
291 RETURN
be13f7bf
LD
292# else
293 .p2align 4
294L(16bytes):
295 mov -16(%eax), %ecx
296 cmp -16(%edx), %ecx
297 jne L(find_diff)
298L(12bytes):
299 mov -12(%eax), %ecx
300 cmp -12(%edx), %ecx
301 jne L(find_diff)
302L(8bytes):
303 mov -8(%eax), %ecx
304 cmp -8(%edx), %ecx
305 jne L(find_diff)
306L(4bytes):
307 mov -4(%eax), %ecx
308 cmp -4(%edx), %ecx
309 mov $0, %eax
310 jne L(find_diff)
311 RETURN
312# endif
904057bc 313
be13f7bf
LD
314# ifndef USE_AS_WMEMCMP
315 .p2align 4
904057bc
L
316L(49bytes):
317 movdqu -49(%eax), %xmm1
318 movdqu -49(%edx), %xmm2
319 mov $-49, %ebx
320 pxor %xmm1, %xmm2
321 ptest %xmm2, %xmm0
322 jnc L(less16bytes)
323L(33bytes):
324 movdqu -33(%eax), %xmm1
325 movdqu -33(%edx), %xmm2
326 mov $-33, %ebx
327 pxor %xmm1, %xmm2
328 ptest %xmm2, %xmm0
329 jnc L(less16bytes)
330L(17bytes):
331 mov -17(%eax), %ecx
332 mov -17(%edx), %ebx
333 cmp %ebx, %ecx
334 jne L(find_diff)
335L(13bytes):
336 mov -13(%eax), %ecx
337 mov -13(%edx), %ebx
338 cmp %ebx, %ecx
339 jne L(find_diff)
340L(9bytes):
341 mov -9(%eax), %ecx
342 mov -9(%edx), %ebx
343 cmp %ebx, %ecx
344 jne L(find_diff)
345L(5bytes):
346 mov -5(%eax), %ecx
347 mov -5(%edx), %ebx
348 cmp %ebx, %ecx
349 jne L(find_diff)
350 movzbl -1(%eax), %ecx
351 cmp -1(%edx), %cl
352 mov $0, %eax
353 jne L(end)
354 RETURN
355
be13f7bf 356 .p2align 4
904057bc
L
357L(50bytes):
358 mov $-50, %ebx
359 movdqu -50(%eax), %xmm1
360 movdqu -50(%edx), %xmm2
361 pxor %xmm1, %xmm2
362 ptest %xmm2, %xmm0
363 jnc L(less16bytes)
364L(34bytes):
365 mov $-34, %ebx
366 movdqu -34(%eax), %xmm1
367 movdqu -34(%edx), %xmm2
368 pxor %xmm1, %xmm2
369 ptest %xmm2, %xmm0
370 jnc L(less16bytes)
371L(18bytes):
372 mov -18(%eax), %ecx
373 mov -18(%edx), %ebx
374 cmp %ebx, %ecx
375 jne L(find_diff)
376L(14bytes):
377 mov -14(%eax), %ecx
378 mov -14(%edx), %ebx
379 cmp %ebx, %ecx
380 jne L(find_diff)
381L(10bytes):
382 mov -10(%eax), %ecx
383 mov -10(%edx), %ebx
384 cmp %ebx, %ecx
385 jne L(find_diff)
386L(6bytes):
387 mov -6(%eax), %ecx
388 mov -6(%edx), %ebx
389 cmp %ebx, %ecx
390 jne L(find_diff)
391L(2bytes):
392 movzwl -2(%eax), %ecx
393 movzwl -2(%edx), %ebx
394 cmp %bl, %cl
395 jne L(end)
396 cmp %bh, %ch
397 mov $0, %eax
398 jne L(end)
399 RETURN
400
be13f7bf 401 .p2align 4
904057bc
L
402L(51bytes):
403 mov $-51, %ebx
404 movdqu -51(%eax), %xmm1
405 movdqu -51(%edx), %xmm2
406 pxor %xmm1, %xmm2
407 ptest %xmm2, %xmm0
408 jnc L(less16bytes)
409L(35bytes):
410 mov $-35, %ebx
411 movdqu -35(%eax), %xmm1
412 movdqu -35(%edx), %xmm2
413 pxor %xmm1, %xmm2
414 ptest %xmm2, %xmm0
415 jnc L(less16bytes)
416L(19bytes):
417 movl -19(%eax), %ecx
418 movl -19(%edx), %ebx
419 cmp %ebx, %ecx
420 jne L(find_diff)
421L(15bytes):
422 movl -15(%eax), %ecx
423 movl -15(%edx), %ebx
424 cmp %ebx, %ecx
425 jne L(find_diff)
426L(11bytes):
427 movl -11(%eax), %ecx
428 movl -11(%edx), %ebx
429 cmp %ebx, %ecx
430 jne L(find_diff)
431L(7bytes):
432 movl -7(%eax), %ecx
433 movl -7(%edx), %ebx
434 cmp %ebx, %ecx
435 jne L(find_diff)
436L(3bytes):
437 movzwl -3(%eax), %ecx
438 movzwl -3(%edx), %ebx
439 cmpb %bl, %cl
440 jne L(end)
441 cmp %bx, %cx
442 jne L(end)
443L(1bytes):
444 movzbl -1(%eax), %eax
445 cmpb -1(%edx), %al
446 mov $0, %eax
447 jne L(end)
448 RETURN
be13f7bf
LD
449# endif
450 .p2align 4
904057bc
L
451L(52bytes):
452 movdqu -52(%eax), %xmm1
453 movdqu -52(%edx), %xmm2
454 mov $-52, %ebx
455 pxor %xmm1, %xmm2
456 ptest %xmm2, %xmm0
457 jnc L(less16bytes)
458L(36bytes):
459 movdqu -36(%eax), %xmm1
460 movdqu -36(%edx), %xmm2
461 mov $-36, %ebx
462 pxor %xmm1, %xmm2
463 ptest %xmm2, %xmm0
464 jnc L(less16bytes)
465L(20bytes):
466 movdqu -20(%eax), %xmm1
467 movdqu -20(%edx), %xmm2
468 mov $-20, %ebx
469 pxor %xmm1, %xmm2
470 ptest %xmm2, %xmm0
471 jnc L(less16bytes)
472 mov -4(%eax), %ecx
be13f7bf 473# ifndef USE_AS_WMEMCMP
904057bc
L
474 mov -4(%edx), %ebx
475 cmp %ebx, %ecx
be13f7bf
LD
476# else
477 cmp -4(%edx), %ecx
478# endif
904057bc
L
479 mov $0, %eax
480 jne L(find_diff)
481 RETURN
482
be13f7bf
LD
483# ifndef USE_AS_WMEMCMP
484 .p2align 4
904057bc
L
485L(53bytes):
486 movdqu -53(%eax), %xmm1
487 movdqu -53(%edx), %xmm2
488 mov $-53, %ebx
489 pxor %xmm1, %xmm2
490 ptest %xmm2, %xmm0
491 jnc L(less16bytes)
492L(37bytes):
493 mov $-37, %ebx
494 movdqu -37(%eax), %xmm1
495 movdqu -37(%edx), %xmm2
496 pxor %xmm1, %xmm2
497 ptest %xmm2, %xmm0
498 jnc L(less16bytes)
499L(21bytes):
500 mov $-21, %ebx
501 movdqu -21(%eax), %xmm1
502 movdqu -21(%edx), %xmm2
503 pxor %xmm1, %xmm2
504 ptest %xmm2, %xmm0
505 jnc L(less16bytes)
506 mov -5(%eax), %ecx
507 mov -5(%edx), %ebx
508 cmp %ebx, %ecx
509 jne L(find_diff)
510 movzbl -1(%eax), %ecx
511 cmp -1(%edx), %cl
512 mov $0, %eax
513 jne L(end)
514 RETURN
515
be13f7bf 516 .p2align 4
904057bc
L
517L(54bytes):
518 movdqu -54(%eax), %xmm1
519 movdqu -54(%edx), %xmm2
520 mov $-54, %ebx
521 pxor %xmm1, %xmm2
522 ptest %xmm2, %xmm0
523 jnc L(less16bytes)
524L(38bytes):
525 mov $-38, %ebx
526 movdqu -38(%eax), %xmm1
527 movdqu -38(%edx), %xmm2
528 pxor %xmm1, %xmm2
529 ptest %xmm2, %xmm0
530 jnc L(less16bytes)
531L(22bytes):
532 mov $-22, %ebx
533 movdqu -22(%eax), %xmm1
534 movdqu -22(%edx), %xmm2
535 pxor %xmm1, %xmm2
536 ptest %xmm2, %xmm0
537 jnc L(less16bytes)
538
539 mov -6(%eax), %ecx
540 mov -6(%edx), %ebx
541 cmp %ebx, %ecx
542 jne L(find_diff)
543 movzwl -2(%eax), %ecx
544 movzwl -2(%edx), %ebx
545 cmp %bl, %cl
546 jne L(end)
547 cmp %bh, %ch
548 mov $0, %eax
549 jne L(end)
550 RETURN
551
be13f7bf 552 .p2align 4
904057bc
L
553L(55bytes):
554 movdqu -55(%eax), %xmm1
555 movdqu -55(%edx), %xmm2
556 mov $-55, %ebx
557 pxor %xmm1, %xmm2
558 ptest %xmm2, %xmm0
559 jnc L(less16bytes)
560L(39bytes):
561 mov $-39, %ebx
562 movdqu -39(%eax), %xmm1
563 movdqu -39(%edx), %xmm2
564 pxor %xmm1, %xmm2
565 ptest %xmm2, %xmm0
566 jnc L(less16bytes)
567L(23bytes):
568 mov $-23, %ebx
569 movdqu -23(%eax), %xmm1
570 movdqu -23(%edx), %xmm2
571 pxor %xmm1, %xmm2
572 ptest %xmm2, %xmm0
573 jnc L(less16bytes)
574 movl -7(%eax), %ecx
575 movl -7(%edx), %ebx
576 cmp %ebx, %ecx
577 jne L(find_diff)
578 movzwl -3(%eax), %ecx
579 movzwl -3(%edx), %ebx
580 cmpb %bl, %cl
581 jne L(end)
582 cmp %bx, %cx
583 jne L(end)
584 movzbl -1(%eax), %eax
585 cmpb -1(%edx), %al
586 mov $0, %eax
587 jne L(end)
588 RETURN
be13f7bf
LD
589# endif
590 .p2align 4
904057bc
L
591L(56bytes):
592 movdqu -56(%eax), %xmm1
593 movdqu -56(%edx), %xmm2
594 mov $-56, %ebx
595 pxor %xmm1, %xmm2
596 ptest %xmm2, %xmm0
597 jnc L(less16bytes)
598L(40bytes):
599 mov $-40, %ebx
600 movdqu -40(%eax), %xmm1
601 movdqu -40(%edx), %xmm2
602 pxor %xmm1, %xmm2
603 ptest %xmm2, %xmm0
604 jnc L(less16bytes)
605L(24bytes):
606 mov $-24, %ebx
607 movdqu -24(%eax), %xmm1
608 movdqu -24(%edx), %xmm2
609 pxor %xmm1, %xmm2
610 ptest %xmm2, %xmm0
611 jnc L(less16bytes)
612
613 mov -8(%eax), %ecx
be13f7bf 614# ifndef USE_AS_WMEMCMP
904057bc
L
615 mov -8(%edx), %ebx
616 cmp %ebx, %ecx
be13f7bf
LD
617# else
618 cmp -8(%edx), %ecx
619# endif
904057bc
L
620 jne L(find_diff)
621
622 mov -4(%eax), %ecx
be13f7bf 623# ifndef USE_AS_WMEMCMP
904057bc
L
624 mov -4(%edx), %ebx
625 cmp %ebx, %ecx
be13f7bf
LD
626# else
627 cmp -4(%edx), %ecx
628# endif
904057bc
L
629 mov $0, %eax
630 jne L(find_diff)
631 RETURN
632
be13f7bf
LD
633# ifndef USE_AS_WMEMCMP
634 .p2align 4
904057bc
L
635L(57bytes):
636 movdqu -57(%eax), %xmm1
637 movdqu -57(%edx), %xmm2
638 mov $-57, %ebx
639 pxor %xmm1, %xmm2
640 ptest %xmm2, %xmm0
641 jnc L(less16bytes)
642L(41bytes):
643 mov $-41, %ebx
644 movdqu -41(%eax), %xmm1
645 movdqu -41(%edx), %xmm2
646 pxor %xmm1, %xmm2
647 ptest %xmm2, %xmm0
648 jnc L(less16bytes)
649L(25bytes):
650 mov $-25, %ebx
651 movdqu -25(%eax), %xmm1
652 movdqu -25(%edx), %xmm2
653 pxor %xmm1, %xmm2
654 ptest %xmm2, %xmm0
655 jnc L(less16bytes)
656 mov -9(%eax), %ecx
657 mov -9(%edx), %ebx
658 cmp %ebx, %ecx
659 jne L(find_diff)
660 mov -5(%eax), %ecx
661 mov -5(%edx), %ebx
662 cmp %ebx, %ecx
663 jne L(find_diff)
664 movzbl -1(%eax), %ecx
665 cmp -1(%edx), %cl
666 mov $0, %eax
667 jne L(end)
668 RETURN
669
be13f7bf 670 .p2align 4
904057bc
L
671L(58bytes):
672 movdqu -58(%eax), %xmm1
673 movdqu -58(%edx), %xmm2
674 mov $-58, %ebx
675 pxor %xmm1, %xmm2
676 ptest %xmm2, %xmm0
677 jnc L(less16bytes)
678L(42bytes):
679 mov $-42, %ebx
680 movdqu -42(%eax), %xmm1
681 movdqu -42(%edx), %xmm2
682 pxor %xmm1, %xmm2
683 ptest %xmm2, %xmm0
684 jnc L(less16bytes)
685L(26bytes):
686 mov $-26, %ebx
687 movdqu -26(%eax), %xmm1
688 movdqu -26(%edx), %xmm2
689 pxor %xmm1, %xmm2
690 ptest %xmm2, %xmm0
691 jnc L(less16bytes)
692
693 mov -10(%eax), %ecx
694 mov -10(%edx), %ebx
695 cmp %ebx, %ecx
696 jne L(find_diff)
697
698 mov -6(%eax), %ecx
699 mov -6(%edx), %ebx
700 cmp %ebx, %ecx
701 jne L(find_diff)
6bb74d9f 702
904057bc
L
703 movzwl -2(%eax), %ecx
704 movzwl -2(%edx), %ebx
705 cmp %bl, %cl
706 jne L(end)
707 cmp %bh, %ch
708 mov $0, %eax
709 jne L(end)
710 RETURN
711
be13f7bf 712 .p2align 4
904057bc
L
713L(59bytes):
714 movdqu -59(%eax), %xmm1
715 movdqu -59(%edx), %xmm2
716 mov $-59, %ebx
717 pxor %xmm1, %xmm2
718 ptest %xmm2, %xmm0
719 jnc L(less16bytes)
720L(43bytes):
721 mov $-43, %ebx
722 movdqu -43(%eax), %xmm1
723 movdqu -43(%edx), %xmm2
724 pxor %xmm1, %xmm2
725 ptest %xmm2, %xmm0
726 jnc L(less16bytes)
727L(27bytes):
728 mov $-27, %ebx
729 movdqu -27(%eax), %xmm1
730 movdqu -27(%edx), %xmm2
731 pxor %xmm1, %xmm2
732 ptest %xmm2, %xmm0
733 jnc L(less16bytes)
734 movl -11(%eax), %ecx
735 movl -11(%edx), %ebx
736 cmp %ebx, %ecx
737 jne L(find_diff)
738 movl -7(%eax), %ecx
739 movl -7(%edx), %ebx
740 cmp %ebx, %ecx
741 jne L(find_diff)
742 movzwl -3(%eax), %ecx
743 movzwl -3(%edx), %ebx
744 cmpb %bl, %cl
745 jne L(end)
746 cmp %bx, %cx
747 jne L(end)
748 movzbl -1(%eax), %eax
749 cmpb -1(%edx), %al
750 mov $0, %eax
751 jne L(end)
752 RETURN
be13f7bf
LD
753# endif
754 .p2align 4
904057bc
L
755L(60bytes):
756 movdqu -60(%eax), %xmm1
757 movdqu -60(%edx), %xmm2
758 mov $-60, %ebx
759 pxor %xmm1, %xmm2
760 ptest %xmm2, %xmm0
761 jnc L(less16bytes)
762L(44bytes):
763 mov $-44, %ebx
764 movdqu -44(%eax), %xmm1
765 movdqu -44(%edx), %xmm2
766 pxor %xmm1, %xmm2
767 ptest %xmm2, %xmm0
768 jnc L(less16bytes)
769L(28bytes):
770 mov $-28, %ebx
771 movdqu -28(%eax), %xmm1
772 movdqu -28(%edx), %xmm2
773 pxor %xmm1, %xmm2
774 ptest %xmm2, %xmm0
775 jnc L(less16bytes)
be13f7bf 776
904057bc 777 mov -12(%eax), %ecx
be13f7bf 778# ifndef USE_AS_WMEMCMP
904057bc
L
779 mov -12(%edx), %ebx
780 cmp %ebx, %ecx
be13f7bf
LD
781# else
782 cmp -12(%edx), %ecx
783# endif
904057bc 784 jne L(find_diff)
be13f7bf 785
904057bc 786 mov -8(%eax), %ecx
be13f7bf 787# ifndef USE_AS_WMEMCMP
904057bc
L
788 mov -8(%edx), %ebx
789 cmp %ebx, %ecx
be13f7bf
LD
790# else
791 cmp -8(%edx), %ecx
792# endif
904057bc 793 jne L(find_diff)
be13f7bf 794
904057bc 795 mov -4(%eax), %ecx
be13f7bf 796# ifndef USE_AS_WMEMCMP
904057bc
L
797 mov -4(%edx), %ebx
798 cmp %ebx, %ecx
be13f7bf
LD
799# else
800 cmp -4(%edx), %ecx
801# endif
904057bc
L
802 mov $0, %eax
803 jne L(find_diff)
804 RETURN
805
be13f7bf
LD
806# ifndef USE_AS_WMEMCMP
807 .p2align 4
904057bc
L
808L(61bytes):
809 movdqu -61(%eax), %xmm1
810 movdqu -61(%edx), %xmm2
811 mov $-61, %ebx
812 pxor %xmm1, %xmm2
813 ptest %xmm2, %xmm0
814 jnc L(less16bytes)
815L(45bytes):
816 mov $-45, %ebx
817 movdqu -45(%eax), %xmm1
818 movdqu -45(%edx), %xmm2
819 pxor %xmm1, %xmm2
820 ptest %xmm2, %xmm0
821 jnc L(less16bytes)
822L(29bytes):
823 mov $-29, %ebx
824 movdqu -29(%eax), %xmm1
825 movdqu -29(%edx), %xmm2
826 pxor %xmm1, %xmm2
827 ptest %xmm2, %xmm0
828 jnc L(less16bytes)
829
830 mov -13(%eax), %ecx
831 mov -13(%edx), %ebx
832 cmp %ebx, %ecx
833 jne L(find_diff)
834
835 mov -9(%eax), %ecx
836 mov -9(%edx), %ebx
837 cmp %ebx, %ecx
838 jne L(find_diff)
839
840 mov -5(%eax), %ecx
841 mov -5(%edx), %ebx
842 cmp %ebx, %ecx
843 jne L(find_diff)
844 movzbl -1(%eax), %ecx
845 cmp -1(%edx), %cl
846 mov $0, %eax
847 jne L(end)
848 RETURN
849
be13f7bf 850 .p2align 4
904057bc
L
851L(62bytes):
852 movdqu -62(%eax), %xmm1
853 movdqu -62(%edx), %xmm2
854 mov $-62, %ebx
855 pxor %xmm1, %xmm2
856 ptest %xmm2, %xmm0
857 jnc L(less16bytes)
858L(46bytes):
859 mov $-46, %ebx
860 movdqu -46(%eax), %xmm1
861 movdqu -46(%edx), %xmm2
862 pxor %xmm1, %xmm2
863 ptest %xmm2, %xmm0
864 jnc L(less16bytes)
865L(30bytes):
866 mov $-30, %ebx
867 movdqu -30(%eax), %xmm1
868 movdqu -30(%edx), %xmm2
869 pxor %xmm1, %xmm2
870 ptest %xmm2, %xmm0
871 jnc L(less16bytes)
872 mov -14(%eax), %ecx
873 mov -14(%edx), %ebx
874 cmp %ebx, %ecx
875 jne L(find_diff)
876 mov -10(%eax), %ecx
877 mov -10(%edx), %ebx
878 cmp %ebx, %ecx
879 jne L(find_diff)
880 mov -6(%eax), %ecx
881 mov -6(%edx), %ebx
882 cmp %ebx, %ecx
883 jne L(find_diff)
884 movzwl -2(%eax), %ecx
885 movzwl -2(%edx), %ebx
886 cmp %bl, %cl
887 jne L(end)
888 cmp %bh, %ch
889 mov $0, %eax
890 jne L(end)
891 RETURN
892
be13f7bf 893 .p2align 4
904057bc
L
894L(63bytes):
895 movdqu -63(%eax), %xmm1
896 movdqu -63(%edx), %xmm2
897 mov $-63, %ebx
898 pxor %xmm1, %xmm2
899 ptest %xmm2, %xmm0
900 jnc L(less16bytes)
901L(47bytes):
902 mov $-47, %ebx
903 movdqu -47(%eax), %xmm1
904 movdqu -47(%edx), %xmm2
905 pxor %xmm1, %xmm2
906 ptest %xmm2, %xmm0
907 jnc L(less16bytes)
908L(31bytes):
909 mov $-31, %ebx
910 movdqu -31(%eax), %xmm1
911 movdqu -31(%edx), %xmm2
912 pxor %xmm1, %xmm2
913 ptest %xmm2, %xmm0
914 jnc L(less16bytes)
915
916 movl -15(%eax), %ecx
917 movl -15(%edx), %ebx
918 cmp %ebx, %ecx
919 jne L(find_diff)
920 movl -11(%eax), %ecx
921 movl -11(%edx), %ebx
922 cmp %ebx, %ecx
923 jne L(find_diff)
924 movl -7(%eax), %ecx
925 movl -7(%edx), %ebx
926 cmp %ebx, %ecx
927 jne L(find_diff)
928 movzwl -3(%eax), %ecx
929 movzwl -3(%edx), %ebx
930 cmpb %bl, %cl
931 jne L(end)
932 cmp %bx, %cx
933 jne L(end)
934 movzbl -1(%eax), %eax
935 cmpb -1(%edx), %al
936 mov $0, %eax
937 jne L(end)
938 RETURN
be13f7bf 939# endif
904057bc 940
be13f7bf 941 .p2align 4
904057bc
L
942L(64bytes):
943 movdqu -64(%eax), %xmm1
944 movdqu -64(%edx), %xmm2
945 mov $-64, %ebx
946 pxor %xmm1, %xmm2
947 ptest %xmm2, %xmm0
948 jnc L(less16bytes)
949L(48bytes):
950 movdqu -48(%eax), %xmm1
951 movdqu -48(%edx), %xmm2
952 mov $-48, %ebx
953 pxor %xmm1, %xmm2
954 ptest %xmm2, %xmm0
955 jnc L(less16bytes)
956L(32bytes):
957 movdqu -32(%eax), %xmm1
958 movdqu -32(%edx), %xmm2
959 mov $-32, %ebx
960 pxor %xmm1, %xmm2
961 ptest %xmm2, %xmm0
962 jnc L(less16bytes)
963
964 mov -16(%eax), %ecx
be13f7bf 965# ifndef USE_AS_WMEMCMP
904057bc
L
966 mov -16(%edx), %ebx
967 cmp %ebx, %ecx
be13f7bf
LD
968# else
969 cmp -16(%edx), %ecx
970# endif
904057bc
L
971 jne L(find_diff)
972
973 mov -12(%eax), %ecx
be13f7bf 974# ifndef USE_AS_WMEMCMP
904057bc
L
975 mov -12(%edx), %ebx
976 cmp %ebx, %ecx
be13f7bf
LD
977# else
978 cmp -12(%edx), %ecx
979# endif
904057bc
L
980 jne L(find_diff)
981
982 mov -8(%eax), %ecx
be13f7bf 983# ifndef USE_AS_WMEMCMP
904057bc
L
984 mov -8(%edx), %ebx
985 cmp %ebx, %ecx
be13f7bf
LD
986# else
987 cmp -8(%edx), %ecx
988# endif
904057bc
L
989 jne L(find_diff)
990
991 mov -4(%eax), %ecx
be13f7bf 992# ifndef USE_AS_WMEMCMP
904057bc
L
993 mov -4(%edx), %ebx
994 cmp %ebx, %ecx
be13f7bf
LD
995# else
996 cmp -4(%edx), %ecx
997# endif
904057bc
L
998 mov $0, %eax
999 jne L(find_diff)
1000 RETURN
1001
be13f7bf
LD
1002# ifndef USE_AS_WMEMCMP
1003 .p2align 4
904057bc
L
1004L(less16bytes):
1005 add %ebx, %eax
1006 add %ebx, %edx
6bb74d9f 1007
904057bc
L
1008 mov (%eax), %ecx
1009 mov (%edx), %ebx
1010 cmp %ebx, %ecx
1011 jne L(find_diff)
1012
1013 mov 4(%eax), %ecx
1014 mov 4(%edx), %ebx
1015 cmp %ebx, %ecx
1016 jne L(find_diff)
1017
1018 mov 8(%eax), %ecx
1019 mov 8(%edx), %ebx
1020 cmp %ebx, %ecx
1021 jne L(find_diff)
1022
1023 mov 12(%eax), %ecx
1024 mov 12(%edx), %ebx
1025 cmp %ebx, %ecx
1026 mov $0, %eax
1027 jne L(find_diff)
1028 RETURN
be13f7bf
LD
1029# else
1030 .p2align 4
1031L(less16bytes):
1032 add %ebx, %eax
1033 add %ebx, %edx
1034
1035 mov (%eax), %ecx
1036 cmp (%edx), %ecx
1037 jne L(find_diff)
1038
1039 mov 4(%eax), %ecx
1040 cmp 4(%edx), %ecx
1041 jne L(find_diff)
1042
1043 mov 8(%eax), %ecx
1044 cmp 8(%edx), %ecx
1045 jne L(find_diff)
1046
1047 mov 12(%eax), %ecx
1048 cmp 12(%edx), %ecx
1049
1050 mov $0, %eax
1051 jne L(find_diff)
1052 RETURN
1053# endif
904057bc 1054
be13f7bf 1055 .p2align 4
904057bc 1056L(find_diff):
be13f7bf 1057# ifndef USE_AS_WMEMCMP
904057bc
L
1058 cmpb %bl, %cl
1059 jne L(end)
1060 cmp %bx, %cx
1061 jne L(end)
1062 shr $16,%ecx
1063 shr $16,%ebx
1064 cmp %bl, %cl
1065 jne L(end)
1066 cmp %bx, %cx
1067L(end):
be13f7bf 1068 POP (%ebx)
904057bc
L
1069 mov $1, %eax
1070 ja L(bigger)
1071 neg %eax
1072L(bigger):
1073 ret
be13f7bf
LD
1074# else
1075 POP (%ebx)
1076 mov $1, %eax
1077 jg L(bigger)
1078 neg %eax
1079 ret
1080
1081 .p2align 4
1082L(bigger):
1083 ret
1084# endif
020ecba7 1085END (MEMCMP)
904057bc 1086
020ecba7 1087 .section .rodata.sse4.2,"a",@progbits
be13f7bf 1088 .p2align 2
020ecba7 1089 .type L(table_64bytes), @object
be13f7bf 1090# ifndef USE_AS_WMEMCMP
904057bc
L
1091L(table_64bytes):
1092 .int JMPTBL (L(0bytes), L(table_64bytes))
1093 .int JMPTBL (L(1bytes), L(table_64bytes))
1094 .int JMPTBL (L(2bytes), L(table_64bytes))
1095 .int JMPTBL (L(3bytes), L(table_64bytes))
1096 .int JMPTBL (L(4bytes), L(table_64bytes))
1097 .int JMPTBL (L(5bytes), L(table_64bytes))
1098 .int JMPTBL (L(6bytes), L(table_64bytes))
1099 .int JMPTBL (L(7bytes), L(table_64bytes))
1100 .int JMPTBL (L(8bytes), L(table_64bytes))
1101 .int JMPTBL (L(9bytes), L(table_64bytes))
1102 .int JMPTBL (L(10bytes), L(table_64bytes))
1103 .int JMPTBL (L(11bytes), L(table_64bytes))
1104 .int JMPTBL (L(12bytes), L(table_64bytes))
1105 .int JMPTBL (L(13bytes), L(table_64bytes))
1106 .int JMPTBL (L(14bytes), L(table_64bytes))
1107 .int JMPTBL (L(15bytes), L(table_64bytes))
1108 .int JMPTBL (L(16bytes), L(table_64bytes))
1109 .int JMPTBL (L(17bytes), L(table_64bytes))
1110 .int JMPTBL (L(18bytes), L(table_64bytes))
1111 .int JMPTBL (L(19bytes), L(table_64bytes))
1112 .int JMPTBL (L(20bytes), L(table_64bytes))
1113 .int JMPTBL (L(21bytes), L(table_64bytes))
1114 .int JMPTBL (L(22bytes), L(table_64bytes))
1115 .int JMPTBL (L(23bytes), L(table_64bytes))
1116 .int JMPTBL (L(24bytes), L(table_64bytes))
1117 .int JMPTBL (L(25bytes), L(table_64bytes))
1118 .int JMPTBL (L(26bytes), L(table_64bytes))
1119 .int JMPTBL (L(27bytes), L(table_64bytes))
1120 .int JMPTBL (L(28bytes), L(table_64bytes))
1121 .int JMPTBL (L(29bytes), L(table_64bytes))
1122 .int JMPTBL (L(30bytes), L(table_64bytes))
1123 .int JMPTBL (L(31bytes), L(table_64bytes))
1124 .int JMPTBL (L(32bytes), L(table_64bytes))
1125 .int JMPTBL (L(33bytes), L(table_64bytes))
1126 .int JMPTBL (L(34bytes), L(table_64bytes))
1127 .int JMPTBL (L(35bytes), L(table_64bytes))
1128 .int JMPTBL (L(36bytes), L(table_64bytes))
1129 .int JMPTBL (L(37bytes), L(table_64bytes))
1130 .int JMPTBL (L(38bytes), L(table_64bytes))
1131 .int JMPTBL (L(39bytes), L(table_64bytes))
1132 .int JMPTBL (L(40bytes), L(table_64bytes))
1133 .int JMPTBL (L(41bytes), L(table_64bytes))
1134 .int JMPTBL (L(42bytes), L(table_64bytes))
1135 .int JMPTBL (L(43bytes), L(table_64bytes))
1136 .int JMPTBL (L(44bytes), L(table_64bytes))
1137 .int JMPTBL (L(45bytes), L(table_64bytes))
1138 .int JMPTBL (L(46bytes), L(table_64bytes))
1139 .int JMPTBL (L(47bytes), L(table_64bytes))
1140 .int JMPTBL (L(48bytes), L(table_64bytes))
1141 .int JMPTBL (L(49bytes), L(table_64bytes))
1142 .int JMPTBL (L(50bytes), L(table_64bytes))
1143 .int JMPTBL (L(51bytes), L(table_64bytes))
1144 .int JMPTBL (L(52bytes), L(table_64bytes))
1145 .int JMPTBL (L(53bytes), L(table_64bytes))
1146 .int JMPTBL (L(54bytes), L(table_64bytes))
1147 .int JMPTBL (L(55bytes), L(table_64bytes))
1148 .int JMPTBL (L(56bytes), L(table_64bytes))
1149 .int JMPTBL (L(57bytes), L(table_64bytes))
1150 .int JMPTBL (L(58bytes), L(table_64bytes))
1151 .int JMPTBL (L(59bytes), L(table_64bytes))
1152 .int JMPTBL (L(60bytes), L(table_64bytes))
1153 .int JMPTBL (L(61bytes), L(table_64bytes))
1154 .int JMPTBL (L(62bytes), L(table_64bytes))
1155 .int JMPTBL (L(63bytes), L(table_64bytes))
1156 .int JMPTBL (L(64bytes), L(table_64bytes))
be13f7bf
LD
1157# else
1158L(table_64bytes):
1159 .int JMPTBL (L(0bytes), L(table_64bytes))
1160 .int JMPTBL (L(unreal_case), L(table_64bytes))
1161 .int JMPTBL (L(unreal_case), L(table_64bytes))
1162 .int JMPTBL (L(unreal_case), L(table_64bytes))
1163 .int JMPTBL (L(4bytes), L(table_64bytes))
1164 .int JMPTBL (L(unreal_case), L(table_64bytes))
1165 .int JMPTBL (L(unreal_case), L(table_64bytes))
1166 .int JMPTBL (L(unreal_case), L(table_64bytes))
1167 .int JMPTBL (L(8bytes), L(table_64bytes))
1168 .int JMPTBL (L(unreal_case), L(table_64bytes))
1169 .int JMPTBL (L(unreal_case), L(table_64bytes))
1170 .int JMPTBL (L(unreal_case), L(table_64bytes))
1171 .int JMPTBL (L(12bytes), L(table_64bytes))
1172 .int JMPTBL (L(unreal_case), L(table_64bytes))
1173 .int JMPTBL (L(unreal_case), L(table_64bytes))
1174 .int JMPTBL (L(unreal_case), L(table_64bytes))
1175 .int JMPTBL (L(16bytes), L(table_64bytes))
1176 .int JMPTBL (L(unreal_case), L(table_64bytes))
1177 .int JMPTBL (L(unreal_case), L(table_64bytes))
1178 .int JMPTBL (L(unreal_case), L(table_64bytes))
1179 .int JMPTBL (L(20bytes), L(table_64bytes))
1180 .int JMPTBL (L(unreal_case), L(table_64bytes))
1181 .int JMPTBL (L(unreal_case), L(table_64bytes))
1182 .int JMPTBL (L(unreal_case), L(table_64bytes))
1183 .int JMPTBL (L(24bytes), L(table_64bytes))
1184 .int JMPTBL (L(unreal_case), L(table_64bytes))
1185 .int JMPTBL (L(unreal_case), L(table_64bytes))
1186 .int JMPTBL (L(unreal_case), L(table_64bytes))
1187 .int JMPTBL (L(28bytes), L(table_64bytes))
1188 .int JMPTBL (L(unreal_case), L(table_64bytes))
1189 .int JMPTBL (L(unreal_case), L(table_64bytes))
1190 .int JMPTBL (L(unreal_case), L(table_64bytes))
1191 .int JMPTBL (L(32bytes), L(table_64bytes))
1192 .int JMPTBL (L(unreal_case), L(table_64bytes))
1193 .int JMPTBL (L(unreal_case), L(table_64bytes))
1194 .int JMPTBL (L(unreal_case), L(table_64bytes))
1195 .int JMPTBL (L(36bytes), L(table_64bytes))
1196 .int JMPTBL (L(unreal_case), L(table_64bytes))
1197 .int JMPTBL (L(unreal_case), L(table_64bytes))
1198 .int JMPTBL (L(unreal_case), L(table_64bytes))
1199 .int JMPTBL (L(40bytes), L(table_64bytes))
1200 .int JMPTBL (L(unreal_case), L(table_64bytes))
1201 .int JMPTBL (L(unreal_case), L(table_64bytes))
1202 .int JMPTBL (L(unreal_case), L(table_64bytes))
1203 .int JMPTBL (L(44bytes), L(table_64bytes))
1204 .int JMPTBL (L(unreal_case), L(table_64bytes))
1205 .int JMPTBL (L(unreal_case), L(table_64bytes))
1206 .int JMPTBL (L(unreal_case), L(table_64bytes))
1207 .int JMPTBL (L(48bytes), L(table_64bytes))
1208 .int JMPTBL (L(unreal_case), L(table_64bytes))
1209 .int JMPTBL (L(unreal_case), L(table_64bytes))
1210 .int JMPTBL (L(unreal_case), L(table_64bytes))
1211 .int JMPTBL (L(52bytes), L(table_64bytes))
1212 .int JMPTBL (L(unreal_case), L(table_64bytes))
1213 .int JMPTBL (L(unreal_case), L(table_64bytes))
1214 .int JMPTBL (L(unreal_case), L(table_64bytes))
1215 .int JMPTBL (L(56bytes), L(table_64bytes))
1216 .int JMPTBL (L(unreal_case), L(table_64bytes))
1217 .int JMPTBL (L(unreal_case), L(table_64bytes))
1218 .int JMPTBL (L(unreal_case), L(table_64bytes))
1219 .int JMPTBL (L(60bytes), L(table_64bytes))
1220 .int JMPTBL (L(unreal_case), L(table_64bytes))
1221 .int JMPTBL (L(unreal_case), L(table_64bytes))
1222 .int JMPTBL (L(unreal_case), L(table_64bytes))
1223 .int JMPTBL (L(64bytes), L(table_64bytes))
1224# endif
904057bc 1225#endif