]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i686/multiarch/memcmp-sse4.S
Fix unwind info in x86 memcmp-ssse3.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / memcmp-sse4.S
CommitLineData
904057bc
L
1/* memcmp with SSE4.2
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#ifndef NOT_IN_libc
22
23#include <sysdep.h>
24#include "asm-syntax.h"
25
26#ifndef MEMCMP
27# define MEMCMP __memcmp_sse4_2
28#endif
29
30#define CFI_PUSH(REG) \
31 cfi_adjust_cfa_offset (4); \
32 cfi_rel_offset (REG, 0)
33
34#define CFI_POP(REG) \
35 cfi_adjust_cfa_offset (-4); \
36 cfi_restore (REG)
37
38#define PUSH(REG) pushl REG; CFI_PUSH (REG)
39#define POP(REG) popl REG; CFI_POP (REG)
40
41#define PARMS 4
42#define BLK1 PARMS
43#define BLK2 BLK1+4
44#define LEN BLK2+4
45#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
46
47
48#ifdef SHARED
49# define JMPTBL(I, B) I - B
50
51/* Load an entry in a jump table into EBX and branch to it. TABLE is a
52 jump table with relative offsets. INDEX is a register contains the
53 index into the jump table. SCALE is the scale of INDEX. */
54# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
55 /* We first load PC into EBX. */ \
56 call __i686.get_pc_thunk.bx; \
57 /* Get the address of the jump table. */ \
58 addl $(TABLE - .), %ebx; \
59 /* Get the entry and convert the relative offset to the \
60 absolute address. */ \
61 addl (%ebx,INDEX,SCALE), %ebx; \
62 /* We loaded the jump table and adjuested EDX/ESI. Go. */ \
63 jmp *%ebx
64
65 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
66 .globl __i686.get_pc_thunk.bx
67 .hidden __i686.get_pc_thunk.bx
68 ALIGN (4)
69 .type __i686.get_pc_thunk.bx,@function
70__i686.get_pc_thunk.bx:
71 movl (%esp), %ebx
72 ret
73#else
74# define JMPTBL(I, B) I
75
76/* Load an entry in a jump table into EBX and branch to it. TABLE is a
77 jump table with relative offsets. INDEX is a register contains the
78 index into the jump table. SCALE is the scale of INDEX. */
79# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
80 jmp *TABLE(,INDEX,SCALE)
81#endif
82
83 .section .text.sse4.2,"ax",@progbits
84ENTRY (MEMCMP)
85 movl BLK1(%esp), %eax
86 movl BLK2(%esp), %edx
87 movl LEN(%esp), %ecx
88 cmp $1, %ecx
89 jbe L(less1bytes)
90 pxor %xmm0, %xmm0
91 cmp $64, %ecx
92 ja L(64bytesormore)
93 cmp $8, %ecx
94 PUSH (%ebx)
95 jb L(less8bytes)
96 add %ecx, %edx
97 add %ecx, %eax
98 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
99L(less8bytes):
100
101 mov (%eax), %bl
102 cmpb (%edx), %bl
103 jne L(nonzero)
104
105 mov 1(%eax), %bl
106 cmpb 1(%edx), %bl
107 jne L(nonzero)
6bb74d9f
UD
108
109 cmp $2, %ecx
904057bc
L
110 jz L(0bytes)
111
112 mov 2(%eax), %bl
113 cmpb 2(%edx), %bl
114 jne L(nonzero)
6bb74d9f
UD
115
116 cmp $3, %ecx
904057bc 117 jz L(0bytes)
6bb74d9f 118
904057bc
L
119 mov 3(%eax), %bl
120 cmpb 3(%edx), %bl
121 jne L(nonzero)
6bb74d9f
UD
122
123 cmp $4, %ecx
904057bc 124 jz L(0bytes)
6bb74d9f 125
904057bc
L
126 mov 4(%eax), %bl
127 cmpb 4(%edx), %bl
128 jne L(nonzero)
129
6bb74d9f 130 cmp $5, %ecx
904057bc 131 jz L(0bytes)
6bb74d9f 132
904057bc
L
133 mov 5(%eax), %bl
134 cmpb 5(%edx), %bl
135 jne L(nonzero)
136
6bb74d9f 137 cmp $6, %ecx
904057bc 138 jz L(0bytes)
6bb74d9f 139
904057bc
L
140 mov 6(%eax), %bl
141 cmpb 6(%edx), %bl
142 je L(0bytes)
143L(nonzero):
6bb74d9f 144 POP (%ebx)
904057bc
L
145 mov $1, %eax
146 ja L(above)
147 neg %eax
148L(above):
149 ret
150 CFI_PUSH (%ebx)
151
152 ALIGN (4)
153L(0bytes):
6bb74d9f 154 POP (%ebx)
904057bc
L
155 xor %eax, %eax
156 ret
157 CFI_PUSH (%ebx)
6bb74d9f 158
904057bc
L
159 ALIGN (4)
160L(less1bytes):
161 jb L(0bytesend)
162 movzbl (%eax), %eax
163 movzbl (%edx), %edx
164 sub %edx, %eax
165 ret
166
167 ALIGN (4)
168L(0bytesend):
169 xor %eax, %eax
170 ret
171
172 ALIGN (4)
173L(64bytesormore):
174 PUSH (%ebx)
175 mov %ecx, %ebx
176 mov $64, %ecx
177 sub $64, %ebx
178L(64bytesormore_loop):
179 movdqu (%eax), %xmm1
180 movdqu (%edx), %xmm2
181 pxor %xmm1, %xmm2
182 ptest %xmm2, %xmm0
183 jnc L(find_16diff)
184
185 movdqu 16(%eax), %xmm1
186 movdqu 16(%edx), %xmm2
187 pxor %xmm1, %xmm2
188 ptest %xmm2, %xmm0
189 jnc L(find_32diff)
190
191 movdqu 32(%eax), %xmm1
192 movdqu 32(%edx), %xmm2
193 pxor %xmm1, %xmm2
194 ptest %xmm2, %xmm0
195 jnc L(find_48diff)
196
197 movdqu 48(%eax), %xmm1
198 movdqu 48(%edx), %xmm2
199 pxor %xmm1, %xmm2
200 ptest %xmm2, %xmm0
201 jnc L(find_64diff)
202 add %ecx, %eax
203 add %ecx, %edx
204 sub %ecx, %ebx
205 jae L(64bytesormore_loop)
206 add %ebx, %ecx
207 add %ecx, %edx
208 add %ecx, %eax
209 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
210L(find_16diff):
211 sub $16, %ecx
212L(find_32diff):
213 sub $16, %ecx
214L(find_48diff):
215 sub $16, %ecx
216L(find_64diff):
217 add %ecx, %edx
218 add %ecx, %eax
219 jmp L(16bytes)
220 ALIGN (4)
221
222L(16bytes):
223 mov -16(%eax), %ecx
224 mov -16(%edx), %ebx
225 cmp %ebx, %ecx
226 jne L(find_diff)
227L(12bytes):
228 mov -12(%eax), %ecx
229 mov -12(%edx), %ebx
230 cmp %ebx, %ecx
231 jne L(find_diff)
232L(8bytes):
233 mov -8(%eax), %ecx
234 mov -8(%edx), %ebx
235 cmp %ebx, %ecx
236 jne L(find_diff)
237L(4bytes):
238 mov -4(%eax), %ecx
239 mov -4(%edx), %ebx
240 cmp %ebx, %ecx
241 mov $0, %eax
242 jne L(find_diff)
243 RETURN
244
245 ALIGN (4)
246L(49bytes):
247 movdqu -49(%eax), %xmm1
248 movdqu -49(%edx), %xmm2
249 mov $-49, %ebx
250 pxor %xmm1, %xmm2
251 ptest %xmm2, %xmm0
252 jnc L(less16bytes)
253L(33bytes):
254 movdqu -33(%eax), %xmm1
255 movdqu -33(%edx), %xmm2
256 mov $-33, %ebx
257 pxor %xmm1, %xmm2
258 ptest %xmm2, %xmm0
259 jnc L(less16bytes)
260L(17bytes):
261 mov -17(%eax), %ecx
262 mov -17(%edx), %ebx
263 cmp %ebx, %ecx
264 jne L(find_diff)
265L(13bytes):
266 mov -13(%eax), %ecx
267 mov -13(%edx), %ebx
268 cmp %ebx, %ecx
269 jne L(find_diff)
270L(9bytes):
271 mov -9(%eax), %ecx
272 mov -9(%edx), %ebx
273 cmp %ebx, %ecx
274 jne L(find_diff)
275L(5bytes):
276 mov -5(%eax), %ecx
277 mov -5(%edx), %ebx
278 cmp %ebx, %ecx
279 jne L(find_diff)
280 movzbl -1(%eax), %ecx
281 cmp -1(%edx), %cl
282 mov $0, %eax
283 jne L(end)
284 RETURN
285
286 ALIGN (4)
287L(50bytes):
288 mov $-50, %ebx
289 movdqu -50(%eax), %xmm1
290 movdqu -50(%edx), %xmm2
291 pxor %xmm1, %xmm2
292 ptest %xmm2, %xmm0
293 jnc L(less16bytes)
294L(34bytes):
295 mov $-34, %ebx
296 movdqu -34(%eax), %xmm1
297 movdqu -34(%edx), %xmm2
298 pxor %xmm1, %xmm2
299 ptest %xmm2, %xmm0
300 jnc L(less16bytes)
301L(18bytes):
302 mov -18(%eax), %ecx
303 mov -18(%edx), %ebx
304 cmp %ebx, %ecx
305 jne L(find_diff)
306L(14bytes):
307 mov -14(%eax), %ecx
308 mov -14(%edx), %ebx
309 cmp %ebx, %ecx
310 jne L(find_diff)
311L(10bytes):
312 mov -10(%eax), %ecx
313 mov -10(%edx), %ebx
314 cmp %ebx, %ecx
315 jne L(find_diff)
316L(6bytes):
317 mov -6(%eax), %ecx
318 mov -6(%edx), %ebx
319 cmp %ebx, %ecx
320 jne L(find_diff)
321L(2bytes):
322 movzwl -2(%eax), %ecx
323 movzwl -2(%edx), %ebx
324 cmp %bl, %cl
325 jne L(end)
326 cmp %bh, %ch
327 mov $0, %eax
328 jne L(end)
329 RETURN
330
331 ALIGN (4)
332L(51bytes):
333 mov $-51, %ebx
334 movdqu -51(%eax), %xmm1
335 movdqu -51(%edx), %xmm2
336 pxor %xmm1, %xmm2
337 ptest %xmm2, %xmm0
338 jnc L(less16bytes)
339L(35bytes):
340 mov $-35, %ebx
341 movdqu -35(%eax), %xmm1
342 movdqu -35(%edx), %xmm2
343 pxor %xmm1, %xmm2
344 ptest %xmm2, %xmm0
345 jnc L(less16bytes)
346L(19bytes):
347 movl -19(%eax), %ecx
348 movl -19(%edx), %ebx
349 cmp %ebx, %ecx
350 jne L(find_diff)
351L(15bytes):
352 movl -15(%eax), %ecx
353 movl -15(%edx), %ebx
354 cmp %ebx, %ecx
355 jne L(find_diff)
356L(11bytes):
357 movl -11(%eax), %ecx
358 movl -11(%edx), %ebx
359 cmp %ebx, %ecx
360 jne L(find_diff)
361L(7bytes):
362 movl -7(%eax), %ecx
363 movl -7(%edx), %ebx
364 cmp %ebx, %ecx
365 jne L(find_diff)
366L(3bytes):
367 movzwl -3(%eax), %ecx
368 movzwl -3(%edx), %ebx
369 cmpb %bl, %cl
370 jne L(end)
371 cmp %bx, %cx
372 jne L(end)
373L(1bytes):
374 movzbl -1(%eax), %eax
375 cmpb -1(%edx), %al
376 mov $0, %eax
377 jne L(end)
378 RETURN
379
380
381L(52bytes):
382 movdqu -52(%eax), %xmm1
383 movdqu -52(%edx), %xmm2
384 mov $-52, %ebx
385 pxor %xmm1, %xmm2
386 ptest %xmm2, %xmm0
387 jnc L(less16bytes)
388L(36bytes):
389 movdqu -36(%eax), %xmm1
390 movdqu -36(%edx), %xmm2
391 mov $-36, %ebx
392 pxor %xmm1, %xmm2
393 ptest %xmm2, %xmm0
394 jnc L(less16bytes)
395L(20bytes):
396 movdqu -20(%eax), %xmm1
397 movdqu -20(%edx), %xmm2
398 mov $-20, %ebx
399 pxor %xmm1, %xmm2
400 ptest %xmm2, %xmm0
401 jnc L(less16bytes)
402 mov -4(%eax), %ecx
403 mov -4(%edx), %ebx
404 cmp %ebx, %ecx
405 mov $0, %eax
406 jne L(find_diff)
407 RETURN
408
409L(53bytes):
410 movdqu -53(%eax), %xmm1
411 movdqu -53(%edx), %xmm2
412 mov $-53, %ebx
413 pxor %xmm1, %xmm2
414 ptest %xmm2, %xmm0
415 jnc L(less16bytes)
416L(37bytes):
417 mov $-37, %ebx
418 movdqu -37(%eax), %xmm1
419 movdqu -37(%edx), %xmm2
420 pxor %xmm1, %xmm2
421 ptest %xmm2, %xmm0
422 jnc L(less16bytes)
423L(21bytes):
424 mov $-21, %ebx
425 movdqu -21(%eax), %xmm1
426 movdqu -21(%edx), %xmm2
427 pxor %xmm1, %xmm2
428 ptest %xmm2, %xmm0
429 jnc L(less16bytes)
430 mov -5(%eax), %ecx
431 mov -5(%edx), %ebx
432 cmp %ebx, %ecx
433 jne L(find_diff)
434 movzbl -1(%eax), %ecx
435 cmp -1(%edx), %cl
436 mov $0, %eax
437 jne L(end)
438 RETURN
439
440L(54bytes):
441 movdqu -54(%eax), %xmm1
442 movdqu -54(%edx), %xmm2
443 mov $-54, %ebx
444 pxor %xmm1, %xmm2
445 ptest %xmm2, %xmm0
446 jnc L(less16bytes)
447L(38bytes):
448 mov $-38, %ebx
449 movdqu -38(%eax), %xmm1
450 movdqu -38(%edx), %xmm2
451 pxor %xmm1, %xmm2
452 ptest %xmm2, %xmm0
453 jnc L(less16bytes)
454L(22bytes):
455 mov $-22, %ebx
456 movdqu -22(%eax), %xmm1
457 movdqu -22(%edx), %xmm2
458 pxor %xmm1, %xmm2
459 ptest %xmm2, %xmm0
460 jnc L(less16bytes)
461
462 mov -6(%eax), %ecx
463 mov -6(%edx), %ebx
464 cmp %ebx, %ecx
465 jne L(find_diff)
466 movzwl -2(%eax), %ecx
467 movzwl -2(%edx), %ebx
468 cmp %bl, %cl
469 jne L(end)
470 cmp %bh, %ch
471 mov $0, %eax
472 jne L(end)
473 RETURN
474
475L(55bytes):
476 movdqu -55(%eax), %xmm1
477 movdqu -55(%edx), %xmm2
478 mov $-55, %ebx
479 pxor %xmm1, %xmm2
480 ptest %xmm2, %xmm0
481 jnc L(less16bytes)
482L(39bytes):
483 mov $-39, %ebx
484 movdqu -39(%eax), %xmm1
485 movdqu -39(%edx), %xmm2
486 pxor %xmm1, %xmm2
487 ptest %xmm2, %xmm0
488 jnc L(less16bytes)
489L(23bytes):
490 mov $-23, %ebx
491 movdqu -23(%eax), %xmm1
492 movdqu -23(%edx), %xmm2
493 pxor %xmm1, %xmm2
494 ptest %xmm2, %xmm0
495 jnc L(less16bytes)
496 movl -7(%eax), %ecx
497 movl -7(%edx), %ebx
498 cmp %ebx, %ecx
499 jne L(find_diff)
500 movzwl -3(%eax), %ecx
501 movzwl -3(%edx), %ebx
502 cmpb %bl, %cl
503 jne L(end)
504 cmp %bx, %cx
505 jne L(end)
506 movzbl -1(%eax), %eax
507 cmpb -1(%edx), %al
508 mov $0, %eax
509 jne L(end)
510 RETURN
511
512L(56bytes):
513 movdqu -56(%eax), %xmm1
514 movdqu -56(%edx), %xmm2
515 mov $-56, %ebx
516 pxor %xmm1, %xmm2
517 ptest %xmm2, %xmm0
518 jnc L(less16bytes)
519L(40bytes):
520 mov $-40, %ebx
521 movdqu -40(%eax), %xmm1
522 movdqu -40(%edx), %xmm2
523 pxor %xmm1, %xmm2
524 ptest %xmm2, %xmm0
525 jnc L(less16bytes)
526L(24bytes):
527 mov $-24, %ebx
528 movdqu -24(%eax), %xmm1
529 movdqu -24(%edx), %xmm2
530 pxor %xmm1, %xmm2
531 ptest %xmm2, %xmm0
532 jnc L(less16bytes)
533
534 mov -8(%eax), %ecx
535 mov -8(%edx), %ebx
536 cmp %ebx, %ecx
537 jne L(find_diff)
538
539 mov -4(%eax), %ecx
540 mov -4(%edx), %ebx
541 cmp %ebx, %ecx
542 mov $0, %eax
543 jne L(find_diff)
544 RETURN
545
546L(57bytes):
547 movdqu -57(%eax), %xmm1
548 movdqu -57(%edx), %xmm2
549 mov $-57, %ebx
550 pxor %xmm1, %xmm2
551 ptest %xmm2, %xmm0
552 jnc L(less16bytes)
553L(41bytes):
554 mov $-41, %ebx
555 movdqu -41(%eax), %xmm1
556 movdqu -41(%edx), %xmm2
557 pxor %xmm1, %xmm2
558 ptest %xmm2, %xmm0
559 jnc L(less16bytes)
560L(25bytes):
561 mov $-25, %ebx
562 movdqu -25(%eax), %xmm1
563 movdqu -25(%edx), %xmm2
564 pxor %xmm1, %xmm2
565 ptest %xmm2, %xmm0
566 jnc L(less16bytes)
567 mov -9(%eax), %ecx
568 mov -9(%edx), %ebx
569 cmp %ebx, %ecx
570 jne L(find_diff)
571 mov -5(%eax), %ecx
572 mov -5(%edx), %ebx
573 cmp %ebx, %ecx
574 jne L(find_diff)
575 movzbl -1(%eax), %ecx
576 cmp -1(%edx), %cl
577 mov $0, %eax
578 jne L(end)
579 RETURN
580
581L(58bytes):
582 movdqu -58(%eax), %xmm1
583 movdqu -58(%edx), %xmm2
584 mov $-58, %ebx
585 pxor %xmm1, %xmm2
586 ptest %xmm2, %xmm0
587 jnc L(less16bytes)
588L(42bytes):
589 mov $-42, %ebx
590 movdqu -42(%eax), %xmm1
591 movdqu -42(%edx), %xmm2
592 pxor %xmm1, %xmm2
593 ptest %xmm2, %xmm0
594 jnc L(less16bytes)
595L(26bytes):
596 mov $-26, %ebx
597 movdqu -26(%eax), %xmm1
598 movdqu -26(%edx), %xmm2
599 pxor %xmm1, %xmm2
600 ptest %xmm2, %xmm0
601 jnc L(less16bytes)
602
603 mov -10(%eax), %ecx
604 mov -10(%edx), %ebx
605 cmp %ebx, %ecx
606 jne L(find_diff)
607
608 mov -6(%eax), %ecx
609 mov -6(%edx), %ebx
610 cmp %ebx, %ecx
611 jne L(find_diff)
6bb74d9f 612
904057bc
L
613 movzwl -2(%eax), %ecx
614 movzwl -2(%edx), %ebx
615 cmp %bl, %cl
616 jne L(end)
617 cmp %bh, %ch
618 mov $0, %eax
619 jne L(end)
620 RETURN
621
622L(59bytes):
623 movdqu -59(%eax), %xmm1
624 movdqu -59(%edx), %xmm2
625 mov $-59, %ebx
626 pxor %xmm1, %xmm2
627 ptest %xmm2, %xmm0
628 jnc L(less16bytes)
629L(43bytes):
630 mov $-43, %ebx
631 movdqu -43(%eax), %xmm1
632 movdqu -43(%edx), %xmm2
633 pxor %xmm1, %xmm2
634 ptest %xmm2, %xmm0
635 jnc L(less16bytes)
636L(27bytes):
637 mov $-27, %ebx
638 movdqu -27(%eax), %xmm1
639 movdqu -27(%edx), %xmm2
640 pxor %xmm1, %xmm2
641 ptest %xmm2, %xmm0
642 jnc L(less16bytes)
643 movl -11(%eax), %ecx
644 movl -11(%edx), %ebx
645 cmp %ebx, %ecx
646 jne L(find_diff)
647 movl -7(%eax), %ecx
648 movl -7(%edx), %ebx
649 cmp %ebx, %ecx
650 jne L(find_diff)
651 movzwl -3(%eax), %ecx
652 movzwl -3(%edx), %ebx
653 cmpb %bl, %cl
654 jne L(end)
655 cmp %bx, %cx
656 jne L(end)
657 movzbl -1(%eax), %eax
658 cmpb -1(%edx), %al
659 mov $0, %eax
660 jne L(end)
661 RETURN
662
663L(60bytes):
664 movdqu -60(%eax), %xmm1
665 movdqu -60(%edx), %xmm2
666 mov $-60, %ebx
667 pxor %xmm1, %xmm2
668 ptest %xmm2, %xmm0
669 jnc L(less16bytes)
670L(44bytes):
671 mov $-44, %ebx
672 movdqu -44(%eax), %xmm1
673 movdqu -44(%edx), %xmm2
674 pxor %xmm1, %xmm2
675 ptest %xmm2, %xmm0
676 jnc L(less16bytes)
677L(28bytes):
678 mov $-28, %ebx
679 movdqu -28(%eax), %xmm1
680 movdqu -28(%edx), %xmm2
681 pxor %xmm1, %xmm2
682 ptest %xmm2, %xmm0
683 jnc L(less16bytes)
684 mov -12(%eax), %ecx
685 mov -12(%edx), %ebx
686 cmp %ebx, %ecx
687 jne L(find_diff)
688 mov -8(%eax), %ecx
689 mov -8(%edx), %ebx
690 cmp %ebx, %ecx
691 jne L(find_diff)
692 mov -4(%eax), %ecx
693 mov -4(%edx), %ebx
694 cmp %ebx, %ecx
695 mov $0, %eax
696 jne L(find_diff)
697 RETURN
698
699L(61bytes):
700 movdqu -61(%eax), %xmm1
701 movdqu -61(%edx), %xmm2
702 mov $-61, %ebx
703 pxor %xmm1, %xmm2
704 ptest %xmm2, %xmm0
705 jnc L(less16bytes)
706L(45bytes):
707 mov $-45, %ebx
708 movdqu -45(%eax), %xmm1
709 movdqu -45(%edx), %xmm2
710 pxor %xmm1, %xmm2
711 ptest %xmm2, %xmm0
712 jnc L(less16bytes)
713L(29bytes):
714 mov $-29, %ebx
715 movdqu -29(%eax), %xmm1
716 movdqu -29(%edx), %xmm2
717 pxor %xmm1, %xmm2
718 ptest %xmm2, %xmm0
719 jnc L(less16bytes)
720
721 mov -13(%eax), %ecx
722 mov -13(%edx), %ebx
723 cmp %ebx, %ecx
724 jne L(find_diff)
725
726 mov -9(%eax), %ecx
727 mov -9(%edx), %ebx
728 cmp %ebx, %ecx
729 jne L(find_diff)
730
731 mov -5(%eax), %ecx
732 mov -5(%edx), %ebx
733 cmp %ebx, %ecx
734 jne L(find_diff)
735 movzbl -1(%eax), %ecx
736 cmp -1(%edx), %cl
737 mov $0, %eax
738 jne L(end)
739 RETURN
740
741L(62bytes):
742 movdqu -62(%eax), %xmm1
743 movdqu -62(%edx), %xmm2
744 mov $-62, %ebx
745 pxor %xmm1, %xmm2
746 ptest %xmm2, %xmm0
747 jnc L(less16bytes)
748L(46bytes):
749 mov $-46, %ebx
750 movdqu -46(%eax), %xmm1
751 movdqu -46(%edx), %xmm2
752 pxor %xmm1, %xmm2
753 ptest %xmm2, %xmm0
754 jnc L(less16bytes)
755L(30bytes):
756 mov $-30, %ebx
757 movdqu -30(%eax), %xmm1
758 movdqu -30(%edx), %xmm2
759 pxor %xmm1, %xmm2
760 ptest %xmm2, %xmm0
761 jnc L(less16bytes)
762 mov -14(%eax), %ecx
763 mov -14(%edx), %ebx
764 cmp %ebx, %ecx
765 jne L(find_diff)
766 mov -10(%eax), %ecx
767 mov -10(%edx), %ebx
768 cmp %ebx, %ecx
769 jne L(find_diff)
770 mov -6(%eax), %ecx
771 mov -6(%edx), %ebx
772 cmp %ebx, %ecx
773 jne L(find_diff)
774 movzwl -2(%eax), %ecx
775 movzwl -2(%edx), %ebx
776 cmp %bl, %cl
777 jne L(end)
778 cmp %bh, %ch
779 mov $0, %eax
780 jne L(end)
781 RETURN
782
783L(63bytes):
784 movdqu -63(%eax), %xmm1
785 movdqu -63(%edx), %xmm2
786 mov $-63, %ebx
787 pxor %xmm1, %xmm2
788 ptest %xmm2, %xmm0
789 jnc L(less16bytes)
790L(47bytes):
791 mov $-47, %ebx
792 movdqu -47(%eax), %xmm1
793 movdqu -47(%edx), %xmm2
794 pxor %xmm1, %xmm2
795 ptest %xmm2, %xmm0
796 jnc L(less16bytes)
797L(31bytes):
798 mov $-31, %ebx
799 movdqu -31(%eax), %xmm1
800 movdqu -31(%edx), %xmm2
801 pxor %xmm1, %xmm2
802 ptest %xmm2, %xmm0
803 jnc L(less16bytes)
804
805 movl -15(%eax), %ecx
806 movl -15(%edx), %ebx
807 cmp %ebx, %ecx
808 jne L(find_diff)
809 movl -11(%eax), %ecx
810 movl -11(%edx), %ebx
811 cmp %ebx, %ecx
812 jne L(find_diff)
813 movl -7(%eax), %ecx
814 movl -7(%edx), %ebx
815 cmp %ebx, %ecx
816 jne L(find_diff)
817 movzwl -3(%eax), %ecx
818 movzwl -3(%edx), %ebx
819 cmpb %bl, %cl
820 jne L(end)
821 cmp %bx, %cx
822 jne L(end)
823 movzbl -1(%eax), %eax
824 cmpb -1(%edx), %al
825 mov $0, %eax
826 jne L(end)
827 RETURN
828
829L(64bytes):
830 movdqu -64(%eax), %xmm1
831 movdqu -64(%edx), %xmm2
832 mov $-64, %ebx
833 pxor %xmm1, %xmm2
834 ptest %xmm2, %xmm0
835 jnc L(less16bytes)
836L(48bytes):
837 movdqu -48(%eax), %xmm1
838 movdqu -48(%edx), %xmm2
839 mov $-48, %ebx
840 pxor %xmm1, %xmm2
841 ptest %xmm2, %xmm0
842 jnc L(less16bytes)
843L(32bytes):
844 movdqu -32(%eax), %xmm1
845 movdqu -32(%edx), %xmm2
846 mov $-32, %ebx
847 pxor %xmm1, %xmm2
848 ptest %xmm2, %xmm0
849 jnc L(less16bytes)
850
851 mov -16(%eax), %ecx
852 mov -16(%edx), %ebx
853 cmp %ebx, %ecx
854 jne L(find_diff)
855
856 mov -12(%eax), %ecx
857 mov -12(%edx), %ebx
858 cmp %ebx, %ecx
859 jne L(find_diff)
860
861 mov -8(%eax), %ecx
862 mov -8(%edx), %ebx
863 cmp %ebx, %ecx
864 jne L(find_diff)
865
866 mov -4(%eax), %ecx
867 mov -4(%edx), %ebx
868 cmp %ebx, %ecx
869 mov $0, %eax
870 jne L(find_diff)
871 RETURN
872
873L(less16bytes):
874 add %ebx, %eax
875 add %ebx, %edx
6bb74d9f 876
904057bc
L
877 mov (%eax), %ecx
878 mov (%edx), %ebx
879 cmp %ebx, %ecx
880 jne L(find_diff)
881
882 mov 4(%eax), %ecx
883 mov 4(%edx), %ebx
884 cmp %ebx, %ecx
885 jne L(find_diff)
886
887 mov 8(%eax), %ecx
888 mov 8(%edx), %ebx
889 cmp %ebx, %ecx
890 jne L(find_diff)
891
892 mov 12(%eax), %ecx
893 mov 12(%edx), %ebx
894 cmp %ebx, %ecx
895 mov $0, %eax
896 jne L(find_diff)
897 RETURN
898
899 ALIGN (4)
900L(find_diff):
901 cmpb %bl, %cl
902 jne L(end)
903 cmp %bx, %cx
904 jne L(end)
905 shr $16,%ecx
906 shr $16,%ebx
907 cmp %bl, %cl
908 jne L(end)
909 cmp %bx, %cx
910L(end):
6bb74d9f 911 POP (%ebx)
904057bc
L
912 mov $1, %eax
913 ja L(bigger)
914 neg %eax
915L(bigger):
916 ret
917
918 ALIGN (2)
919L(table_64bytes):
920 .int JMPTBL (L(0bytes), L(table_64bytes))
921 .int JMPTBL (L(1bytes), L(table_64bytes))
922 .int JMPTBL (L(2bytes), L(table_64bytes))
923 .int JMPTBL (L(3bytes), L(table_64bytes))
924 .int JMPTBL (L(4bytes), L(table_64bytes))
925 .int JMPTBL (L(5bytes), L(table_64bytes))
926 .int JMPTBL (L(6bytes), L(table_64bytes))
927 .int JMPTBL (L(7bytes), L(table_64bytes))
928 .int JMPTBL (L(8bytes), L(table_64bytes))
929 .int JMPTBL (L(9bytes), L(table_64bytes))
930 .int JMPTBL (L(10bytes), L(table_64bytes))
931 .int JMPTBL (L(11bytes), L(table_64bytes))
932 .int JMPTBL (L(12bytes), L(table_64bytes))
933 .int JMPTBL (L(13bytes), L(table_64bytes))
934 .int JMPTBL (L(14bytes), L(table_64bytes))
935 .int JMPTBL (L(15bytes), L(table_64bytes))
936 .int JMPTBL (L(16bytes), L(table_64bytes))
937 .int JMPTBL (L(17bytes), L(table_64bytes))
938 .int JMPTBL (L(18bytes), L(table_64bytes))
939 .int JMPTBL (L(19bytes), L(table_64bytes))
940 .int JMPTBL (L(20bytes), L(table_64bytes))
941 .int JMPTBL (L(21bytes), L(table_64bytes))
942 .int JMPTBL (L(22bytes), L(table_64bytes))
943 .int JMPTBL (L(23bytes), L(table_64bytes))
944 .int JMPTBL (L(24bytes), L(table_64bytes))
945 .int JMPTBL (L(25bytes), L(table_64bytes))
946 .int JMPTBL (L(26bytes), L(table_64bytes))
947 .int JMPTBL (L(27bytes), L(table_64bytes))
948 .int JMPTBL (L(28bytes), L(table_64bytes))
949 .int JMPTBL (L(29bytes), L(table_64bytes))
950 .int JMPTBL (L(30bytes), L(table_64bytes))
951 .int JMPTBL (L(31bytes), L(table_64bytes))
952 .int JMPTBL (L(32bytes), L(table_64bytes))
953 .int JMPTBL (L(33bytes), L(table_64bytes))
954 .int JMPTBL (L(34bytes), L(table_64bytes))
955 .int JMPTBL (L(35bytes), L(table_64bytes))
956 .int JMPTBL (L(36bytes), L(table_64bytes))
957 .int JMPTBL (L(37bytes), L(table_64bytes))
958 .int JMPTBL (L(38bytes), L(table_64bytes))
959 .int JMPTBL (L(39bytes), L(table_64bytes))
960 .int JMPTBL (L(40bytes), L(table_64bytes))
961 .int JMPTBL (L(41bytes), L(table_64bytes))
962 .int JMPTBL (L(42bytes), L(table_64bytes))
963 .int JMPTBL (L(43bytes), L(table_64bytes))
964 .int JMPTBL (L(44bytes), L(table_64bytes))
965 .int JMPTBL (L(45bytes), L(table_64bytes))
966 .int JMPTBL (L(46bytes), L(table_64bytes))
967 .int JMPTBL (L(47bytes), L(table_64bytes))
968 .int JMPTBL (L(48bytes), L(table_64bytes))
969 .int JMPTBL (L(49bytes), L(table_64bytes))
970 .int JMPTBL (L(50bytes), L(table_64bytes))
971 .int JMPTBL (L(51bytes), L(table_64bytes))
972 .int JMPTBL (L(52bytes), L(table_64bytes))
973 .int JMPTBL (L(53bytes), L(table_64bytes))
974 .int JMPTBL (L(54bytes), L(table_64bytes))
975 .int JMPTBL (L(55bytes), L(table_64bytes))
976 .int JMPTBL (L(56bytes), L(table_64bytes))
977 .int JMPTBL (L(57bytes), L(table_64bytes))
978 .int JMPTBL (L(58bytes), L(table_64bytes))
979 .int JMPTBL (L(59bytes), L(table_64bytes))
980 .int JMPTBL (L(60bytes), L(table_64bytes))
981 .int JMPTBL (L(61bytes), L(table_64bytes))
982 .int JMPTBL (L(62bytes), L(table_64bytes))
983 .int JMPTBL (L(63bytes), L(table_64bytes))
984 .int JMPTBL (L(64bytes), L(table_64bytes))
985
986END (MEMCMP)
987
988#endif