]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/memcmp-ssse3.S
Fix unwind info in x86 memcmp-ssse3.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / memcmp-ssse3.S
1 /* memcmp with SSSE3
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #ifndef NOT_IN_libc
22
23 #include <sysdep.h>
24 #include "asm-syntax.h"
25
26 #ifndef MEMCMP
27 # define MEMCMP __memcmp_ssse3
28 #endif
29
30 #define CFI_PUSH(REG) \
31 cfi_adjust_cfa_offset (4); \
32 cfi_rel_offset (REG, 0)
33
34 #define CFI_POP(REG) \
35 cfi_adjust_cfa_offset (-4); \
36 cfi_restore (REG)
37
38 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
39 #define POP(REG) popl REG; CFI_POP (REG)
40
41 #define PARMS 4
42 #define BLK1 PARMS
43 #define BLK2 BLK1+4
44 #define LEN BLK2+4
45 #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
46 #define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
47
48 .section .text.ssse3,"ax",@progbits
49 ENTRY (MEMCMP)
50 movl LEN(%esp), %ecx
51 movl BLK1(%esp), %eax
52 cmp $48, %ecx
53 movl BLK2(%esp), %edx
54 jae L(48bytesormore)
55 cmp $1, %ecx
56 jbe L(less1bytes)
57 PUSH (%ebx)
58 add %ecx, %edx
59 add %ecx, %eax
60 jmp L(less48bytes)
61
62 ALIGN (4)
63 CFI_POP (%ebx)
64 L(less1bytes):
65 jb L(zero)
66 movb (%eax), %cl
67 cmp (%edx), %cl
68 je L(zero)
69 mov $1, %eax
70 ja L(1bytesend)
71 neg %eax
72 L(1bytesend):
73 ret
74
75 ALIGN (4)
76 L(zero):
77 mov $0, %eax
78 ret
79
80 ALIGN (4)
81 L(48bytesormore):
82 PUSH (%ebx)
83 PUSH (%esi)
84 PUSH (%edi)
85 cfi_remember_state
86 movdqu (%eax), %xmm3
87 movdqu (%edx), %xmm0
88 movl %eax, %edi
89 movl %edx, %esi
90 pcmpeqb %xmm0, %xmm3
91 pmovmskb %xmm3, %edx
92 lea 16(%edi), %edi
93
94 sub $0xffff, %edx
95 lea 16(%esi), %esi
96 jnz L(less16bytes)
97 mov %edi, %edx
98 and $0xf, %edx
99 xor %edx, %edi
100 sub %edx, %esi
101 add %edx, %ecx
102 mov %esi, %edx
103 and $0xf, %edx
104 jz L(shr_0)
105 xor %edx, %esi
106
107 cmp $8, %edx
108 jae L(next_unaligned_table)
109 cmp $0, %edx
110 je L(shr_0)
111 cmp $1, %edx
112 je L(shr_1)
113 cmp $2, %edx
114 je L(shr_2)
115 cmp $3, %edx
116 je L(shr_3)
117 cmp $4, %edx
118 je L(shr_4)
119 cmp $5, %edx
120 je L(shr_5)
121 cmp $6, %edx
122 je L(shr_6)
123 jmp L(shr_7)
124
125 ALIGN (4)
126 L(next_unaligned_table):
127 cmp $8, %edx
128 je L(shr_8)
129 cmp $9, %edx
130 je L(shr_9)
131 cmp $10, %edx
132 je L(shr_10)
133 cmp $11, %edx
134 je L(shr_11)
135 cmp $12, %edx
136 je L(shr_12)
137 cmp $13, %edx
138 je L(shr_13)
139 cmp $14, %edx
140 je L(shr_14)
141 jmp L(shr_15)
142
143 ALIGN (4)
144 L(shr_0):
145 cmp $80, %ecx
146 jae L(shr_0_gobble)
147 lea -48(%ecx), %ecx
148 xor %eax, %eax
149 movaps (%esi), %xmm1
150 pcmpeqb (%edi), %xmm1
151 movaps 16(%esi), %xmm2
152 pcmpeqb 16(%edi), %xmm2
153 pand %xmm1, %xmm2
154 pmovmskb %xmm2, %edx
155 add $32, %edi
156 add $32, %esi
157 sub $0xffff, %edx
158 jnz L(exit)
159
160 lea (%ecx, %edi,1), %eax
161 lea (%ecx, %esi,1), %edx
162 POP (%edi)
163 POP (%esi)
164 jmp L(less48bytes)
165
166 cfi_restore_state
167 cfi_remember_state
168 ALIGN (4)
169 L(shr_0_gobble):
170 lea -48(%ecx), %ecx
171 movdqa (%esi), %xmm0
172 xor %eax, %eax
173 pcmpeqb (%edi), %xmm0
174 sub $32, %ecx
175 movdqa 16(%esi), %xmm2
176 pcmpeqb 16(%edi), %xmm2
177 L(shr_0_gobble_loop):
178 pand %xmm0, %xmm2
179 sub $32, %ecx
180 pmovmskb %xmm2, %edx
181 movdqa %xmm0, %xmm1
182 movdqa 32(%esi), %xmm0
183 movdqa 48(%esi), %xmm2
184 sbb $0xffff, %edx
185 pcmpeqb 32(%edi), %xmm0
186 pcmpeqb 48(%edi), %xmm2
187 lea 32(%edi), %edi
188 lea 32(%esi), %esi
189 jz L(shr_0_gobble_loop)
190
191 pand %xmm0, %xmm2
192 cmp $0, %ecx
193 jge L(shr_0_gobble_loop_next)
194 inc %edx
195 add $32, %ecx
196 L(shr_0_gobble_loop_next):
197 test %edx, %edx
198 jnz L(exit)
199
200 pmovmskb %xmm2, %edx
201 movdqa %xmm0, %xmm1
202 lea 32(%edi), %edi
203 lea 32(%esi), %esi
204 sub $0xffff, %edx
205 jnz L(exit)
206 lea (%ecx, %edi,1), %eax
207 lea (%ecx, %esi,1), %edx
208 POP (%edi)
209 POP (%esi)
210 jmp L(less48bytes)
211
212 cfi_restore_state
213 cfi_remember_state
214 ALIGN (4)
215 L(shr_1):
216 cmp $80, %ecx
217 lea -48(%ecx), %ecx
218 mov %edx, %eax
219 jae L(shr_1_gobble)
220
221 movdqa 16(%esi), %xmm1
222 movdqa %xmm1, %xmm2
223 palignr $1,(%esi), %xmm1
224 pcmpeqb (%edi), %xmm1
225
226 movdqa 32(%esi), %xmm3
227 palignr $1,%xmm2, %xmm3
228 pcmpeqb 16(%edi), %xmm3
229
230 pand %xmm1, %xmm3
231 pmovmskb %xmm3, %edx
232 lea 32(%edi), %edi
233 lea 32(%esi), %esi
234 sub $0xffff, %edx
235 jnz L(exit)
236 lea (%ecx, %edi,1), %eax
237 lea 1(%ecx, %esi,1), %edx
238 POP (%edi)
239 POP (%esi)
240 jmp L(less48bytes)
241
242 cfi_restore_state
243 cfi_remember_state
244 ALIGN (4)
245 L(shr_1_gobble):
246 sub $32, %ecx
247 movdqa 16(%esi), %xmm0
248 palignr $1,(%esi), %xmm0
249 pcmpeqb (%edi), %xmm0
250
251 movdqa 32(%esi), %xmm3
252 palignr $1,16(%esi), %xmm3
253 pcmpeqb 16(%edi), %xmm3
254
255 L(shr_1_gobble_loop):
256 pand %xmm0, %xmm3
257 sub $32, %ecx
258 pmovmskb %xmm3, %edx
259 movdqa %xmm0, %xmm1
260
261 movdqa 64(%esi), %xmm3
262 palignr $1,48(%esi), %xmm3
263 sbb $0xffff, %edx
264 movdqa 48(%esi), %xmm0
265 palignr $1,32(%esi), %xmm0
266 pcmpeqb 32(%edi), %xmm0
267 lea 32(%esi), %esi
268 pcmpeqb 48(%edi), %xmm3
269
270 lea 32(%edi), %edi
271 jz L(shr_1_gobble_loop)
272
273 cmp $0, %ecx
274 jge L(shr_1_gobble_next)
275 inc %edx
276 add $32, %ecx
277 L(shr_1_gobble_next):
278 test %edx, %edx
279 jnz L(exit)
280
281 pmovmskb %xmm3, %edx
282 movdqa %xmm0, %xmm1
283 lea 32(%edi), %edi
284 lea 32(%esi), %esi
285 sub $0xffff, %edx
286 jnz L(exit)
287
288 lea (%ecx, %edi,1), %eax
289 lea 1(%ecx, %esi,1), %edx
290 POP (%edi)
291 POP (%esi)
292 jmp L(less48bytes)
293
294
295 cfi_restore_state
296 cfi_remember_state
297 ALIGN (4)
298 L(shr_2):
299 cmp $80, %ecx
300 lea -48(%ecx), %ecx
301 mov %edx, %eax
302 jae L(shr_2_gobble)
303
304 movdqa 16(%esi), %xmm1
305 movdqa %xmm1, %xmm2
306 palignr $2,(%esi), %xmm1
307 pcmpeqb (%edi), %xmm1
308
309 movdqa 32(%esi), %xmm3
310 palignr $2,%xmm2, %xmm3
311 pcmpeqb 16(%edi), %xmm3
312
313 pand %xmm1, %xmm3
314 pmovmskb %xmm3, %edx
315 lea 32(%edi), %edi
316 lea 32(%esi), %esi
317 sub $0xffff, %edx
318 jnz L(exit)
319 lea (%ecx, %edi,1), %eax
320 lea 2(%ecx, %esi,1), %edx
321 POP (%edi)
322 POP (%esi)
323 jmp L(less48bytes)
324
325 cfi_restore_state
326 cfi_remember_state
327 ALIGN (4)
328 L(shr_2_gobble):
329 sub $32, %ecx
330 movdqa 16(%esi), %xmm0
331 palignr $2,(%esi), %xmm0
332 pcmpeqb (%edi), %xmm0
333
334 movdqa 32(%esi), %xmm3
335 palignr $2,16(%esi), %xmm3
336 pcmpeqb 16(%edi), %xmm3
337
338 L(shr_2_gobble_loop):
339 pand %xmm0, %xmm3
340 sub $32, %ecx
341 pmovmskb %xmm3, %edx
342 movdqa %xmm0, %xmm1
343
344 movdqa 64(%esi), %xmm3
345 palignr $2,48(%esi), %xmm3
346 sbb $0xffff, %edx
347 movdqa 48(%esi), %xmm0
348 palignr $2,32(%esi), %xmm0
349 pcmpeqb 32(%edi), %xmm0
350 lea 32(%esi), %esi
351 pcmpeqb 48(%edi), %xmm3
352
353 lea 32(%edi), %edi
354 jz L(shr_2_gobble_loop)
355
356 cmp $0, %ecx
357 jge L(shr_2_gobble_next)
358 inc %edx
359 add $32, %ecx
360 L(shr_2_gobble_next):
361 test %edx, %edx
362 jnz L(exit)
363
364 pmovmskb %xmm3, %edx
365 movdqa %xmm0, %xmm1
366 lea 32(%edi), %edi
367 lea 32(%esi), %esi
368 sub $0xffff, %edx
369 jnz L(exit)
370
371 lea (%ecx, %edi,1), %eax
372 lea 2(%ecx, %esi,1), %edx
373 POP (%edi)
374 POP (%esi)
375 jmp L(less48bytes)
376
377 cfi_restore_state
378 cfi_remember_state
379 ALIGN (4)
380 L(shr_3):
381 cmp $80, %ecx
382 lea -48(%ecx), %ecx
383 mov %edx, %eax
384 jae L(shr_3_gobble)
385
386 movdqa 16(%esi), %xmm1
387 movdqa %xmm1, %xmm2
388 palignr $3,(%esi), %xmm1
389 pcmpeqb (%edi), %xmm1
390
391 movdqa 32(%esi), %xmm3
392 palignr $3,%xmm2, %xmm3
393 pcmpeqb 16(%edi), %xmm3
394
395 pand %xmm1, %xmm3
396 pmovmskb %xmm3, %edx
397 lea 32(%edi), %edi
398 lea 32(%esi), %esi
399 sub $0xffff, %edx
400 jnz L(exit)
401 lea (%ecx, %edi,1), %eax
402 lea 3(%ecx, %esi,1), %edx
403 POP (%edi)
404 POP (%esi)
405 jmp L(less48bytes)
406
407 cfi_restore_state
408 cfi_remember_state
409 ALIGN (4)
410 L(shr_3_gobble):
411 sub $32, %ecx
412 movdqa 16(%esi), %xmm0
413 palignr $3,(%esi), %xmm0
414 pcmpeqb (%edi), %xmm0
415
416 movdqa 32(%esi), %xmm3
417 palignr $3,16(%esi), %xmm3
418 pcmpeqb 16(%edi), %xmm3
419
420 L(shr_3_gobble_loop):
421 pand %xmm0, %xmm3
422 sub $32, %ecx
423 pmovmskb %xmm3, %edx
424 movdqa %xmm0, %xmm1
425
426 movdqa 64(%esi), %xmm3
427 palignr $3,48(%esi), %xmm3
428 sbb $0xffff, %edx
429 movdqa 48(%esi), %xmm0
430 palignr $3,32(%esi), %xmm0
431 pcmpeqb 32(%edi), %xmm0
432 lea 32(%esi), %esi
433 pcmpeqb 48(%edi), %xmm3
434
435 lea 32(%edi), %edi
436 jz L(shr_3_gobble_loop)
437
438 cmp $0, %ecx
439 jge L(shr_3_gobble_next)
440 inc %edx
441 add $32, %ecx
442 L(shr_3_gobble_next):
443 test %edx, %edx
444 jnz L(exit)
445
446 pmovmskb %xmm3, %edx
447 movdqa %xmm0, %xmm1
448 lea 32(%edi), %edi
449 lea 32(%esi), %esi
450 sub $0xffff, %edx
451 jnz L(exit)
452
453 lea (%ecx, %edi,1), %eax
454 lea 3(%ecx, %esi,1), %edx
455 POP (%edi)
456 POP (%esi)
457 jmp L(less48bytes)
458
459 cfi_restore_state
460 cfi_remember_state
461 ALIGN (4)
462 L(shr_4):
463 cmp $80, %ecx
464 lea -48(%ecx), %ecx
465 mov %edx, %eax
466 jae L(shr_4_gobble)
467
468 movdqa 16(%esi), %xmm1
469 movdqa %xmm1, %xmm2
470 palignr $4,(%esi), %xmm1
471 pcmpeqb (%edi), %xmm1
472
473 movdqa 32(%esi), %xmm3
474 palignr $4,%xmm2, %xmm3
475 pcmpeqb 16(%edi), %xmm3
476
477 pand %xmm1, %xmm3
478 pmovmskb %xmm3, %edx
479 lea 32(%edi), %edi
480 lea 32(%esi), %esi
481 sub $0xffff, %edx
482 jnz L(exit)
483 lea (%ecx, %edi,1), %eax
484 lea 4(%ecx, %esi,1), %edx
485 POP (%edi)
486 POP (%esi)
487 jmp L(less48bytes)
488
489 cfi_restore_state
490 cfi_remember_state
491 ALIGN (4)
492 L(shr_4_gobble):
493 sub $32, %ecx
494 movdqa 16(%esi), %xmm0
495 palignr $4,(%esi), %xmm0
496 pcmpeqb (%edi), %xmm0
497
498 movdqa 32(%esi), %xmm3
499 palignr $4,16(%esi), %xmm3
500 pcmpeqb 16(%edi), %xmm3
501
502 L(shr_4_gobble_loop):
503 pand %xmm0, %xmm3
504 sub $32, %ecx
505 pmovmskb %xmm3, %edx
506 movdqa %xmm0, %xmm1
507
508 movdqa 64(%esi), %xmm3
509 palignr $4,48(%esi), %xmm3
510 sbb $0xffff, %edx
511 movdqa 48(%esi), %xmm0
512 palignr $4,32(%esi), %xmm0
513 pcmpeqb 32(%edi), %xmm0
514 lea 32(%esi), %esi
515 pcmpeqb 48(%edi), %xmm3
516
517 lea 32(%edi), %edi
518 jz L(shr_4_gobble_loop)
519
520 cmp $0, %ecx
521 jge L(shr_4_gobble_next)
522 inc %edx
523 add $32, %ecx
524 L(shr_4_gobble_next):
525 test %edx, %edx
526 jnz L(exit)
527
528 pmovmskb %xmm3, %edx
529 movdqa %xmm0, %xmm1
530 lea 32(%edi), %edi
531 lea 32(%esi), %esi
532 sub $0xffff, %edx
533 jnz L(exit)
534
535 lea (%ecx, %edi,1), %eax
536 lea 4(%ecx, %esi,1), %edx
537 POP (%edi)
538 POP (%esi)
539 jmp L(less48bytes)
540
541 cfi_restore_state
542 cfi_remember_state
543 ALIGN (4)
544 L(shr_5):
545 cmp $80, %ecx
546 lea -48(%ecx), %ecx
547 mov %edx, %eax
548 jae L(shr_5_gobble)
549
550 movdqa 16(%esi), %xmm1
551 movdqa %xmm1, %xmm2
552 palignr $5,(%esi), %xmm1
553 pcmpeqb (%edi), %xmm1
554
555 movdqa 32(%esi), %xmm3
556 palignr $5,%xmm2, %xmm3
557 pcmpeqb 16(%edi), %xmm3
558
559 pand %xmm1, %xmm3
560 pmovmskb %xmm3, %edx
561 lea 32(%edi), %edi
562 lea 32(%esi), %esi
563 sub $0xffff, %edx
564 jnz L(exit)
565 lea (%ecx, %edi,1), %eax
566 lea 5(%ecx, %esi,1), %edx
567 POP (%edi)
568 POP (%esi)
569 jmp L(less48bytes)
570
571 cfi_restore_state
572 cfi_remember_state
573 ALIGN (4)
574 L(shr_5_gobble):
575 sub $32, %ecx
576 movdqa 16(%esi), %xmm0
577 palignr $5,(%esi), %xmm0
578 pcmpeqb (%edi), %xmm0
579
580 movdqa 32(%esi), %xmm3
581 palignr $5,16(%esi), %xmm3
582 pcmpeqb 16(%edi), %xmm3
583
584 L(shr_5_gobble_loop):
585 pand %xmm0, %xmm3
586 sub $32, %ecx
587 pmovmskb %xmm3, %edx
588 movdqa %xmm0, %xmm1
589
590 movdqa 64(%esi), %xmm3
591 palignr $5,48(%esi), %xmm3
592 sbb $0xffff, %edx
593 movdqa 48(%esi), %xmm0
594 palignr $5,32(%esi), %xmm0
595 pcmpeqb 32(%edi), %xmm0
596 lea 32(%esi), %esi
597 pcmpeqb 48(%edi), %xmm3
598
599 lea 32(%edi), %edi
600 jz L(shr_5_gobble_loop)
601
602 cmp $0, %ecx
603 jge L(shr_5_gobble_next)
604 inc %edx
605 add $32, %ecx
606 L(shr_5_gobble_next):
607 test %edx, %edx
608 jnz L(exit)
609
610 pmovmskb %xmm3, %edx
611 movdqa %xmm0, %xmm1
612 lea 32(%edi), %edi
613 lea 32(%esi), %esi
614 sub $0xffff, %edx
615 jnz L(exit)
616
617 lea (%ecx, %edi,1), %eax
618 lea 5(%ecx, %esi,1), %edx
619 POP (%edi)
620 POP (%esi)
621 jmp L(less48bytes)
622
623 cfi_restore_state
624 cfi_remember_state
625 ALIGN (4)
626 L(shr_6):
627 cmp $80, %ecx
628 lea -48(%ecx), %ecx
629 mov %edx, %eax
630 jae L(shr_6_gobble)
631
632 movdqa 16(%esi), %xmm1
633 movdqa %xmm1, %xmm2
634 palignr $6,(%esi), %xmm1
635 pcmpeqb (%edi), %xmm1
636
637 movdqa 32(%esi), %xmm3
638 palignr $6,%xmm2, %xmm3
639 pcmpeqb 16(%edi), %xmm3
640
641 pand %xmm1, %xmm3
642 pmovmskb %xmm3, %edx
643 lea 32(%edi), %edi
644 lea 32(%esi), %esi
645 sub $0xffff, %edx
646 jnz L(exit)
647 lea (%ecx, %edi,1), %eax
648 lea 6(%ecx, %esi,1), %edx
649 POP (%edi)
650 POP (%esi)
651 jmp L(less48bytes)
652
653 cfi_restore_state
654 cfi_remember_state
655 ALIGN (4)
656 L(shr_6_gobble):
657 sub $32, %ecx
658 movdqa 16(%esi), %xmm0
659 palignr $6,(%esi), %xmm0
660 pcmpeqb (%edi), %xmm0
661
662 movdqa 32(%esi), %xmm3
663 palignr $6,16(%esi), %xmm3
664 pcmpeqb 16(%edi), %xmm3
665
666 L(shr_6_gobble_loop):
667 pand %xmm0, %xmm3
668 sub $32, %ecx
669 pmovmskb %xmm3, %edx
670 movdqa %xmm0, %xmm1
671
672 movdqa 64(%esi), %xmm3
673 palignr $6,48(%esi), %xmm3
674 sbb $0xffff, %edx
675 movdqa 48(%esi), %xmm0
676 palignr $6,32(%esi), %xmm0
677 pcmpeqb 32(%edi), %xmm0
678 lea 32(%esi), %esi
679 pcmpeqb 48(%edi), %xmm3
680
681 lea 32(%edi), %edi
682 jz L(shr_6_gobble_loop)
683
684 cmp $0, %ecx
685 jge L(shr_6_gobble_next)
686 inc %edx
687 add $32, %ecx
688 L(shr_6_gobble_next):
689 test %edx, %edx
690 jnz L(exit)
691
692 pmovmskb %xmm3, %edx
693 movdqa %xmm0, %xmm1
694 lea 32(%edi), %edi
695 lea 32(%esi), %esi
696 sub $0xffff, %edx
697 jnz L(exit)
698
699 lea (%ecx, %edi,1), %eax
700 lea 6(%ecx, %esi,1), %edx
701 POP (%edi)
702 POP (%esi)
703 jmp L(less48bytes)
704
705 cfi_restore_state
706 cfi_remember_state
707 ALIGN (4)
708 L(shr_7):
709 cmp $80, %ecx
710 lea -48(%ecx), %ecx
711 mov %edx, %eax
712 jae L(shr_7_gobble)
713
714 movdqa 16(%esi), %xmm1
715 movdqa %xmm1, %xmm2
716 palignr $7,(%esi), %xmm1
717 pcmpeqb (%edi), %xmm1
718
719 movdqa 32(%esi), %xmm3
720 palignr $7,%xmm2, %xmm3
721 pcmpeqb 16(%edi), %xmm3
722
723 pand %xmm1, %xmm3
724 pmovmskb %xmm3, %edx
725 lea 32(%edi), %edi
726 lea 32(%esi), %esi
727 sub $0xffff, %edx
728 jnz L(exit)
729 lea (%ecx, %edi,1), %eax
730 lea 7(%ecx, %esi,1), %edx
731 POP (%edi)
732 POP (%esi)
733 jmp L(less48bytes)
734
735 cfi_restore_state
736 cfi_remember_state
737 ALIGN (4)
738 L(shr_7_gobble):
739 sub $32, %ecx
740 movdqa 16(%esi), %xmm0
741 palignr $7,(%esi), %xmm0
742 pcmpeqb (%edi), %xmm0
743
744 movdqa 32(%esi), %xmm3
745 palignr $7,16(%esi), %xmm3
746 pcmpeqb 16(%edi), %xmm3
747
748 L(shr_7_gobble_loop):
749 pand %xmm0, %xmm3
750 sub $32, %ecx
751 pmovmskb %xmm3, %edx
752 movdqa %xmm0, %xmm1
753
754 movdqa 64(%esi), %xmm3
755 palignr $7,48(%esi), %xmm3
756 sbb $0xffff, %edx
757 movdqa 48(%esi), %xmm0
758 palignr $7,32(%esi), %xmm0
759 pcmpeqb 32(%edi), %xmm0
760 lea 32(%esi), %esi
761 pcmpeqb 48(%edi), %xmm3
762
763 lea 32(%edi), %edi
764 jz L(shr_7_gobble_loop)
765
766 cmp $0, %ecx
767 jge L(shr_7_gobble_next)
768 inc %edx
769 add $32, %ecx
770 L(shr_7_gobble_next):
771 test %edx, %edx
772 jnz L(exit)
773
774 pmovmskb %xmm3, %edx
775 movdqa %xmm0, %xmm1
776 lea 32(%edi), %edi
777 lea 32(%esi), %esi
778 sub $0xffff, %edx
779 jnz L(exit)
780
781 lea (%ecx, %edi,1), %eax
782 lea 7(%ecx, %esi,1), %edx
783 POP (%edi)
784 POP (%esi)
785 jmp L(less48bytes)
786
787 cfi_restore_state
788 cfi_remember_state
789 ALIGN (4)
790 L(shr_8):
791 cmp $80, %ecx
792 lea -48(%ecx), %ecx
793 mov %edx, %eax
794 jae L(shr_8_gobble)
795
796 movdqa 16(%esi), %xmm1
797 movdqa %xmm1, %xmm2
798 palignr $8,(%esi), %xmm1
799 pcmpeqb (%edi), %xmm1
800
801 movdqa 32(%esi), %xmm3
802 palignr $8,%xmm2, %xmm3
803 pcmpeqb 16(%edi), %xmm3
804
805 pand %xmm1, %xmm3
806 pmovmskb %xmm3, %edx
807 lea 32(%edi), %edi
808 lea 32(%esi), %esi
809 sub $0xffff, %edx
810 jnz L(exit)
811 lea (%ecx, %edi,1), %eax
812 lea 8(%ecx, %esi,1), %edx
813 POP (%edi)
814 POP (%esi)
815 jmp L(less48bytes)
816
817 cfi_restore_state
818 cfi_remember_state
819 ALIGN (4)
820 L(shr_8_gobble):
821 sub $32, %ecx
822 movdqa 16(%esi), %xmm0
823 palignr $8,(%esi), %xmm0
824 pcmpeqb (%edi), %xmm0
825
826 movdqa 32(%esi), %xmm3
827 palignr $8,16(%esi), %xmm3
828 pcmpeqb 16(%edi), %xmm3
829
830 L(shr_8_gobble_loop):
831 pand %xmm0, %xmm3
832 sub $32, %ecx
833 pmovmskb %xmm3, %edx
834 movdqa %xmm0, %xmm1
835
836 movdqa 64(%esi), %xmm3
837 palignr $8,48(%esi), %xmm3
838 sbb $0xffff, %edx
839 movdqa 48(%esi), %xmm0
840 palignr $8,32(%esi), %xmm0
841 pcmpeqb 32(%edi), %xmm0
842 lea 32(%esi), %esi
843 pcmpeqb 48(%edi), %xmm3
844
845 lea 32(%edi), %edi
846 jz L(shr_8_gobble_loop)
847
848 cmp $0, %ecx
849 jge L(shr_8_gobble_next)
850 inc %edx
851 add $32, %ecx
852 L(shr_8_gobble_next):
853 test %edx, %edx
854 jnz L(exit)
855
856 pmovmskb %xmm3, %edx
857 movdqa %xmm0, %xmm1
858 lea 32(%edi), %edi
859 lea 32(%esi), %esi
860 sub $0xffff, %edx
861 jnz L(exit)
862
863 lea (%ecx, %edi,1), %eax
864 lea 8(%ecx, %esi,1), %edx
865 POP (%edi)
866 POP (%esi)
867 jmp L(less48bytes)
868
869 cfi_restore_state
870 cfi_remember_state
871 ALIGN (4)
872 L(shr_9):
873 cmp $80, %ecx
874 lea -48(%ecx), %ecx
875 mov %edx, %eax
876 jae L(shr_9_gobble)
877
878 movdqa 16(%esi), %xmm1
879 movdqa %xmm1, %xmm2
880 palignr $9,(%esi), %xmm1
881 pcmpeqb (%edi), %xmm1
882
883 movdqa 32(%esi), %xmm3
884 palignr $9,%xmm2, %xmm3
885 pcmpeqb 16(%edi), %xmm3
886
887 pand %xmm1, %xmm3
888 pmovmskb %xmm3, %edx
889 lea 32(%edi), %edi
890 lea 32(%esi), %esi
891 sub $0xffff, %edx
892 jnz L(exit)
893 lea (%ecx, %edi,1), %eax
894 lea 9(%ecx, %esi,1), %edx
895 POP (%edi)
896 POP (%esi)
897 jmp L(less48bytes)
898
899 cfi_restore_state
900 cfi_remember_state
901 ALIGN (4)
902 L(shr_9_gobble):
903 sub $32, %ecx
904 movdqa 16(%esi), %xmm0
905 palignr $9,(%esi), %xmm0
906 pcmpeqb (%edi), %xmm0
907
908 movdqa 32(%esi), %xmm3
909 palignr $9,16(%esi), %xmm3
910 pcmpeqb 16(%edi), %xmm3
911
912 L(shr_9_gobble_loop):
913 pand %xmm0, %xmm3
914 sub $32, %ecx
915 pmovmskb %xmm3, %edx
916 movdqa %xmm0, %xmm1
917
918 movdqa 64(%esi), %xmm3
919 palignr $9,48(%esi), %xmm3
920 sbb $0xffff, %edx
921 movdqa 48(%esi), %xmm0
922 palignr $9,32(%esi), %xmm0
923 pcmpeqb 32(%edi), %xmm0
924 lea 32(%esi), %esi
925 pcmpeqb 48(%edi), %xmm3
926
927 lea 32(%edi), %edi
928 jz L(shr_9_gobble_loop)
929
930 cmp $0, %ecx
931 jge L(shr_9_gobble_next)
932 inc %edx
933 add $32, %ecx
934 L(shr_9_gobble_next):
935 test %edx, %edx
936 jnz L(exit)
937
938 pmovmskb %xmm3, %edx
939 movdqa %xmm0, %xmm1
940 lea 32(%edi), %edi
941 lea 32(%esi), %esi
942 sub $0xffff, %edx
943 jnz L(exit)
944
945 lea (%ecx, %edi,1), %eax
946 lea 9(%ecx, %esi,1), %edx
947 POP (%edi)
948 POP (%esi)
949 jmp L(less48bytes)
950
951 cfi_restore_state
952 cfi_remember_state
953 ALIGN (4)
954 L(shr_10):
955 cmp $80, %ecx
956 lea -48(%ecx), %ecx
957 mov %edx, %eax
958 jae L(shr_10_gobble)
959
960 movdqa 16(%esi), %xmm1
961 movdqa %xmm1, %xmm2
962 palignr $10, (%esi), %xmm1
963 pcmpeqb (%edi), %xmm1
964
965 movdqa 32(%esi), %xmm3
966 palignr $10,%xmm2, %xmm3
967 pcmpeqb 16(%edi), %xmm3
968
969 pand %xmm1, %xmm3
970 pmovmskb %xmm3, %edx
971 lea 32(%edi), %edi
972 lea 32(%esi), %esi
973 sub $0xffff, %edx
974 jnz L(exit)
975 lea (%ecx, %edi,1), %eax
976 lea 10(%ecx, %esi,1), %edx
977 POP (%edi)
978 POP (%esi)
979 jmp L(less48bytes)
980
981 cfi_restore_state
982 cfi_remember_state
983 ALIGN (4)
984 L(shr_10_gobble):
985 sub $32, %ecx
986 movdqa 16(%esi), %xmm0
987 palignr $10, (%esi), %xmm0
988 pcmpeqb (%edi), %xmm0
989
990 movdqa 32(%esi), %xmm3
991 palignr $10, 16(%esi), %xmm3
992 pcmpeqb 16(%edi), %xmm3
993
994 L(shr_10_gobble_loop):
995 pand %xmm0, %xmm3
996 sub $32, %ecx
997 pmovmskb %xmm3, %edx
998 movdqa %xmm0, %xmm1
999
1000 movdqa 64(%esi), %xmm3
1001 palignr $10,48(%esi), %xmm3
1002 sbb $0xffff, %edx
1003 movdqa 48(%esi), %xmm0
1004 palignr $10,32(%esi), %xmm0
1005 pcmpeqb 32(%edi), %xmm0
1006 lea 32(%esi), %esi
1007 pcmpeqb 48(%edi), %xmm3
1008
1009 lea 32(%edi), %edi
1010 jz L(shr_10_gobble_loop)
1011
1012 cmp $0, %ecx
1013 jge L(shr_10_gobble_next)
1014 inc %edx
1015 add $32, %ecx
1016 L(shr_10_gobble_next):
1017 test %edx, %edx
1018 jnz L(exit)
1019
1020 pmovmskb %xmm3, %edx
1021 movdqa %xmm0, %xmm1
1022 lea 32(%edi), %edi
1023 lea 32(%esi), %esi
1024 sub $0xffff, %edx
1025 jnz L(exit)
1026
1027 lea (%ecx, %edi,1), %eax
1028 lea 10(%ecx, %esi,1), %edx
1029 POP (%edi)
1030 POP (%esi)
1031 jmp L(less48bytes)
1032
1033 cfi_restore_state
1034 cfi_remember_state
1035 ALIGN (4)
1036 L(shr_11):
1037 cmp $80, %ecx
1038 lea -48(%ecx), %ecx
1039 mov %edx, %eax
1040 jae L(shr_11_gobble)
1041
1042 movdqa 16(%esi), %xmm1
1043 movdqa %xmm1, %xmm2
1044 palignr $11, (%esi), %xmm1
1045 pcmpeqb (%edi), %xmm1
1046
1047 movdqa 32(%esi), %xmm3
1048 palignr $11, %xmm2, %xmm3
1049 pcmpeqb 16(%edi), %xmm3
1050
1051 pand %xmm1, %xmm3
1052 pmovmskb %xmm3, %edx
1053 lea 32(%edi), %edi
1054 lea 32(%esi), %esi
1055 sub $0xffff, %edx
1056 jnz L(exit)
1057 lea (%ecx, %edi,1), %eax
1058 lea 11(%ecx, %esi,1), %edx
1059 POP (%edi)
1060 POP (%esi)
1061 jmp L(less48bytes)
1062
1063 cfi_restore_state
1064 cfi_remember_state
1065 ALIGN (4)
1066 L(shr_11_gobble):
1067 sub $32, %ecx
1068 movdqa 16(%esi), %xmm0
1069 palignr $11, (%esi), %xmm0
1070 pcmpeqb (%edi), %xmm0
1071
1072 movdqa 32(%esi), %xmm3
1073 palignr $11, 16(%esi), %xmm3
1074 pcmpeqb 16(%edi), %xmm3
1075
1076 L(shr_11_gobble_loop):
1077 pand %xmm0, %xmm3
1078 sub $32, %ecx
1079 pmovmskb %xmm3, %edx
1080 movdqa %xmm0, %xmm1
1081
1082 movdqa 64(%esi), %xmm3
1083 palignr $11,48(%esi), %xmm3
1084 sbb $0xffff, %edx
1085 movdqa 48(%esi), %xmm0
1086 palignr $11,32(%esi), %xmm0
1087 pcmpeqb 32(%edi), %xmm0
1088 lea 32(%esi), %esi
1089 pcmpeqb 48(%edi), %xmm3
1090
1091 lea 32(%edi), %edi
1092 jz L(shr_11_gobble_loop)
1093
1094 cmp $0, %ecx
1095 jge L(shr_11_gobble_next)
1096 inc %edx
1097 add $32, %ecx
1098 L(shr_11_gobble_next):
1099 test %edx, %edx
1100 jnz L(exit)
1101
1102 pmovmskb %xmm3, %edx
1103 movdqa %xmm0, %xmm1
1104 lea 32(%edi), %edi
1105 lea 32(%esi), %esi
1106 sub $0xffff, %edx
1107 jnz L(exit)
1108
1109 lea (%ecx, %edi,1), %eax
1110 lea 11(%ecx, %esi,1), %edx
1111 POP (%edi)
1112 POP (%esi)
1113 jmp L(less48bytes)
1114
1115 cfi_restore_state
1116 cfi_remember_state
1117 ALIGN (4)
1118 L(shr_12):
1119 cmp $80, %ecx
1120 lea -48(%ecx), %ecx
1121 mov %edx, %eax
1122 jae L(shr_12_gobble)
1123
1124 movdqa 16(%esi), %xmm1
1125 movdqa %xmm1, %xmm2
1126 palignr $12, (%esi), %xmm1
1127 pcmpeqb (%edi), %xmm1
1128
1129 movdqa 32(%esi), %xmm3
1130 palignr $12, %xmm2, %xmm3
1131 pcmpeqb 16(%edi), %xmm3
1132
1133 pand %xmm1, %xmm3
1134 pmovmskb %xmm3, %edx
1135 lea 32(%edi), %edi
1136 lea 32(%esi), %esi
1137 sub $0xffff, %edx
1138 jnz L(exit)
1139 lea (%ecx, %edi,1), %eax
1140 lea 12(%ecx, %esi,1), %edx
1141 POP (%edi)
1142 POP (%esi)
1143 jmp L(less48bytes)
1144
1145 cfi_restore_state
1146 cfi_remember_state
1147 ALIGN (4)
1148 L(shr_12_gobble):
1149 sub $32, %ecx
1150 movdqa 16(%esi), %xmm0
1151 palignr $12, (%esi), %xmm0
1152 pcmpeqb (%edi), %xmm0
1153
1154 movdqa 32(%esi), %xmm3
1155 palignr $12, 16(%esi), %xmm3
1156 pcmpeqb 16(%edi), %xmm3
1157
1158 L(shr_12_gobble_loop):
1159 pand %xmm0, %xmm3
1160 sub $32, %ecx
1161 pmovmskb %xmm3, %edx
1162 movdqa %xmm0, %xmm1
1163
1164 movdqa 64(%esi), %xmm3
1165 palignr $12,48(%esi), %xmm3
1166 sbb $0xffff, %edx
1167 movdqa 48(%esi), %xmm0
1168 palignr $12,32(%esi), %xmm0
1169 pcmpeqb 32(%edi), %xmm0
1170 lea 32(%esi), %esi
1171 pcmpeqb 48(%edi), %xmm3
1172
1173 lea 32(%edi), %edi
1174 jz L(shr_12_gobble_loop)
1175
1176 cmp $0, %ecx
1177 jge L(shr_12_gobble_next)
1178 inc %edx
1179 add $32, %ecx
1180 L(shr_12_gobble_next):
1181 test %edx, %edx
1182 jnz L(exit)
1183
1184 pmovmskb %xmm3, %edx
1185 movdqa %xmm0, %xmm1
1186 lea 32(%edi), %edi
1187 lea 32(%esi), %esi
1188 sub $0xffff, %edx
1189 jnz L(exit)
1190
1191 lea (%ecx, %edi,1), %eax
1192 lea 12(%ecx, %esi,1), %edx
1193 POP (%edi)
1194 POP (%esi)
1195 jmp L(less48bytes)
1196
1197 cfi_restore_state
1198 cfi_remember_state
1199 ALIGN (4)
1200 L(shr_13):
1201 cmp $80, %ecx
1202 lea -48(%ecx), %ecx
1203 mov %edx, %eax
1204 jae L(shr_13_gobble)
1205
1206 movdqa 16(%esi), %xmm1
1207 movdqa %xmm1, %xmm2
1208 palignr $13, (%esi), %xmm1
1209 pcmpeqb (%edi), %xmm1
1210
1211 movdqa 32(%esi), %xmm3
1212 palignr $13, %xmm2, %xmm3
1213 pcmpeqb 16(%edi), %xmm3
1214
1215 pand %xmm1, %xmm3
1216 pmovmskb %xmm3, %edx
1217 lea 32(%edi), %edi
1218 lea 32(%esi), %esi
1219 sub $0xffff, %edx
1220 jnz L(exit)
1221 lea (%ecx, %edi,1), %eax
1222 lea 13(%ecx, %esi,1), %edx
1223 POP (%edi)
1224 POP (%esi)
1225 jmp L(less48bytes)
1226
1227 cfi_restore_state
1228 cfi_remember_state
1229 ALIGN (4)
1230 L(shr_13_gobble):
1231 sub $32, %ecx
1232 movdqa 16(%esi), %xmm0
1233 palignr $13, (%esi), %xmm0
1234 pcmpeqb (%edi), %xmm0
1235
1236 movdqa 32(%esi), %xmm3
1237 palignr $13, 16(%esi), %xmm3
1238 pcmpeqb 16(%edi), %xmm3
1239
1240 L(shr_13_gobble_loop):
1241 pand %xmm0, %xmm3
1242 sub $32, %ecx
1243 pmovmskb %xmm3, %edx
1244 movdqa %xmm0, %xmm1
1245
1246 movdqa 64(%esi), %xmm3
1247 palignr $13,48(%esi), %xmm3
1248 sbb $0xffff, %edx
1249 movdqa 48(%esi), %xmm0
1250 palignr $13,32(%esi), %xmm0
1251 pcmpeqb 32(%edi), %xmm0
1252 lea 32(%esi), %esi
1253 pcmpeqb 48(%edi), %xmm3
1254
1255 lea 32(%edi), %edi
1256 jz L(shr_13_gobble_loop)
1257
1258 cmp $0, %ecx
1259 jge L(shr_13_gobble_next)
1260 inc %edx
1261 add $32, %ecx
1262 L(shr_13_gobble_next):
1263 test %edx, %edx
1264 jnz L(exit)
1265
1266 pmovmskb %xmm3, %edx
1267 movdqa %xmm0, %xmm1
1268 lea 32(%edi), %edi
1269 lea 32(%esi), %esi
1270 sub $0xffff, %edx
1271 jnz L(exit)
1272
1273 lea (%ecx, %edi,1), %eax
1274 lea 13(%ecx, %esi,1), %edx
1275 POP (%edi)
1276 POP (%esi)
1277 jmp L(less48bytes)
1278
1279 cfi_restore_state
1280 cfi_remember_state
1281 ALIGN (4)
1282 L(shr_14):
1283 cmp $80, %ecx
1284 lea -48(%ecx), %ecx
1285 mov %edx, %eax
1286 jae L(shr_14_gobble)
1287
1288 movdqa 16(%esi), %xmm1
1289 movdqa %xmm1, %xmm2
1290 palignr $14, (%esi), %xmm1
1291 pcmpeqb (%edi), %xmm1
1292
1293 movdqa 32(%esi), %xmm3
1294 palignr $14, %xmm2, %xmm3
1295 pcmpeqb 16(%edi), %xmm3
1296
1297 pand %xmm1, %xmm3
1298 pmovmskb %xmm3, %edx
1299 lea 32(%edi), %edi
1300 lea 32(%esi), %esi
1301 sub $0xffff, %edx
1302 jnz L(exit)
1303 lea (%ecx, %edi,1), %eax
1304 lea 14(%ecx, %esi,1), %edx
1305 POP (%edi)
1306 POP (%esi)
1307 jmp L(less48bytes)
1308
1309 cfi_restore_state
1310 cfi_remember_state
1311 ALIGN (4)
1312 L(shr_14_gobble):
1313 sub $32, %ecx
1314 movdqa 16(%esi), %xmm0
1315 palignr $14, (%esi), %xmm0
1316 pcmpeqb (%edi), %xmm0
1317
1318 movdqa 32(%esi), %xmm3
1319 palignr $14, 16(%esi), %xmm3
1320 pcmpeqb 16(%edi), %xmm3
1321
1322 L(shr_14_gobble_loop):
1323 pand %xmm0, %xmm3
1324 sub $32, %ecx
1325 pmovmskb %xmm3, %edx
1326 movdqa %xmm0, %xmm1
1327
1328 movdqa 64(%esi), %xmm3
1329 palignr $14,48(%esi), %xmm3
1330 sbb $0xffff, %edx
1331 movdqa 48(%esi), %xmm0
1332 palignr $14,32(%esi), %xmm0
1333 pcmpeqb 32(%edi), %xmm0
1334 lea 32(%esi), %esi
1335 pcmpeqb 48(%edi), %xmm3
1336
1337 lea 32(%edi), %edi
1338 jz L(shr_14_gobble_loop)
1339
1340 cmp $0, %ecx
1341 jge L(shr_14_gobble_next)
1342 inc %edx
1343 add $32, %ecx
1344 L(shr_14_gobble_next):
1345 test %edx, %edx
1346 jnz L(exit)
1347
1348 pmovmskb %xmm3, %edx
1349 movdqa %xmm0, %xmm1
1350 lea 32(%edi), %edi
1351 lea 32(%esi), %esi
1352 sub $0xffff, %edx
1353 jnz L(exit)
1354
1355 lea (%ecx, %edi,1), %eax
1356 lea 14(%ecx, %esi,1), %edx
1357 POP (%edi)
1358 POP (%esi)
1359 jmp L(less48bytes)
1360
1361 cfi_restore_state
1362 cfi_remember_state
1363 ALIGN (4)
1364 L(shr_15):
1365 cmp $80, %ecx
1366 lea -48(%ecx), %ecx
1367 mov %edx, %eax
1368 jae L(shr_15_gobble)
1369
1370 movdqa 16(%esi), %xmm1
1371 movdqa %xmm1, %xmm2
1372 palignr $15, (%esi), %xmm1
1373 pcmpeqb (%edi), %xmm1
1374
1375 movdqa 32(%esi), %xmm3
1376 palignr $15, %xmm2, %xmm3
1377 pcmpeqb 16(%edi), %xmm3
1378
1379 pand %xmm1, %xmm3
1380 pmovmskb %xmm3, %edx
1381 lea 32(%edi), %edi
1382 lea 32(%esi), %esi
1383 sub $0xffff, %edx
1384 jnz L(exit)
1385 lea (%ecx, %edi,1), %eax
1386 lea 15(%ecx, %esi,1), %edx
1387 POP (%edi)
1388 POP (%esi)
1389 jmp L(less48bytes)
1390
1391 cfi_restore_state
1392 cfi_remember_state
1393 ALIGN (4)
1394 L(shr_15_gobble):
1395 sub $32, %ecx
1396 movdqa 16(%esi), %xmm0
1397 palignr $15, (%esi), %xmm0
1398 pcmpeqb (%edi), %xmm0
1399
1400 movdqa 32(%esi), %xmm3
1401 palignr $15, 16(%esi), %xmm3
1402 pcmpeqb 16(%edi), %xmm3
1403
1404 L(shr_15_gobble_loop):
1405 pand %xmm0, %xmm3
1406 sub $32, %ecx
1407 pmovmskb %xmm3, %edx
1408 movdqa %xmm0, %xmm1
1409
1410 movdqa 64(%esi), %xmm3
1411 palignr $15,48(%esi), %xmm3
1412 sbb $0xffff, %edx
1413 movdqa 48(%esi), %xmm0
1414 palignr $15,32(%esi), %xmm0
1415 pcmpeqb 32(%edi), %xmm0
1416 lea 32(%esi), %esi
1417 pcmpeqb 48(%edi), %xmm3
1418
1419 lea 32(%edi), %edi
1420 jz L(shr_15_gobble_loop)
1421
1422 cmp $0, %ecx
1423 jge L(shr_15_gobble_next)
1424 inc %edx
1425 add $32, %ecx
1426 L(shr_15_gobble_next):
1427 test %edx, %edx
1428 jnz L(exit)
1429
1430 pmovmskb %xmm3, %edx
1431 movdqa %xmm0, %xmm1
1432 lea 32(%edi), %edi
1433 lea 32(%esi), %esi
1434 sub $0xffff, %edx
1435 jnz L(exit)
1436
1437 lea (%ecx, %edi,1), %eax
1438 lea 15(%ecx, %esi,1), %edx
1439 POP (%edi)
1440 POP (%esi)
1441 jmp L(less48bytes)
1442
1443 cfi_restore_state
1444 cfi_remember_state
1445 ALIGN (4)
1446 L(exit):
1447 pmovmskb %xmm1, %ebx
1448 sub $0xffff, %ebx
1449 jz L(first16bytes)
1450 lea -16(%esi), %esi
1451 lea -16(%edi), %edi
1452 mov %ebx, %edx
1453 L(first16bytes):
1454 add %eax, %esi
1455 L(less16bytes):
1456 test %dl, %dl
1457 jz L(next_24_bytes)
1458
1459 test $0x01, %dl
1460 jnz L(Byte16)
1461
1462 test $0x02, %dl
1463 jnz L(Byte17)
1464
1465 test $0x04, %dl
1466 jnz L(Byte18)
1467
1468 test $0x08, %dl
1469 jnz L(Byte19)
1470
1471 test $0x10, %dl
1472 jnz L(Byte20)
1473
1474 test $0x20, %dl
1475 jnz L(Byte21)
1476
1477 test $0x40, %dl
1478 jnz L(Byte22)
1479 L(Byte23):
1480 movzbl -9(%edi), %eax
1481 movzbl -9(%esi), %edx
1482 sub %edx, %eax
1483 RETURN
1484
1485 ALIGN (4)
1486 L(Byte16):
1487 movzbl -16(%edi), %eax
1488 movzbl -16(%esi), %edx
1489 sub %edx, %eax
1490 RETURN
1491
1492 ALIGN (4)
1493 L(Byte17):
1494 movzbl -15(%edi), %eax
1495 movzbl -15(%esi), %edx
1496 sub %edx, %eax
1497 RETURN
1498
1499 ALIGN (4)
1500 L(Byte18):
1501 movzbl -14(%edi), %eax
1502 movzbl -14(%esi), %edx
1503 sub %edx, %eax
1504 RETURN
1505
1506 ALIGN (4)
1507 L(Byte19):
1508 movzbl -13(%edi), %eax
1509 movzbl -13(%esi), %edx
1510 sub %edx, %eax
1511 RETURN
1512
1513 ALIGN (4)
1514 L(Byte20):
1515 movzbl -12(%edi), %eax
1516 movzbl -12(%esi), %edx
1517 sub %edx, %eax
1518 RETURN
1519
1520 ALIGN (4)
1521 L(Byte21):
1522 movzbl -11(%edi), %eax
1523 movzbl -11(%esi), %edx
1524 sub %edx, %eax
1525 RETURN
1526
1527 ALIGN (4)
1528 L(Byte22):
1529 movzbl -10(%edi), %eax
1530 movzbl -10(%esi), %edx
1531 sub %edx, %eax
1532 RETURN
1533
1534 ALIGN (4)
1535 L(next_24_bytes):
1536 lea 8(%edi), %edi
1537 lea 8(%esi), %esi
1538 test $0x01, %dh
1539 jnz L(Byte16)
1540
1541 test $0x02, %dh
1542 jnz L(Byte17)
1543
1544 test $0x04, %dh
1545 jnz L(Byte18)
1546
1547 test $0x08, %dh
1548 jnz L(Byte19)
1549
1550 test $0x10, %dh
1551 jnz L(Byte20)
1552
1553 test $0x20, %dh
1554 jnz L(Byte21)
1555
1556 test $0x40, %dh
1557 jnz L(Byte22)
1558
1559 ALIGN (4)
1560 L(Byte31):
1561 movzbl -9(%edi), %eax
1562 movzbl -9(%esi), %edx
1563 sub %edx, %eax
1564 RETURN_END
1565
1566 CFI_PUSH (%ebx)
1567 ALIGN (4)
1568 L(more8bytes):
1569 cmp $16, %ecx
1570 jae L(more16bytes)
1571 cmp $8, %ecx
1572 je L(8bytes)
1573 cmp $9, %ecx
1574 je L(9bytes)
1575 cmp $10, %ecx
1576 je L(10bytes)
1577 cmp $11, %ecx
1578 je L(11bytes)
1579 cmp $12, %ecx
1580 je L(12bytes)
1581 cmp $13, %ecx
1582 je L(13bytes)
1583 cmp $14, %ecx
1584 je L(14bytes)
1585 jmp L(15bytes)
1586
1587 ALIGN (4)
1588 L(more16bytes):
1589 cmp $24, %ecx
1590 jae L(more24bytes)
1591 cmp $16, %ecx
1592 je L(16bytes)
1593 cmp $17, %ecx
1594 je L(17bytes)
1595 cmp $18, %ecx
1596 je L(18bytes)
1597 cmp $19, %ecx
1598 je L(19bytes)
1599 cmp $20, %ecx
1600 je L(20bytes)
1601 cmp $21, %ecx
1602 je L(21bytes)
1603 cmp $22, %ecx
1604 je L(22bytes)
1605 jmp L(23bytes)
1606
1607 ALIGN (4)
1608 L(more24bytes):
1609 cmp $32, %ecx
1610 jae L(more32bytes)
1611 cmp $24, %ecx
1612 je L(24bytes)
1613 cmp $25, %ecx
1614 je L(25bytes)
1615 cmp $26, %ecx
1616 je L(26bytes)
1617 cmp $27, %ecx
1618 je L(27bytes)
1619 cmp $28, %ecx
1620 je L(28bytes)
1621 cmp $29, %ecx
1622 je L(29bytes)
1623 cmp $30, %ecx
1624 je L(30bytes)
1625 jmp L(31bytes)
1626
1627 ALIGN (4)
1628 L(more32bytes):
1629 cmp $40, %ecx
1630 jae L(more40bytes)
1631 cmp $32, %ecx
1632 je L(32bytes)
1633 cmp $33, %ecx
1634 je L(33bytes)
1635 cmp $34, %ecx
1636 je L(34bytes)
1637 cmp $35, %ecx
1638 je L(35bytes)
1639 cmp $36, %ecx
1640 je L(36bytes)
1641 cmp $37, %ecx
1642 je L(37bytes)
1643 cmp $38, %ecx
1644 je L(38bytes)
1645 jmp L(39bytes)
1646
1647 ALIGN (4)
1648 L(more40bytes):
1649 cmp $40, %ecx
1650 je L(40bytes)
1651 cmp $41, %ecx
1652 je L(41bytes)
1653 cmp $42, %ecx
1654 je L(42bytes)
1655 cmp $43, %ecx
1656 je L(43bytes)
1657 cmp $44, %ecx
1658 je L(44bytes)
1659 cmp $45, %ecx
1660 je L(45bytes)
1661 cmp $46, %ecx
1662 je L(46bytes)
1663 jmp L(47bytes)
1664
1665 ALIGN (4)
1666 L(less48bytes):
1667 cmp $8, %ecx
1668 jae L(more8bytes)
1669 cmp $2, %ecx
1670 je L(2bytes)
1671 cmp $3, %ecx
1672 je L(3bytes)
1673 cmp $4, %ecx
1674 je L(4bytes)
1675 cmp $5, %ecx
1676 je L(5bytes)
1677 cmp $6, %ecx
1678 je L(6bytes)
1679 jmp L(7bytes)
1680
1681 ALIGN (4)
1682 L(44bytes):
1683 mov -44(%eax), %ecx
1684 mov -44(%edx), %ebx
1685 cmp %ebx, %ecx
1686 jne L(find_diff)
1687 L(40bytes):
1688 mov -40(%eax), %ecx
1689 mov -40(%edx), %ebx
1690 cmp %ebx, %ecx
1691 jne L(find_diff)
1692 L(36bytes):
1693 mov -36(%eax), %ecx
1694 mov -36(%edx), %ebx
1695 cmp %ebx, %ecx
1696 jne L(find_diff)
1697 L(32bytes):
1698 mov -32(%eax), %ecx
1699 mov -32(%edx), %ebx
1700 cmp %ebx, %ecx
1701 jne L(find_diff)
1702 L(28bytes):
1703 mov -28(%eax), %ecx
1704 mov -28(%edx), %ebx
1705 cmp %ebx, %ecx
1706 jne L(find_diff)
1707 L(24bytes):
1708 mov -24(%eax), %ecx
1709 mov -24(%edx), %ebx
1710 cmp %ebx, %ecx
1711 jne L(find_diff)
1712 L(20bytes):
1713 mov -20(%eax), %ecx
1714 mov -20(%edx), %ebx
1715 cmp %ebx, %ecx
1716 jne L(find_diff)
1717 L(16bytes):
1718 mov -16(%eax), %ecx
1719 mov -16(%edx), %ebx
1720 cmp %ebx, %ecx
1721 jne L(find_diff)
1722 L(12bytes):
1723 mov -12(%eax), %ecx
1724 mov -12(%edx), %ebx
1725 cmp %ebx, %ecx
1726 jne L(find_diff)
1727 L(8bytes):
1728 mov -8(%eax), %ecx
1729 mov -8(%edx), %ebx
1730 cmp %ebx, %ecx
1731 jne L(find_diff)
1732 L(4bytes):
1733 mov -4(%eax), %ecx
1734 mov -4(%edx), %ebx
1735 cmp %ebx, %ecx
1736 mov $0, %eax
1737 jne L(find_diff)
1738 POP (%ebx)
1739 ret
1740 CFI_PUSH (%ebx)
1741
1742 ALIGN (4)
1743 L(45bytes):
1744 mov -45(%eax), %ecx
1745 mov -45(%edx), %ebx
1746 cmp %ebx, %ecx
1747 jne L(find_diff)
1748 L(41bytes):
1749 mov -41(%eax), %ecx
1750 mov -41(%edx), %ebx
1751 cmp %ebx, %ecx
1752 jne L(find_diff)
1753 L(37bytes):
1754 mov -37(%eax), %ecx
1755 mov -37(%edx), %ebx
1756 cmp %ebx, %ecx
1757 jne L(find_diff)
1758 L(33bytes):
1759 mov -33(%eax), %ecx
1760 mov -33(%edx), %ebx
1761 cmp %ebx, %ecx
1762 jne L(find_diff)
1763 L(29bytes):
1764 mov -29(%eax), %ecx
1765 mov -29(%edx), %ebx
1766 cmp %ebx, %ecx
1767 jne L(find_diff)
1768 L(25bytes):
1769 mov -25(%eax), %ecx
1770 mov -25(%edx), %ebx
1771 cmp %ebx, %ecx
1772 jne L(find_diff)
1773 L(21bytes):
1774 mov -21(%eax), %ecx
1775 mov -21(%edx), %ebx
1776 cmp %ebx, %ecx
1777 jne L(find_diff)
1778 L(17bytes):
1779 mov -17(%eax), %ecx
1780 mov -17(%edx), %ebx
1781 cmp %ebx, %ecx
1782 jne L(find_diff)
1783 L(13bytes):
1784 mov -13(%eax), %ecx
1785 mov -13(%edx), %ebx
1786 cmp %ebx, %ecx
1787 jne L(find_diff)
1788 L(9bytes):
1789 mov -9(%eax), %ecx
1790 mov -9(%edx), %ebx
1791 cmp %ebx, %ecx
1792 jne L(find_diff)
1793 L(5bytes):
1794 mov -5(%eax), %ecx
1795 mov -5(%edx), %ebx
1796 cmp %ebx, %ecx
1797 jne L(find_diff)
1798 movzbl -1(%eax), %ecx
1799 cmp -1(%edx), %cl
1800 mov $0, %eax
1801 jne L(end)
1802 POP (%ebx)
1803 ret
1804 CFI_PUSH (%ebx)
1805
1806 ALIGN (4)
1807 L(46bytes):
1808 mov -46(%eax), %ecx
1809 mov -46(%edx), %ebx
1810 cmp %ebx, %ecx
1811 jne L(find_diff)
1812 L(42bytes):
1813 mov -42(%eax), %ecx
1814 mov -42(%edx), %ebx
1815 cmp %ebx, %ecx
1816 jne L(find_diff)
1817 L(38bytes):
1818 mov -38(%eax), %ecx
1819 mov -38(%edx), %ebx
1820 cmp %ebx, %ecx
1821 jne L(find_diff)
1822 L(34bytes):
1823 mov -34(%eax), %ecx
1824 mov -34(%edx), %ebx
1825 cmp %ebx, %ecx
1826 jne L(find_diff)
1827 L(30bytes):
1828 mov -30(%eax), %ecx
1829 mov -30(%edx), %ebx
1830 cmp %ebx, %ecx
1831 jne L(find_diff)
1832 L(26bytes):
1833 mov -26(%eax), %ecx
1834 mov -26(%edx), %ebx
1835 cmp %ebx, %ecx
1836 jne L(find_diff)
1837 L(22bytes):
1838 mov -22(%eax), %ecx
1839 mov -22(%edx), %ebx
1840 cmp %ebx, %ecx
1841 jne L(find_diff)
1842 L(18bytes):
1843 mov -18(%eax), %ecx
1844 mov -18(%edx), %ebx
1845 cmp %ebx, %ecx
1846 jne L(find_diff)
1847 L(14bytes):
1848 mov -14(%eax), %ecx
1849 mov -14(%edx), %ebx
1850 cmp %ebx, %ecx
1851 jne L(find_diff)
1852 L(10bytes):
1853 mov -10(%eax), %ecx
1854 mov -10(%edx), %ebx
1855 cmp %ebx, %ecx
1856 jne L(find_diff)
1857 L(6bytes):
1858 mov -6(%eax), %ecx
1859 mov -6(%edx), %ebx
1860 cmp %ebx, %ecx
1861 jne L(find_diff)
1862 L(2bytes):
1863 movzwl -2(%eax), %ecx
1864 movzwl -2(%edx), %ebx
1865 cmp %bl, %cl
1866 jne L(end)
1867 cmp %bh, %ch
1868 mov $0, %eax
1869 jne L(end)
1870 POP (%ebx)
1871 ret
1872 CFI_PUSH (%ebx)
1873
1874 ALIGN (4)
1875 L(47bytes):
1876 movl -47(%eax), %ecx
1877 movl -47(%edx), %ebx
1878 cmp %ebx, %ecx
1879 jne L(find_diff)
1880 L(43bytes):
1881 movl -43(%eax), %ecx
1882 movl -43(%edx), %ebx
1883 cmp %ebx, %ecx
1884 jne L(find_diff)
1885 L(39bytes):
1886 movl -39(%eax), %ecx
1887 movl -39(%edx), %ebx
1888 cmp %ebx, %ecx
1889 jne L(find_diff)
1890 L(35bytes):
1891 movl -35(%eax), %ecx
1892 movl -35(%edx), %ebx
1893 cmp %ebx, %ecx
1894 jne L(find_diff)
1895 L(31bytes):
1896 movl -31(%eax), %ecx
1897 movl -31(%edx), %ebx
1898 cmp %ebx, %ecx
1899 jne L(find_diff)
1900 L(27bytes):
1901 movl -27(%eax), %ecx
1902 movl -27(%edx), %ebx
1903 cmp %ebx, %ecx
1904 jne L(find_diff)
1905 L(23bytes):
1906 movl -23(%eax), %ecx
1907 movl -23(%edx), %ebx
1908 cmp %ebx, %ecx
1909 jne L(find_diff)
1910 L(19bytes):
1911 movl -19(%eax), %ecx
1912 movl -19(%edx), %ebx
1913 cmp %ebx, %ecx
1914 jne L(find_diff)
1915 L(15bytes):
1916 movl -15(%eax), %ecx
1917 movl -15(%edx), %ebx
1918 cmp %ebx, %ecx
1919 jne L(find_diff)
1920 L(11bytes):
1921 movl -11(%eax), %ecx
1922 movl -11(%edx), %ebx
1923 cmp %ebx, %ecx
1924 jne L(find_diff)
1925 L(7bytes):
1926 movl -7(%eax), %ecx
1927 movl -7(%edx), %ebx
1928 cmp %ebx, %ecx
1929 jne L(find_diff)
1930 L(3bytes):
1931 movzwl -3(%eax), %ecx
1932 movzwl -3(%edx), %ebx
1933 cmpb %bl, %cl
1934 jne L(end)
1935 cmp %bx, %cx
1936 jne L(end)
1937 movzbl -1(%eax), %eax
1938 cmpb -1(%edx), %al
1939 mov $0, %eax
1940 jne L(end)
1941 POP (%ebx)
1942 ret
1943 CFI_PUSH (%ebx)
1944
1945 ALIGN (4)
1946 L(find_diff):
1947 cmpb %bl, %cl
1948 jne L(end)
1949 cmp %bx, %cx
1950 jne L(end)
1951 shr $16,%ecx
1952 shr $16,%ebx
1953 cmp %bl, %cl
1954 jne L(end)
1955 cmp %bx, %cx
1956 L(end):
1957 POP (%ebx)
1958 mov $1, %eax
1959 ja L(bigger)
1960 neg %eax
1961 L(bigger):
1962 ret
1963
1964 END (MEMCMP)
1965
1966 #endif