1 /* memcmp with SSSE3, wmemcmp with SSSE3
2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
25 # define MEMCMP __memcmp_ssse3
29 wmemcmp has to use SIGNED comparison for elements.
30 memcmp has to use UNSIGNED comparison for elemnts.
35 # ifdef USE_AS_WMEMCMP
39 # elif defined __ILP32__
40 /* Clear the upper 32 bits. */
46 jae L(48bytesormore) /* LEN => 48 */
73 # ifndef USE_AS_WMEMCMP
75 jae L(next_unaligned_table)
93 L(next_unaligned_table):
126 pcmpeqb (%rdi), %xmm1
127 movdqa 16(%rsi), %xmm2
128 pcmpeqb 16(%rdi), %xmm2
143 pcmpeqb (%rdi), %xmm0
145 movdqa 16(%rsi), %xmm2
146 pcmpeqb 16(%rdi), %xmm2
147 L(shr_0_gobble_loop):
152 movdqa 32(%rsi), %xmm0
153 movdqa 48(%rsi), %xmm2
155 pcmpeqb 32(%rdi), %xmm0
156 pcmpeqb 48(%rdi), %xmm2
159 jz L(shr_0_gobble_loop)
180 # ifndef USE_AS_WMEMCMP
189 movdqa 16(%rsi), %xmm1
191 palignr $1, (%rsi), %xmm1
192 pcmpeqb (%rdi), %xmm1
194 movdqa 32(%rsi), %xmm3
195 palignr $1, %xmm2, %xmm3
196 pcmpeqb 16(%rdi), %xmm3
212 movdqa 16(%rsi), %xmm0
213 palignr $1, (%rsi), %xmm0
214 pcmpeqb (%rdi), %xmm0
216 movdqa 32(%rsi), %xmm3
217 palignr $1, 16(%rsi), %xmm3
218 pcmpeqb 16(%rdi), %xmm3
220 L(shr_1_gobble_loop):
226 movdqa 64(%rsi), %xmm3
227 palignr $1, 48(%rsi), %xmm3
229 movdqa 48(%rsi), %xmm0
230 palignr $1, 32(%rsi), %xmm0
231 pcmpeqb 32(%rdi), %xmm0
233 pcmpeqb 48(%rdi), %xmm3
236 jz L(shr_1_gobble_loop)
240 jge L(shr_1_gobble_next)
243 L(shr_1_gobble_next):
267 movdqa 16(%rsi), %xmm1
269 palignr $2, (%rsi), %xmm1
270 pcmpeqb (%rdi), %xmm1
272 movdqa 32(%rsi), %xmm3
273 palignr $2, %xmm2, %xmm3
274 pcmpeqb 16(%rdi), %xmm3
290 movdqa 16(%rsi), %xmm0
291 palignr $2, (%rsi), %xmm0
292 pcmpeqb (%rdi), %xmm0
294 movdqa 32(%rsi), %xmm3
295 palignr $2, 16(%rsi), %xmm3
296 pcmpeqb 16(%rdi), %xmm3
298 L(shr_2_gobble_loop):
304 movdqa 64(%rsi), %xmm3
305 palignr $2, 48(%rsi), %xmm3
307 movdqa 48(%rsi), %xmm0
308 palignr $2, 32(%rsi), %xmm0
309 pcmpeqb 32(%rdi), %xmm0
311 pcmpeqb 48(%rdi), %xmm3
314 jz L(shr_2_gobble_loop)
318 jge L(shr_2_gobble_next)
321 L(shr_2_gobble_next):
344 movdqa 16(%rsi), %xmm1
346 palignr $3, (%rsi), %xmm1
347 pcmpeqb (%rdi), %xmm1
349 movdqa 32(%rsi), %xmm3
350 palignr $3, %xmm2, %xmm3
351 pcmpeqb 16(%rdi), %xmm3
367 movdqa 16(%rsi), %xmm0
368 palignr $3, (%rsi), %xmm0
369 pcmpeqb (%rdi), %xmm0
371 movdqa 32(%rsi), %xmm3
372 palignr $3, 16(%rsi), %xmm3
373 pcmpeqb 16(%rdi), %xmm3
375 L(shr_3_gobble_loop):
381 movdqa 64(%rsi), %xmm3
382 palignr $3, 48(%rsi), %xmm3
384 movdqa 48(%rsi), %xmm0
385 palignr $3, 32(%rsi), %xmm0
386 pcmpeqb 32(%rdi), %xmm0
388 pcmpeqb 48(%rdi), %xmm3
391 jz L(shr_3_gobble_loop)
395 jge L(shr_3_gobble_next)
398 L(shr_3_gobble_next):
423 movdqa 16(%rsi), %xmm1
425 palignr $4, (%rsi), %xmm1
426 pcmpeqb (%rdi), %xmm1
428 movdqa 32(%rsi), %xmm3
429 palignr $4, %xmm2, %xmm3
430 pcmpeqb 16(%rdi), %xmm3
446 movdqa 16(%rsi), %xmm0
447 palignr $4, (%rsi), %xmm0
448 pcmpeqb (%rdi), %xmm0
450 movdqa 32(%rsi), %xmm3
451 palignr $4, 16(%rsi), %xmm3
452 pcmpeqb 16(%rdi), %xmm3
454 L(shr_4_gobble_loop):
460 movdqa 64(%rsi), %xmm3
461 palignr $4, 48(%rsi), %xmm3
463 movdqa 48(%rsi), %xmm0
464 palignr $4, 32(%rsi), %xmm0
465 pcmpeqb 32(%rdi), %xmm0
467 pcmpeqb 48(%rdi), %xmm3
470 jz L(shr_4_gobble_loop)
474 jge L(shr_4_gobble_next)
477 L(shr_4_gobble_next):
493 # ifndef USE_AS_WMEMCMP
502 movdqa 16(%rsi), %xmm1
504 palignr $5, (%rsi), %xmm1
505 pcmpeqb (%rdi), %xmm1
507 movdqa 32(%rsi), %xmm3
508 palignr $5, %xmm2, %xmm3
509 pcmpeqb 16(%rdi), %xmm3
525 movdqa 16(%rsi), %xmm0
526 palignr $5, (%rsi), %xmm0
527 pcmpeqb (%rdi), %xmm0
529 movdqa 32(%rsi), %xmm3
530 palignr $5, 16(%rsi), %xmm3
531 pcmpeqb 16(%rdi), %xmm3
533 L(shr_5_gobble_loop):
539 movdqa 64(%rsi), %xmm3
540 palignr $5, 48(%rsi), %xmm3
542 movdqa 48(%rsi), %xmm0
543 palignr $5, 32(%rsi), %xmm0
544 pcmpeqb 32(%rdi), %xmm0
546 pcmpeqb 48(%rdi), %xmm3
549 jz L(shr_5_gobble_loop)
553 jge L(shr_5_gobble_next)
556 L(shr_5_gobble_next):
579 movdqa 16(%rsi), %xmm1
581 palignr $6, (%rsi), %xmm1
582 pcmpeqb (%rdi), %xmm1
584 movdqa 32(%rsi), %xmm3
585 palignr $6, %xmm2, %xmm3
586 pcmpeqb 16(%rdi), %xmm3
602 movdqa 16(%rsi), %xmm0
603 palignr $6, (%rsi), %xmm0
604 pcmpeqb (%rdi), %xmm0
606 movdqa 32(%rsi), %xmm3
607 palignr $6, 16(%rsi), %xmm3
608 pcmpeqb 16(%rdi), %xmm3
610 L(shr_6_gobble_loop):
616 movdqa 64(%rsi), %xmm3
617 palignr $6, 48(%rsi), %xmm3
619 movdqa 48(%rsi), %xmm0
620 palignr $6, 32(%rsi), %xmm0
621 pcmpeqb 32(%rdi), %xmm0
623 pcmpeqb 48(%rdi), %xmm3
626 jz L(shr_6_gobble_loop)
630 jge L(shr_6_gobble_next)
633 L(shr_6_gobble_next):
656 movdqa 16(%rsi), %xmm1
658 palignr $7, (%rsi), %xmm1
659 pcmpeqb (%rdi), %xmm1
661 movdqa 32(%rsi), %xmm3
662 palignr $7, %xmm2, %xmm3
663 pcmpeqb 16(%rdi), %xmm3
679 movdqa 16(%rsi), %xmm0
680 palignr $7, (%rsi), %xmm0
681 pcmpeqb (%rdi), %xmm0
683 movdqa 32(%rsi), %xmm3
684 palignr $7, 16(%rsi), %xmm3
685 pcmpeqb 16(%rdi), %xmm3
687 L(shr_7_gobble_loop):
693 movdqa 64(%rsi), %xmm3
694 palignr $7, 48(%rsi), %xmm3
696 movdqa 48(%rsi), %xmm0
697 palignr $7, 32(%rsi), %xmm0
698 pcmpeqb 32(%rdi), %xmm0
700 pcmpeqb 48(%rdi), %xmm3
703 jz L(shr_7_gobble_loop)
707 jge L(shr_7_gobble_next)
710 L(shr_7_gobble_next):
735 movdqa 16(%rsi), %xmm1
737 palignr $8, (%rsi), %xmm1
738 pcmpeqb (%rdi), %xmm1
740 movdqa 32(%rsi), %xmm3
741 palignr $8, %xmm2, %xmm3
742 pcmpeqb 16(%rdi), %xmm3
758 movdqa 16(%rsi), %xmm0
759 palignr $8, (%rsi), %xmm0
760 pcmpeqb (%rdi), %xmm0
762 movdqa 32(%rsi), %xmm3
763 palignr $8, 16(%rsi), %xmm3
764 pcmpeqb 16(%rdi), %xmm3
766 L(shr_8_gobble_loop):
772 movdqa 64(%rsi), %xmm3
773 palignr $8, 48(%rsi), %xmm3
775 movdqa 48(%rsi), %xmm0
776 palignr $8, 32(%rsi), %xmm0
777 pcmpeqb 32(%rdi), %xmm0
779 pcmpeqb 48(%rdi), %xmm3
782 jz L(shr_8_gobble_loop)
786 jge L(shr_8_gobble_next)
789 L(shr_8_gobble_next):
805 # ifndef USE_AS_WMEMCMP
814 movdqa 16(%rsi), %xmm1
816 palignr $9, (%rsi), %xmm1
817 pcmpeqb (%rdi), %xmm1
819 movdqa 32(%rsi), %xmm3
820 palignr $9, %xmm2, %xmm3
821 pcmpeqb 16(%rdi), %xmm3
837 movdqa 16(%rsi), %xmm0
838 palignr $9, (%rsi), %xmm0
839 pcmpeqb (%rdi), %xmm0
841 movdqa 32(%rsi), %xmm3
842 palignr $9, 16(%rsi), %xmm3
843 pcmpeqb 16(%rdi), %xmm3
845 L(shr_9_gobble_loop):
851 movdqa 64(%rsi), %xmm3
852 palignr $9, 48(%rsi), %xmm3
854 movdqa 48(%rsi), %xmm0
855 palignr $9, 32(%rsi), %xmm0
856 pcmpeqb 32(%rdi), %xmm0
858 pcmpeqb 48(%rdi), %xmm3
861 jz L(shr_9_gobble_loop)
865 jge L(shr_9_gobble_next)
868 L(shr_9_gobble_next):
891 movdqa 16(%rsi), %xmm1
893 palignr $10, (%rsi), %xmm1
894 pcmpeqb (%rdi), %xmm1
896 movdqa 32(%rsi), %xmm3
897 palignr $10, %xmm2, %xmm3
898 pcmpeqb 16(%rdi), %xmm3
914 movdqa 16(%rsi), %xmm0
915 palignr $10, (%rsi), %xmm0
916 pcmpeqb (%rdi), %xmm0
918 movdqa 32(%rsi), %xmm3
919 palignr $10, 16(%rsi), %xmm3
920 pcmpeqb 16(%rdi), %xmm3
922 L(shr_10_gobble_loop):
928 movdqa 64(%rsi), %xmm3
929 palignr $10, 48(%rsi), %xmm3
931 movdqa 48(%rsi), %xmm0
932 palignr $10, 32(%rsi), %xmm0
933 pcmpeqb 32(%rdi), %xmm0
935 pcmpeqb 48(%rdi), %xmm3
938 jz L(shr_10_gobble_loop)
942 jge L(shr_10_gobble_next)
945 L(shr_10_gobble_next):
968 movdqa 16(%rsi), %xmm1
970 palignr $11, (%rsi), %xmm1
971 pcmpeqb (%rdi), %xmm1
973 movdqa 32(%rsi), %xmm3
974 palignr $11, %xmm2, %xmm3
975 pcmpeqb 16(%rdi), %xmm3
991 movdqa 16(%rsi), %xmm0
992 palignr $11, (%rsi), %xmm0
993 pcmpeqb (%rdi), %xmm0
995 movdqa 32(%rsi), %xmm3
996 palignr $11, 16(%rsi), %xmm3
997 pcmpeqb 16(%rdi), %xmm3
999 L(shr_11_gobble_loop):
1002 pmovmskb %xmm3, %edx
1005 movdqa 64(%rsi), %xmm3
1006 palignr $11, 48(%rsi), %xmm3
1008 movdqa 48(%rsi), %xmm0
1009 palignr $11, 32(%rsi), %xmm0
1010 pcmpeqb 32(%rdi), %xmm0
1012 pcmpeqb 48(%rdi), %xmm3
1015 jz L(shr_11_gobble_loop)
1019 jge L(shr_11_gobble_next)
1022 L(shr_11_gobble_next):
1026 pmovmskb %xmm3, %edx
1045 jae L(shr_12_gobble)
1047 movdqa 16(%rsi), %xmm1
1049 palignr $12, (%rsi), %xmm1
1050 pcmpeqb (%rdi), %xmm1
1052 movdqa 32(%rsi), %xmm3
1053 palignr $12, %xmm2, %xmm3
1054 pcmpeqb 16(%rdi), %xmm3
1057 pmovmskb %xmm3, %edx
1070 movdqa 16(%rsi), %xmm0
1071 palignr $12, (%rsi), %xmm0
1072 pcmpeqb (%rdi), %xmm0
1074 movdqa 32(%rsi), %xmm3
1075 palignr $12, 16(%rsi), %xmm3
1076 pcmpeqb 16(%rdi), %xmm3
1078 L(shr_12_gobble_loop):
1081 pmovmskb %xmm3, %edx
1084 movdqa 64(%rsi), %xmm3
1085 palignr $12, 48(%rsi), %xmm3
1087 movdqa 48(%rsi), %xmm0
1088 palignr $12, 32(%rsi), %xmm0
1089 pcmpeqb 32(%rdi), %xmm0
1091 pcmpeqb 48(%rdi), %xmm3
1094 jz L(shr_12_gobble_loop)
1098 jge L(shr_12_gobble_next)
1101 L(shr_12_gobble_next):
1105 pmovmskb %xmm3, %edx
1117 # ifndef USE_AS_WMEMCMP
1124 jae L(shr_13_gobble)
1126 movdqa 16(%rsi), %xmm1
1128 palignr $13, (%rsi), %xmm1
1129 pcmpeqb (%rdi), %xmm1
1131 movdqa 32(%rsi), %xmm3
1132 palignr $13, %xmm2, %xmm3
1133 pcmpeqb 16(%rdi), %xmm3
1136 pmovmskb %xmm3, %edx
1149 movdqa 16(%rsi), %xmm0
1150 palignr $13, (%rsi), %xmm0
1151 pcmpeqb (%rdi), %xmm0
1153 movdqa 32(%rsi), %xmm3
1154 palignr $13, 16(%rsi), %xmm3
1155 pcmpeqb 16(%rdi), %xmm3
1157 L(shr_13_gobble_loop):
1160 pmovmskb %xmm3, %edx
1163 movdqa 64(%rsi), %xmm3
1164 palignr $13, 48(%rsi), %xmm3
1166 movdqa 48(%rsi), %xmm0
1167 palignr $13, 32(%rsi), %xmm0
1168 pcmpeqb 32(%rdi), %xmm0
1170 pcmpeqb 48(%rdi), %xmm3
1173 jz L(shr_13_gobble_loop)
1177 jge L(shr_13_gobble_next)
1180 L(shr_13_gobble_next):
1184 pmovmskb %xmm3, %edx
1201 jae L(shr_14_gobble)
1203 movdqa 16(%rsi), %xmm1
1205 palignr $14, (%rsi), %xmm1
1206 pcmpeqb (%rdi), %xmm1
1208 movdqa 32(%rsi), %xmm3
1209 palignr $14, %xmm2, %xmm3
1210 pcmpeqb 16(%rdi), %xmm3
1213 pmovmskb %xmm3, %edx
1226 movdqa 16(%rsi), %xmm0
1227 palignr $14, (%rsi), %xmm0
1228 pcmpeqb (%rdi), %xmm0
1230 movdqa 32(%rsi), %xmm3
1231 palignr $14, 16(%rsi), %xmm3
1232 pcmpeqb 16(%rdi), %xmm3
1234 L(shr_14_gobble_loop):
1237 pmovmskb %xmm3, %edx
1240 movdqa 64(%rsi), %xmm3
1241 palignr $14, 48(%rsi), %xmm3
1243 movdqa 48(%rsi), %xmm0
1244 palignr $14, 32(%rsi), %xmm0
1245 pcmpeqb 32(%rdi), %xmm0
1247 pcmpeqb 48(%rdi), %xmm3
1250 jz L(shr_14_gobble_loop)
1254 jge L(shr_14_gobble_next)
1257 L(shr_14_gobble_next):
1261 pmovmskb %xmm3, %edx
1278 jae L(shr_15_gobble)
1280 movdqa 16(%rsi), %xmm1
1282 palignr $15, (%rsi), %xmm1
1283 pcmpeqb (%rdi), %xmm1
1285 movdqa 32(%rsi), %xmm3
1286 palignr $15, %xmm2, %xmm3
1287 pcmpeqb 16(%rdi), %xmm3
1290 pmovmskb %xmm3, %edx
1303 movdqa 16(%rsi), %xmm0
1304 palignr $15, (%rsi), %xmm0
1305 pcmpeqb (%rdi), %xmm0
1307 movdqa 32(%rsi), %xmm3
1308 palignr $15, 16(%rsi), %xmm3
1309 pcmpeqb 16(%rdi), %xmm3
1311 L(shr_15_gobble_loop):
1314 pmovmskb %xmm3, %edx
1317 movdqa 64(%rsi), %xmm3
1318 palignr $15, 48(%rsi), %xmm3
1320 movdqa 48(%rsi), %xmm0
1321 palignr $15, 32(%rsi), %xmm0
1322 pcmpeqb 32(%rdi), %xmm0
1324 pcmpeqb 48(%rdi), %xmm3
1327 jz L(shr_15_gobble_loop)
1331 jge L(shr_15_gobble_next)
1334 L(shr_15_gobble_next):
1338 pmovmskb %xmm3, %edx
1352 pmovmskb %xmm1, %r8d
1361 # ifndef USE_AS_WMEMCMP
1386 movzbl -9(%rdi), %eax
1387 movzbl -9(%rsi), %edx
1393 movzbl -16(%rdi), %eax
1394 movzbl -16(%rsi), %edx
1400 movzbl -15(%rdi), %eax
1401 movzbl -15(%rsi), %edx
1407 movzbl -14(%rdi), %eax
1408 movzbl -14(%rsi), %edx
1414 movzbl -13(%rdi), %eax
1415 movzbl -13(%rsi), %edx
1421 movzbl -12(%rdi), %eax
1422 movzbl -12(%rsi), %edx
1428 movzbl -11(%rdi), %eax
1429 movzbl -11(%rsi), %edx
1435 movzbl -10(%rdi), %eax
1436 movzbl -10(%rsi), %edx
1465 movzbl -9(%rdi), %eax
1466 movzbl -9(%rsi), %edx
1470 /* special for wmemcmp */
1473 jz L(next_two_double_words)
1475 jz L(second_double_word)
1482 L(second_double_word):
1489 L(next_two_double_words):
1491 jz L(fourth_double_word)
1498 L(fourth_double_word):
1511 # ifndef USE_AS_WMEMCMP
1535 # ifndef USE_AS_WMEMCMP
1559 # ifndef USE_AS_WMEMCMP
1583 # ifndef USE_AS_WMEMCMP
1607 # ifndef USE_AS_WMEMCMP
1629 # ifndef USE_AS_WMEMCMP
1646 movl -44(%rdi), %eax
1647 movl -44(%rsi), %ecx
1651 movl -40(%rdi), %eax
1652 movl -40(%rsi), %ecx
1656 movl -36(%rdi), %eax
1657 movl -36(%rsi), %ecx
1661 movl -32(%rdi), %eax
1662 movl -32(%rsi), %ecx
1666 movl -28(%rdi), %eax
1667 movl -28(%rsi), %ecx
1671 movl -24(%rdi), %eax
1672 movl -24(%rsi), %ecx
1676 movl -20(%rdi), %eax
1677 movl -20(%rsi), %ecx
1681 movl -16(%rdi), %eax
1682 movl -16(%rsi), %ecx
1686 movl -12(%rdi), %eax
1687 movl -12(%rsi), %ecx
1706 movl -44(%rdi), %eax
1710 movl -40(%rdi), %eax
1714 movl -36(%rdi), %eax
1718 movl -32(%rdi), %eax
1722 movl -28(%rdi), %eax
1726 movl -24(%rdi), %eax
1730 movl -20(%rdi), %eax
1734 movl -16(%rdi), %eax
1738 movl -12(%rdi), %eax
1754 # ifndef USE_AS_WMEMCMP
1757 movl -45(%rdi), %eax
1758 movl -45(%rsi), %ecx
1762 movl -41(%rdi), %eax
1763 movl -41(%rsi), %ecx
1767 movl -37(%rdi), %eax
1768 movl -37(%rsi), %ecx
1772 movl -33(%rdi), %eax
1773 movl -33(%rsi), %ecx
1777 movl -29(%rdi), %eax
1778 movl -29(%rsi), %ecx
1782 movl -25(%rdi), %eax
1783 movl -25(%rsi), %ecx
1787 movl -21(%rdi), %eax
1788 movl -21(%rsi), %ecx
1792 movl -17(%rdi), %eax
1793 movl -17(%rsi), %ecx
1797 movl -13(%rdi), %eax
1798 movl -13(%rsi), %ecx
1812 movzbl -1(%rdi), %eax
1820 movl -46(%rdi), %eax
1821 movl -46(%rsi), %ecx
1825 movl -42(%rdi), %eax
1826 movl -42(%rsi), %ecx
1830 movl -38(%rdi), %eax
1831 movl -38(%rsi), %ecx
1835 movl -34(%rdi), %eax
1836 movl -34(%rsi), %ecx
1840 movl -30(%rdi), %eax
1841 movl -30(%rsi), %ecx
1845 movl -26(%rdi), %eax
1846 movl -26(%rsi), %ecx
1850 movl -22(%rdi), %eax
1851 movl -22(%rsi), %ecx
1855 movl -18(%rdi), %eax
1856 movl -18(%rsi), %ecx
1860 movl -14(%rdi), %eax
1861 movl -14(%rsi), %ecx
1865 movl -10(%rdi), %eax
1866 movl -10(%rsi), %ecx
1875 movzwl -2(%rdi), %eax
1876 movzwl -2(%rsi), %ecx
1886 movl -47(%rdi), %eax
1887 movl -47(%rsi), %ecx
1891 movl -43(%rdi), %eax
1892 movl -43(%rsi), %ecx
1896 movl -39(%rdi), %eax
1897 movl -39(%rsi), %ecx
1901 movl -35(%rdi), %eax
1902 movl -35(%rsi), %ecx
1906 movl -31(%rdi), %eax
1907 movl -31(%rsi), %ecx
1911 movl -27(%rdi), %eax
1912 movl -27(%rsi), %ecx
1916 movl -23(%rdi), %eax
1917 movl -23(%rsi), %ecx
1921 movl -19(%rdi), %eax
1922 movl -19(%rsi), %ecx
1926 movl -15(%rdi), %eax
1927 movl -15(%rsi), %ecx
1931 movl -11(%rdi), %eax
1932 movl -11(%rsi), %ecx
1941 movzwl -3(%rdi), %eax
1942 movzwl -3(%rsi), %ecx
1947 movzbl -1(%rdi), %eax
1964 /* We get there only if we already know there is a
1978 jg L(find_diff_bigger)
1983 L(find_diff_bigger):