]>
Commit | Line | Data |
---|---|---|
e73015f2 | 1 | /* strlen with SSE2 and BSF |
04277e02 | 2 | Copyright (C) 2010-2019 Free Software Foundation, Inc. |
e73015f2 L |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
e73015f2 | 19 | |
4f41c682 | 20 | #if defined SHARED && IS_IN (libc) |
e73015f2 L |
21 | |
22 | #include <sysdep.h> | |
e73015f2 L |
23 | |
24 | #define CFI_PUSH(REG) \ | |
25 | cfi_adjust_cfa_offset (4); \ | |
26 | cfi_rel_offset (REG, 0) | |
27 | ||
28 | #define CFI_POP(REG) \ | |
29 | cfi_adjust_cfa_offset (-4); \ | |
30 | cfi_restore (REG) | |
31 | ||
32 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
33 | #define POP(REG) popl REG; CFI_POP (REG) | |
34 | #define PARMS 4 + 8 /* Preserve ESI and EDI. */ | |
35 | #define STR PARMS | |
36 | #define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state | |
37 | #define RETURN POP (%edi); POP (%esi); ret; \ | |
38 | cfi_restore_state; cfi_remember_state | |
39 | ||
48882a1a | 40 | .text |
e73015f2 L |
41 | ENTRY ( __strlen_sse2_bsf) |
42 | ENTRANCE | |
43 | mov STR(%esp), %edi | |
44 | xor %eax, %eax | |
45 | mov %edi, %ecx | |
46 | and $0x3f, %ecx | |
47 | pxor %xmm0, %xmm0 | |
48 | cmp $0x30, %ecx | |
49 | ja L(next) | |
50 | movdqu (%edi), %xmm1 | |
51 | pcmpeqb %xmm1, %xmm0 | |
52 | pmovmskb %xmm0, %edx | |
53 | test %edx, %edx | |
54 | jnz L(exit_less16) | |
55 | mov %edi, %eax | |
56 | and $-16, %eax | |
57 | jmp L(align16_start) | |
58 | L(next): | |
59 | ||
60 | mov %edi, %eax | |
61 | and $-16, %eax | |
62 | pcmpeqb (%eax), %xmm0 | |
63 | mov $-1, %esi | |
64 | sub %eax, %ecx | |
65 | shl %cl, %esi | |
66 | pmovmskb %xmm0, %edx | |
67 | and %esi, %edx | |
68 | jnz L(exit) | |
69 | L(align16_start): | |
70 | pxor %xmm0, %xmm0 | |
71 | pxor %xmm1, %xmm1 | |
72 | pxor %xmm2, %xmm2 | |
73 | pxor %xmm3, %xmm3 | |
74 | .p2align 4 | |
75 | L(align16_loop): | |
76 | pcmpeqb 16(%eax), %xmm0 | |
77 | pmovmskb %xmm0, %edx | |
78 | test %edx, %edx | |
79 | jnz L(exit16) | |
80 | ||
81 | pcmpeqb 32(%eax), %xmm1 | |
82 | pmovmskb %xmm1, %edx | |
83 | test %edx, %edx | |
84 | jnz L(exit32) | |
85 | ||
86 | pcmpeqb 48(%eax), %xmm2 | |
87 | pmovmskb %xmm2, %edx | |
88 | test %edx, %edx | |
89 | jnz L(exit48) | |
90 | ||
91 | pcmpeqb 64(%eax), %xmm3 | |
92 | pmovmskb %xmm3, %edx | |
93 | lea 64(%eax), %eax | |
94 | test %edx, %edx | |
95 | jz L(align16_loop) | |
96 | L(exit): | |
97 | sub %edi, %eax | |
98 | L(exit_less16): | |
99 | bsf %edx, %edx | |
100 | add %edx, %eax | |
101 | RETURN | |
102 | L(exit16): | |
103 | sub %edi, %eax | |
104 | bsf %edx, %edx | |
105 | add %edx, %eax | |
106 | add $16, %eax | |
107 | RETURN | |
108 | L(exit32): | |
109 | sub %edi, %eax | |
110 | bsf %edx, %edx | |
111 | add %edx, %eax | |
112 | add $32, %eax | |
113 | RETURN | |
114 | L(exit48): | |
115 | sub %edi, %eax | |
116 | bsf %edx, %edx | |
117 | add %edx, %eax | |
118 | add $48, %eax | |
119 | POP (%edi) | |
120 | POP (%esi) | |
121 | ret | |
122 | ||
123 | END ( __strlen_sse2_bsf) | |
124 | ||
125 | #endif |