]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
5a19ba26bc18ac102c781b45b1f7170e913fe7b0
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strchr-sse2-bsf.S
1 /* strchr with SSE2 with bsf
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #ifndef NOT_IN_libc
22
23 # include <sysdep.h>
24
25 # define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
28
29 # define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
31 cfi_restore (REG)
32
33 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
34 # define POP(REG) popl REG; CFI_POP (REG)
35
36 # define PARMS 8
37 # define ENTRANCE PUSH(%edi)
38 # define RETURN POP(%edi); ret; CFI_PUSH(%edi);
39
40 # define STR1 PARMS
41 # define STR2 STR1+4
42
43 .text
44 ENTRY (__strchr_sse2_bsf)
45
46 ENTRANCE
47 mov STR1(%esp), %ecx
48 movd STR2(%esp), %xmm1
49
50 pxor %xmm2, %xmm2
51 mov %ecx, %edi
52 punpcklbw %xmm1, %xmm1
53 punpcklbw %xmm1, %xmm1
54 /* ECX has OFFSET. */
55 and $15, %ecx
56 pshufd $0, %xmm1, %xmm1
57 je L(loop)
58
59 /* Handle unaligned string. */
60 and $-16, %edi
61 movdqa (%edi), %xmm0
62 pcmpeqb %xmm0, %xmm2
63 pcmpeqb %xmm1, %xmm0
64 /* Find where NULL is. */
65 pmovmskb %xmm2, %edx
66 /* Check if there is a match. */
67 pmovmskb %xmm0, %eax
68 /* Remove the leading bytes. */
69 sarl %cl, %edx
70 sarl %cl, %eax
71 test %eax, %eax
72 je L(unaligned_no_match)
73 /* Check which byte is a match. */
74 bsf %eax, %eax
75 /* Is there a NULL? */
76 test %edx, %edx
77 je L(unaligned_match)
78 bsf %edx, %edx
79 cmpl %edx, %eax
80 /* Return NULL if NULL comes first. */
81 ja L(return_null)
82 L(unaligned_match):
83 add %edi, %eax
84 add %ecx, %eax
85 RETURN
86
87 .p2align 4
88 L(unaligned_no_match):
89 test %edx, %edx
90 jne L(return_null)
91 pxor %xmm2, %xmm2
92
93 add $16, %edi
94
95 .p2align 4
96 /* Loop start on aligned string. */
97 L(loop):
98 movdqa (%edi), %xmm0
99 pcmpeqb %xmm0, %xmm2
100 add $16, %edi
101 pcmpeqb %xmm1, %xmm0
102 pmovmskb %xmm2, %edx
103 pmovmskb %xmm0, %eax
104 or %eax, %edx
105 jnz L(matches)
106
107 movdqa (%edi), %xmm0
108 pcmpeqb %xmm0, %xmm2
109 add $16, %edi
110 pcmpeqb %xmm1, %xmm0
111 pmovmskb %xmm2, %edx
112 pmovmskb %xmm0, %eax
113 or %eax, %edx
114 jnz L(matches)
115
116 movdqa (%edi), %xmm0
117 pcmpeqb %xmm0, %xmm2
118 add $16, %edi
119 pcmpeqb %xmm1, %xmm0
120 pmovmskb %xmm2, %edx
121 pmovmskb %xmm0, %eax
122 or %eax, %edx
123 jnz L(matches)
124
125 movdqa (%edi), %xmm0
126 pcmpeqb %xmm0, %xmm2
127 add $16, %edi
128 pcmpeqb %xmm1, %xmm0
129 pmovmskb %xmm2, %edx
130 pmovmskb %xmm0, %eax
131 or %eax, %edx
132 jnz L(matches)
133 jmp L(loop)
134
135 L(matches):
136 pmovmskb %xmm2, %edx
137 test %eax, %eax
138 jz L(return_null)
139 bsf %eax, %eax
140 /* There is a match. First find where NULL is. */
141 test %edx, %edx
142 je L(match)
143 bsf %edx, %ecx
144 /* Check if NULL comes first. */
145 cmpl %ecx, %eax
146 ja L(return_null)
147 L(match):
148 sub $16, %edi
149 add %edi, %eax
150 RETURN
151
152 /* Return NULL. */
153 .p2align 4
154 L(return_null):
155 xor %eax, %eax
156 RETURN
157
158 END (__strchr_sse2_bsf)
159 #endif