]>
Commit | Line | Data |
---|---|---|
83d1cc42 FX |
1 | /* memchr - find a character in a memory zone using base integer registers |
2 | ||
2b778ceb | 3 | Copyright (C) 2018-2021 Free Software Foundation, Inc. |
83d1cc42 FX |
4 | |
5 | This file is part of the GNU C Library. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU Lesser General Public | |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
11 | ||
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public | |
18 | License along with the GNU C Library. If not, see | |
5a82c748 | 19 | <https://www.gnu.org/licenses/>. */ |
83d1cc42 FX |
20 | |
21 | #include <sysdep.h> | |
22 | ||
23 | /* Assumptions: | |
24 | * | |
25 | * ARMv8-a, AArch64 | |
26 | * Use base integer registers. | |
27 | */ | |
28 | ||
29 | #ifndef MEMCHR | |
30 | # define MEMCHR __memchr_nosimd | |
31 | #endif | |
32 | ||
33 | /* Arguments and results. */ | |
34 | #define srcin x0 | |
35 | #define chrin x1 | |
36 | #define cntin x2 | |
37 | ||
38 | #define result x0 | |
39 | ||
40 | #define repchr x1 | |
41 | ||
42 | #define tmp1 x2 | |
43 | #define tmp2 x3 | |
44 | #define tmp3 x4 | |
45 | #define tmp4 x5 | |
46 | ||
47 | #define src x6 | |
48 | #define srcend x7 | |
49 | #define srcend16 x8 | |
50 | ||
51 | #define anymore x9 | |
52 | ||
53 | #define zeroones x10 | |
54 | ||
55 | #define data1 x11 | |
56 | #define data2 x12 | |
57 | ||
58 | #define has_chr1 x13 | |
59 | #define has_chr2 x14 | |
60 | ||
61 | #define REP8_01 0x0101010101010101 | |
62 | #define REP8_7f 0x7f7f7f7f7f7f7f7f | |
63 | ||
64 | ||
65 | ENTRY_ALIGN (MEMCHR, 6) | |
66 | ||
45b1e17e SN |
67 | PTR_ARG (0) |
68 | SIZE_ARG (2) | |
83d1cc42 FX |
69 | |
70 | /* Do not dereference srcin if no bytes to compare. */ | |
71 | cbz cntin, L(none_chr) | |
72 | ||
73 | /* Start address is 16-byte aligned or not? */ | |
74 | tst srcin, 15 | |
75 | bic src, srcin, 15 | |
76 | ||
77 | mov zeroones, REP8_01 | |
78 | and repchr, chrin, 255 | |
79 | /* Generate a qword integer as |c|c|c|c|c|c|c|c|. */ | |
80 | mul repchr, repchr, zeroones | |
81 | ||
82 | add srcend, srcin, cntin | |
83 | /* | |
84 | * srcend16 is address of the block following the last block. | |
85 | * | |
86 | * [A block is 16-byte aligned and sized.] | |
87 | */ | |
88 | add srcend16, srcend, 15 | |
89 | bic srcend16, srcend16, 15 | |
90 | ||
91 | b.eq L(loop) | |
92 | ||
93 | /* Load the first block containing start address. */ | |
94 | ldp data1, data2, [src], 16 | |
95 | ||
96 | lsl tmp1, srcin, 3 | |
97 | mov tmp2, ~0 | |
98 | #ifdef __AARCH64EB__ | |
99 | lsr tmp3, tmp2, tmp1 | |
100 | #else | |
101 | lsl tmp3, tmp2, tmp1 | |
102 | #endif | |
103 | /* Start address is in the first or the second qword? */ | |
104 | tst srcin, 8 | |
105 | ||
106 | /* | |
107 | * Transform any byte in the block to zero using XOR operation, | |
108 | * if that byte equals the char to search. In this way, searching | |
109 | * the char becomes detecting zero in the resulting two qwords. | |
110 | */ | |
111 | eor data1, data1, repchr | |
112 | eor data2, data2, repchr | |
113 | ||
114 | /* | |
115 | * Set those unused bytes(before start address) to 0xff, so | |
116 | * that they will not hit any zero detection. | |
117 | */ | |
118 | orn tmp1, data1, tmp3 | |
119 | orn tmp2, data2, tmp3 | |
120 | ||
121 | csinv data1, tmp1, xzr, eq | |
122 | csel data2, data2, tmp2, eq | |
123 | ||
124 | /* | |
125 | * When the first and last block are the same, there are two cases: | |
126 | * o. Memory range to search is just in one block. | |
127 | * ( start address - end address) < 0 | |
128 | * | |
129 | * o. Memory range is so large that end address wrap-around. | |
130 | * ( start address - end address) > 0 | |
131 | */ | |
132 | cmp srcin, srcend | |
133 | ccmp src, srcend16, 0, mi | |
134 | csetm anymore, ne | |
135 | b L(find_chr) | |
136 | ||
137 | .p2align 4 | |
138 | L(loop): | |
139 | ldp data1, data2, [src], 16 | |
140 | ||
141 | subs anymore, src, srcend16 | |
142 | ||
143 | /* | |
144 | * Transform any byte in the block to zero using XOR operation, | |
145 | * if that byte equals the char to search. | |
146 | */ | |
147 | eor data1, data1, repchr | |
148 | eor data2, data2, repchr | |
149 | ||
150 | L(find_chr): | |
151 | /* | |
152 | * Use the following integer test to find out if any byte in a | |
153 | * qword is zero. If do not contain zero-valued byte, test result | |
154 | * is zero. | |
155 | * | |
156 | * (qword - 0x0101010101010101) & ~(qword) & 0x8080808080808080 | |
157 | * = | |
158 | * (qword - 0x0101010101010101) & ~(qword | 0x7f7f7f7f7f7f7f7f) | |
159 | * | |
160 | */ | |
161 | sub tmp1, data1, zeroones | |
162 | sub tmp2, data2, zeroones | |
163 | ||
164 | orr tmp3, data1, REP8_7f | |
165 | orr tmp4, data2, REP8_7f | |
166 | ||
167 | bic has_chr1, tmp1, tmp3 | |
168 | bic has_chr2, tmp2, tmp4 | |
169 | ||
170 | orr tmp1, has_chr1, has_chr2 | |
171 | ccmp tmp1, 0, 0, ne | |
172 | ||
173 | b.eq L(loop) | |
174 | ||
175 | cbz has_chr1, 1f | |
176 | sub result, src, 16 | |
177 | #ifdef __AARCH64EB__ | |
178 | rev data1, data1 | |
179 | #else | |
180 | rev has_chr1, has_chr1 | |
181 | #endif | |
182 | b L(done) | |
183 | ||
184 | 1: cbz has_chr2, L(none_chr) | |
185 | sub result, src, 8 | |
186 | #ifdef __AARCH64EB__ | |
187 | rev data1, data2 | |
188 | #else | |
189 | rev has_chr1, has_chr2 | |
190 | #endif | |
191 | ||
192 | L(done): | |
193 | #ifdef __AARCH64EB__ | |
194 | /* | |
195 | * For big-endian, can not directly use has_chr1/has_chr2 because | |
196 | * two qwords has been reversed after loading from memory. | |
197 | * Thus, have to perform char detection on two qwords again, which | |
198 | * should be byte-swapped this time. | |
199 | */ | |
200 | sub tmp1, data1, zeroones | |
201 | orr tmp3, data1, REP8_7f | |
202 | bic has_chr1, tmp1, tmp3 | |
203 | rev has_chr1, has_chr1 | |
204 | #endif | |
205 | ||
206 | /* | |
207 | * If the specified char is found in a qword, the corresponding | |
208 | * byte of in has_chr has value of 1, while this is only true for | |
209 | * the first occurrence, not other occurrences. | |
210 | */ | |
211 | cmp anymore, 0 | |
212 | clz tmp1, has_chr1 | |
213 | add result, result, tmp1, lsr 3 | |
214 | ccmp result, srcend, 8, eq /* NZCV = 8000 */ | |
215 | csel result, result, xzr, mi | |
216 | ret | |
217 | ||
218 | L(none_chr): | |
219 | mov result, 0 | |
220 | ret | |
221 | ||
222 | END (MEMCHR) | |
223 | libc_hidden_builtin_def (MEMCHR) |