]>
Commit | Line | Data |
---|---|---|
f940b965 RE |
1 | /* strchr - find a character in a string |
2 | ||
f7a9f785 | 3 | Copyright (C) 2014-2016 Free Software Foundation, Inc. |
f940b965 RE |
4 | |
5 | This file is part of the GNU C Library. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU Lesser General Public | |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
11 | ||
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public | |
18 | License along with the GNU C Library. If not, see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include <sysdep.h> | |
22 | ||
23 | /* Assumptions: | |
24 | * | |
25 | * ARMv8-a, AArch64 | |
26 | */ | |
27 | ||
28 | /* Arguments and results. */ | |
29 | #define srcin x0 | |
30 | #define chrin w1 | |
31 | ||
32 | #define result x0 | |
33 | ||
34 | #define src x2 | |
35 | #define tmp1 x3 | |
36 | #define wtmp2 w4 | |
37 | #define tmp3 x5 | |
38 | ||
39 | #define vrepchr v0 | |
40 | #define vdata1 v1 | |
41 | #define vdata2 v2 | |
42 | #define vhas_nul1 v3 | |
43 | #define vhas_nul2 v4 | |
44 | #define vhas_chr1 v5 | |
45 | #define vhas_chr2 v6 | |
46 | #define vrepmask_0 v7 | |
47 | #define vrepmask_c v16 | |
48 | #define vend1 v17 | |
49 | #define vend2 v18 | |
50 | ||
51 | /* Core algorithm. | |
52 | For each 32-byte hunk we calculate a 64-bit syndrome value, with | |
53 | two bits per byte (LSB is always in bits 0 and 1, for both big | |
54 | and little-endian systems). Bit 0 is set iff the relevant byte | |
55 | matched the requested character. Bit 1 is set iff the | |
56 | relevant byte matched the NUL end of string (we trigger off bit0 | |
57 | for the special case of looking for NUL). Since the bits | |
58 | in the syndrome reflect exactly the order in which things occur | |
59 | in the original string a count_trailing_zeros() operation will | |
60 | identify exactly which byte is causing the termination, and why. */ | |
61 | ||
62 | /* Locals and temporaries. */ | |
63 | ||
64 | ENTRY (strchr) | |
65 | mov wtmp2, #0x0401 | |
66 | movk wtmp2, #0x4010, lsl #16 | |
67 | dup vrepchr.16b, chrin | |
68 | bic src, srcin, #31 | |
69 | dup vrepmask_c.4s, wtmp2 | |
70 | ands tmp1, srcin, #31 | |
71 | add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s // lsl #1 | |
72 | b.eq L(loop) | |
73 | ||
74 | /* Input string is not 32-byte aligned. Rather than forcing | |
75 | the padding bytes to a safe value, we calculate the syndrome | |
76 | for all the bytes, but then mask off those bits of the | |
77 | syndrome that are related to the padding. */ | |
78 | ld1 {vdata1.16b, vdata2.16b}, [src], #32 | |
79 | neg tmp1, tmp1 | |
80 | cmeq vhas_nul1.16b, vdata1.16b, #0 | |
81 | cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b | |
82 | cmeq vhas_nul2.16b, vdata2.16b, #0 | |
83 | cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b | |
84 | and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b | |
85 | and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b | |
86 | and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b | |
87 | and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b | |
88 | orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b | |
89 | orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b | |
90 | lsl tmp1, tmp1, #1 | |
91 | addp vend1.16b, vend1.16b, vend2.16b // 256->128 | |
92 | mov tmp3, #~0 | |
93 | addp vend1.16b, vend1.16b, vend2.16b // 128->64 | |
94 | lsr tmp1, tmp3, tmp1 | |
95 | ||
96 | mov tmp3, vend1.2d[0] | |
97 | bic tmp1, tmp3, tmp1 // Mask padding bits. | |
98 | cbnz tmp1, L(tail) | |
99 | ||
100 | L(loop): | |
101 | ld1 {vdata1.16b, vdata2.16b}, [src], #32 | |
102 | cmeq vhas_nul1.16b, vdata1.16b, #0 | |
103 | cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b | |
104 | cmeq vhas_nul2.16b, vdata2.16b, #0 | |
105 | cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b | |
106 | /* Use a fast check for the termination condition. */ | |
107 | orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b | |
108 | orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b | |
109 | orr vend1.16b, vend1.16b, vend2.16b | |
110 | addp vend1.2d, vend1.2d, vend1.2d | |
111 | mov tmp1, vend1.2d[0] | |
112 | cbz tmp1, L(loop) | |
113 | ||
114 | /* Termination condition found. Now need to establish exactly why | |
115 | we terminated. */ | |
116 | and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b | |
117 | and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b | |
118 | and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b | |
119 | and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b | |
120 | orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b | |
121 | orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b | |
122 | addp vend1.16b, vend1.16b, vend2.16b // 256->128 | |
123 | addp vend1.16b, vend1.16b, vend2.16b // 128->64 | |
124 | ||
125 | mov tmp1, vend1.2d[0] | |
126 | L(tail): | |
127 | sub src, src, #32 | |
128 | rbit tmp1, tmp1 | |
129 | clz tmp1, tmp1 | |
130 | /* Tmp1 is even if the target charager was found first. Otherwise | |
131 | we've found the end of string and we weren't looking for NUL. */ | |
132 | tst tmp1, #1 | |
133 | add result, src, tmp1, lsr #1 | |
134 | csel result, result, xzr, eq | |
135 | ret | |
136 | END (strchr) | |
137 | libc_hidden_builtin_def (strchr) | |
138 | weak_alias (strchr, index) |