]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/aarch64/strchr.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / aarch64 / strchr.S
CommitLineData
f940b965
RE
1/* strchr - find a character in a string
2
f7a9f785 3 Copyright (C) 2014-2016 Free Software Foundation, Inc.
f940b965
RE
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <http://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 */
27
28/* Arguments and results. */
29#define srcin x0
30#define chrin w1
31
32#define result x0
33
34#define src x2
35#define tmp1 x3
36#define wtmp2 w4
37#define tmp3 x5
38
39#define vrepchr v0
40#define vdata1 v1
41#define vdata2 v2
42#define vhas_nul1 v3
43#define vhas_nul2 v4
44#define vhas_chr1 v5
45#define vhas_chr2 v6
46#define vrepmask_0 v7
47#define vrepmask_c v16
48#define vend1 v17
49#define vend2 v18
50
51 /* Core algorithm.
52 For each 32-byte hunk we calculate a 64-bit syndrome value, with
53 two bits per byte (LSB is always in bits 0 and 1, for both big
54 and little-endian systems). Bit 0 is set iff the relevant byte
55 matched the requested character. Bit 1 is set iff the
56 relevant byte matched the NUL end of string (we trigger off bit0
57 for the special case of looking for NUL). Since the bits
58 in the syndrome reflect exactly the order in which things occur
59 in the original string a count_trailing_zeros() operation will
60 identify exactly which byte is causing the termination, and why. */
61
62/* Locals and temporaries. */
63
64ENTRY (strchr)
65 mov wtmp2, #0x0401
66 movk wtmp2, #0x4010, lsl #16
67 dup vrepchr.16b, chrin
68 bic src, srcin, #31
69 dup vrepmask_c.4s, wtmp2
70 ands tmp1, srcin, #31
71 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s // lsl #1
72 b.eq L(loop)
73
74 /* Input string is not 32-byte aligned. Rather than forcing
75 the padding bytes to a safe value, we calculate the syndrome
76 for all the bytes, but then mask off those bits of the
77 syndrome that are related to the padding. */
78 ld1 {vdata1.16b, vdata2.16b}, [src], #32
79 neg tmp1, tmp1
80 cmeq vhas_nul1.16b, vdata1.16b, #0
81 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
82 cmeq vhas_nul2.16b, vdata2.16b, #0
83 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
84 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
85 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
86 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
87 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
88 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
89 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
90 lsl tmp1, tmp1, #1
91 addp vend1.16b, vend1.16b, vend2.16b // 256->128
92 mov tmp3, #~0
93 addp vend1.16b, vend1.16b, vend2.16b // 128->64
94 lsr tmp1, tmp3, tmp1
95
96 mov tmp3, vend1.2d[0]
97 bic tmp1, tmp3, tmp1 // Mask padding bits.
98 cbnz tmp1, L(tail)
99
100L(loop):
101 ld1 {vdata1.16b, vdata2.16b}, [src], #32
102 cmeq vhas_nul1.16b, vdata1.16b, #0
103 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
104 cmeq vhas_nul2.16b, vdata2.16b, #0
105 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
106 /* Use a fast check for the termination condition. */
107 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
108 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
109 orr vend1.16b, vend1.16b, vend2.16b
110 addp vend1.2d, vend1.2d, vend1.2d
111 mov tmp1, vend1.2d[0]
112 cbz tmp1, L(loop)
113
114 /* Termination condition found. Now need to establish exactly why
115 we terminated. */
116 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
117 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
118 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
119 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
120 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
121 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
122 addp vend1.16b, vend1.16b, vend2.16b // 256->128
123 addp vend1.16b, vend1.16b, vend2.16b // 128->64
124
125 mov tmp1, vend1.2d[0]
126L(tail):
127 sub src, src, #32
128 rbit tmp1, tmp1
129 clz tmp1, tmp1
130 /* Tmp1 is even if the target charager was found first. Otherwise
131 we've found the end of string and we weren't looking for NUL. */
132 tst tmp1, #1
133 add result, src, tmp1, lsr #1
134 csel result, result, xzr, eq
135 ret
136END (strchr)
137libc_hidden_builtin_def (strchr)
138weak_alias (strchr, index)