]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/powerpc64/strchr.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / strchr.S
CommitLineData
cfc91acd 1/* Optimized strchr implementation for PowerPC64.
2b778ceb 2 Copyright (C) 1997-2021 Free Software Foundation, Inc.
cfc91acd
RM
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
cfc91acd
RM
18
19#include <sysdep.h>
cfc91acd
RM
20
21/* See strlen.s for comments on how this works. */
22
23/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */
24
6d15a5c2
WSM
25#ifndef STRCHR
26# define STRCHR strchr
27#endif
28
d5b41185 29ENTRY_TOCLESS (STRCHR)
d7d06f79 30 CALL_MCOUNT 2
cfc91acd
RM
31
32#define rTMP1 r0
33#define rRTN r3 /* outgoing result */
2d67d91a
JM
34#define rSTR r8 /* current word pointer */
35#define rCHR r4 /* byte we're looking for, spread over the whole word */
36#define rWORD r5 /* the current word */
cfc91acd 37#define rCLZB rCHR /* leading zero byte count */
beb03cee
RM
38#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
39#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
cfc91acd
RM
40#define rTMP2 r9
41#define rIGN r10 /* number of bits we should ignore in the first word */
42#define rMASK r11 /* mask with the bits to ignore set to 0 */
43#define rTMP3 r12
664318c3
AM
44#define rTMP4 rIGN
45#define rTMP5 rMASK
cfc91acd 46
beb03cee 47 dcbt 0,rRTN
664318c3 48 insrdi rCHR, rCHR, 8, 48
cfc91acd 49 li rMASK, -1
664318c3 50 insrdi rCHR, rCHR, 16, 32
beb03cee
RM
51 rlwinm rIGN, rRTN, 3, 26, 28
52 insrdi rCHR, rCHR, 32, 0
cfc91acd
RM
53 lis rFEFE, -0x101
54 lis r7F7F, 0x7f7f
beb03cee 55 clrrdi rSTR, rRTN, 3
cfc91acd
RM
56 addi rFEFE, rFEFE, -0x101
57 addi r7F7F, r7F7F, 0x7f7f
beb03cee
RM
58 sldi rTMP1, rFEFE, 32
59 insrdi r7F7F, r7F7F, 32, 0
60 add rFEFE, rFEFE, rTMP1
cfc91acd 61/* Test the first (partial?) word. */
beb03cee 62 ld rWORD, 0(rSTR)
664318c3
AM
63#ifdef __LITTLE_ENDIAN__
64 sld rMASK, rMASK, rIGN
65#else
beb03cee 66 srd rMASK, rMASK, rIGN
664318c3 67#endif
cfc91acd
RM
68 orc rWORD, rWORD, rMASK
69 add rTMP1, rFEFE, rWORD
70 nor rTMP2, r7F7F, rWORD
664318c3 71 and. rTMP4, rTMP1, rTMP2
cfc91acd
RM
72 xor rTMP3, rCHR, rWORD
73 orc rTMP3, rTMP3, rMASK
74 b L(loopentry)
75
76/* The loop. */
77
664318c3
AM
78L(loop):
79 ldu rWORD, 8(rSTR)
80 and. rTMP5, rTMP1, rTMP2
cfc91acd 81/* Test for 0. */
664318c3
AM
82 add rTMP1, rFEFE, rWORD /* x - 0x01010101. */
83 nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */
cfc91acd 84 bne L(foundit)
664318c3 85 and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */
cfc91acd
RM
86/* Start test for the bytes we're looking for. */
87 xor rTMP3, rCHR, rWORD
88L(loopentry):
89 add rTMP1, rFEFE, rTMP3
90 nor rTMP2, r7F7F, rTMP3
91 beq L(loop)
664318c3 92
cfc91acd
RM
93/* There is a zero byte in the word, but may also be a matching byte (either
94 before or after the zero byte). In fact, we may be looking for a
664318c3
AM
95 zero byte, in which case we return a match. */
96 and. rTMP5, rTMP1, rTMP2
cfc91acd 97 li rRTN, 0
cfc91acd 98 beqlr
664318c3
AM
99/* At this point:
100 rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
101 rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
102 But there may be false matches in the next most significant byte from
103 a true match due to carries. This means we need to recalculate the
104 matches using a longer method for big-endian. */
105#ifdef __LITTLE_ENDIAN__
106 addi rTMP1, rTMP5, -1
107 andc rTMP1, rTMP1, rTMP5
108 cntlzd rCLZB, rTMP1
109 addi rTMP2, rTMP4, -1
110 andc rTMP2, rTMP2, rTMP4
111 cmpld rTMP1, rTMP2
112 bgtlr
113 subfic rCLZB, rCLZB, 64-7
114#else
115/* I think we could reduce this by two instructions by keeping the "nor"
116 results from the loop for reuse here. See strlen.S tail. Similarly
117 one instruction could be pruned from L(foundit). */
cfc91acd 118 and rFEFE, r7F7F, rWORD
664318c3 119 or rTMP5, r7F7F, rWORD
cfc91acd 120 and rTMP1, r7F7F, rTMP3
664318c3 121 or rTMP4, r7F7F, rTMP3
cfc91acd
RM
122 add rFEFE, rFEFE, r7F7F
123 add rTMP1, rTMP1, r7F7F
664318c3
AM
124 nor rWORD, rTMP5, rFEFE
125 nor rTMP2, rTMP4, rTMP1
126 cntlzd rCLZB, rTMP2
beb03cee 127 cmpld rWORD, rTMP2
cfc91acd 128 bgtlr
664318c3 129#endif
beb03cee 130 srdi rCLZB, rCLZB, 3
cfc91acd 131 add rRTN, rSTR, rCLZB
cfc91acd
RM
132 blr
133
134L(foundit):
664318c3
AM
135#ifdef __LITTLE_ENDIAN__
136 addi rTMP1, rTMP5, -1
137 andc rTMP1, rTMP1, rTMP5
138 cntlzd rCLZB, rTMP1
139 subfic rCLZB, rCLZB, 64-7-64
140 sradi rCLZB, rCLZB, 3
141#else
cfc91acd 142 and rTMP1, r7F7F, rTMP3
664318c3 143 or rTMP4, r7F7F, rTMP3
cfc91acd 144 add rTMP1, rTMP1, r7F7F
664318c3 145 nor rTMP2, rTMP4, rTMP1
beb03cee
RM
146 cntlzd rCLZB, rTMP2
147 subi rSTR, rSTR, 8
148 srdi rCLZB, rCLZB, 3
664318c3 149#endif
cfc91acd 150 add rRTN, rSTR, rCLZB
cfc91acd 151 blr
6d15a5c2 152END (STRCHR)
cfc91acd 153
2d67d91a 154weak_alias (strchr, index)
85dd1003 155libc_hidden_builtin_def (strchr)