]>
Commit | Line | Data |
---|---|---|
fe2f79db | 1 | /* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn. |
2b778ceb | 2 | Copyright (C) 2010-2021 Free Software Foundation, Inc. |
fe2f79db LM |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
fe2f79db LM |
18 | |
19 | #include <sysdep.h> | |
fe2f79db LM |
20 | |
21 | /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ | |
22 | .machine power7 | |
b5510883 | 23 | ENTRY (__memchr) |
fe2f79db LM |
24 | CALL_MCOUNT |
25 | dcbt 0,r3 | |
26 | clrrwi r8,r3,2 | |
d298c416 | 27 | insrwi r4,r4,8,16 /* Replicate byte to word. */ |
5e628dd1 TMQMF |
28 | |
29 | /* Calculate the last acceptable address and check for possible | |
30 | addition overflow by using satured math: | |
31 | r7 = r3 + r5 | |
32 | r7 |= -(r7 < x) */ | |
33 | add r7,r3,r5 | |
34 | subfc r6,r3,r7 | |
35 | subfe r9,r9,r9 | |
36 | or r7,r7,r9 | |
37 | ||
d298c416 | 38 | insrwi r4,r4,16,0 |
fe2f79db | 39 | cmplwi r5,16 |
466b0393 AM |
40 | li r9, -1 |
41 | rlwinm r6,r3,3,27,28 /* Calculate padding. */ | |
42 | addi r7,r7,-1 | |
43 | #ifdef __LITTLE_ENDIAN__ | |
44 | slw r9,r9,r6 | |
45 | #else | |
46 | srw r9,r9,r6 | |
47 | #endif | |
fe2f79db LM |
48 | ble L(small_range) |
49 | ||
fe2f79db | 50 | lwz r12,0(r8) /* Load word from memory. */ |
466b0393 AM |
51 | cmpb r3,r12,r4 /* Check for BYTEs in WORD1. */ |
52 | and r3,r3,r9 | |
53 | clrlwi r5,r7,30 /* Byte count - 1 in last word. */ | |
54 | clrrwi r7,r7,2 /* Address of last word. */ | |
55 | cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ | |
fe2f79db LM |
56 | bne cr7,L(done) |
57 | ||
fe2f79db LM |
58 | mtcrf 0x01,r8 |
59 | /* Are we now aligned to a doubleword boundary? If so, skip to | |
60 | the main loop. Otherwise, go through the alignment code. */ | |
fe2f79db LM |
61 | bt 29,L(loop_setup) |
62 | ||
63 | /* Handle WORD2 of pair. */ | |
64 | lwzu r12,4(r8) | |
466b0393 AM |
65 | cmpb r3,r12,r4 |
66 | cmplwi cr7,r3,0 | |
fe2f79db LM |
67 | bne cr7,L(done) |
68 | ||
fe2f79db | 69 | L(loop_setup): |
466b0393 AM |
70 | /* The last word we want to read in the loop below is the one |
71 | containing the last byte of the string, ie. the word at | |
72 | (s + size - 1) & ~3, or r7. The first word read is at | |
73 | r8 + 4, we read 2 * cnt words, so the last word read will | |
74 | be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives | |
75 | cnt = (r7 - r8) / 8 */ | |
76 | sub r6,r7,r8 | |
77 | srwi r6,r6,3 /* Number of loop iterations. */ | |
14a50c9d | 78 | mtctr r6 /* Setup the counter. */ |
466b0393 AM |
79 | |
80 | /* Main loop to look for BYTE in the string. Since | |
81 | it's a small loop (8 instructions), align it to 32-bytes. */ | |
82 | .align 5 | |
fe2f79db LM |
83 | L(loop): |
84 | /* Load two words, compare and merge in a | |
85 | single register for speed. This is an attempt | |
86 | to speed up the byte-checking process for bigger strings. */ | |
fe2f79db LM |
87 | lwz r12,4(r8) |
88 | lwzu r11,8(r8) | |
466b0393 | 89 | cmpb r3,r12,r4 |
fe2f79db | 90 | cmpb r9,r11,r4 |
466b0393 AM |
91 | or r6,r9,r3 /* Merge everything in one word. */ |
92 | cmplwi cr7,r6,0 | |
fe2f79db LM |
93 | bne cr7,L(found) |
94 | bdnz L(loop) | |
14a50c9d | 95 | |
466b0393 AM |
96 | /* We may have one more dword to read. */ |
97 | cmplw r8,r7 | |
98 | beqlr | |
99 | ||
100 | lwzu r12,4(r8) | |
101 | cmpb r3,r12,r4 | |
102 | cmplwi cr6,r3,0 | |
103 | bne cr6,L(done) | |
104 | blr | |
fe2f79db | 105 | |
466b0393 AM |
106 | .align 4 |
107 | L(found): | |
fe2f79db LM |
108 | /* OK, one (or both) of the words contains BYTE. Check |
109 | the first word and decrement the address in case the first | |
110 | word really contains BYTE. */ | |
466b0393 | 111 | cmplwi cr6,r3,0 |
fe2f79db LM |
112 | addi r8,r8,-4 |
113 | bne cr6,L(done) | |
114 | ||
115 | /* BYTE must be in the second word. Adjust the address | |
466b0393 | 116 | again and move the result of cmpb to r3 so we can calculate the |
fe2f79db LM |
117 | pointer. */ |
118 | ||
466b0393 | 119 | mr r3,r9 |
fe2f79db LM |
120 | addi r8,r8,4 |
121 | ||
466b0393 | 122 | /* r3 has the output of the cmpb instruction, that is, it contains |
fe2f79db LM |
123 | 0xff in the same position as BYTE in the original |
124 | word from the string. Use that to calculate the pointer. | |
14a50c9d | 125 | We need to make sure BYTE is *before* the end of the range. */ |
fe2f79db | 126 | L(done): |
466b0393 AM |
127 | #ifdef __LITTLE_ENDIAN__ |
128 | addi r0,r3,-1 | |
129 | andc r0,r0,r3 | |
130 | popcntw r0,r0 /* Count trailing zeros. */ | |
131 | #else | |
132 | cntlzw r0,r3 /* Count leading zeros before the match. */ | |
133 | #endif | |
134 | cmplw r8,r7 /* Are we on the last word? */ | |
135 | srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ | |
fe2f79db | 136 | add r3,r8,r0 |
466b0393 AM |
137 | cmplw cr7,r0,r5 /* If on the last dword, check byte offset. */ |
138 | bnelr | |
139 | blelr cr7 | |
140 | li r3,0 | |
fe2f79db LM |
141 | blr |
142 | ||
143 | .align 4 | |
144 | L(null): | |
145 | li r3,0 | |
146 | blr | |
147 | ||
148 | /* Deals with size <= 16. */ | |
149 | .align 4 | |
150 | L(small_range): | |
151 | cmplwi r5,0 | |
466b0393 | 152 | beq L(null) |
fe2f79db | 153 | lwz r12,0(r8) /* Load word from memory. */ |
466b0393 AM |
154 | cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ |
155 | and r3,r3,r9 | |
156 | cmplwi cr7,r3,0 | |
157 | clrlwi r5,r7,30 /* Byte count - 1 in last word. */ | |
158 | clrrwi r7,r7,2 /* Address of last word. */ | |
159 | cmplw r8,r7 /* Are we done already? */ | |
fe2f79db | 160 | bne cr7,L(done) |
466b0393 | 161 | beqlr |
fe2f79db | 162 | |
fe2f79db | 163 | lwzu r12,4(r8) |
466b0393 AM |
164 | cmpb r3,r12,r4 |
165 | cmplwi cr6,r3,0 | |
166 | cmplw r8,r7 | |
fe2f79db | 167 | bne cr6,L(done) |
466b0393 | 168 | beqlr |
14a50c9d WS |
169 | |
170 | lwzu r12,4(r8) | |
466b0393 AM |
171 | cmpb r3,r12,r4 |
172 | cmplwi cr6,r3,0 | |
173 | cmplw r8,r7 | |
14a50c9d | 174 | bne cr6,L(done) |
466b0393 | 175 | beqlr |
14a50c9d WS |
176 | |
177 | lwzu r12,4(r8) | |
466b0393 AM |
178 | cmpb r3,r12,r4 |
179 | cmplwi cr6,r3,0 | |
180 | cmplw r8,r7 | |
14a50c9d | 181 | bne cr6,L(done) |
466b0393 | 182 | beqlr |
14a50c9d WS |
183 | |
184 | lwzu r12,4(r8) | |
466b0393 AM |
185 | cmpb r3,r12,r4 |
186 | cmplwi cr6,r3,0 | |
14a50c9d | 187 | bne cr6,L(done) |
14a50c9d | 188 | blr |
fe2f79db | 189 | |
b5510883 JM |
190 | END (__memchr) |
191 | weak_alias (__memchr, memchr) | |
fe2f79db | 192 | libc_hidden_builtin_def (memchr) |