]>
Commit | Line | Data |
---|---|---|
b42f8cad | 1 | /* Optimized strstr implementation for PowerPC64/POWER7. |
688903eb | 2 | Copyright (C) 2015-2018 Free Software Foundation, Inc. |
b42f8cad RS |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <sysdep.h> | |
20 | ||
21 | /* Char * [r3] strstr (char *s [r3], char * pat[r4]) */ | |
22 | ||
23 | /* The performance gain is obtained using aligned memory access, load | |
24 | * doubleword and usage of cmpb instruction for quicker comparison. */ | |
25 | ||
fe7faec3 RS |
26 | #define ITERATIONS 64 |
27 | ||
f0748b70 WSM |
28 | #ifndef STRSTR |
29 | # define STRSTR strstr | |
30 | #endif | |
31 | ||
b42f8cad RS |
32 | #ifndef STRLEN |
33 | /* For builds with no IFUNC support, local calls should be made to internal | |
34 | GLIBC symbol (created by libc_hidden_builtin_def). */ | |
35 | # ifdef SHARED | |
36 | # define STRLEN __GI_strlen | |
de7ee73d | 37 | # define STRLEN_is_local |
b42f8cad RS |
38 | # else |
39 | # define STRLEN strlen | |
40 | # endif | |
41 | #endif | |
42 | ||
43 | #ifndef STRNLEN | |
44 | /* For builds with no IFUNC support, local calls should be made to internal | |
45 | GLIBC symbol (created by libc_hidden_builtin_def). */ | |
46 | # ifdef SHARED | |
47 | # define STRNLEN __GI_strnlen | |
de7ee73d | 48 | # define STRNLEN_is_local |
b42f8cad | 49 | # else |
6f714aa4 | 50 | # define STRNLEN __strnlen |
b42f8cad RS |
51 | # endif |
52 | #endif | |
53 | ||
54 | #ifndef STRCHR | |
55 | # ifdef SHARED | |
56 | # define STRCHR __GI_strchr | |
de7ee73d | 57 | # define STRCHR_is_local |
b42f8cad RS |
58 | # else |
59 | # define STRCHR strchr | |
60 | # endif | |
61 | #endif | |
62 | ||
63 | #define FRAMESIZE (FRAME_MIN_SIZE+32) | |
64 | .machine power7 | |
d5b41185 AM |
65 | /* Can't be ENTRY_TOCLESS due to calling __strstr_ppc which uses r2. */ |
66 | ENTRY (STRSTR, 4) | |
b42f8cad RS |
67 | CALL_MCOUNT 2 |
68 | mflr r0 /* Load link register LR to r0. */ | |
69 | std r31, -8(r1) /* Save callers register r31. */ | |
b42f8cad | 70 | std r30, -16(r1) /* Save callers register r30. */ |
b42f8cad | 71 | std r29, -24(r1) /* Save callers register r29. */ |
fe7faec3 | 72 | std r28, -32(r1) /* Save callers register r28. */ |
b42f8cad | 73 | std r0, 16(r1) /* Store the link register. */ |
869d7180 RS |
74 | cfi_offset(r31, -8) |
75 | cfi_offset(r30, -16) | |
76 | cfi_offset(r28, -32) | |
77 | cfi_offset(r29, -24) | |
b42f8cad RS |
78 | cfi_offset(lr, 16) |
79 | stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ | |
80 | cfi_adjust_cfa_offset(FRAMESIZE) | |
81 | ||
82 | dcbt 0, r3 | |
83 | dcbt 0, r4 | |
b42f8cad RS |
84 | cmpdi cr7, r3, 0 |
85 | beq cr7, L(retnull) | |
86 | cmpdi cr7, r4, 0 | |
87 | beq cr7, L(retnull) | |
88 | ||
89 | mr r29, r3 | |
90 | mr r30, r4 | |
91 | mr r3, r4 | |
92 | bl STRLEN | |
de7ee73d | 93 | #ifndef STRLEN_is_local |
b42f8cad | 94 | nop |
de7ee73d | 95 | #endif |
b42f8cad RS |
96 | |
97 | cmpdi cr7, r3, 0 /* If search str is null. */ | |
98 | beq cr7, L(ret_r3) | |
99 | ||
b42f8cad RS |
100 | mr r31, r3 |
101 | mr r4, r3 | |
102 | mr r3, r29 | |
103 | bl STRNLEN | |
de7ee73d | 104 | #ifndef STRNLEN_is_local |
b42f8cad | 105 | nop |
de7ee73d | 106 | #endif |
b42f8cad RS |
107 | |
108 | cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ | |
109 | blt cr7, L(retnull) | |
110 | mr r3, r29 | |
111 | lbz r4, 0(r30) | |
112 | bl STRCHR | |
de7ee73d | 113 | #ifndef STRCHR_is_local |
b42f8cad | 114 | nop |
de7ee73d | 115 | #endif |
b42f8cad RS |
116 | |
117 | mr r11, r3 | |
118 | /* If first char of search str is not present. */ | |
119 | cmpdi cr7, r3, 0 | |
120 | ble cr7, L(end) | |
fe7faec3 RS |
121 | /* Reg r28 is used to count the number of iterations. */ |
122 | li r28, 0 | |
b42f8cad RS |
123 | rldicl r8, r3, 0, 52 /* Page cross check. */ |
124 | cmpldi cr7, r8, 4096-16 | |
125 | bgt cr7, L(bytebybyte) | |
126 | ||
127 | rldicl r8, r30, 0, 52 | |
128 | cmpldi cr7, r8, 4096-16 | |
129 | bgt cr7, L(bytebybyte) | |
130 | ||
131 | /* If len(r4) < 8 handle in a different way. */ | |
132 | /* Shift position based on null and use cmpb. */ | |
133 | cmpdi cr7, r31, 8 | |
134 | blt cr7, L(lessthan8) | |
135 | ||
136 | /* Len(r4) >= 8 reaches here. */ | |
137 | mr r8, r3 /* Save r3 for future use. */ | |
138 | mr r4, r30 /* Restore r4. */ | |
139 | li r0, 0 | |
140 | rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ | |
141 | clrrdi r4, r4, 3 /* Make r4 aligned to 8. */ | |
142 | ld r6, 0(r4) | |
143 | addi r4, r4, 8 | |
144 | cmpdi cr7, r10, 0 /* Check if its already aligned? */ | |
145 | beq cr7, L(begin1) | |
146 | #ifdef __LITTLE_ENDIAN__ | |
147 | srd r6, r6, r10 /* Discard unwanted bits. */ | |
148 | #else | |
149 | sld r6, r6, r10 | |
150 | #endif | |
151 | ld r9, 0(r4) | |
152 | subfic r10, r10, 64 | |
153 | #ifdef __LITTLE_ENDIAN__ | |
154 | sld r9, r9, r10 /* Discard unwanted bits. */ | |
155 | #else | |
156 | srd r9, r9, r10 | |
157 | #endif | |
158 | or r6, r6, r9 /* Form complete search str. */ | |
159 | L(begin1): | |
160 | mr r29, r6 | |
161 | rlwinm r10, r3, 3, 26, 28 | |
162 | clrrdi r3, r3, 3 | |
163 | ld r5, 0(r3) | |
164 | cmpb r9, r0, r6 /* Check if input has null. */ | |
165 | cmpdi cr7, r9, 0 | |
166 | bne cr7, L(return3) | |
167 | cmpb r9, r0, r5 /* Check if input has null. */ | |
168 | #ifdef __LITTLE_ENDIAN__ | |
169 | srd r9, r9, r10 | |
170 | #else | |
171 | sld r9, r9, r10 | |
172 | #endif | |
173 | cmpdi cr7, r9, 0 | |
174 | bne cr7, L(retnull) | |
175 | ||
176 | li r12, -8 /* Shift values. */ | |
177 | li r11, 72 /* Shift values. */ | |
178 | cmpdi cr7, r10, 0 | |
179 | beq cr7, L(nextbyte1) | |
180 | mr r12, r10 | |
181 | addi r12, r12, -8 | |
182 | subfic r11, r12, 64 | |
183 | ||
184 | L(nextbyte1): | |
185 | ldu r7, 8(r3) /* Load next dw. */ | |
186 | addi r12, r12, 8 /* Shift one byte and compare. */ | |
187 | addi r11, r11, -8 | |
188 | #ifdef __LITTLE_ENDIAN__ | |
189 | srd r9, r5, r12 /* Rotate based on mask. */ | |
190 | sld r10, r7, r11 | |
191 | #else | |
192 | sld r9, r5, r12 | |
193 | srd r10, r7, r11 | |
194 | #endif | |
195 | /* Form single dw from few bytes on first load and second load. */ | |
196 | or r10, r9, r10 | |
197 | /* Check for null in the formed dw. */ | |
198 | cmpb r9, r0, r10 | |
199 | cmpdi cr7, r9, 0 | |
200 | bne cr7, L(retnull) | |
201 | /* Cmpb search str and input str. */ | |
202 | cmpb r9, r10, r6 | |
203 | cmpdi cr7, r9, -1 | |
204 | beq cr7, L(match) | |
205 | addi r8, r8, 1 | |
206 | b L(begin) | |
207 | ||
208 | .align 4 | |
209 | L(match): | |
210 | /* There is a match of 8 bytes, check next bytes. */ | |
211 | cmpdi cr7, r31, 8 | |
212 | beq cr7, L(return) | |
213 | /* Update next starting point r8. */ | |
214 | srdi r9, r11, 3 | |
215 | subf r9, r9, r3 | |
216 | mr r8, r9 | |
217 | ||
218 | L(secondmatch): | |
219 | mr r5, r7 | |
220 | rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ | |
221 | ld r6, 0(r4) | |
222 | addi r4, r4, 8 | |
223 | cmpdi cr7, r10, 0 /* Check if its already aligned? */ | |
224 | beq cr7, L(proceed3) | |
225 | #ifdef __LITTLE_ENDIAN__ | |
226 | srd r6, r6, r10 /* Discard unwanted bits. */ | |
227 | cmpb r9, r0, r6 | |
228 | sld r9, r9, r10 | |
229 | #else | |
230 | sld r6, r6, r10 | |
231 | cmpb r9, r0, r6 | |
232 | srd r9, r9, r10 | |
233 | #endif | |
234 | cmpdi cr7, r9, 0 | |
235 | bne cr7, L(proceed3) | |
236 | ld r9, 0(r4) | |
237 | subfic r10, r10, 64 | |
238 | #ifdef __LITTLE_ENDIAN__ | |
239 | sld r9, r9, r10 /* Discard unwanted bits. */ | |
240 | #else | |
241 | srd r9, r9, r10 | |
242 | #endif | |
243 | or r6, r6, r9 /* Form complete search str. */ | |
244 | ||
245 | L(proceed3): | |
246 | li r7, 0 | |
247 | addi r3, r3, 8 | |
248 | cmpb r9, r0, r5 | |
249 | cmpdi cr7, r9, 0 | |
250 | bne cr7, L(proceed4) | |
251 | ld r7, 0(r3) | |
252 | L(proceed4): | |
253 | #ifdef __LITTLE_ENDIAN__ | |
254 | srd r9, r5, r12 | |
255 | sld r10, r7, r11 | |
256 | #else | |
257 | sld r9, r5, r12 | |
258 | srd r10, r7, r11 | |
259 | #endif | |
260 | /* Form single dw with few bytes from first and second load. */ | |
261 | or r10, r9, r10 | |
262 | cmpb r9, r0, r6 | |
263 | cmpdi cr7, r9, 0 | |
264 | bne cr7, L(return4) | |
265 | /* Check for null in the formed dw. */ | |
266 | cmpb r9, r0, r10 | |
267 | cmpdi cr7, r9, 0 | |
268 | bne cr7, L(retnull) | |
269 | /* If the next 8 bytes dont match, start search again. */ | |
270 | cmpb r9, r10, r6 | |
271 | cmpdi cr7, r9, -1 | |
272 | bne cr7, L(reset) | |
273 | /* If the next 8 bytes match, load and compare next 8. */ | |
274 | b L(secondmatch) | |
275 | ||
276 | .align 4 | |
277 | L(reset): | |
278 | /* Start the search again. */ | |
279 | addi r8, r8, 1 | |
280 | b L(begin) | |
281 | ||
282 | .align 4 | |
283 | L(return3): | |
284 | /* Count leading zeros and compare partial dw. */ | |
285 | #ifdef __LITTLE_ENDIAN__ | |
286 | addi r7, r9, -1 | |
287 | andc r7, r7, r9 | |
288 | popcntd r7, r7 | |
289 | subfic r7, r7, 64 | |
290 | sld r10, r5, r7 | |
291 | sld r6, r6, r7 | |
292 | #else | |
293 | cntlzd r7, r9 | |
294 | subfic r7, r7, 64 | |
295 | srd r10, r5, r7 | |
296 | srd r6, r6, r7 | |
297 | #endif | |
298 | cmpb r9, r10, r6 | |
299 | cmpdi cr7, r9, -1 | |
300 | addi r8, r8, 1 | |
301 | /* Start search again if there is no match. */ | |
302 | bne cr7, L(begin) | |
303 | /* If the words match, update return values. */ | |
304 | subfic r7, r7, 64 | |
305 | srdi r7, r7, 3 | |
306 | add r3, r3, r7 | |
307 | subf r3, r31, r3 | |
308 | b L(end) | |
309 | ||
310 | .align 4 | |
311 | L(return4): | |
312 | /* Count leading zeros and compare partial dw. */ | |
313 | #ifdef __LITTLE_ENDIAN__ | |
314 | addi r7, r9, -1 | |
315 | andc r7, r7, r9 | |
316 | popcntd r7, r7 | |
317 | subfic r7, r7, 64 | |
318 | sld r10, r10, r7 | |
319 | sld r6, r6, r7 | |
320 | #else | |
321 | cntlzd r7, r9 | |
322 | subfic r7, r7, 64 | |
323 | srd r10, r10, r7 | |
324 | srd r6, r6, r7 | |
325 | #endif | |
326 | cmpb r9, r10, r6 | |
327 | cmpdi cr7, r9, -1 | |
328 | addi r8, r8, 1 | |
329 | bne cr7, L(begin) | |
330 | subfic r7, r7, 64 | |
331 | srdi r11, r11, 3 | |
332 | subf r3, r11, r3 | |
333 | srdi r7, r7, 3 | |
334 | add r3, r3, r7 | |
335 | subf r3, r31, r3 | |
336 | b L(end) | |
337 | ||
338 | .align 4 | |
339 | L(begin): | |
340 | mr r3, r8 | |
fe7faec3 RS |
341 | /* When our iterations exceed ITERATIONS,fall back to default. */ |
342 | addi r28, r28, 1 | |
343 | cmpdi cr7, r28, ITERATIONS | |
344 | beq cr7, L(default) | |
b42f8cad RS |
345 | lbz r4, 0(r30) |
346 | bl STRCHR | |
de7ee73d | 347 | #ifndef STRCHR_is_local |
b42f8cad | 348 | nop |
de7ee73d | 349 | #endif |
b42f8cad RS |
350 | /* If first char of search str is not present. */ |
351 | cmpdi cr7, r3, 0 | |
352 | ble cr7, L(end) | |
353 | mr r8, r3 | |
354 | mr r4, r30 /* Restore r4. */ | |
355 | li r0, 0 | |
356 | mr r6, r29 | |
357 | clrrdi r4, r4, 3 | |
358 | addi r4, r4, 8 | |
359 | b L(begin1) | |
360 | ||
361 | /* Handle less than 8 search string. */ | |
362 | .align 4 | |
363 | L(lessthan8): | |
364 | mr r4, r3 | |
365 | mr r9, r30 | |
366 | li r0, 0 | |
367 | ||
368 | rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */ | |
369 | srdi r8, r10, 3 /* Padding in bytes. */ | |
370 | clrrdi r9, r9, 3 /* Make r4 aligned to 8. */ | |
371 | ld r6, 0(r9) | |
372 | cmpdi cr7, r10, 0 /* Check if its already aligned? */ | |
373 | beq cr7, L(proceed2) | |
374 | #ifdef __LITTLE_ENDIAN__ | |
375 | srd r6, r6, r10 /* Discard unwanted bits. */ | |
376 | #else | |
377 | sld r6, r6, r10 | |
378 | #endif | |
379 | subfic r8, r8, 8 | |
380 | cmpd cr7, r8, r31 /* Next load needed? */ | |
381 | bge cr7, L(proceed2) | |
382 | ld r7, 8(r9) | |
383 | subfic r10, r10, 64 | |
384 | #ifdef __LITTLE_ENDIAN__ | |
385 | sld r7, r7, r10 /* Discard unwanted bits. */ | |
386 | #else | |
387 | srd r7, r7, r10 | |
388 | #endif | |
389 | or r6, r6, r7 /* Form complete search str. */ | |
390 | L(proceed2): | |
391 | mr r29, r6 | |
392 | rlwinm r10, r3, 3, 26, 28 | |
393 | clrrdi r7, r3, 3 /* Make r3 aligned. */ | |
394 | ld r5, 0(r7) | |
395 | sldi r8, r31, 3 | |
396 | subfic r8, r8, 64 | |
397 | #ifdef __LITTLE_ENDIAN__ | |
398 | sld r6, r6, r8 | |
399 | cmpb r9, r0, r5 | |
400 | srd r9, r9, r10 | |
401 | #else | |
402 | srd r6, r6, r8 | |
403 | cmpb r9, r0, r5 | |
404 | sld r9, r9, r10 | |
405 | #endif | |
406 | cmpdi cr7, r9, 0 | |
407 | bne cr7, L(noload) | |
408 | cmpdi cr7, r10, 0 | |
409 | beq cr7, L(continue) | |
410 | ld r7, 8(r7) | |
411 | L(continue1): | |
412 | mr r12, r10 | |
413 | addi r12, r12, -8 | |
414 | subfic r11, r12, 64 | |
415 | b L(nextbyte) | |
416 | ||
417 | .align 4 | |
418 | L(continue): | |
419 | ld r7, 8(r7) | |
420 | li r12, -8 /* Shift values. */ | |
421 | li r11, 72 /* Shift values. */ | |
422 | L(nextbyte): | |
423 | addi r12, r12, 8 /* Mask for rotation. */ | |
424 | addi r11, r11, -8 | |
425 | #ifdef __LITTLE_ENDIAN__ | |
426 | srd r9, r5, r12 | |
427 | sld r10, r7, r11 | |
428 | or r10, r9, r10 | |
429 | sld r10, r10, r8 | |
430 | cmpb r9, r0, r10 | |
431 | srd r9, r9, r8 | |
432 | #else | |
433 | sld r9, r5, r12 | |
434 | srd r10, r7, r11 | |
435 | or r10, r9, r10 | |
436 | srd r10, r10, r8 | |
437 | cmpb r9, r0, r10 | |
438 | sld r9, r9, r8 | |
439 | #endif | |
440 | cmpdi cr7, r9, 0 | |
441 | bne cr7, L(retnull) | |
442 | cmpb r9, r10, r6 | |
443 | cmpdi cr7, r9, -1 | |
444 | beq cr7, L(end) | |
445 | addi r3, r4, 1 | |
fe7faec3 RS |
446 | /* When our iterations exceed ITERATIONS,fall back to default. */ |
447 | addi r28, r28, 1 | |
448 | cmpdi cr7, r28, ITERATIONS | |
449 | beq cr7, L(default) | |
b42f8cad RS |
450 | lbz r4, 0(r30) |
451 | bl STRCHR | |
de7ee73d | 452 | #ifndef STRCHR_is_local |
b42f8cad | 453 | nop |
de7ee73d | 454 | #endif |
b42f8cad RS |
455 | /* If first char of search str is not present. */ |
456 | cmpdi cr7, r3, 0 | |
457 | ble cr7, L(end) | |
458 | mr r4, r3 | |
459 | mr r6, r29 | |
460 | li r0, 0 | |
461 | b L(proceed2) | |
462 | ||
463 | .align 4 | |
464 | L(noload): | |
465 | /* Reached null in r3, so skip next load. */ | |
466 | li r7, 0 | |
467 | b L(continue1) | |
468 | ||
469 | .align 4 | |
470 | L(return): | |
471 | /* Update return values. */ | |
472 | srdi r9, r11, 3 | |
473 | subf r3, r9, r3 | |
474 | b L(end) | |
475 | ||
476 | /* Handling byte by byte. */ | |
477 | .align 4 | |
478 | L(bytebybyte): | |
479 | mr r8, r3 | |
480 | addi r8, r8, -1 | |
481 | L(loop1): | |
482 | addi r8, r8, 1 | |
483 | mr r3, r8 | |
484 | mr r4, r30 | |
485 | lbz r6, 0(r4) | |
486 | cmpdi cr7, r6, 0 | |
487 | beq cr7, L(updater3) | |
488 | L(loop): | |
489 | lbz r5, 0(r3) | |
490 | cmpdi cr7, r5, 0 | |
491 | beq cr7, L(retnull) | |
492 | cmpld cr7, r6, r5 | |
493 | bne cr7, L(loop1) | |
494 | addi r3, r3, 1 | |
495 | addi r4, r4, 1 | |
496 | lbz r6, 0(r4) | |
497 | cmpdi cr7, r6, 0 | |
498 | beq cr7, L(updater3) | |
499 | b L(loop) | |
500 | ||
501 | /* Handling return values. */ | |
502 | .align 4 | |
503 | L(updater3): | |
504 | subf r3, r31, r3 /* Reduce len of r4 from r3. */ | |
505 | b L(end) | |
506 | ||
507 | .align 4 | |
508 | L(ret_r3): | |
509 | mr r3, r29 /* Return r3. */ | |
510 | b L(end) | |
511 | ||
512 | .align 4 | |
513 | L(retnull): | |
514 | li r3, 0 /* Return NULL. */ | |
515 | b L(end) | |
516 | ||
517 | .align 4 | |
518 | L(default): | |
b42f8cad RS |
519 | mr r4, r30 |
520 | bl __strstr_ppc | |
521 | nop | |
522 | ||
523 | .align 4 | |
524 | L(end): | |
525 | addi r1, r1, FRAMESIZE /* Restore stack pointer. */ | |
526 | cfi_adjust_cfa_offset(-FRAMESIZE) | |
527 | ld r0, 16(r1) /* Restore the saved link register. */ | |
fe7faec3 | 528 | ld r28, -32(r1) /* Restore callers save register r28. */ |
b42f8cad RS |
529 | ld r29, -24(r1) /* Restore callers save register r29. */ |
530 | ld r30, -16(r1) /* Restore callers save register r30. */ | |
531 | ld r31, -8(r1) /* Restore callers save register r31. */ | |
532 | mtlr r0 /* Branch to link register. */ | |
533 | blr | |
f0748b70 | 534 | END (STRSTR) |
b42f8cad | 535 | libc_hidden_builtin_def (strstr) |