1 /* Optimized strcasecmp implementation for PowerPC64.
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
20 #include <locale-defines.h>
22 /* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
24 #ifndef USE_AS_STRNCASECMP
25 # define __STRCASECMP __strcasecmp
26 # define STRCASECMP strcasecmp
28 # define __STRCASECMP __strncasecmp
29 # define STRCASECMP strncasecmp
31 /* Convert 16 bytes to lowercase and compare */
35 vcmpgtub v8, v8, v2; \
36 vsel v4, v7, v4, v8; \
39 vcmpgtub v8, v8, v2; \
40 vsel v5, v7, v5, v8; \
44 * Get 16 bytes for unaligned case.
45 * reg1: Vector to hold next 16 bytes.
46 * reg2: Address to read from.
47 * reg3: Permute control vector.
48 * v8: Tmp vector used to mask unwanted bytes.
49 * v9: Tmp vector,0 when null is found on first 16 bytes
51 #ifdef __LITTLE_ENDIAN__
52 #define GET16BYTES(reg1, reg2, reg3) \
55 vperm v8, v8, reg1, reg3; \
56 vcmpequb. v8, v0, v8; \
65 vperm reg1, v9, reg1, reg3;
67 #define GET16BYTES(reg1, reg2, reg3) \
70 vperm v8, reg1, v8, reg3; \
71 vcmpequb. v8, v0, v8; \
80 vperm reg1, reg1, v9, reg3;
83 /* Check null in v4, v5 and convert to lower. */
84 #define CHECKNULLANDCONVERT() \
85 vcmpequb. v7, v0, v5; \
87 vcmpequb. v7, v0, v4; \
95 # define VCLZD_V8_v7 vclzd v8, v7;
96 # define MFVRD_R3_V1 mfvrd r3, v1;
97 # define VSUBUDM_V9_V8 vsubudm v9, v9, v8;
98 # define VPOPCNTD_V8_V8 vpopcntd v8, v8;
99 # define VADDUQM_V7_V8 vadduqm v9, v7, v8;
101 # define VCLZD_V8_v7 .long 0x11003fc2
102 # define MFVRD_R3_V1 .long 0x7c230067
103 # define VSUBUDM_V9_V8 .long 0x112944c0
104 # define VPOPCNTD_V8_V8 .long 0x110047c3
105 # define VADDUQM_V7_V8 .long 0x11274100
111 #ifdef USE_AS_STRNCASECMP
116 #define rRTN r3 /* Return value */
117 #define rSTR1 r10 /* 1st string */
118 #define rSTR2 r4 /* 2nd string */
119 #define rCHAR1 r6 /* Byte read from 1st string */
120 #define rCHAR2 r7 /* Byte read from 2nd string */
121 #define rADDR1 r8 /* Address of tolower(rCHAR1) */
122 #define rADDR2 r12 /* Address of tolower(rCHAR2) */
123 #define rLWR1 r8 /* Word tolower(rCHAR1) */
124 #define rLWR2 r12 /* Word tolower(rCHAR2) */
126 #define rLOC r11 /* Default locale address */
128 cmpd cr7, rRTN, rSTR2
130 /* Get locale address. */
131 ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
132 add rLOC, rTMP, __libc_tsd_LOCALE@tls
138 #ifdef USE_AS_STRNCASECMP
142 blt cr7, L(bytebybyte)
146 /* Check for null in initial characters.
147 Check max of 16 char depending on the alignment.
148 If null is present, proceed byte by byte. */
150 #ifdef __LITTLE_ENDIAN__
151 lvsr v10, 0, rSTR1 /* Compute mask. */
152 vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */
155 vperm v9, v4, v8, v10
157 vcmpequb. v9, v0, v9 /* Check for null bytes. */
158 bne cr6, L(bytebybyte)
160 /* Calculate alignment. */
161 #ifdef __LITTLE_ENDIAN__
163 vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */
168 vcmpequb. v9, v0, v9 /* Check for null bytes. */
169 bne cr6, L(bytebybyte)
170 /* Check if locale has non ascii characters. */
172 addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
175 beq cr7, L(bytebybyte)
177 /* Load vector registers with values used for TOLOWER. */
178 /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */
187 andi. rADDR1, rSTR1, 0xF
191 /* Compute 16 bytes from previous two loads. */
192 #ifdef __LITTLE_ENDIAN__
193 vperm v4, v9, v4, v10
195 vperm v4, v4, v9, v10
198 andi. rADDR2, rSTR2, 0xF
202 /* Compute 16 bytes from previous two loads. */
203 #ifdef __LITTLE_ENDIAN__
209 CHECKNULLANDCONVERT()
216 #ifdef USE_AS_STRNCASECMP
221 andi. rADDR2, rSTR2, 0xF
222 addi rSTR1, rSTR1, -16
223 addi rSTR2, rSTR2, -16
225 #ifdef __LITTLE_ENDIAN__
230 /* There are 2 loops depending on the input alignment.
231 Each loop gets 16 bytes from s1 and s2, check for null,
232 convert to lowercase and compare. Loop till difference
235 addi rSTR1, rSTR1, 16
236 addi rSTR2, rSTR2, 16
237 #ifdef USE_AS_STRNCASECMP
239 blt cr7, L(bytebybyte)
243 GET16BYTES(v5, rSTR2, v6)
244 CHECKNULLANDCONVERT()
249 addi rSTR1, rSTR1, 16
250 addi rSTR2, rSTR2, 16
251 #ifdef USE_AS_STRNCASECMP
253 blt cr7, L(bytebybyte)
258 CHECKNULLANDCONVERT()
261 /* Calculate and return the difference. */
265 #ifdef __LITTLE_ENDIAN__
266 /* Count trailing zero. */
275 /* Count leading zero. */
287 #ifdef __LITTLE_ENDIAN__
288 /* Shift registers based on leading zero count. */
291 /* Merge and move to GPR. */
295 /* Place the characters that are different in first position. */
297 srdi rSTR2, rSTR2, 56
299 srdi rSTR1, rSTR1, 56
306 sldi rSTR2, rSTR2, 56
307 srdi rSTR2, rSTR2, 56
310 subf rRTN, rSTR1, rSTR2
315 /* OK. We've hit the end of the string. We need to be careful that
316 we don't compare two strings as different because of junk beyond
317 the end of the strings... */
320 #ifdef __LITTLE_ENDIAN__
321 /* Count trailing zero. */
327 vcmpequb. v6, v6, v10
330 /* Count leading zero. */
333 vcmpequb. v6, v6, v10
342 /* Calculate shift count based on count of zero. */
345 vsldoi v9, v0, v10, 1
350 /* Shift and remove junk after null character. */
351 #ifdef __LITTLE_ENDIAN__
358 /* Convert and compare 16 bytes. */
368 /* Unrolling loop for POWER: loads are done with 'lbz' plus
369 offset and string descriptors are only updated in the end
370 of loop unrolling. */
371 ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
372 lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
373 lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
374 #ifdef USE_AS_STRNCASECMP
375 rldicl rTMP, r5, 62, 2
377 beq cr7, L(lessthan4)
381 cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
382 sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
383 sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
384 lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
385 lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
386 cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
387 crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
392 sldi rADDR1, rCHAR1, 2
393 sldi rADDR2, rCHAR2, 2
394 lwzx rLWR1, rLOC, rADDR1
395 lwzx rLWR2, rLOC, rADDR2
396 cmpw cr1, rLWR1, rLWR2
397 crorc 4*cr1+eq,eq,4*cr1+eq
402 sldi rADDR1, rCHAR1, 2
403 sldi rADDR2, rCHAR2, 2
404 lwzx rLWR1, rLOC, rADDR1
405 lwzx rLWR2, rLOC, rADDR2
406 cmpw cr1, rLWR1, rLWR2
407 crorc 4*cr1+eq,eq,4*cr1+eq
412 /* Increment both string descriptors */
415 sldi rADDR1, rCHAR1, 2
416 sldi rADDR2, rCHAR2, 2
417 lwzx rLWR1, rLOC, rADDR1
418 lwzx rLWR2, rLOC, rADDR2
419 cmpw cr1, rLWR1, rLWR2
420 crorc 4*cr1+eq,eq,4*cr1+eq
422 lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
423 lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
424 #ifdef USE_AS_STRNCASECMP
429 #ifdef USE_AS_STRNCASECMP
437 sldi rADDR1, rCHAR1, 2
438 sldi rADDR2, rCHAR2, 2
439 lwzx rLWR1, rLOC, rADDR1
440 lwzx rLWR2, rLOC, rADDR2
441 cmpw cr1, rLWR1, rLWR2
442 crorc 4*cr1+eq,eq,4*cr1+eq
451 subf r0, rLWR2, rLWR1
456 weak_alias (__STRCASECMP, STRCASECMP)
457 libc_hidden_builtin_def (__STRCASECMP)