]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/powerpc64/strncmp.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / strncmp.S
CommitLineData
7df11052 1/* Optimized strcmp implementation for PowerPC64.
f7a9f785 2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
7df11052
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
7df11052
UD
18
19#include <sysdep.h>
7df11052
UD
20
21/* See strlen.s for comments on how the end-of-string testing works. */
22
23/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
24
2d67d91a 25EALIGN (strncmp, 4, 0)
d7d06f79 26 CALL_MCOUNT 3
7df11052 27
8a7413f9 28#define rTMP2 r0
7df11052
UD
29#define rRTN r3
30#define rSTR1 r3 /* first string arg */
31#define rSTR2 r4 /* second string arg */
32#define rN r5 /* max string length */
7df11052
UD
33#define rWORD1 r6 /* current word in s1 */
34#define rWORD2 r7 /* current word in s2 */
35#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
36#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
37#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
38#define rBITDIF r11 /* bits that differ in s1 & s2 words */
8a7413f9 39#define rTMP r12
7df11052
UD
40
41 dcbt 0,rSTR1
42 or rTMP, rSTR2, rSTR1
43 lis r7F7F, 0x7f7f
44 dcbt 0,rSTR2
45 clrldi. rTMP, rTMP, 61
8ce9ea74 46 cmpldi cr1, rN, 0
7df11052
UD
47 lis rFEFE, -0x101
48 bne L(unaligned)
2ccdea26 49/* We are doubleword aligned so set up for two loops. first a double word
7df11052
UD
50 loop, then fall into the byte loop if any residual. */
51 srdi. rTMP, rN, 3
52 clrldi rN, rN, 61
53 addi rFEFE, rFEFE, -0x101
8ce9ea74 54 addi r7F7F, r7F7F, 0x7f7f
9c84384c 55 cmpldi cr1, rN, 0
7df11052
UD
56 beq L(unaligned)
57
58 mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
59 ld rWORD1, 0(rSTR1)
60 ld rWORD2, 0(rSTR2)
61 sldi rTMP, rFEFE, 32
62 insrdi r7F7F, r7F7F, 32, 0
63 add rFEFE, rFEFE, rTMP
64 b L(g1)
65
9c84384c 66L(g0):
7df11052
UD
67 ldu rWORD1, 8(rSTR1)
68 bne- cr1, L(different)
69 ldu rWORD2, 8(rSTR2)
70L(g1): add rTMP, rFEFE, rWORD1
71 nor rNEG, r7F7F, rWORD1
72 bdz L(tail)
73 and. rTMP, rTMP, rNEG
74 cmpd cr1, rWORD1, rWORD2
75 beq+ L(g0)
9c84384c 76
7df11052
UD
77/* OK. We've hit the end of the string. We need to be careful that
78 we don't compare two strings as different because of gunk beyond
79 the end of the strings... */
9c84384c 80
8a7413f9
AM
81#ifdef __LITTLE_ENDIAN__
82L(endstring):
83 addi rTMP2, rTMP, -1
84 beq cr1, L(equal)
85 andc rTMP2, rTMP2, rTMP
86 rldimi rTMP2, rTMP2, 1, 0
87 and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
88 and rWORD1, rWORD1, rTMP2
89 cmpd cr1, rWORD1, rWORD2
90 beq cr1, L(equal)
91 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
92 neg rNEG, rBITDIF
93 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
94 cntlzd rNEG, rNEG /* bitcount of the bit. */
95 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
96 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
97 sld rWORD2, rWORD2, rNEG
98 xor. rBITDIF, rWORD1, rWORD2
99 sub rRTN, rWORD1, rWORD2
100 blt- L(highbit)
101 sradi rRTN, rRTN, 63 /* must return an int. */
102 ori rRTN, rRTN, 1
103 blr
104L(equal):
105 li rRTN, 0
106 blr
107
108L(different):
109 ld rWORD1, -8(rSTR1)
110 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
111 neg rNEG, rBITDIF
112 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
113 cntlzd rNEG, rNEG /* bitcount of the bit. */
114 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
115 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
116 sld rWORD2, rWORD2, rNEG
117 xor. rBITDIF, rWORD1, rWORD2
118 sub rRTN, rWORD1, rWORD2
119 blt- L(highbit)
120 sradi rRTN, rRTN, 63
121 ori rRTN, rRTN, 1
122 blr
123L(highbit):
124 sradi rRTN, rWORD2, 63
125 ori rRTN, rRTN, 1
126 blr
127
128#else
7df11052
UD
129L(endstring):
130 and rTMP, r7F7F, rWORD1
131 beq cr1, L(equal)
132 add rTMP, rTMP, r7F7F
133 xor. rBITDIF, rWORD1, rWORD2
7df11052
UD
134 andc rNEG, rNEG, rTMP
135 blt- L(highbit)
136 cntlzd rBITDIF, rBITDIF
137 cntlzd rNEG, rNEG
138 addi rNEG, rNEG, 7
139 cmpd cr1, rNEG, rBITDIF
140 sub rRTN, rWORD1, rWORD2
141 blt- cr1, L(equal)
8a7413f9 142 sradi rRTN, rRTN, 63 /* must return an int. */
7df11052
UD
143 ori rRTN, rRTN, 1
144 blr
145L(equal):
146 li rRTN, 0
147 blr
148
149L(different):
8a7413f9 150 ld rWORD1, -8(rSTR1)
7df11052
UD
151 xor. rBITDIF, rWORD1, rWORD2
152 sub rRTN, rWORD1, rWORD2
153 blt- L(highbit)
154 sradi rRTN, rRTN, 63
155 ori rRTN, rRTN, 1
156 blr
157L(highbit):
8a7413f9
AM
158 sradi rRTN, rWORD2, 63
159 ori rRTN, rRTN, 1
7df11052 160 blr
8a7413f9 161#endif
7df11052
UD
162
163/* Oh well. In this case, we just do a byte-by-byte comparison. */
164 .align 4
165L(tail):
166 and. rTMP, rTMP, rNEG
167 cmpd cr1, rWORD1, rWORD2
168 bne- L(endstring)
169 addi rSTR1, rSTR1, 8
170 bne- cr1, L(different)
171 addi rSTR2, rSTR2, 8
8ce9ea74 172 cmpldi cr1, rN, 0
7df11052
UD
173L(unaligned):
174 mtctr rN /* Power4 wants mtctr 1st in dispatch group */
8ce9ea74 175 bgt cr1, L(uz)
7df11052
UD
176L(ux):
177 li rRTN, 0
178 blr
8ce9ea74
UD
179 .align 4
180L(uz):
181 lbz rWORD1, 0(rSTR1)
182 lbz rWORD2, 0(rSTR2)
183 nop
184 b L(u1)
7df11052
UD
185L(u0):
186 lbzu rWORD2, 1(rSTR2)
187L(u1):
188 bdz L(u3)
189 cmpdi cr1, rWORD1, 0
190 cmpd rWORD1, rWORD2
191 beq- cr1, L(u3)
192 lbzu rWORD1, 1(rSTR1)
193 bne- L(u2)
194 lbzu rWORD2, 1(rSTR2)
195 bdz L(u3)
196 cmpdi cr1, rWORD1, 0
197 cmpd rWORD1, rWORD2
198 bne- L(u3)
199 lbzu rWORD1, 1(rSTR1)
200 bne+ cr1, L(u0)
201
9c84384c 202L(u2): lbzu rWORD1, -1(rSTR1)
7df11052
UD
203L(u3): sub rRTN, rWORD1, rWORD2
204 blr
2d67d91a 205END (strncmp)
7df11052 206libc_hidden_builtin_def (strncmp)