]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/strncmp.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / strncmp.S
1 /* Optimized strcmp implementation for PowerPC64.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20
21 /* See strlen.s for comments on how the end-of-string testing works. */
22
23 /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
24
25 EALIGN (strncmp, 4, 0)
26 CALL_MCOUNT 3
27
28 #define rTMP2 r0
29 #define rRTN r3
30 #define rSTR1 r3 /* first string arg */
31 #define rSTR2 r4 /* second string arg */
32 #define rN r5 /* max string length */
33 #define rWORD1 r6 /* current word in s1 */
34 #define rWORD2 r7 /* current word in s2 */
35 #define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
36 #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
37 #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
38 #define rBITDIF r11 /* bits that differ in s1 & s2 words */
39 #define rTMP r12
40
41 dcbt 0,rSTR1
42 or rTMP, rSTR2, rSTR1
43 lis r7F7F, 0x7f7f
44 dcbt 0,rSTR2
45 clrldi. rTMP, rTMP, 61
46 cmpldi cr1, rN, 0
47 lis rFEFE, -0x101
48 bne L(unaligned)
49 /* We are doubleword aligned so set up for two loops. first a double word
50 loop, then fall into the byte loop if any residual. */
51 srdi. rTMP, rN, 3
52 clrldi rN, rN, 61
53 addi rFEFE, rFEFE, -0x101
54 addi r7F7F, r7F7F, 0x7f7f
55 cmpldi cr1, rN, 0
56 beq L(unaligned)
57
58 mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
59 ld rWORD1, 0(rSTR1)
60 ld rWORD2, 0(rSTR2)
61 sldi rTMP, rFEFE, 32
62 insrdi r7F7F, r7F7F, 32, 0
63 add rFEFE, rFEFE, rTMP
64 b L(g1)
65
66 L(g0):
67 ldu rWORD1, 8(rSTR1)
68 bne- cr1, L(different)
69 ldu rWORD2, 8(rSTR2)
70 L(g1): add rTMP, rFEFE, rWORD1
71 nor rNEG, r7F7F, rWORD1
72 bdz L(tail)
73 and. rTMP, rTMP, rNEG
74 cmpd cr1, rWORD1, rWORD2
75 beq+ L(g0)
76
77 /* OK. We've hit the end of the string. We need to be careful that
78 we don't compare two strings as different because of gunk beyond
79 the end of the strings... */
80
81 #ifdef __LITTLE_ENDIAN__
82 L(endstring):
83 addi rTMP2, rTMP, -1
84 beq cr1, L(equal)
85 andc rTMP2, rTMP2, rTMP
86 rldimi rTMP2, rTMP2, 1, 0
87 and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
88 and rWORD1, rWORD1, rTMP2
89 cmpd cr1, rWORD1, rWORD2
90 beq cr1, L(equal)
91 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
92 neg rNEG, rBITDIF
93 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
94 cntlzd rNEG, rNEG /* bitcount of the bit. */
95 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
96 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
97 sld rWORD2, rWORD2, rNEG
98 xor. rBITDIF, rWORD1, rWORD2
99 sub rRTN, rWORD1, rWORD2
100 blt- L(highbit)
101 sradi rRTN, rRTN, 63 /* must return an int. */
102 ori rRTN, rRTN, 1
103 blr
104 L(equal):
105 li rRTN, 0
106 blr
107
108 L(different):
109 ld rWORD1, -8(rSTR1)
110 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
111 neg rNEG, rBITDIF
112 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
113 cntlzd rNEG, rNEG /* bitcount of the bit. */
114 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
115 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
116 sld rWORD2, rWORD2, rNEG
117 xor. rBITDIF, rWORD1, rWORD2
118 sub rRTN, rWORD1, rWORD2
119 blt- L(highbit)
120 sradi rRTN, rRTN, 63
121 ori rRTN, rRTN, 1
122 blr
123 L(highbit):
124 sradi rRTN, rWORD2, 63
125 ori rRTN, rRTN, 1
126 blr
127
128 #else
129 L(endstring):
130 and rTMP, r7F7F, rWORD1
131 beq cr1, L(equal)
132 add rTMP, rTMP, r7F7F
133 xor. rBITDIF, rWORD1, rWORD2
134 andc rNEG, rNEG, rTMP
135 blt- L(highbit)
136 cntlzd rBITDIF, rBITDIF
137 cntlzd rNEG, rNEG
138 addi rNEG, rNEG, 7
139 cmpd cr1, rNEG, rBITDIF
140 sub rRTN, rWORD1, rWORD2
141 blt- cr1, L(equal)
142 sradi rRTN, rRTN, 63 /* must return an int. */
143 ori rRTN, rRTN, 1
144 blr
145 L(equal):
146 li rRTN, 0
147 blr
148
149 L(different):
150 ld rWORD1, -8(rSTR1)
151 xor. rBITDIF, rWORD1, rWORD2
152 sub rRTN, rWORD1, rWORD2
153 blt- L(highbit)
154 sradi rRTN, rRTN, 63
155 ori rRTN, rRTN, 1
156 blr
157 L(highbit):
158 sradi rRTN, rWORD2, 63
159 ori rRTN, rRTN, 1
160 blr
161 #endif
162
163 /* Oh well. In this case, we just do a byte-by-byte comparison. */
164 .align 4
165 L(tail):
166 and. rTMP, rTMP, rNEG
167 cmpd cr1, rWORD1, rWORD2
168 bne- L(endstring)
169 addi rSTR1, rSTR1, 8
170 bne- cr1, L(different)
171 addi rSTR2, rSTR2, 8
172 cmpldi cr1, rN, 0
173 L(unaligned):
174 mtctr rN /* Power4 wants mtctr 1st in dispatch group */
175 bgt cr1, L(uz)
176 L(ux):
177 li rRTN, 0
178 blr
179 .align 4
180 L(uz):
181 lbz rWORD1, 0(rSTR1)
182 lbz rWORD2, 0(rSTR2)
183 nop
184 b L(u1)
185 L(u0):
186 lbzu rWORD2, 1(rSTR2)
187 L(u1):
188 bdz L(u3)
189 cmpdi cr1, rWORD1, 0
190 cmpd rWORD1, rWORD2
191 beq- cr1, L(u3)
192 lbzu rWORD1, 1(rSTR1)
193 bne- L(u2)
194 lbzu rWORD2, 1(rSTR2)
195 bdz L(u3)
196 cmpdi cr1, rWORD1, 0
197 cmpd rWORD1, rWORD2
198 bne- L(u3)
199 lbzu rWORD1, 1(rSTR1)
200 bne+ cr1, L(u0)
201
202 L(u2): lbzu rWORD1, -1(rSTR1)
203 L(u3): sub rRTN, rWORD1, rWORD2
204 blr
205 END (strncmp)
206 libc_hidden_builtin_def (strncmp)