]>
Commit | Line | Data |
---|---|---|
158db122 | 1 | /* Optimized strcmp implementation for POWER7/PowerPC64. |
581c785b | 2 | Copyright (C) 2010-2022 Free Software Foundation, Inc. |
158db122 LM |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
158db122 LM |
18 | |
19 | #include <sysdep.h> | |
158db122 | 20 | |
3bc426e1 WSM |
21 | #ifndef STRNCMP |
22 | # define STRNCMP strncmp | |
23 | #endif | |
24 | ||
158db122 LM |
25 | /* See strlen.s for comments on how the end-of-string testing works. */ |
26 | ||
27 | /* int [r3] strncmp (const char *s1 [r3], | |
28 | const char *s2 [r4], | |
29 | size_t size [r5]) */ | |
30 | ||
9250e661 | 31 | .machine power7 |
d5b41185 | 32 | ENTRY_TOCLESS (STRNCMP, 5) |
158db122 LM |
33 | CALL_MCOUNT 3 |
34 | ||
8a7413f9 | 35 | #define rTMP2 r0 |
158db122 LM |
36 | #define rRTN r3 |
37 | #define rSTR1 r3 /* first string arg */ | |
38 | #define rSTR2 r4 /* second string arg */ | |
39 | #define rN r5 /* max string length */ | |
158db122 LM |
40 | #define rWORD1 r6 /* current word in s1 */ |
41 | #define rWORD2 r7 /* current word in s2 */ | |
42 | #define rWORD3 r10 | |
43 | #define rWORD4 r11 | |
44 | #define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ | |
45 | #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ | |
46 | #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ | |
47 | #define rBITDIF r11 /* bits that differ in s1 & s2 words */ | |
8a7413f9 | 48 | #define rTMP r12 |
158db122 LM |
49 | |
50 | dcbt 0,rSTR1 | |
22700377 | 51 | nop |
158db122 LM |
52 | or rTMP,rSTR2,rSTR1 |
53 | lis r7F7F,0x7f7f | |
54 | dcbt 0,rSTR2 | |
22700377 | 55 | nop |
158db122 LM |
56 | clrldi. rTMP,rTMP,61 |
57 | cmpldi cr1,rN,0 | |
58 | lis rFEFE,-0x101 | |
59 | bne L(unaligned) | |
2ccdea26 | 60 | /* We are doubleword aligned so set up for two loops. first a double word |
158db122 LM |
61 | loop, then fall into the byte loop if any residual. */ |
62 | srdi. rTMP,rN,3 | |
63 | clrldi rN,rN,61 | |
64 | addi rFEFE,rFEFE,-0x101 | |
65 | addi r7F7F,r7F7F,0x7f7f | |
66 | cmpldi cr1,rN,0 | |
67 | beq L(unaligned) | |
68 | ||
69 | mtctr rTMP | |
70 | ld rWORD1,0(rSTR1) | |
71 | ld rWORD2,0(rSTR2) | |
72 | sldi rTMP,rFEFE,32 | |
73 | insrdi r7F7F,r7F7F,32,0 | |
74 | add rFEFE,rFEFE,rTMP | |
75 | b L(g1) | |
76 | ||
77 | L(g0): | |
78 | ldu rWORD1,8(rSTR1) | |
79 | bne cr1,L(different) | |
80 | ldu rWORD2,8(rSTR2) | |
81 | L(g1): add rTMP,rFEFE,rWORD1 | |
82 | nor rNEG,r7F7F,rWORD1 | |
83 | bdz L(tail) | |
84 | and. rTMP,rTMP,rNEG | |
85 | cmpd cr1,rWORD1,rWORD2 | |
86 | beq L(g0) | |
87 | ||
88 | /* OK. We've hit the end of the string. We need to be careful that | |
89 | we don't compare two strings as different because of gunk beyond | |
90 | the end of the strings... */ | |
91 | ||
8a7413f9 AM |
92 | #ifdef __LITTLE_ENDIAN__ |
93 | L(endstring): | |
94 | addi rTMP2, rTMP, -1 | |
95 | beq cr1, L(equal) | |
96 | andc rTMP2, rTMP2, rTMP | |
97 | rldimi rTMP2, rTMP2, 1, 0 | |
98 | and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ | |
99 | and rWORD1, rWORD1, rTMP2 | |
100 | cmpd cr1, rWORD1, rWORD2 | |
101 | beq cr1, L(equal) | |
102 | cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ | |
103 | addi rNEG, rBITDIF, 1 | |
104 | orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ | |
105 | sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ | |
106 | andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ | |
107 | andc rWORD2, rWORD2, rNEG | |
108 | xor. rBITDIF, rWORD1, rWORD2 | |
109 | sub rRTN, rWORD1, rWORD2 | |
110 | blt L(highbit) | |
111 | sradi rRTN, rRTN, 63 /* must return an int. */ | |
112 | ori rRTN, rRTN, 1 | |
113 | blr | |
114 | L(equal): | |
115 | li rRTN, 0 | |
116 | blr | |
117 | ||
118 | L(different): | |
119 | ld rWORD1, -8(rSTR1) | |
120 | cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ | |
121 | addi rNEG, rBITDIF, 1 | |
122 | orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ | |
123 | sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ | |
124 | andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ | |
125 | andc rWORD2, rWORD2, rNEG | |
126 | xor. rBITDIF, rWORD1, rWORD2 | |
127 | sub rRTN, rWORD1, rWORD2 | |
128 | blt L(highbit) | |
129 | sradi rRTN, rRTN, 63 | |
130 | ori rRTN, rRTN, 1 | |
131 | blr | |
132 | L(highbit): | |
133 | sradi rRTN, rWORD2, 63 | |
134 | ori rRTN, rRTN, 1 | |
135 | blr | |
136 | ||
137 | #else | |
158db122 LM |
138 | L(endstring): |
139 | and rTMP,r7F7F,rWORD1 | |
140 | beq cr1,L(equal) | |
141 | add rTMP,rTMP,r7F7F | |
142 | xor. rBITDIF,rWORD1,rWORD2 | |
158db122 LM |
143 | andc rNEG,rNEG,rTMP |
144 | blt L(highbit) | |
145 | cntlzd rBITDIF,rBITDIF | |
146 | cntlzd rNEG,rNEG | |
147 | addi rNEG,rNEG,7 | |
148 | cmpd cr1,rNEG,rBITDIF | |
149 | sub rRTN,rWORD1,rWORD2 | |
150 | blt cr1,L(equal) | |
8a7413f9 | 151 | sradi rRTN,rRTN,63 /* must return an int. */ |
158db122 LM |
152 | ori rRTN,rRTN,1 |
153 | blr | |
154 | L(equal): | |
155 | li rRTN,0 | |
156 | blr | |
157 | ||
158 | L(different): | |
8a7413f9 | 159 | ld rWORD1,-8(rSTR1) |
158db122 LM |
160 | xor. rBITDIF,rWORD1,rWORD2 |
161 | sub rRTN,rWORD1,rWORD2 | |
162 | blt L(highbit) | |
163 | sradi rRTN,rRTN,63 | |
164 | ori rRTN,rRTN,1 | |
165 | blr | |
166 | L(highbit): | |
8a7413f9 AM |
167 | sradi rRTN,rWORD2,63 |
168 | ori rRTN,rRTN,1 | |
158db122 | 169 | blr |
8a7413f9 | 170 | #endif |
158db122 LM |
171 | |
172 | /* Oh well. In this case, we just do a byte-by-byte comparison. */ | |
173 | .align 4 | |
174 | L(tail): | |
175 | and. rTMP,rTMP,rNEG | |
176 | cmpd cr1,rWORD1,rWORD2 | |
177 | bne L(endstring) | |
178 | addi rSTR1,rSTR1,8 | |
179 | bne cr1,L(different) | |
180 | addi rSTR2,rSTR2,8 | |
181 | cmpldi cr1,rN,0 | |
182 | L(unaligned): | |
183 | mtctr rN | |
184 | ble cr1,L(ux) | |
185 | L(uz): | |
186 | lbz rWORD1,0(rSTR1) | |
187 | lbz rWORD2,0(rSTR2) | |
188 | .align 4 | |
189 | L(u1): | |
190 | cmpdi cr1,rWORD1,0 | |
191 | bdz L(u4) | |
192 | cmpd rWORD1,rWORD2 | |
193 | beq cr1,L(u4) | |
4420675c | 194 | bne L(u4) |
158db122 LM |
195 | lbzu rWORD3,1(rSTR1) |
196 | lbzu rWORD4,1(rSTR2) | |
158db122 LM |
197 | cmpdi cr1,rWORD3,0 |
198 | bdz L(u3) | |
199 | cmpd rWORD3,rWORD4 | |
200 | beq cr1,L(u3) | |
4420675c | 201 | bne L(u3) |
158db122 LM |
202 | lbzu rWORD1,1(rSTR1) |
203 | lbzu rWORD2,1(rSTR2) | |
158db122 LM |
204 | cmpdi cr1,rWORD1,0 |
205 | bdz L(u4) | |
206 | cmpd rWORD1,rWORD2 | |
207 | beq cr1,L(u4) | |
4420675c | 208 | bne L(u4) |
158db122 LM |
209 | lbzu rWORD3,1(rSTR1) |
210 | lbzu rWORD4,1(rSTR2) | |
158db122 LM |
211 | cmpdi cr1,rWORD3,0 |
212 | bdz L(u3) | |
213 | cmpd rWORD3,rWORD4 | |
214 | beq cr1,L(u3) | |
4420675c | 215 | bne L(u3) |
158db122 LM |
216 | lbzu rWORD1,1(rSTR1) |
217 | lbzu rWORD2,1(rSTR2) | |
4420675c | 218 | b L(u1) |
158db122 LM |
219 | |
220 | L(u3): sub rRTN,rWORD3,rWORD4 | |
221 | blr | |
222 | L(u4): sub rRTN,rWORD1,rWORD2 | |
223 | blr | |
224 | L(ux): | |
225 | li rRTN,0 | |
226 | blr | |
3bc426e1 | 227 | END (STRNCMP) |
158db122 | 228 | libc_hidden_builtin_def (strncmp) |