]>
Commit | Line | Data |
---|---|---|
158db122 | 1 | /* Optimized strcmp implementation for POWER7/PowerPC32. |
04277e02 | 2 | Copyright (C) 2010-2019 Free Software Foundation, Inc. |
158db122 LM |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
158db122 LM |
18 | |
19 | #include <sysdep.h> | |
158db122 LM |
20 | |
21 | /* See strlen.s for comments on how the end-of-string testing works. */ | |
22 | ||
23 | /* int [r3] strncmp (const char *s1 [r3], | |
24 | const char *s2 [r4], | |
25 | size_t size [r5]) */ | |
26 | ||
b5510883 | 27 | EALIGN (strncmp,5,0) |
158db122 | 28 | |
8a7413f9 | 29 | #define rTMP2 r0 |
158db122 LM |
30 | #define rRTN r3 |
31 | #define rSTR1 r3 /* first string arg */ | |
32 | #define rSTR2 r4 /* second string arg */ | |
33 | #define rN r5 /* max string length */ | |
158db122 LM |
34 | #define rWORD1 r6 /* current word in s1 */ |
35 | #define rWORD2 r7 /* current word in s2 */ | |
36 | #define rWORD3 r10 | |
37 | #define rWORD4 r11 | |
38 | #define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ | |
39 | #define r7F7F r9 /* constant 0x7f7f7f7f */ | |
40 | #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ | |
41 | #define rBITDIF r11 /* bits that differ in s1 & s2 words */ | |
8a7413f9 | 42 | #define rTMP r12 |
158db122 LM |
43 | |
44 | dcbt 0,rSTR1 | |
22700377 | 45 | nop |
158db122 LM |
46 | or rTMP,rSTR2,rSTR1 |
47 | lis r7F7F,0x7f7f | |
48 | dcbt 0,rSTR2 | |
22700377 | 49 | nop |
158db122 LM |
50 | clrlwi. rTMP,rTMP,30 |
51 | cmplwi cr1,rN,0 | |
52 | lis rFEFE,-0x101 | |
53 | bne L(unaligned) | |
2ccdea26 | 54 | /* We are word aligned so set up for two loops. first a word |
158db122 LM |
55 | loop, then fall into the byte loop if any residual. */ |
56 | srwi. rTMP,rN,2 | |
57 | clrlwi rN,rN,30 | |
58 | addi rFEFE,rFEFE,-0x101 | |
59 | addi r7F7F,r7F7F,0x7f7f | |
60 | cmplwi cr1,rN,0 | |
61 | beq L(unaligned) | |
62 | ||
63 | mtctr rTMP | |
64 | lwz rWORD1,0(rSTR1) | |
65 | lwz rWORD2,0(rSTR2) | |
66 | b L(g1) | |
67 | ||
68 | L(g0): | |
69 | lwzu rWORD1,4(rSTR1) | |
70 | bne cr1,L(different) | |
71 | lwzu rWORD2,4(rSTR2) | |
72 | L(g1): add rTMP,rFEFE,rWORD1 | |
73 | nor rNEG,r7F7F,rWORD1 | |
74 | bdz L(tail) | |
75 | and. rTMP,rTMP,rNEG | |
76 | cmpw cr1,rWORD1,rWORD2 | |
77 | beq L(g0) | |
78 | ||
79 | /* OK. We've hit the end of the string. We need to be careful that | |
80 | we don't compare two strings as different because of gunk beyond | |
81 | the end of the strings... */ | |
8a7413f9 AM |
82 | #ifdef __LITTLE_ENDIAN__ |
83 | L(endstring): | |
84 | slwi rTMP, rTMP, 1 | |
85 | addi rTMP2, rTMP, -1 | |
86 | andc rTMP2, rTMP2, rTMP | |
87 | and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ | |
88 | and rWORD1, rWORD1, rTMP2 | |
89 | rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ | |
90 | rlwinm rTMP, rWORD1, 8, 0xffffffff | |
91 | rldimi rTMP2, rWORD2, 24, 32 | |
92 | rldimi rTMP, rWORD1, 24, 32 | |
93 | rlwimi rTMP2, rWORD2, 24, 16, 23 | |
94 | rlwimi rTMP, rWORD1, 24, 16, 23 | |
95 | xor. rBITDIF, rTMP, rTMP2 | |
96 | sub rRTN, rTMP, rTMP2 | |
97 | bgelr | |
98 | ori rRTN, rTMP2, 1 | |
99 | blr | |
100 | ||
101 | L(different): | |
102 | lwz rWORD1, -4(rSTR1) | |
103 | rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ | |
104 | rlwinm rTMP, rWORD1, 8, 0xffffffff | |
105 | rldimi rTMP2, rWORD2, 24, 32 | |
106 | rldimi rTMP, rWORD1, 24, 32 | |
107 | rlwimi rTMP2, rWORD2, 24, 16, 23 | |
108 | rlwimi rTMP, rWORD1, 24, 16, 23 | |
109 | xor. rBITDIF, rTMP, rTMP2 | |
110 | sub rRTN, rTMP, rTMP2 | |
111 | bgelr | |
112 | ori rRTN, rTMP2, 1 | |
113 | blr | |
158db122 | 114 | |
8a7413f9 | 115 | #else |
158db122 LM |
116 | L(endstring): |
117 | and rTMP,r7F7F,rWORD1 | |
118 | beq cr1,L(equal) | |
119 | add rTMP,rTMP,r7F7F | |
120 | xor. rBITDIF,rWORD1,rWORD2 | |
158db122 LM |
121 | andc rNEG,rNEG,rTMP |
122 | blt L(highbit) | |
123 | cntlzw rBITDIF,rBITDIF | |
124 | cntlzw rNEG,rNEG | |
125 | addi rNEG,rNEG,7 | |
126 | cmpw cr1,rNEG,rBITDIF | |
127 | sub rRTN,rWORD1,rWORD2 | |
8a7413f9 | 128 | bgelr cr1 |
158db122 LM |
129 | L(equal): |
130 | li rRTN,0 | |
131 | blr | |
132 | ||
133 | L(different): | |
8a7413f9 | 134 | lwz rWORD1,-4(rSTR1) |
158db122 LM |
135 | xor. rBITDIF,rWORD1,rWORD2 |
136 | sub rRTN,rWORD1,rWORD2 | |
8a7413f9 | 137 | bgelr |
158db122 | 138 | L(highbit): |
8a7413f9 | 139 | ori rRTN, rWORD2, 1 |
158db122 | 140 | blr |
8a7413f9 | 141 | #endif |
158db122 LM |
142 | |
143 | /* Oh well. In this case, we just do a byte-by-byte comparison. */ | |
144 | .align 4 | |
145 | L(tail): | |
146 | and. rTMP,rTMP,rNEG | |
147 | cmpw cr1,rWORD1,rWORD2 | |
148 | bne L(endstring) | |
149 | addi rSTR1,rSTR1,4 | |
150 | bne cr1,L(different) | |
151 | addi rSTR2,rSTR2,4 | |
152 | cmplwi cr1,rN,0 | |
153 | L(unaligned): | |
154 | mtctr rN | |
155 | ble cr1,L(ux) | |
156 | L(uz): | |
157 | lbz rWORD1,0(rSTR1) | |
158 | lbz rWORD2,0(rSTR2) | |
159 | .align 4 | |
160 | L(u1): | |
161 | cmpwi cr1,rWORD1,0 | |
162 | bdz L(u4) | |
163 | cmpw rWORD1,rWORD2 | |
164 | beq cr1,L(u4) | |
4420675c | 165 | bne L(u4) |
158db122 LM |
166 | lbzu rWORD3,1(rSTR1) |
167 | lbzu rWORD4,1(rSTR2) | |
158db122 LM |
168 | cmpwi cr1,rWORD3,0 |
169 | bdz L(u3) | |
170 | cmpw rWORD3,rWORD4 | |
171 | beq cr1,L(u3) | |
4420675c | 172 | bne L(u3) |
158db122 LM |
173 | lbzu rWORD1,1(rSTR1) |
174 | lbzu rWORD2,1(rSTR2) | |
158db122 LM |
175 | cmpwi cr1,rWORD1,0 |
176 | bdz L(u4) | |
177 | cmpw rWORD1,rWORD2 | |
178 | beq cr1,L(u4) | |
4420675c | 179 | bne L(u4) |
158db122 LM |
180 | lbzu rWORD3,1(rSTR1) |
181 | lbzu rWORD4,1(rSTR2) | |
158db122 LM |
182 | cmpwi cr1,rWORD3,0 |
183 | bdz L(u3) | |
184 | cmpw rWORD3,rWORD4 | |
185 | beq cr1,L(u3) | |
4420675c | 186 | bne L(u3) |
158db122 LM |
187 | lbzu rWORD1,1(rSTR1) |
188 | lbzu rWORD2,1(rSTR2) | |
4420675c | 189 | b L(u1) |
158db122 LM |
190 | |
191 | L(u3): sub rRTN,rWORD3,rWORD4 | |
192 | blr | |
193 | L(u4): sub rRTN,rWORD1,rWORD2 | |
194 | blr | |
195 | L(ux): | |
196 | li rRTN,0 | |
197 | blr | |
b5510883 | 198 | END (strncmp) |
158db122 | 199 | libc_hidden_builtin_def (strncmp) |