]>
Commit | Line | Data |
---|---|---|
ae6b8730 RH |
1 | /* Compare two strings for differences. |
2 | For SPARC v9. | |
04277e02 | 3 | Copyright (C) 2011-2019 Free Software Foundation, Inc. |
ae6b8730 | 4 | This file is part of the GNU C Library. |
ad69cc26 | 5 | Contributed by David S. Miller <davem@davemloft.net> |
ae6b8730 RH |
6 | |
7 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
ae6b8730 RH |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 15 | Lesser General Public License for more details. |
ae6b8730 | 16 | |
41bdb6e2 | 17 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 PE |
18 | License along with the GNU C Library; if not, see |
19 | <http://www.gnu.org/licenses/>. */ | |
ae6b8730 RH |
20 | |
21 | #include <sysdep.h> | |
22 | #include <asm/asi.h> | |
ad69cc26 | 23 | |
8cb079d4 UD |
24 | #ifndef XCC |
25 | .register %g2, #scratch | |
26 | .register %g3, #scratch | |
62f29da7 | 27 | .register %g6, #scratch |
8cb079d4 | 28 | #endif |
ae6b8730 | 29 | |
ad69cc26 DM |
30 | #define rSTR1 %o0 |
31 | #define rSTR2 %o1 | |
32 | #define r0101 %o2 /* 0x0101010101010101 */ | |
33 | #define r8080 %o3 /* 0x8080808080808080 */ | |
34 | #define rSTRXOR %o4 | |
35 | #define rWORD1 %o5 | |
36 | #define rTMP1 %g1 | |
37 | #define rTMP2 %g2 | |
38 | #define rWORD2 %g3 | |
39 | #define rSLL %g4 | |
40 | #define rSRL %g5 | |
41 | #define rBARREL %g6 | |
42 | ||
43 | /* There are two cases, either the two pointers are aligned | |
44 | * identically or they are not. If they have the same | |
45 | * alignment we can use the normal full speed loop. Otherwise | |
46 | * we have to use the barrel-shifter version. | |
ae6b8730 RH |
47 | */ |
48 | ||
49 | .text | |
ad69cc26 | 50 | .align 32 |
ae6b8730 | 51 | ENTRY(strcmp) |
ad69cc26 DM |
52 | or rSTR2, rSTR1, rTMP1 |
53 | sethi %hi(0x80808080), r8080 | |
54 | ||
55 | andcc rTMP1, 0x7, %g0 | |
56 | bne,pn %icc, .Lmaybe_barrel_shift | |
57 | or r8080, %lo(0x80808080), r8080 | |
58 | ldx [rSTR1], rWORD1 | |
59 | ||
60 | sub rSTR2, rSTR1, rSTR2 | |
61 | sllx r8080, 32, rTMP1 | |
62 | ||
63 | ldx [rSTR1 + rSTR2], rWORD2 | |
64 | or r8080, rTMP1, r8080 | |
65 | ||
66 | ba,pt %xcc, .Laligned_loop_entry | |
67 | srlx r8080, 7, r0101 | |
68 | ||
69 | .align 32 | |
70 | .Laligned_loop_entry: | |
71 | .Laligned_loop: | |
72 | add rSTR1, 8, rSTR1 | |
73 | ||
74 | sub rWORD1, r0101, rTMP2 | |
75 | xorcc rWORD1, rWORD2, rSTRXOR | |
76 | bne,pn %xcc, .Lcommon_endstring | |
77 | ||
78 | andn r8080, rWORD1, rTMP1 | |
79 | ||
80 | ldxa [rSTR1] ASI_PNF, rWORD1 | |
81 | andcc rTMP1, rTMP2, %g0 | |
82 | be,a,pt %xcc, .Laligned_loop | |
83 | ||
84 | ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2 | |
85 | ||
86 | .Lcommon_equal: | |
87 | retl | |
88 | mov 0, %o0 | |
89 | ||
90 | /* All loops terminate here once they find an unequal word. | |
91 | * If a zero byte appears in the word before the first unequal | |
92 | * byte, we must report zero. Otherwise we report '1' or '-1' | |
93 | * depending upon whether the first mis-matching byte is larger | |
94 | * in the first string or the second, respectively. | |
95 | * | |
96 | * First we compute a 64-bit mask value that has "0x01" in | |
97 | * each byte where a zero exists in rWORD1. rSTRXOR holds the | |
98 | * value (rWORD1 ^ rWORD2). Therefore, if considered as an | |
99 | * unsigned quantity, our "0x01" mask value is "greater than" | |
100 | * rSTRXOR then a zero terminating byte comes first and | |
101 | * therefore we report '0'. | |
102 | * | |
103 | * The formula for this mask is: | |
104 | * | |
105 | * mask_tmp1 = ~rWORD1 & 0x8080808080808080; | |
106 | * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) + | |
107 | * 0x7f7f7f7f7f7f7f7f); | |
108 | * | |
109 | * mask = ((mask_tmp1 & ~mask_tmp2) >> 7); | |
110 | */ | |
111 | .Lcommon_endstring: | |
112 | andn rWORD1, r8080, rTMP2 | |
113 | or r8080, 1, %o1 | |
114 | ||
115 | mov 1, %o0 | |
116 | sub rTMP2, %o1, rTMP2 | |
117 | ||
118 | cmp rWORD1, rWORD2 | |
119 | andn rTMP1, rTMP2, rTMP1 | |
120 | ||
121 | movleu %xcc, -1, %o0 | |
122 | srlx rTMP1, 7, rTMP1 | |
123 | ||
5331255b DM |
124 | /* In order not to be influenced by bytes after the zero byte, we |
125 | * have to retain only the highest bit in the mask for the comparison | |
126 | * with rSTRXOR to work properly. | |
127 | */ | |
128 | mov 0, rTMP2 | |
129 | andcc rTMP1, 0x0100, %g0 | |
130 | ||
131 | movne %xcc, 8, rTMP2 | |
132 | sllx rTMP1, 63 - 16, %o1 | |
133 | ||
134 | movrlz %o1, 16, rTMP2 | |
135 | sllx rTMP1, 63 - 24, %o1 | |
136 | ||
137 | movrlz %o1, 24, rTMP2 | |
138 | sllx rTMP1, 63 - 32, %o1 | |
139 | ||
140 | movrlz %o1, 32, rTMP2 | |
141 | sllx rTMP1, 63 - 40, %o1 | |
142 | ||
143 | movrlz %o1, 40, rTMP2 | |
144 | sllx rTMP1, 63 - 48, %o1 | |
145 | ||
146 | movrlz %o1, 48, rTMP2 | |
147 | sllx rTMP1, 63 - 56, %o1 | |
148 | ||
149 | movrlz %o1, 56, rTMP2 | |
150 | ||
151 | srlx rTMP1, rTMP2, rTMP1 | |
152 | ||
153 | sllx rTMP1, rTMP2, rTMP1 | |
154 | ||
ad69cc26 DM |
155 | cmp rTMP1, rSTRXOR |
156 | retl | |
157 | movgu %xcc, 0, %o0 | |
158 | ||
159 | .Lmaybe_barrel_shift: | |
160 | sub rSTR2, rSTR1, rSTR2 | |
161 | sllx r8080, 32, rTMP1 | |
162 | ||
163 | or r8080, rTMP1, r8080 | |
164 | and rSTR1, 0x7, rTMP2 | |
165 | ||
166 | srlx r8080, 7, r0101 | |
167 | andn rSTR1, 0x7, rSTR1 | |
168 | ||
169 | ldxa [rSTR1] ASI_PNF, rWORD1 | |
170 | andcc rSTR2, 0x7, rSLL | |
171 | sll rTMP2, 3, rSTRXOR | |
172 | ||
173 | bne,pn %icc, .Lneed_barrel_shift | |
174 | mov -1, rTMP1 | |
175 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL | |
176 | ||
177 | srlx rTMP1, rSTRXOR, rTMP2 | |
178 | ||
179 | orn rWORD1, rTMP2, rWORD1 | |
180 | ba,pt %xcc, .Laligned_loop_entry | |
181 | orn rBARREL, rTMP2, rWORD2 | |
182 | ||
183 | .Lneed_barrel_shift: | |
184 | sllx rSLL, 3, rSLL | |
185 | andn rSTR2, 0x7, rSTR2 | |
186 | ||
187 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL | |
188 | mov 64, rTMP2 | |
189 | sub rTMP2, rSLL, rSRL | |
190 | ||
191 | srlx rTMP1, rSTRXOR, rTMP1 | |
192 | add rSTR2, 8, rSTR2 | |
193 | ||
194 | orn rWORD1, rTMP1, rWORD1 | |
195 | sllx rBARREL, rSLL, rWORD2 | |
196 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL | |
197 | ||
198 | add rSTR1, 8, rSTR1 | |
199 | sub rWORD1, r0101, rTMP2 | |
200 | ||
201 | srlx rBARREL, rSRL, rSTRXOR | |
202 | ||
203 | or rWORD2, rSTRXOR, rWORD2 | |
204 | ||
205 | orn rWORD2, rTMP1, rWORD2 | |
206 | ba,pt %xcc, .Lbarrel_shift_loop_entry | |
207 | andn r8080, rWORD1, rTMP1 | |
208 | ||
209 | .Lbarrel_shift_loop: | |
210 | sllx rBARREL, rSLL, rWORD2 | |
211 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL | |
212 | ||
213 | add rSTR1, 8, rSTR1 | |
214 | sub rWORD1, r0101, rTMP2 | |
215 | ||
216 | srlx rBARREL, rSRL, rSTRXOR | |
217 | andn r8080, rWORD1, rTMP1 | |
218 | ||
219 | or rWORD2, rSTRXOR, rWORD2 | |
220 | ||
221 | .Lbarrel_shift_loop_entry: | |
222 | xorcc rWORD1, rWORD2, rSTRXOR | |
223 | bne,pn %xcc, .Lcommon_endstring | |
224 | ||
225 | andcc rTMP1, rTMP2, %g0 | |
226 | be,a,pt %xcc, .Lbarrel_shift_loop | |
227 | ldxa [rSTR1] ASI_PNF, rWORD1 | |
228 | ||
229 | retl | |
230 | mov 0, %o0 | |
ae6b8730 | 231 | END(strcmp) |
85dd1003 | 232 | libc_hidden_builtin_def (strcmp) |