]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/sparc/sparc64/strcmp.S
a36992e4377b2693f2ce71776012d280948b10e4
[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / strcmp.S
1 /* Compare two strings for differences.
2 For SPARC v9.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by David S. Miller <davem@davemloft.net>
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21 #include <sysdep.h>
22 #include <asm/asi.h>
23
24 #ifndef XCC
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
28 #endif
29
30 #define rSTR1 %o0
31 #define rSTR2 %o1
32 #define r0101 %o2 /* 0x0101010101010101 */
33 #define r8080 %o3 /* 0x8080808080808080 */
34 #define rSTRXOR %o4
35 #define rWORD1 %o5
36 #define rTMP1 %g1
37 #define rTMP2 %g2
38 #define rWORD2 %g3
39 #define rSLL %g4
40 #define rSRL %g5
41 #define rBARREL %g6
42
43 /* There are two cases, either the two pointers are aligned
44 * identically or they are not. If they have the same
45 * alignment we can use the normal full speed loop. Otherwise
46 * we have to use the barrel-shifter version.
47 */
48
49 .text
50 .align 32
51 ENTRY(strcmp)
52 or rSTR2, rSTR1, rTMP1
53 sethi %hi(0x80808080), r8080
54
55 andcc rTMP1, 0x7, %g0
56 bne,pn %icc, .Lmaybe_barrel_shift
57 or r8080, %lo(0x80808080), r8080
58 ldx [rSTR1], rWORD1
59
60 sub rSTR2, rSTR1, rSTR2
61 sllx r8080, 32, rTMP1
62
63 ldx [rSTR1 + rSTR2], rWORD2
64 or r8080, rTMP1, r8080
65
66 ba,pt %xcc, .Laligned_loop_entry
67 srlx r8080, 7, r0101
68
69 .align 32
70 .Laligned_loop_entry:
71 .Laligned_loop:
72 add rSTR1, 8, rSTR1
73
74 sub rWORD1, r0101, rTMP2
75 xorcc rWORD1, rWORD2, rSTRXOR
76 bne,pn %xcc, .Lcommon_endstring
77
78 andn r8080, rWORD1, rTMP1
79
80 ldxa [rSTR1] ASI_PNF, rWORD1
81 andcc rTMP1, rTMP2, %g0
82 be,a,pt %xcc, .Laligned_loop
83
84 ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2
85
86 .Lcommon_equal:
87 retl
88 mov 0, %o0
89
90 /* All loops terminate here once they find an unequal word.
91 * If a zero byte appears in the word before the first unequal
92 * byte, we must report zero. Otherwise we report '1' or '-1'
93 * depending upon whether the first mis-matching byte is larger
94 * in the first string or the second, respectively.
95 *
96 * First we compute a 64-bit mask value that has "0x01" in
97 * each byte where a zero exists in rWORD1. rSTRXOR holds the
98 * value (rWORD1 ^ rWORD2). Therefore, if considered as an
99 * unsigned quantity, our "0x01" mask value is "greater than"
100 * rSTRXOR then a zero terminating byte comes first and
101 * therefore we report '0'.
102 *
103 * The formula for this mask is:
104 *
105 * mask_tmp1 = ~rWORD1 & 0x8080808080808080;
106 * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) +
107 * 0x7f7f7f7f7f7f7f7f);
108 *
109 * mask = ((mask_tmp1 & ~mask_tmp2) >> 7);
110 */
111 .Lcommon_endstring:
112 andn rWORD1, r8080, rTMP2
113 or r8080, 1, %o1
114
115 mov 1, %o0
116 sub rTMP2, %o1, rTMP2
117
118 cmp rWORD1, rWORD2
119 andn rTMP1, rTMP2, rTMP1
120
121 movleu %xcc, -1, %o0
122 srlx rTMP1, 7, rTMP1
123
124 cmp rTMP1, rSTRXOR
125 retl
126 movgu %xcc, 0, %o0
127
128 .Lmaybe_barrel_shift:
129 sub rSTR2, rSTR1, rSTR2
130 sllx r8080, 32, rTMP1
131
132 or r8080, rTMP1, r8080
133 and rSTR1, 0x7, rTMP2
134
135 srlx r8080, 7, r0101
136 andn rSTR1, 0x7, rSTR1
137
138 ldxa [rSTR1] ASI_PNF, rWORD1
139 andcc rSTR2, 0x7, rSLL
140 sll rTMP2, 3, rSTRXOR
141
142 bne,pn %icc, .Lneed_barrel_shift
143 mov -1, rTMP1
144 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
145
146 srlx rTMP1, rSTRXOR, rTMP2
147
148 orn rWORD1, rTMP2, rWORD1
149 ba,pt %xcc, .Laligned_loop_entry
150 orn rBARREL, rTMP2, rWORD2
151
152 .Lneed_barrel_shift:
153 sllx rSLL, 3, rSLL
154 andn rSTR2, 0x7, rSTR2
155
156 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
157 mov 64, rTMP2
158 sub rTMP2, rSLL, rSRL
159
160 srlx rTMP1, rSTRXOR, rTMP1
161 add rSTR2, 8, rSTR2
162
163 orn rWORD1, rTMP1, rWORD1
164 sllx rBARREL, rSLL, rWORD2
165 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
166
167 add rSTR1, 8, rSTR1
168 sub rWORD1, r0101, rTMP2
169
170 srlx rBARREL, rSRL, rSTRXOR
171
172 or rWORD2, rSTRXOR, rWORD2
173
174 orn rWORD2, rTMP1, rWORD2
175 ba,pt %xcc, .Lbarrel_shift_loop_entry
176 andn r8080, rWORD1, rTMP1
177
178 .Lbarrel_shift_loop:
179 sllx rBARREL, rSLL, rWORD2
180 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
181
182 add rSTR1, 8, rSTR1
183 sub rWORD1, r0101, rTMP2
184
185 srlx rBARREL, rSRL, rSTRXOR
186 andn r8080, rWORD1, rTMP1
187
188 or rWORD2, rSTRXOR, rWORD2
189
190 .Lbarrel_shift_loop_entry:
191 xorcc rWORD1, rWORD2, rSTRXOR
192 bne,pn %xcc, .Lcommon_endstring
193
194 andcc rTMP1, rTMP2, %g0
195 be,a,pt %xcc, .Lbarrel_shift_loop
196 ldxa [rSTR1] ASI_PNF, rWORD1
197
198 retl
199 mov 0, %o0
200 END(strcmp)
201 libc_hidden_builtin_def (strcmp)