]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/alpha/strncmp.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / alpha / strncmp.S
1 /* Copyright (C) 1996-2019 Free Software Foundation, Inc.
2 Contributed by Richard Henderson (rth@tamu.edu)
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19 /* Bytewise compare two null-terminated strings of length no longer than N. */
20
21 #include <sysdep.h>
22
23 .set noat
24 .set noreorder
25
26 /* EV6 only predicts one branch per octaword. We'll use these to push
27 subsequent branches back to the next bundle. This will generally add
28 a fetch+decode cycle to older machines, so skip in that case. */
29 #ifdef __alpha_fix__
30 # define ev6_unop unop
31 #else
32 # define ev6_unop
33 #endif
34
35 .text
36
37 ENTRY(strncmp)
38 #ifdef PROF
39 ldgp gp, 0(pv)
40 lda AT, _mcount
41 jsr AT, (AT), _mcount
42 .prologue 1
43 #else
44 .prologue 0
45 #endif
46
47 xor a0, a1, t2 # are s1 and s2 co-aligned?
48 beq a2, $zerolength
49 ldq_u t0, 0(a0) # load asap to give cache time to catch up
50 ldq_u t1, 0(a1)
51 lda t3, -1
52 and t2, 7, t2
53 srl t3, 1, t6
54 and a0, 7, t4 # find s1 misalignment
55 and a1, 7, t5 # find s2 misalignment
56 cmovlt a2, t6, a2 # bound neg count to LONG_MAX
57 addq a1, a2, a3 # s2+count
58 addq a2, t4, a2 # bias count by s1 misalignment
59 and a2, 7, t10 # ofs of last byte in s1 last word
60 srl a2, 3, a2 # remaining full words in s1 count
61 bne t2, $unaligned
62
63 /* On entry to this basic block:
64 t0 == the first word of s1.
65 t1 == the first word of s2.
66 t3 == -1. */
67 $aligned:
68 mskqh t3, a1, t8 # mask off leading garbage
69 ornot t1, t8, t1
70 ornot t0, t8, t0
71 cmpbge zero, t1, t7 # bits set iff null found
72 beq a2, $eoc # check end of count
73 bne t7, $eos
74 beq t10, $ant_loop
75
76 /* Aligned compare main loop.
77 On entry to this basic block:
78 t0 == an s1 word.
79 t1 == an s2 word not containing a null. */
80
81 .align 4
82 $a_loop:
83 xor t0, t1, t2 # e0 :
84 bne t2, $wordcmp # .. e1 (zdb)
85 ldq_u t1, 8(a1) # e0 :
86 ldq_u t0, 8(a0) # .. e1 :
87
88 subq a2, 1, a2 # e0 :
89 addq a1, 8, a1 # .. e1 :
90 addq a0, 8, a0 # e0 :
91 beq a2, $eoc # .. e1 :
92
93 cmpbge zero, t1, t7 # e0 :
94 beq t7, $a_loop # .. e1 :
95
96 br $eos
97
98 /* Alternate aligned compare loop, for when there's no trailing
99 bytes on the count. We have to avoid reading too much data. */
100 .align 4
101 $ant_loop:
102 xor t0, t1, t2 # e0 :
103 ev6_unop
104 ev6_unop
105 bne t2, $wordcmp # .. e1 (zdb)
106
107 subq a2, 1, a2 # e0 :
108 beq a2, $zerolength # .. e1 :
109 ldq_u t1, 8(a1) # e0 :
110 ldq_u t0, 8(a0) # .. e1 :
111
112 addq a1, 8, a1 # e0 :
113 addq a0, 8, a0 # .. e1 :
114 cmpbge zero, t1, t7 # e0 :
115 beq t7, $ant_loop # .. e1 :
116
117 br $eos
118
119 /* The two strings are not co-aligned. Align s1 and cope. */
120 /* On entry to this basic block:
121 t0 == the first word of s1.
122 t1 == the first word of s2.
123 t3 == -1.
124 t4 == misalignment of s1.
125 t5 == misalignment of s2.
126 t10 == misalignment of s1 end. */
127 .align 4
128 $unaligned:
129 /* If s1 misalignment is larger than s2 misalignment, we need
130 extra startup checks to avoid SEGV. */
131 subq a1, t4, a1 # adjust s2 for s1 misalignment
132 cmpult t4, t5, t9
133 subq a3, 1, a3 # last byte of s2
134 bic a1, 7, t8
135 mskqh t3, t5, t7 # mask garbage in s2
136 subq a3, t8, a3
137 ornot t1, t7, t7
138 srl a3, 3, a3 # remaining full words in s2 count
139 beq t9, $u_head
140
141 /* Failing that, we need to look for both eos and eoc within the
142 first word of s2. If we find either, we can continue by
143 pretending that the next word of s2 is all zeros. */
144 lda t2, 0 # next = zero
145 cmpeq a3, 0, t8 # eoc in the first word of s2?
146 cmpbge zero, t7, t7 # eos in the first word of s2?
147 or t7, t8, t8
148 bne t8, $u_head_nl
149
150 /* We know just enough now to be able to assemble the first
151 full word of s2. We can still find a zero at the end of it.
152
153 On entry to this basic block:
154 t0 == first word of s1
155 t1 == first partial word of s2.
156 t3 == -1.
157 t10 == ofs of last byte in s1 last word.
158 t11 == ofs of last byte in s2 last word. */
159 $u_head:
160 ldq_u t2, 8(a1) # load second partial s2 word
161 subq a3, 1, a3
162 $u_head_nl:
163 extql t1, a1, t1 # create first s2 word
164 mskqh t3, a0, t8
165 extqh t2, a1, t4
166 ornot t0, t8, t0 # kill s1 garbage
167 or t1, t4, t1 # s2 word now complete
168 cmpbge zero, t0, t7 # find eos in first s1 word
169 ornot t1, t8, t1 # kill s2 garbage
170 beq a2, $eoc
171 subq a2, 1, a2
172 bne t7, $eos
173 mskql t3, a1, t8 # mask out s2[1] bits we have seen
174 xor t0, t1, t4 # compare aligned words
175 or t2, t8, t8
176 bne t4, $wordcmp
177 cmpbge zero, t8, t7 # eos in high bits of s2[1]?
178 cmpeq a3, 0, t8 # eoc in s2[1]?
179 or t7, t8, t7
180 bne t7, $u_final
181
182 /* Unaligned copy main loop. In order to avoid reading too much,
183 the loop is structured to detect zeros in aligned words from s2.
184 This has, unfortunately, effectively pulled half of a loop
185 iteration out into the head and half into the tail, but it does
186 prevent nastiness from accumulating in the very thing we want
187 to run as fast as possible.
188
189 On entry to this basic block:
190 t2 == the unshifted low-bits from the next s2 word.
191 t10 == ofs of last byte in s1 last word.
192 t11 == ofs of last byte in s2 last word. */
193 .align 4
194 $u_loop:
195 extql t2, a1, t3 # e0 :
196 ldq_u t2, 16(a1) # .. e1 : load next s2 high bits
197 ldq_u t0, 8(a0) # e0 : load next s1 word
198 addq a1, 8, a1 # .. e1 :
199
200 addq a0, 8, a0 # e0 :
201 subq a3, 1, a3 # .. e1 :
202 extqh t2, a1, t1 # e0 :
203 cmpbge zero, t0, t7 # .. e1 : eos in current s1 word
204
205 or t1, t3, t1 # e0 :
206 beq a2, $eoc # .. e1 : eoc in current s1 word
207 subq a2, 1, a2 # e0 :
208 cmpbge zero, t2, t4 # .. e1 : eos in s2[1]
209
210 xor t0, t1, t3 # e0 : compare the words
211 ev6_unop
212 ev6_unop
213 bne t7, $eos # .. e1 :
214
215 cmpeq a3, 0, t5 # e0 : eoc in s2[1]
216 ev6_unop
217 ev6_unop
218 bne t3, $wordcmp # .. e1 :
219
220 or t4, t5, t4 # e0 : eos or eoc in s2[1].
221 beq t4, $u_loop # .. e1 (zdb)
222
223 /* We've found a zero in the low bits of the last s2 word. Get
224 the next s1 word and align them. */
225 .align 3
226 $u_final:
227 ldq_u t0, 8(a0)
228 extql t2, a1, t1
229 cmpbge zero, t1, t7
230 bne a2, $eos
231
232 /* We've hit end of count. Zero everything after the count
233 and compare whats left. */
234 .align 3
235 $eoc:
236 mskql t0, t10, t0
237 mskql t1, t10, t1
238 cmpbge zero, t1, t7
239
240 /* We've found a zero somewhere in a word we just read.
241 On entry to this basic block:
242 t0 == s1 word
243 t1 == s2 word
244 t7 == cmpbge mask containing the zero. */
245 .align 3
246 $eos:
247 negq t7, t6 # create bytemask of valid data
248 and t6, t7, t8
249 subq t8, 1, t6
250 or t6, t8, t7
251 zapnot t0, t7, t0 # kill the garbage
252 zapnot t1, t7, t1
253 xor t0, t1, v0 # ... and compare
254 beq v0, $done
255
256 /* Here we have two differing co-aligned words in t0 & t1.
257 Bytewise compare them and return (t0 > t1 ? 1 : -1). */
258 .align 3
259 $wordcmp:
260 cmpbge t0, t1, t2 # comparison yields bit mask of ge
261 cmpbge t1, t0, t3
262 xor t2, t3, t0 # bits set iff t0/t1 bytes differ
263 negq t0, t1 # clear all but least bit
264 and t0, t1, t0
265 lda v0, -1
266 and t0, t2, t1 # was bit set in t0 > t1?
267 cmovne t1, 1, v0
268 $done:
269 ret
270
271 .align 3
272 $zerolength:
273 clr v0
274 ret
275
276 END(strncmp)
277 libc_hidden_builtin_def (strncmp)