]>
Commit | Line | Data |
---|---|---|
c8a89e7d | 1 | /* Copyright (C) 1991,1993,1995,1997,1998,2003,2004,2012 |
758b2153 | 2 | Free Software Foundation, Inc. |
478b92f0 | 3 | This file is part of the GNU C Library. |
28f540f4 RM |
4 | Contributed by Torbjorn Granlund (tege@sics.se). |
5 | ||
478b92f0 | 6 | The GNU C Library is free software; you can redistribute it and/or |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
28f540f4 | 10 | |
478b92f0 UD |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
28f540f4 | 15 | |
41bdb6e2 AJ |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
28f540f4 RM |
20 | |
21 | #ifdef HAVE_CONFIG_H | |
af6f3906 | 22 | # include "config.h" |
28f540f4 RM |
23 | #endif |
24 | ||
25 | #undef __ptr_t | |
c8a89e7d | 26 | #define __ptr_t void * |
28f540f4 | 27 | |
af6f3906 UD |
28 | #if defined HAVE_STRING_H || defined _LIBC |
29 | # include <string.h> | |
28f540f4 RM |
30 | #endif |
31 | ||
9a0a462c UD |
32 | #undef memcmp |
33 | ||
28f540f4 RM |
34 | #ifdef _LIBC |
35 | ||
af6f3906 | 36 | # include <memcopy.h> |
789b13c4 UD |
37 | # include <endian.h> |
38 | ||
39 | # if __BYTE_ORDER == __BIG_ENDIAN | |
40 | # define WORDS_BIGENDIAN | |
41 | # endif | |
28f540f4 RM |
42 | |
43 | #else /* Not in the GNU C library. */ | |
44 | ||
af6f3906 | 45 | # include <sys/types.h> |
28f540f4 RM |
46 | |
47 | /* Type to use for aligned memory operations. | |
48 | This should normally be the biggest type supported by a single load | |
49 | and store. Must be an unsigned type. */ | |
af6f3906 UD |
50 | # define op_t unsigned long int |
51 | # define OPSIZ (sizeof(op_t)) | |
28f540f4 RM |
52 | |
53 | /* Threshold value for when to enter the unrolled loops. */ | |
af6f3906 | 54 | # define OP_T_THRES 16 |
28f540f4 RM |
55 | |
56 | /* Type to use for unaligned operations. */ | |
57 | typedef unsigned char byte; | |
58 | ||
af6f3906 UD |
59 | # ifndef WORDS_BIGENDIAN |
60 | # define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) | |
61 | # else | |
62 | # define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) | |
63 | # endif | |
28f540f4 RM |
64 | |
65 | #endif /* In the GNU C library. */ | |
66 | ||
67 | #ifdef WORDS_BIGENDIAN | |
af6f3906 | 68 | # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) |
28f540f4 | 69 | #else |
af6f3906 | 70 | # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) |
28f540f4 RM |
71 | #endif |
72 | ||
73 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ | |
74 | ||
75 | /* The strategy of this memcmp is: | |
76 | ||
77 | 1. Compare bytes until one of the block pointers is aligned. | |
78 | ||
79 | 2. Compare using memcmp_common_alignment or | |
80 | memcmp_not_common_alignment, regarding the alignment of the other | |
81 | block after the initial byte operations. The maximum number of | |
82 | full words (of type op_t) are compared in this way. | |
83 | ||
84 | 3. Compare the few remaining bytes. */ | |
85 | ||
86 | #ifndef WORDS_BIGENDIAN | |
87 | /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. | |
88 | A and B are known to be different. | |
89 | This is needed only on little-endian machines. */ | |
a1470b6f | 90 | |
79937577 | 91 | static int memcmp_bytes (op_t, op_t) __THROW; |
a1470b6f | 92 | |
af6f3906 | 93 | # ifdef __GNUC__ |
28f540f4 | 94 | __inline |
af6f3906 | 95 | # endif |
28f540f4 RM |
96 | static int |
97 | memcmp_bytes (a, b) | |
98 | op_t a, b; | |
99 | { | |
100 | long int srcp1 = (long int) &a; | |
101 | long int srcp2 = (long int) &b; | |
102 | op_t a0, b0; | |
103 | ||
104 | do | |
105 | { | |
106 | a0 = ((byte *) srcp1)[0]; | |
107 | b0 = ((byte *) srcp2)[0]; | |
108 | srcp1 += 1; | |
109 | srcp2 += 1; | |
110 | } | |
111 | while (a0 == b0); | |
112 | return a0 - b0; | |
113 | } | |
114 | #endif | |
115 | ||
79937577 | 116 | static int memcmp_common_alignment (long, long, size_t) __THROW; |
a1470b6f | 117 | |
28f540f4 RM |
118 | /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' |
119 | objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for | |
120 | memory operations on `op_t's. */ | |
28f540f4 RM |
121 | static int |
122 | memcmp_common_alignment (srcp1, srcp2, len) | |
123 | long int srcp1; | |
124 | long int srcp2; | |
125 | size_t len; | |
126 | { | |
127 | op_t a0, a1; | |
128 | op_t b0, b1; | |
129 | ||
130 | switch (len % 4) | |
131 | { | |
dfd2257a | 132 | default: /* Avoid warning about uninitialized local variables. */ |
28f540f4 RM |
133 | case 2: |
134 | a0 = ((op_t *) srcp1)[0]; | |
135 | b0 = ((op_t *) srcp2)[0]; | |
136 | srcp1 -= 2 * OPSIZ; | |
137 | srcp2 -= 2 * OPSIZ; | |
138 | len += 2; | |
139 | goto do1; | |
140 | case 3: | |
141 | a1 = ((op_t *) srcp1)[0]; | |
142 | b1 = ((op_t *) srcp2)[0]; | |
143 | srcp1 -= OPSIZ; | |
144 | srcp2 -= OPSIZ; | |
145 | len += 1; | |
146 | goto do2; | |
147 | case 0: | |
148 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
149 | return 0; | |
150 | a0 = ((op_t *) srcp1)[0]; | |
151 | b0 = ((op_t *) srcp2)[0]; | |
152 | goto do3; | |
153 | case 1: | |
154 | a1 = ((op_t *) srcp1)[0]; | |
155 | b1 = ((op_t *) srcp2)[0]; | |
156 | srcp1 += OPSIZ; | |
157 | srcp2 += OPSIZ; | |
158 | len -= 1; | |
159 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
160 | goto do0; | |
161 | /* Fall through. */ | |
162 | } | |
163 | ||
164 | do | |
165 | { | |
166 | a0 = ((op_t *) srcp1)[0]; | |
167 | b0 = ((op_t *) srcp2)[0]; | |
168 | if (a1 != b1) | |
169 | return CMP_LT_OR_GT (a1, b1); | |
170 | ||
171 | do3: | |
172 | a1 = ((op_t *) srcp1)[1]; | |
173 | b1 = ((op_t *) srcp2)[1]; | |
174 | if (a0 != b0) | |
175 | return CMP_LT_OR_GT (a0, b0); | |
176 | ||
177 | do2: | |
178 | a0 = ((op_t *) srcp1)[2]; | |
179 | b0 = ((op_t *) srcp2)[2]; | |
180 | if (a1 != b1) | |
181 | return CMP_LT_OR_GT (a1, b1); | |
182 | ||
183 | do1: | |
184 | a1 = ((op_t *) srcp1)[3]; | |
185 | b1 = ((op_t *) srcp2)[3]; | |
186 | if (a0 != b0) | |
187 | return CMP_LT_OR_GT (a0, b0); | |
188 | ||
189 | srcp1 += 4 * OPSIZ; | |
190 | srcp2 += 4 * OPSIZ; | |
191 | len -= 4; | |
192 | } | |
193 | while (len != 0); | |
194 | ||
195 | /* This is the right position for do0. Please don't move | |
196 | it into the loop. */ | |
197 | do0: | |
198 | if (a1 != b1) | |
199 | return CMP_LT_OR_GT (a1, b1); | |
200 | return 0; | |
201 | } | |
202 | ||
79937577 | 203 | static int memcmp_not_common_alignment (long, long, size_t) __THROW; |
a1470b6f | 204 | |
28f540f4 RM |
205 | /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN |
206 | `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory | |
207 | operations on `op_t', but SRCP1 *should be unaligned*. */ | |
28f540f4 RM |
208 | static int |
209 | memcmp_not_common_alignment (srcp1, srcp2, len) | |
210 | long int srcp1; | |
211 | long int srcp2; | |
212 | size_t len; | |
213 | { | |
214 | op_t a0, a1, a2, a3; | |
215 | op_t b0, b1, b2, b3; | |
216 | op_t x; | |
217 | int shl, shr; | |
218 | ||
219 | /* Calculate how to shift a word read at the memory operation | |
220 | aligned srcp1 to make it aligned for comparison. */ | |
221 | ||
222 | shl = 8 * (srcp1 % OPSIZ); | |
223 | shr = 8 * OPSIZ - shl; | |
224 | ||
225 | /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' | |
226 | it points in the middle of. */ | |
227 | srcp1 &= -OPSIZ; | |
228 | ||
229 | switch (len % 4) | |
230 | { | |
dfd2257a | 231 | default: /* Avoid warning about uninitialized local variables. */ |
28f540f4 RM |
232 | case 2: |
233 | a1 = ((op_t *) srcp1)[0]; | |
234 | a2 = ((op_t *) srcp1)[1]; | |
235 | b2 = ((op_t *) srcp2)[0]; | |
236 | srcp1 -= 1 * OPSIZ; | |
237 | srcp2 -= 2 * OPSIZ; | |
238 | len += 2; | |
239 | goto do1; | |
240 | case 3: | |
241 | a0 = ((op_t *) srcp1)[0]; | |
242 | a1 = ((op_t *) srcp1)[1]; | |
243 | b1 = ((op_t *) srcp2)[0]; | |
244 | srcp2 -= 1 * OPSIZ; | |
245 | len += 1; | |
246 | goto do2; | |
247 | case 0: | |
248 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
249 | return 0; | |
250 | a3 = ((op_t *) srcp1)[0]; | |
251 | a0 = ((op_t *) srcp1)[1]; | |
252 | b0 = ((op_t *) srcp2)[0]; | |
253 | srcp1 += 1 * OPSIZ; | |
254 | goto do3; | |
255 | case 1: | |
256 | a2 = ((op_t *) srcp1)[0]; | |
257 | a3 = ((op_t *) srcp1)[1]; | |
258 | b3 = ((op_t *) srcp2)[0]; | |
259 | srcp1 += 2 * OPSIZ; | |
260 | srcp2 += 1 * OPSIZ; | |
261 | len -= 1; | |
262 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
263 | goto do0; | |
264 | /* Fall through. */ | |
265 | } | |
266 | ||
267 | do | |
268 | { | |
269 | a0 = ((op_t *) srcp1)[0]; | |
270 | b0 = ((op_t *) srcp2)[0]; | |
271 | x = MERGE(a2, shl, a3, shr); | |
272 | if (x != b3) | |
273 | return CMP_LT_OR_GT (x, b3); | |
274 | ||
275 | do3: | |
276 | a1 = ((op_t *) srcp1)[1]; | |
277 | b1 = ((op_t *) srcp2)[1]; | |
278 | x = MERGE(a3, shl, a0, shr); | |
279 | if (x != b0) | |
280 | return CMP_LT_OR_GT (x, b0); | |
281 | ||
282 | do2: | |
283 | a2 = ((op_t *) srcp1)[2]; | |
284 | b2 = ((op_t *) srcp2)[2]; | |
285 | x = MERGE(a0, shl, a1, shr); | |
286 | if (x != b1) | |
287 | return CMP_LT_OR_GT (x, b1); | |
288 | ||
289 | do1: | |
290 | a3 = ((op_t *) srcp1)[3]; | |
291 | b3 = ((op_t *) srcp2)[3]; | |
292 | x = MERGE(a1, shl, a2, shr); | |
293 | if (x != b2) | |
294 | return CMP_LT_OR_GT (x, b2); | |
295 | ||
296 | srcp1 += 4 * OPSIZ; | |
297 | srcp2 += 4 * OPSIZ; | |
298 | len -= 4; | |
299 | } | |
300 | while (len != 0); | |
301 | ||
302 | /* This is the right position for do0. Please don't move | |
303 | it into the loop. */ | |
304 | do0: | |
305 | x = MERGE(a2, shl, a3, shr); | |
306 | if (x != b3) | |
307 | return CMP_LT_OR_GT (x, b3); | |
308 | return 0; | |
309 | } | |
310 | ||
311 | int | |
312 | memcmp (s1, s2, len) | |
313 | const __ptr_t s1; | |
314 | const __ptr_t s2; | |
315 | size_t len; | |
316 | { | |
317 | op_t a0; | |
318 | op_t b0; | |
319 | long int srcp1 = (long int) s1; | |
320 | long int srcp2 = (long int) s2; | |
321 | op_t res; | |
322 | ||
323 | if (len >= OP_T_THRES) | |
324 | { | |
325 | /* There are at least some bytes to compare. No need to test | |
326 | for LEN == 0 in this alignment loop. */ | |
327 | while (srcp2 % OPSIZ != 0) | |
328 | { | |
329 | a0 = ((byte *) srcp1)[0]; | |
330 | b0 = ((byte *) srcp2)[0]; | |
331 | srcp1 += 1; | |
332 | srcp2 += 1; | |
333 | res = a0 - b0; | |
334 | if (res != 0) | |
335 | return res; | |
336 | len -= 1; | |
337 | } | |
338 | ||
339 | /* SRCP2 is now aligned for memory operations on `op_t'. | |
340 | SRCP1 alignment determines if we can do a simple, | |
341 | aligned compare or need to shuffle bits. */ | |
342 | ||
343 | if (srcp1 % OPSIZ == 0) | |
344 | res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); | |
345 | else | |
346 | res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); | |
347 | if (res != 0) | |
348 | return res; | |
349 | ||
350 | /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ | |
351 | srcp1 += len & -OPSIZ; | |
352 | srcp2 += len & -OPSIZ; | |
353 | len %= OPSIZ; | |
354 | } | |
355 | ||
356 | /* There are just a few bytes to compare. Use byte memory operations. */ | |
357 | while (len != 0) | |
358 | { | |
359 | a0 = ((byte *) srcp1)[0]; | |
360 | b0 = ((byte *) srcp2)[0]; | |
361 | srcp1 += 1; | |
362 | srcp2 += 1; | |
363 | res = a0 - b0; | |
364 | if (res != 0) | |
365 | return res; | |
366 | len -= 1; | |
367 | } | |
368 | ||
369 | return 0; | |
370 | } | |
758b2153 | 371 | libc_hidden_builtin_def(memcmp) |
28f540f4 | 372 | #ifdef weak_alias |
af6f3906 | 373 | # undef bcmp |
28f540f4 RM |
374 | weak_alias (memcmp, bcmp) |
375 | #endif |