]>
Commit | Line | Data |
---|---|---|
14d941e4 MS |
1 | /* memcmp - compare memory |
2 | ||
d4697bc9 | 3 | Copyright (C) 2013-2014 Free Software Foundation, Inc. |
14d941e4 MS |
4 | |
5 | This file is part of the GNU C Library. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU Lesser General Public | |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
11 | ||
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public | |
18 | License along with the GNU C Library. If not, see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include <sysdep.h> | |
22 | ||
23 | /* Assumptions: | |
24 | * | |
25 | * ARMv8-a, AArch64 | |
26 | */ | |
27 | ||
28 | /* Parameters and result. */ | |
29 | #define src1 x0 | |
30 | #define src2 x1 | |
31 | #define limit x2 | |
32 | #define result x0 | |
33 | ||
34 | /* Internal variables. */ | |
35 | #define data1 x3 | |
36 | #define data1w w3 | |
37 | #define data2 x4 | |
38 | #define data2w w4 | |
39 | #define has_nul x5 | |
40 | #define diff x6 | |
41 | #define endloop x7 | |
42 | #define tmp1 x8 | |
43 | #define tmp2 x9 | |
44 | #define tmp3 x10 | |
45 | #define pos x11 | |
46 | #define limit_wd x12 | |
47 | #define mask x13 | |
48 | ||
49 | ENTRY_ALIGN (memcmp, 6) | |
50 | cbz limit, L(ret0) | |
51 | eor tmp1, src1, src2 | |
52 | tst tmp1, #7 | |
53 | b.ne L(misaligned8) | |
54 | ands tmp1, src1, #7 | |
55 | b.ne L(mutual_align) | |
56 | add limit_wd, limit, #7 | |
57 | lsr limit_wd, limit_wd, #3 | |
58 | /* Start of performance-critical section -- one 64B cache line. */ | |
59 | L(loop_aligned): | |
60 | ldr data1, [src1], #8 | |
61 | ldr data2, [src2], #8 | |
62 | L(start_realigned): | |
63 | subs limit_wd, limit_wd, #1 | |
64 | eor diff, data1, data2 /* Non-zero if differences found. */ | |
65 | csinv endloop, diff, xzr, ne /* Last Dword or differences. */ | |
66 | cbz endloop, L(loop_aligned) | |
67 | /* End of performance-critical section -- one 64B cache line. */ | |
68 | ||
69 | /* Not reached the limit, must have found a diff. */ | |
70 | cbnz limit_wd, L(not_limit) | |
71 | ||
72 | /* Limit % 8 == 0 => all bytes significant. */ | |
73 | ands limit, limit, #7 | |
74 | b.eq L(not_limit) | |
75 | ||
76 | lsl limit, limit, #3 /* Bits -> bytes. */ | |
77 | mov mask, #~0 | |
78 | #ifdef __AARCH64EB__ | |
79 | lsr mask, mask, limit | |
80 | #else | |
81 | lsl mask, mask, limit | |
82 | #endif | |
83 | bic data1, data1, mask | |
84 | bic data2, data2, mask | |
85 | ||
86 | orr diff, diff, mask | |
87 | L(not_limit): | |
88 | ||
89 | #ifndef __AARCH64EB__ | |
90 | rev diff, diff | |
91 | rev data1, data1 | |
92 | rev data2, data2 | |
93 | #endif | |
94 | /* The MS-non-zero bit of DIFF marks either the first bit | |
95 | that is different, or the end of the significant data. | |
96 | Shifting left now will bring the critical information into the | |
97 | top bits. */ | |
98 | clz pos, diff | |
99 | lsl data1, data1, pos | |
100 | lsl data2, data2, pos | |
101 | /* But we need to zero-extend (char is unsigned) the value and then | |
102 | perform a signed 32-bit subtraction. */ | |
103 | lsr data1, data1, #56 | |
104 | sub result, data1, data2, lsr #56 | |
105 | RET | |
106 | ||
107 | L(mutual_align): | |
108 | /* Sources are mutually aligned, but are not currently at an | |
109 | alignment boundary. Round down the addresses and then mask off | |
110 | the bytes that precede the start point. */ | |
111 | bic src1, src1, #7 | |
112 | bic src2, src2, #7 | |
113 | add limit, limit, tmp1 /* Adjust the limit for the extra. */ | |
114 | lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ | |
115 | ldr data1, [src1], #8 | |
116 | neg tmp1, tmp1 /* Bits to alignment -64. */ | |
117 | ldr data2, [src2], #8 | |
118 | mov tmp2, #~0 | |
119 | #ifdef __AARCH64EB__ | |
120 | /* Big-endian. Early bytes are at MSB. */ | |
121 | lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ | |
122 | #else | |
123 | /* Little-endian. Early bytes are at LSB. */ | |
124 | lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ | |
125 | #endif | |
126 | add limit_wd, limit, #7 | |
127 | orr data1, data1, tmp2 | |
128 | orr data2, data2, tmp2 | |
129 | lsr limit_wd, limit_wd, #3 | |
130 | b L(start_realigned) | |
131 | ||
132 | L(ret0): | |
133 | mov result, #0 | |
134 | RET | |
135 | ||
136 | .p2align 6 | |
137 | L(misaligned8): | |
138 | sub limit, limit, #1 | |
139 | 1: | |
140 | /* Perhaps we can do better than this. */ | |
141 | ldrb data1w, [src1], #1 | |
142 | ldrb data2w, [src2], #1 | |
143 | subs limit, limit, #1 | |
144 | ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ | |
145 | b.eq 1b | |
146 | sub result, data1, data2 | |
147 | RET | |
148 | END (memcmp) | |
149 | #undef bcmp | |
150 | weak_alias (memcmp, bcmp) | |
151 | libc_hidden_builtin_def (memcmp) |