]> git.ipfire.org Git - thirdparty/glibc.git/blame - ports/sysdeps/aarch64/memcmp.S
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / ports / sysdeps / aarch64 / memcmp.S
CommitLineData
14d941e4
MS
1/* memcmp - compare memory
2
d4697bc9 3 Copyright (C) 2013-2014 Free Software Foundation, Inc.
14d941e4
MS
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <http://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 */
27
28/* Parameters and result. */
29#define src1 x0
30#define src2 x1
31#define limit x2
32#define result x0
33
34/* Internal variables. */
35#define data1 x3
36#define data1w w3
37#define data2 x4
38#define data2w w4
39#define has_nul x5
40#define diff x6
41#define endloop x7
42#define tmp1 x8
43#define tmp2 x9
44#define tmp3 x10
45#define pos x11
46#define limit_wd x12
47#define mask x13
48
49ENTRY_ALIGN (memcmp, 6)
50 cbz limit, L(ret0)
51 eor tmp1, src1, src2
52 tst tmp1, #7
53 b.ne L(misaligned8)
54 ands tmp1, src1, #7
55 b.ne L(mutual_align)
56 add limit_wd, limit, #7
57 lsr limit_wd, limit_wd, #3
58 /* Start of performance-critical section -- one 64B cache line. */
59L(loop_aligned):
60 ldr data1, [src1], #8
61 ldr data2, [src2], #8
62L(start_realigned):
63 subs limit_wd, limit_wd, #1
64 eor diff, data1, data2 /* Non-zero if differences found. */
65 csinv endloop, diff, xzr, ne /* Last Dword or differences. */
66 cbz endloop, L(loop_aligned)
67 /* End of performance-critical section -- one 64B cache line. */
68
69 /* Not reached the limit, must have found a diff. */
70 cbnz limit_wd, L(not_limit)
71
72 /* Limit % 8 == 0 => all bytes significant. */
73 ands limit, limit, #7
74 b.eq L(not_limit)
75
76 lsl limit, limit, #3 /* Bits -> bytes. */
77 mov mask, #~0
78#ifdef __AARCH64EB__
79 lsr mask, mask, limit
80#else
81 lsl mask, mask, limit
82#endif
83 bic data1, data1, mask
84 bic data2, data2, mask
85
86 orr diff, diff, mask
87L(not_limit):
88
89#ifndef __AARCH64EB__
90 rev diff, diff
91 rev data1, data1
92 rev data2, data2
93#endif
94 /* The MS-non-zero bit of DIFF marks either the first bit
95 that is different, or the end of the significant data.
96 Shifting left now will bring the critical information into the
97 top bits. */
98 clz pos, diff
99 lsl data1, data1, pos
100 lsl data2, data2, pos
101 /* But we need to zero-extend (char is unsigned) the value and then
102 perform a signed 32-bit subtraction. */
103 lsr data1, data1, #56
104 sub result, data1, data2, lsr #56
105 RET
106
107L(mutual_align):
108 /* Sources are mutually aligned, but are not currently at an
109 alignment boundary. Round down the addresses and then mask off
110 the bytes that precede the start point. */
111 bic src1, src1, #7
112 bic src2, src2, #7
113 add limit, limit, tmp1 /* Adjust the limit for the extra. */
114 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
115 ldr data1, [src1], #8
116 neg tmp1, tmp1 /* Bits to alignment -64. */
117 ldr data2, [src2], #8
118 mov tmp2, #~0
119#ifdef __AARCH64EB__
120 /* Big-endian. Early bytes are at MSB. */
121 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
122#else
123 /* Little-endian. Early bytes are at LSB. */
124 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
125#endif
126 add limit_wd, limit, #7
127 orr data1, data1, tmp2
128 orr data2, data2, tmp2
129 lsr limit_wd, limit_wd, #3
130 b L(start_realigned)
131
132L(ret0):
133 mov result, #0
134 RET
135
136 .p2align 6
137L(misaligned8):
138 sub limit, limit, #1
1391:
140 /* Perhaps we can do better than this. */
141 ldrb data1w, [src1], #1
142 ldrb data2w, [src2], #1
143 subs limit, limit, #1
144 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
145 b.eq 1b
146 sub result, data1, data2
147 RET
148END (memcmp)
149#undef bcmp
150weak_alias (memcmp, bcmp)
151libc_hidden_builtin_def (memcmp)