1 /* Copyright (C) 2012-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
26 #define REP8_01 0x0101010101010101
27 #define REP8_7f 0x7f7f7f7f7f7f7f7f
28 #define REP8_80 0x8080808080808080
30 /* Parameters and result. */
35 /* Internal variables. */
49 /* Start of performance-critical section -- one 64B cache line. */
50 ENTRY_ALIGN(strcmp, 6)
55 mov zeroones, #REP8_01
60 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
61 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
62 can be done in parallel across the entire word. */
67 sub tmp1, data1, zeroones
68 orr tmp2, data1, #REP8_7f
69 eor diff, data1, data2 /* Non-zero if differences found. */
70 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
71 orr syndrome, diff, has_nul
72 cbz syndrome, L(loop_aligned)
73 /* End of performance-critical section -- one 64B cache line. */
77 rev syndrome, syndrome
79 /* The MS-non-zero bit of the syndrome marks either the first bit
80 that is different, or the top bit of the first zero byte.
81 Shifting left now will bring the critical information into the
87 /* But we need to zero-extend (char is unsigned) the value and then
88 perform a signed 32-bit subtraction. */
90 sub result, data1, data2, lsr #56
93 /* For big-endian we cannot use the trick with the syndrome value
94 as carry-propagation can corrupt the upper bits if the trailing
95 bytes in the string contain 0x01. */
96 /* However, if there is no NUL byte in the dword, we can generate
97 the result directly. We can't just subtract the bytes as the
98 MSB might be significant. */
102 cneg result, result, lo
105 /* Re-compute the NUL-byte detection, using a byte-reversed value. */
107 sub tmp1, tmp3, zeroones
108 orr tmp2, tmp3, #REP8_7f
109 bic has_nul, tmp1, tmp2
111 orr syndrome, diff, has_nul
113 /* The MS-non-zero bit of the syndrome marks either the first bit
114 that is different, or the top bit of the first zero byte.
115 Shifting left now will bring the critical information into the
117 lsl data1, data1, pos
118 lsl data2, data2, pos
119 /* But we need to zero-extend (char is unsigned) the value and then
120 perform a signed 32-bit subtraction. */
121 lsr data1, data1, #56
122 sub result, data1, data2, lsr #56
127 /* Sources are mutually aligned, but are not currently at an
128 alignment boundary. Round down the addresses and then mask off
129 the bytes that preceed the start point. */
132 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
133 ldr data1, [src1], #8
134 neg tmp1, tmp1 /* Bits to alignment -64. */
135 ldr data2, [src2], #8
138 /* Big-endian. Early bytes are at MSB. */
139 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
141 /* Little-endian. Early bytes are at LSB. */
142 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
144 orr data1, data1, tmp2
145 orr data2, data2, tmp2
149 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
150 checking to make sure that we don't access beyond page boundary in
153 b.eq L(loop_misaligned)
155 ldrb data1w, [src1], #1
156 ldrb data2w, [src2], #1
158 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
161 b.ne L(do_misaligned)
164 /* Test if we are within the last dword of the end of a 4K page. If
165 yes then jump back to the misaligned loop to copy a byte at a time. */
166 and tmp1, src2, #0xff8
167 eor tmp1, tmp1, #0xff8
168 cbz tmp1, L(do_misaligned)
169 ldr data1, [src1], #8
170 ldr data2, [src2], #8
172 sub tmp1, data1, zeroones
173 orr tmp2, data1, #REP8_7f
174 eor diff, data1, data2 /* Non-zero if differences found. */
175 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
176 orr syndrome, diff, has_nul
177 cbz syndrome, L(loop_misaligned)
181 sub result, data1, data2
184 libc_hidden_builtin_def (strcmp)