1 /* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* UNUSED. Exists purely as reference implementation. */
21 #include <isa-level.h>
23 #if ISA_SHOULD_BUILD (4)
29 # define VPTESTN vptestnmd
30 # define VPMINU vpminud
34 # define VPTESTN vptestnmb
35 # define VPMINU vpminub
40 # define PAGE_SIZE 4096
41 # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
45 # define KORTEST kortestq
50 # define TEXTSUFFIX evex512
56 # define VMOVA vmovdqa64
58 /* Currently Unused. */
60 # define KORTEST kortestd
65 # define TEXTSUFFIX evex256
71 # define VMOVA vmovdqa32
74 .section .text.TEXTSUFFIX, "ax", @progbits
75 /* Aligning entry point to 64 byte, provides better performance for
76 one vector length string. */
77 ENTRY_P2ALIGN (STRLEN, 6)
78 # ifdef USE_AS_STRNLEN
79 /* Check zero length. */
83 /* Clear the upper 32 bits. */
89 vpxorq %XMM0, %XMM0, %XMM0
90 andl $(PAGE_SIZE - 1), %eax
91 cmpl $(PAGE_SIZE - VEC_SIZE), %eax
94 /* Compare [w]char for null, mask bit will be set for match. */
95 VPCMP $0, (%rdi), %VMM0, %k0
101 # ifdef USE_AS_STRNLEN
107 /* At this point vector max length reached. */
108 # ifdef USE_AS_STRNLEN
116 leaq VEC_SIZE(%rdi), %rax
117 /* Align rax to VEC_SIZE. */
118 andq $-VEC_SIZE, %rax
119 # ifdef USE_AS_STRNLEN
122 # ifdef USE_AS_WCSLEN
125 /* At this point rdx contains [w]chars already compared. */
129 /* At this point rdx contains number of w[char] needs to go.
130 Now onwards rdx will keep decrementing with each compare. */
133 /* Loop unroll 4 times for 4 vector loop. */
134 VPCMP $0, (%rax), %VMM0, %k0
139 # ifdef USE_AS_STRNLEN
140 subq $CHAR_PER_VEC, %rdx
144 VPCMP $0, VEC_SIZE(%rax), %VMM0, %k0
149 # ifdef USE_AS_STRNLEN
150 subq $CHAR_PER_VEC, %rdx
154 VPCMP $0, (VEC_SIZE * 2)(%rax), %VMM0, %k0
159 # ifdef USE_AS_STRNLEN
160 subq $CHAR_PER_VEC, %rdx
164 VPCMP $0, (VEC_SIZE * 3)(%rax), %VMM0, %k0
169 # ifdef USE_AS_STRNLEN
170 subq $CHAR_PER_VEC, %rdx
172 /* Save pointer before 4 x VEC_SIZE alignment. */
176 /* Align address to VEC_SIZE * 4 for loop. */
177 andq $-(VEC_SIZE * 4), %rax
179 # ifdef USE_AS_STRNLEN
181 # ifdef USE_AS_WCSLEN
184 /* rcx contains number of [w]char will be recompared due to
185 alignment fixes. rdx must be incremented by rcx to offset
186 alignment adjustment. */
188 /* Need jump as we don't want to add/subtract rdx for first
189 iteration of 4 x VEC_SIZE aligned loop. */
195 # ifdef USE_AS_STRNLEN
196 subq $(CHAR_PER_VEC * 4), %rdx
200 /* VPMINU and VPCMP combination provide better performance as
201 compared to alternative combinations. */
202 VMOVA (VEC_SIZE * 4)(%rax), %VMM1
203 VPMINU (VEC_SIZE * 5)(%rax), %VMM1, %VMM2
204 VMOVA (VEC_SIZE * 6)(%rax), %VMM3
205 VPMINU (VEC_SIZE * 7)(%rax), %VMM3, %VMM4
207 VPTESTN %VMM2, %VMM2, %k0
208 VPTESTN %VMM4, %VMM4, %k1
210 subq $-(VEC_SIZE * 4), %rax
214 VPTESTN %VMM1, %VMM1, %k2
220 /* At this point, if k0 is non zero, null char must be in the
225 VPTESTN %VMM3, %VMM3, %k3
229 /* At this point null [w]char must be in the fourth vector so no
233 /* Fourth, third, second vector terminating are pretty much
234 same, implemented this way to avoid branching and reuse code
235 from pre loop exit condition. */
239 # ifdef USE_AS_WCSLEN
240 subq $-(VEC_SIZE * 3), %rax
244 leaq (VEC_SIZE * 3)(%rcx, %rax), %rax
246 # ifdef USE_AS_STRNLEN
255 # ifdef USE_AS_WCSLEN
256 subq $-(VEC_SIZE * 2), %rax
260 leaq (VEC_SIZE * 2)(%rcx, %rax), %rax
262 # ifdef USE_AS_STRNLEN
269 subq $-VEC_SIZE, %rax
273 # ifdef USE_AS_WCSLEN
277 # ifdef USE_AS_STRNLEN
285 # ifdef USE_AS_WCSLEN
286 andl $(VEC_SIZE - 1), %ecx
289 /* ecx contains number of w[char] to be skipped as a result
290 of address alignment. */
292 VPCMP $0, (PAGE_SIZE - VEC_SIZE)(%rax), %VMM0, %k0
294 /* Ignore number of character for alignment adjustment. */
299 # ifdef USE_AS_STRNLEN