1 /* strchr optimized with SSE2.
2 Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <isa-level.h>
21 /* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
22 so we need this to build for ISA V2 builds. */
23 #if ISA_SHOULD_BUILD (2)
26 # define STRCHR __strchr_sse2
36 punpcklbw %xmm1, %xmm1
38 punpcklwd %xmm1, %xmm1
39 pshufd $0, %xmm1, %xmm1
52 leaq (%rdi,%rax), %rax
55 leaq (%rdi,%rax), %rax
63 movdqu 16(%rdi), %xmm0
69 movdqu 32(%rdi), %xmm0
76 movdqu 48(%rdi), %xmm0
88 /* We use this alignment to force loop be aligned to 8 but not
89 16 bytes. This gives better sheduling on AMD processors. */
97 movdqa 16(%rdi), %xmm2
98 movdqa 32(%rdi), %xmm3
100 movdqa 48(%rdi), %xmm4
105 pminub 16(%rdi), %xmm2
106 pminub 32(%rdi), %xmm3
108 pminub 48(%rdi), %xmm4
140 leaq (%rdi,%rax), %rax
143 leaq (%rdi,%rax), %rax
161 movdqa 16(%rdx), %xmm3
167 movdqa 32(%rdx), %xmm3
174 movdqa 48(%rdx), %xmm3