]>
git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/strspn-sse4.c
1 /* strspn with SSE4.2 intrinsics
2 Copyright (C) 2009-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <nmmintrin.h>
26 | _SIDD_NEGATIVE_POLARITY
27 | _SIDD_LEAST_SIGNIFICANT
28 on pcmpistri to compare xmm/mem128
30 0 1 2 3 4 5 6 7 8 9 A B C D E F
31 X X X X X X X X X X X X X X X X
35 0 1 2 3 4 5 6 7 8 9 A B C D E F
36 A A A A A A A A A A A A A A A A
38 to find out if the first 16byte data element has any non-A byte and
39 the offset of the first byte. There are 2 cases:
41 1. The first 16byte data element has the non-A byte, including
43 2. The first 16byte data element is valid and doesn't have the non-A
46 Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
48 case ECX CFlag ZFlag SFlag
52 We exit from the loop for case 1. */
54 extern size_t __strspn_generic (const char *, const char *) attribute_hidden
;
57 # define STRSPN __strspn_sse42
61 __attribute__ ((section (".text.sse4.2")))
62 STRSPN (const char *s
, const char *a
)
68 __m128i mask
, maskz
, zero
;
69 unsigned int maskz_bits
;
70 unsigned int offset
= (int) ((size_t) a
& 15);
71 zero
= _mm_set1_epi8 (0);
75 aligned
= (const char *) ((size_t) a
& -16L);
76 __m128i mask0
= _mm_load_si128 ((__m128i
*) aligned
);
77 maskz
= _mm_cmpeq_epi8 (mask0
, zero
);
79 /* Find where the NULL terminator is. */
80 maskz_bits
= _mm_movemask_epi8 (maskz
) >> offset
;
83 mask
= __m128i_shift_right (mask0
, offset
);
84 offset
= (unsigned int) ((size_t) s
& 15);
94 mask
= _mm_loadu_si128 ((__m128i
*) a
);
96 /* Find where the NULL terminator is. */
97 maskz
= _mm_cmpeq_epi8 (mask
, zero
);
98 maskz_bits
= _mm_movemask_epi8 (maskz
);
101 /* There is no NULL terminator. Don't use SSE4.2 if the length
104 return __strspn_generic (s
, a
);
107 offset
= (unsigned int) ((size_t) s
& 15);
112 /* Check partial string. */
113 aligned
= (const char *) ((size_t) s
& -16L);
114 __m128i value
= _mm_load_si128 ((__m128i
*) aligned
);
115 __m128i adj_value
= __m128i_shift_right (value
, offset
);
117 unsigned int length
= _mm_cmpistri (mask
, adj_value
, 0x12);
118 /* No need to check CFlag since it is always 1. */
119 if (length
< 16 - offset
)
121 /* Find where the NULL terminator is. */
122 maskz
= _mm_cmpeq_epi8 (value
, zero
);
123 maskz_bits
= _mm_movemask_epi8 (maskz
) >> offset
;
132 __m128i value
= _mm_load_si128 ((__m128i
*) aligned
);
133 unsigned int index
= _mm_cmpistri (mask
, value
, 0x12);
134 unsigned int cflag
= _mm_cmpistrc (mask
, value
, 0x12);
136 return (size_t) (aligned
+ index
- s
);