]>
Commit | Line | Data |
---|---|---|
dff8da6b | 1 | /* Copyright (C) 2010-2024 Free Software Foundation, Inc. |
926cf114 RH |
2 | This file is part of the GNU C Library. |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
5 | modify it under the terms of the GNU Lesser General Public | |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
8 | ||
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff | 15 | License along with the GNU C Library. If not, see |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
926cf114 RH |
17 | |
18 | #include <string.h> | |
19 | ||
20 | typedef unsigned long word; | |
21 | ||
22 | static inline word | |
23 | ldq_u(const void *s) | |
24 | { | |
25 | return *(const word *)((word)s & -8); | |
26 | } | |
27 | ||
28 | #define unlikely(X) __builtin_expect ((X), 0) | |
29 | #define prefetch(X) __builtin_prefetch ((void *)(X), 0) | |
30 | ||
31 | #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X)) | |
32 | #define find(X, Y) cmpbeq0 ((X) ^ (Y)) | |
33 | ||
34 | /* Search no more than N bytes of S for C. */ | |
35 | ||
36 | void * | |
37 | __memchr (const void *s, int xc, size_t n) | |
38 | { | |
39 | const word *s_align; | |
40 | word t, current, found, mask, offset; | |
41 | ||
42 | if (unlikely (n == 0)) | |
43 | return 0; | |
44 | ||
45 | current = ldq_u (s); | |
46 | ||
47 | /* Replicate low byte of XC into all bytes of C. */ | |
48 | t = xc & 0xff; /* 0000000c */ | |
49 | t = (t << 8) | t; /* 000000cc */ | |
50 | t = (t << 16) | t; /* 0000cccc */ | |
51 | const word c = (t << 32) | t; /* cccccccc */ | |
52 | ||
53 | /* Align the source, and decrement the count by the number | |
54 | of bytes searched in the first word. */ | |
b54f998d | 55 | s_align = (const word *)((word)s & -8); |
9c8e6448 RH |
56 | { |
57 | size_t inc = n + ((word)s & 7); | |
58 | n = inc | -(inc < n); | |
59 | } | |
926cf114 RH |
60 | |
61 | /* Deal with misalignment in the first word for the comparison. */ | |
b54f998d | 62 | mask = (1ul << ((word)s & 7)) - 1; |
926cf114 RH |
63 | |
64 | /* If the entire string fits within one word, we may need masking | |
65 | at both the front and the back of the string. */ | |
66 | if (unlikely (n <= 8)) | |
67 | { | |
68 | mask |= -1ul << n; | |
69 | goto last_quad; | |
70 | } | |
71 | ||
72 | found = find (current, c) & ~mask; | |
73 | if (unlikely (found)) | |
74 | goto found_it; | |
75 | ||
76 | s_align++; | |
77 | n -= 8; | |
78 | ||
79 | /* If the block is sufficiently large, align to cacheline and prefetch. */ | |
80 | if (unlikely (n >= 256)) | |
81 | { | |
82 | /* Prefetch 3 cache lines beyond the one we're working on. */ | |
83 | prefetch (s_align + 8); | |
84 | prefetch (s_align + 16); | |
85 | prefetch (s_align + 24); | |
86 | ||
87 | while ((word)s_align & 63) | |
88 | { | |
89 | current = *s_align; | |
90 | found = find (current, c); | |
91 | if (found) | |
92 | goto found_it; | |
93 | s_align++; | |
94 | n -= 8; | |
95 | } | |
96 | ||
97 | /* Within each cacheline, advance the load for the next word | |
98 | before the test for the previous word is complete. This | |
99 | allows us to hide the 3 cycle L1 cache load latency. We | |
100 | only perform this advance load within a cacheline to prevent | |
101 | reading across page boundary. */ | |
102 | #define CACHELINE_LOOP \ | |
103 | do { \ | |
104 | word i, next = s_align[0]; \ | |
105 | for (i = 0; i < 7; ++i) \ | |
106 | { \ | |
107 | current = next; \ | |
108 | next = s_align[1]; \ | |
109 | found = find (current, c); \ | |
110 | if (unlikely (found)) \ | |
111 | goto found_it; \ | |
112 | s_align++; \ | |
113 | } \ | |
114 | current = next; \ | |
115 | found = find (current, c); \ | |
116 | if (unlikely (found)) \ | |
117 | goto found_it; \ | |
118 | s_align++; \ | |
119 | n -= 64; \ | |
120 | } while (0) | |
5556231d | 121 | |
926cf114 RH |
122 | /* While there's still lots more data to potentially be read, |
123 | continue issuing prefetches for the 4th cacheline out. */ | |
124 | while (n >= 256) | |
125 | { | |
126 | prefetch (s_align + 24); | |
127 | CACHELINE_LOOP; | |
128 | } | |
129 | ||
130 | /* Up to 3 cache lines remaining. Continue issuing advanced | |
131 | loads, but stop prefetching. */ | |
132 | while (n >= 64) | |
133 | CACHELINE_LOOP; | |
134 | ||
135 | /* We may have exhausted the buffer. */ | |
136 | if (n == 0) | |
137 | return NULL; | |
138 | } | |
139 | ||
140 | /* Quadword aligned loop. */ | |
141 | current = *s_align; | |
142 | while (n > 8) | |
143 | { | |
144 | found = find (current, c); | |
145 | if (unlikely (found)) | |
146 | goto found_it; | |
147 | current = *++s_align; | |
148 | n -= 8; | |
149 | } | |
150 | ||
151 | /* The last word may need masking at the tail of the compare. */ | |
152 | mask = -1ul << n; | |
153 | last_quad: | |
154 | found = find (current, c) & ~mask; | |
155 | if (found == 0) | |
156 | return NULL; | |
157 | ||
158 | found_it: | |
159 | #ifdef __alpha_cix__ | |
160 | offset = __builtin_alpha_cttz (found); | |
161 | #else | |
162 | /* Extract LSB. */ | |
163 | found &= -found; | |
164 | ||
165 | /* Binary search for the LSB. */ | |
166 | offset = (found & 0x0f ? 0 : 4); | |
167 | offset += (found & 0x33 ? 0 : 2); | |
168 | offset += (found & 0x55 ? 0 : 1); | |
169 | #endif | |
170 | ||
171 | return (void *)((word)s_align + offset); | |
172 | } | |
173 | ||
174 | #ifdef weak_alias | |
e97ed6dd | 175 | weak_alias (__memchr, memchr) |
926cf114 RH |
176 | #endif |
177 | libc_hidden_builtin_def (memchr) |