]>
Commit | Line | Data |
---|---|---|
568035b7 | 1 | /* Copyright (C) 2010-2013 Free Software Foundation, Inc. |
926cf114 RH |
2 | This file is part of the GNU C Library. |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
5 | modify it under the terms of the GNU Lesser General Public | |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
8 | ||
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff PE |
15 | License along with the GNU C Library. If not, see |
16 | <http://www.gnu.org/licenses/>. */ | |
926cf114 RH |
17 | |
18 | #include <string.h> | |
b54f998d | 19 | #include <bp-sym.h> |
926cf114 RH |
20 | |
21 | typedef unsigned long word; | |
22 | ||
23 | static inline word | |
24 | ldq_u(const void *s) | |
25 | { | |
26 | return *(const word *)((word)s & -8); | |
27 | } | |
28 | ||
29 | #define unlikely(X) __builtin_expect ((X), 0) | |
30 | #define prefetch(X) __builtin_prefetch ((void *)(X), 0) | |
31 | ||
32 | #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X)) | |
33 | #define find(X, Y) cmpbeq0 ((X) ^ (Y)) | |
34 | ||
35 | /* Search no more than N bytes of S for C. */ | |
36 | ||
37 | void * | |
38 | __memchr (const void *s, int xc, size_t n) | |
39 | { | |
40 | const word *s_align; | |
41 | word t, current, found, mask, offset; | |
42 | ||
43 | if (unlikely (n == 0)) | |
44 | return 0; | |
45 | ||
46 | current = ldq_u (s); | |
47 | ||
48 | /* Replicate low byte of XC into all bytes of C. */ | |
49 | t = xc & 0xff; /* 0000000c */ | |
50 | t = (t << 8) | t; /* 000000cc */ | |
51 | t = (t << 16) | t; /* 0000cccc */ | |
52 | const word c = (t << 32) | t; /* cccccccc */ | |
53 | ||
54 | /* Align the source, and decrement the count by the number | |
55 | of bytes searched in the first word. */ | |
b54f998d MC |
56 | s_align = (const word *)((word)s & -8); |
57 | n += ((word)s & 7); | |
926cf114 RH |
58 | |
59 | /* Deal with misalignment in the first word for the comparison. */ | |
b54f998d | 60 | mask = (1ul << ((word)s & 7)) - 1; |
926cf114 RH |
61 | |
62 | /* If the entire string fits within one word, we may need masking | |
63 | at both the front and the back of the string. */ | |
64 | if (unlikely (n <= 8)) | |
65 | { | |
66 | mask |= -1ul << n; | |
67 | goto last_quad; | |
68 | } | |
69 | ||
70 | found = find (current, c) & ~mask; | |
71 | if (unlikely (found)) | |
72 | goto found_it; | |
73 | ||
74 | s_align++; | |
75 | n -= 8; | |
76 | ||
77 | /* If the block is sufficiently large, align to cacheline and prefetch. */ | |
78 | if (unlikely (n >= 256)) | |
79 | { | |
80 | /* Prefetch 3 cache lines beyond the one we're working on. */ | |
81 | prefetch (s_align + 8); | |
82 | prefetch (s_align + 16); | |
83 | prefetch (s_align + 24); | |
84 | ||
85 | while ((word)s_align & 63) | |
86 | { | |
87 | current = *s_align; | |
88 | found = find (current, c); | |
89 | if (found) | |
90 | goto found_it; | |
91 | s_align++; | |
92 | n -= 8; | |
93 | } | |
94 | ||
95 | /* Within each cacheline, advance the load for the next word | |
96 | before the test for the previous word is complete. This | |
97 | allows us to hide the 3 cycle L1 cache load latency. We | |
98 | only perform this advance load within a cacheline to prevent | |
99 | reading across page boundary. */ | |
100 | #define CACHELINE_LOOP \ | |
101 | do { \ | |
102 | word i, next = s_align[0]; \ | |
103 | for (i = 0; i < 7; ++i) \ | |
104 | { \ | |
105 | current = next; \ | |
106 | next = s_align[1]; \ | |
107 | found = find (current, c); \ | |
108 | if (unlikely (found)) \ | |
109 | goto found_it; \ | |
110 | s_align++; \ | |
111 | } \ | |
112 | current = next; \ | |
113 | found = find (current, c); \ | |
114 | if (unlikely (found)) \ | |
115 | goto found_it; \ | |
116 | s_align++; \ | |
117 | n -= 64; \ | |
118 | } while (0) | |
119 | ||
120 | /* While there's still lots more data to potentially be read, | |
121 | continue issuing prefetches for the 4th cacheline out. */ | |
122 | while (n >= 256) | |
123 | { | |
124 | prefetch (s_align + 24); | |
125 | CACHELINE_LOOP; | |
126 | } | |
127 | ||
128 | /* Up to 3 cache lines remaining. Continue issuing advanced | |
129 | loads, but stop prefetching. */ | |
130 | while (n >= 64) | |
131 | CACHELINE_LOOP; | |
132 | ||
133 | /* We may have exhausted the buffer. */ | |
134 | if (n == 0) | |
135 | return NULL; | |
136 | } | |
137 | ||
138 | /* Quadword aligned loop. */ | |
139 | current = *s_align; | |
140 | while (n > 8) | |
141 | { | |
142 | found = find (current, c); | |
143 | if (unlikely (found)) | |
144 | goto found_it; | |
145 | current = *++s_align; | |
146 | n -= 8; | |
147 | } | |
148 | ||
149 | /* The last word may need masking at the tail of the compare. */ | |
150 | mask = -1ul << n; | |
151 | last_quad: | |
152 | found = find (current, c) & ~mask; | |
153 | if (found == 0) | |
154 | return NULL; | |
155 | ||
156 | found_it: | |
157 | #ifdef __alpha_cix__ | |
158 | offset = __builtin_alpha_cttz (found); | |
159 | #else | |
160 | /* Extract LSB. */ | |
161 | found &= -found; | |
162 | ||
163 | /* Binary search for the LSB. */ | |
164 | offset = (found & 0x0f ? 0 : 4); | |
165 | offset += (found & 0x33 ? 0 : 2); | |
166 | offset += (found & 0x55 ? 0 : 1); | |
167 | #endif | |
168 | ||
169 | return (void *)((word)s_align + offset); | |
170 | } | |
171 | ||
172 | #ifdef weak_alias | |
173 | weak_alias (__memchr, BP_SYM (memchr)) | |
174 | #endif | |
175 | libc_hidden_builtin_def (memchr) |