]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/alpha/memchr.c
alpha: rewrite memchr.
[thirdparty/glibc.git] / sysdeps / alpha / memchr.c
1 /* Copyright (C) 2010 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
18
19 #include <string.h>
20
21 typedef unsigned long word;
22
23 static inline word
24 ldq_u(const void *s)
25 {
26 return *(const word *)((word)s & -8);
27 }
28
29 #define unlikely(X) __builtin_expect ((X), 0)
30 #define prefetch(X) __builtin_prefetch ((void *)(X), 0)
31
32 #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X))
33 #define find(X, Y) cmpbeq0 ((X) ^ (Y))
34
35 /* Search no more than N bytes of S for C. */
36
37 void *
38 __memchr (const void *s, int xc, size_t n)
39 {
40 const word *s_align;
41 word t, current, found, mask, offset;
42
43 if (unlikely (n == 0))
44 return 0;
45
46 current = ldq_u (s);
47
48 /* Replicate low byte of XC into all bytes of C. */
49 t = xc & 0xff; /* 0000000c */
50 t = (t << 8) | t; /* 000000cc */
51 t = (t << 16) | t; /* 0000cccc */
52 const word c = (t << 32) | t; /* cccccccc */
53
54 /* Align the source, and decrement the count by the number
55 of bytes searched in the first word. */
56 s_align = (const word *)(s & -8);
57 n += (s & 7);
58
59 /* Deal with misalignment in the first word for the comparison. */
60 mask = (1ul << (s & 7)) - 1;
61
62 /* If the entire string fits within one word, we may need masking
63 at both the front and the back of the string. */
64 if (unlikely (n <= 8))
65 {
66 mask |= -1ul << n;
67 goto last_quad;
68 }
69
70 found = find (current, c) & ~mask;
71 if (unlikely (found))
72 goto found_it;
73
74 s_align++;
75 n -= 8;
76
77 /* If the block is sufficiently large, align to cacheline and prefetch. */
78 if (unlikely (n >= 256))
79 {
80 /* Prefetch 3 cache lines beyond the one we're working on. */
81 prefetch (s_align + 8);
82 prefetch (s_align + 16);
83 prefetch (s_align + 24);
84
85 while ((word)s_align & 63)
86 {
87 current = *s_align;
88 found = find (current, c);
89 if (found)
90 goto found_it;
91 s_align++;
92 n -= 8;
93 }
94
95 /* Within each cacheline, advance the load for the next word
96 before the test for the previous word is complete. This
97 allows us to hide the 3 cycle L1 cache load latency. We
98 only perform this advance load within a cacheline to prevent
99 reading across page boundary. */
100 #define CACHELINE_LOOP \
101 do { \
102 word i, next = s_align[0]; \
103 for (i = 0; i < 7; ++i) \
104 { \
105 current = next; \
106 next = s_align[1]; \
107 found = find (current, c); \
108 if (unlikely (found)) \
109 goto found_it; \
110 s_align++; \
111 } \
112 current = next; \
113 found = find (current, c); \
114 if (unlikely (found)) \
115 goto found_it; \
116 s_align++; \
117 n -= 64; \
118 } while (0)
119
120 /* While there's still lots more data to potentially be read,
121 continue issuing prefetches for the 4th cacheline out. */
122 while (n >= 256)
123 {
124 prefetch (s_align + 24);
125 CACHELINE_LOOP;
126 }
127
128 /* Up to 3 cache lines remaining. Continue issuing advanced
129 loads, but stop prefetching. */
130 while (n >= 64)
131 CACHELINE_LOOP;
132
133 /* We may have exhausted the buffer. */
134 if (n == 0)
135 return NULL;
136 }
137
138 /* Quadword aligned loop. */
139 current = *s_align;
140 while (n > 8)
141 {
142 found = find (current, c);
143 if (unlikely (found))
144 goto found_it;
145 current = *++s_align;
146 n -= 8;
147 }
148
149 /* The last word may need masking at the tail of the compare. */
150 mask = -1ul << n;
151 last_quad:
152 found = find (current, c) & ~mask;
153 if (found == 0)
154 return NULL;
155
156 found_it:
157 #ifdef __alpha_cix__
158 offset = __builtin_alpha_cttz (found);
159 #else
160 /* Extract LSB. */
161 found &= -found;
162
163 /* Binary search for the LSB. */
164 offset = (found & 0x0f ? 0 : 4);
165 offset += (found & 0x33 ? 0 : 2);
166 offset += (found & 0x55 ? 0 : 1);
167 #endif
168
169 return (void *)((word)s_align + offset);
170 }
171
172 #ifdef weak_alias
173 weak_alias (__memchr, BP_SYM (memchr))
174 #endif
175 libc_hidden_builtin_def (memchr)