]>
Commit | Line | Data |
---|---|---|
734914b6 | 1 | /* A class for building vector constant patterns. |
a5544970 | 2 | Copyright (C) 2017-2019 Free Software Foundation, Inc. |
734914b6 RS |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #ifndef GCC_VECTOR_BUILDER_H | |
21 | #define GCC_VECTOR_BUILDER_H | |
22 | ||
23 | /* This class is a wrapper around auto_vec<T> for building vectors of T. | |
24 | It aims to encode each vector as npatterns interleaved patterns, | |
25 | where each pattern represents a sequence: | |
26 | ||
27 | { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... } | |
28 | ||
29 | The first three elements in each pattern provide enough information | |
30 | to derive the other elements. If all patterns have a STEP of zero, | |
31 | we only need to encode the first two elements in each pattern. | |
32 | If BASE1 is also equal to BASE0 for all patterns, we only need to | |
33 | encode the first element in each pattern. The number of encoded | |
34 | elements per pattern is given by nelts_per_pattern. | |
35 | ||
36 | The class can be used in two ways: | |
37 | ||
38 | 1. It can be used to build a full image of the vector, which is then | |
39 | canonicalized by finalize (). In this case npatterns is initially | |
40 | the number of elements in the vector and nelts_per_pattern is | |
41 | initially 1. | |
42 | ||
43 | 2. It can be used to build a vector that already has a known encoding. | |
44 | This is preferred since it is more efficient and copes with | |
45 | variable-length vectors. finalize () then canonicalizes the encoding | |
46 | to a simpler form if possible. | |
47 | ||
48 | The derived class Derived provides this functionality for specific Ts. | |
49 | Derived needs to provide the following interface: | |
50 | ||
51 | bool equal_p (T elt1, T elt2) const; | |
52 | ||
53 | Return true if elements ELT1 and ELT2 are equal. | |
54 | ||
55 | bool allow_steps_p () const; | |
56 | ||
57 | Return true if a stepped representation is OK. We don't allow | |
58 | linear series for anything other than integers, to avoid problems | |
59 | with rounding. | |
60 | ||
61 | bool integral_p (T elt) const; | |
62 | ||
63 | Return true if element ELT can be interpreted as an integer. | |
64 | ||
65 | StepType step (T elt1, T elt2) const; | |
66 | ||
67 | Return the value of element ELT2 minus the value of element ELT1, | |
68 | given integral_p (ELT1) && integral_p (ELT2). There is no fixed | |
69 | choice of StepType. | |
70 | ||
abe73c3d RS |
71 | T apply_step (T base, unsigned int factor, StepType step) const; |
72 | ||
73 | Return a vector element with the value BASE + FACTOR * STEP. | |
74 | ||
734914b6 RS |
75 | bool can_elide_p (T elt) const; |
76 | ||
77 | Return true if we can drop element ELT, even if the retained | |
78 | elements are different. This is provided for TREE_OVERFLOW | |
79 | handling. | |
80 | ||
81 | void note_representative (T *elt1_ptr, T elt2); | |
82 | ||
83 | Record that ELT2 is being elided, given that ELT1_PTR points to | |
84 | the last encoded element for the containing pattern. This is | |
85 | again provided for TREE_OVERFLOW handling. */ | |
86 | ||
87 | template<typename T, typename Derived> | |
88 | class vector_builder : public auto_vec<T, 32> | |
89 | { | |
90 | public: | |
91 | vector_builder (); | |
92 | ||
0ecc2b7d | 93 | poly_uint64 full_nelts () const { return m_full_nelts; } |
734914b6 RS |
94 | unsigned int npatterns () const { return m_npatterns; } |
95 | unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; } | |
96 | unsigned int encoded_nelts () const; | |
97 | bool encoded_full_vector_p () const; | |
abe73c3d | 98 | T elt (unsigned int) const; |
3a0afad0 | 99 | unsigned int count_dups (int, int, int) const; |
734914b6 | 100 | |
1a1c441d RS |
101 | bool operator == (const Derived &) const; |
102 | bool operator != (const Derived &x) const { return !operator == (x); } | |
103 | ||
734914b6 RS |
104 | void finalize (); |
105 | ||
106 | protected: | |
0ecc2b7d | 107 | void new_vector (poly_uint64, unsigned int, unsigned int); |
734914b6 RS |
108 | void reshape (unsigned int, unsigned int); |
109 | bool repeating_sequence_p (unsigned int, unsigned int, unsigned int); | |
110 | bool stepped_sequence_p (unsigned int, unsigned int, unsigned int); | |
111 | bool try_npatterns (unsigned int); | |
112 | ||
113 | private: | |
114 | vector_builder (const vector_builder &); | |
115 | vector_builder &operator= (const vector_builder &); | |
116 | Derived *derived () { return static_cast<Derived *> (this); } | |
117 | const Derived *derived () const; | |
118 | ||
0ecc2b7d | 119 | poly_uint64 m_full_nelts; |
734914b6 RS |
120 | unsigned int m_npatterns; |
121 | unsigned int m_nelts_per_pattern; | |
122 | }; | |
123 | ||
124 | template<typename T, typename Derived> | |
125 | inline const Derived * | |
126 | vector_builder<T, Derived>::derived () const | |
127 | { | |
128 | return static_cast<const Derived *> (this); | |
129 | } | |
130 | ||
131 | template<typename T, typename Derived> | |
132 | inline | |
133 | vector_builder<T, Derived>::vector_builder () | |
134 | : m_full_nelts (0), | |
135 | m_npatterns (0), | |
136 | m_nelts_per_pattern (0) | |
137 | {} | |
138 | ||
139 | /* Return the number of elements that are explicitly encoded. The vec | |
140 | starts with these explicitly-encoded elements and may contain additional | |
141 | elided elements. */ | |
142 | ||
143 | template<typename T, typename Derived> | |
144 | inline unsigned int | |
145 | vector_builder<T, Derived>::encoded_nelts () const | |
146 | { | |
147 | return m_npatterns * m_nelts_per_pattern; | |
148 | } | |
149 | ||
150 | /* Return true if every element of the vector is explicitly encoded. */ | |
151 | ||
152 | template<typename T, typename Derived> | |
153 | inline bool | |
154 | vector_builder<T, Derived>::encoded_full_vector_p () const | |
155 | { | |
0ecc2b7d | 156 | return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); |
734914b6 RS |
157 | } |
158 | ||
159 | /* Start building a vector that has FULL_NELTS elements. Initially | |
160 | encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ | |
161 | ||
162 | template<typename T, typename Derived> | |
163 | void | |
0ecc2b7d | 164 | vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts, |
734914b6 RS |
165 | unsigned int npatterns, |
166 | unsigned int nelts_per_pattern) | |
167 | { | |
168 | m_full_nelts = full_nelts; | |
169 | m_npatterns = npatterns; | |
170 | m_nelts_per_pattern = nelts_per_pattern; | |
171 | this->reserve (encoded_nelts ()); | |
172 | this->truncate (0); | |
173 | } | |
174 | ||
1a1c441d RS |
175 | /* Return true if this vector and OTHER have the same elements and |
176 | are encoded in the same way. */ | |
177 | ||
178 | template<typename T, typename Derived> | |
179 | bool | |
180 | vector_builder<T, Derived>::operator == (const Derived &other) const | |
181 | { | |
0ecc2b7d | 182 | if (maybe_ne (m_full_nelts, other.m_full_nelts) |
1a1c441d RS |
183 | || m_npatterns != other.m_npatterns |
184 | || m_nelts_per_pattern != other.m_nelts_per_pattern) | |
185 | return false; | |
186 | ||
187 | unsigned int nelts = encoded_nelts (); | |
188 | for (unsigned int i = 0; i < nelts; ++i) | |
189 | if (!derived ()->equal_p ((*this)[i], other[i])) | |
190 | return false; | |
191 | ||
192 | return true; | |
193 | } | |
194 | ||
abe73c3d RS |
195 | /* Return the value of vector element I, which might or might not be |
196 | encoded explicitly. */ | |
197 | ||
198 | template<typename T, typename Derived> | |
199 | T | |
200 | vector_builder<T, Derived>::elt (unsigned int i) const | |
201 | { | |
202 | /* This only makes sense if the encoding has been fully populated. */ | |
203 | gcc_checking_assert (encoded_nelts () <= this->length ()); | |
204 | ||
205 | /* First handle elements that are already present in the underlying | |
206 | vector, regardless of whether they're part of the encoding or not. */ | |
207 | if (i < this->length ()) | |
208 | return (*this)[i]; | |
209 | ||
210 | /* Identify the pattern that contains element I and work out the index of | |
211 | the last encoded element for that pattern. */ | |
212 | unsigned int pattern = i % m_npatterns; | |
213 | unsigned int count = i / m_npatterns; | |
214 | unsigned int final_i = encoded_nelts () - m_npatterns + pattern; | |
215 | T final = (*this)[final_i]; | |
216 | ||
217 | /* If there are no steps, the final encoded value is the right one. */ | |
218 | if (m_nelts_per_pattern <= 2) | |
219 | return final; | |
220 | ||
221 | /* Otherwise work out the value from the last two encoded elements. */ | |
222 | T prev = (*this)[final_i - m_npatterns]; | |
223 | return derived ()->apply_step (final, count - 2, | |
224 | derived ()->step (prev, final)); | |
225 | } | |
226 | ||
3a0afad0 PK |
227 | /* Return the number of leading duplicate elements in the range |
228 | [START:END:STEP]. The value is always at least 1. */ | |
229 | ||
230 | template<typename T, typename Derived> | |
231 | unsigned int | |
232 | vector_builder<T, Derived>::count_dups (int start, int end, int step) const | |
233 | { | |
234 | gcc_assert ((end - start) % step == 0); | |
235 | ||
236 | unsigned int ndups = 1; | |
237 | for (int i = start + step; | |
238 | i != end && derived ()->equal_p (elt (i), elt (start)); | |
239 | i += step) | |
240 | ndups++; | |
241 | return ndups; | |
242 | } | |
243 | ||
734914b6 RS |
244 | /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, |
245 | but without changing the underlying vector. */ | |
246 | ||
247 | template<typename T, typename Derived> | |
248 | void | |
249 | vector_builder<T, Derived>::reshape (unsigned int npatterns, | |
250 | unsigned int nelts_per_pattern) | |
251 | { | |
252 | unsigned int old_encoded_nelts = encoded_nelts (); | |
253 | unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; | |
254 | gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts); | |
255 | unsigned int next = new_encoded_nelts - npatterns; | |
256 | for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i) | |
257 | { | |
258 | derived ()->note_representative (&(*this)[next], (*this)[i]); | |
259 | next += 1; | |
260 | if (next == new_encoded_nelts) | |
261 | next -= npatterns; | |
262 | } | |
263 | m_npatterns = npatterns; | |
264 | m_nelts_per_pattern = nelts_per_pattern; | |
265 | } | |
266 | ||
267 | /* Return true if elements [START, END) contain a repeating sequence of | |
268 | STEP elements. */ | |
269 | ||
270 | template<typename T, typename Derived> | |
271 | bool | |
272 | vector_builder<T, Derived>::repeating_sequence_p (unsigned int start, | |
273 | unsigned int end, | |
274 | unsigned int step) | |
275 | { | |
276 | for (unsigned int i = start; i < end - step; ++i) | |
277 | if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) | |
278 | return false; | |
279 | return true; | |
280 | } | |
281 | ||
282 | /* Return true if elements [START, END) contain STEP interleaved linear | |
283 | series. */ | |
284 | ||
285 | template<typename T, typename Derived> | |
286 | bool | |
287 | vector_builder<T, Derived>::stepped_sequence_p (unsigned int start, | |
288 | unsigned int end, | |
289 | unsigned int step) | |
290 | { | |
291 | if (!derived ()->allow_steps_p ()) | |
292 | return false; | |
293 | ||
294 | for (unsigned int i = start + step * 2; i < end; ++i) | |
295 | { | |
296 | T elt1 = (*this)[i - step * 2]; | |
297 | T elt2 = (*this)[i - step]; | |
298 | T elt3 = (*this)[i]; | |
299 | ||
300 | if (!derived ()->integral_p (elt1) | |
301 | || !derived ()->integral_p (elt2) | |
302 | || !derived ()->integral_p (elt3)) | |
303 | return false; | |
304 | ||
6b0630fb RS |
305 | if (maybe_ne (derived ()->step (elt1, elt2), |
306 | derived ()->step (elt2, elt3))) | |
734914b6 RS |
307 | return false; |
308 | ||
309 | if (!derived ()->can_elide_p (elt3)) | |
310 | return false; | |
311 | } | |
312 | return true; | |
313 | } | |
314 | ||
315 | /* Try to change the number of encoded patterns to NPATTERNS, returning | |
316 | true on success. */ | |
317 | ||
318 | template<typename T, typename Derived> | |
319 | bool | |
320 | vector_builder<T, Derived>::try_npatterns (unsigned int npatterns) | |
321 | { | |
322 | if (m_nelts_per_pattern == 1) | |
323 | { | |
324 | /* See whether NPATTERNS is valid with the current 1-element-per-pattern | |
325 | encoding. */ | |
326 | if (repeating_sequence_p (0, encoded_nelts (), npatterns)) | |
327 | { | |
328 | reshape (npatterns, 1); | |
329 | return true; | |
330 | } | |
331 | ||
332 | /* We can only increase the number of elements per pattern if all | |
333 | elements are still encoded explicitly. */ | |
334 | if (!encoded_full_vector_p ()) | |
335 | return false; | |
336 | } | |
337 | ||
338 | if (m_nelts_per_pattern <= 2) | |
339 | { | |
340 | /* See whether NPATTERNS is valid with a 2-element-per-pattern | |
341 | encoding. */ | |
342 | if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns)) | |
343 | { | |
344 | reshape (npatterns, 2); | |
345 | return true; | |
346 | } | |
347 | ||
348 | /* We can only increase the number of elements per pattern if all | |
349 | elements are still encoded explicitly. */ | |
350 | if (!encoded_full_vector_p ()) | |
351 | return false; | |
352 | } | |
353 | ||
354 | if (m_nelts_per_pattern <= 3) | |
355 | { | |
356 | /* See whether we have NPATTERNS interleaved linear series, | |
357 | giving a 3-element-per-pattern encoding. */ | |
358 | if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns)) | |
359 | { | |
360 | reshape (npatterns, 3); | |
361 | return true; | |
362 | } | |
363 | return false; | |
364 | } | |
365 | ||
366 | gcc_unreachable (); | |
367 | } | |
368 | ||
369 | /* Replace the current encoding with the canonical form. */ | |
370 | ||
371 | template<typename T, typename Derived> | |
372 | void | |
373 | vector_builder<T, Derived>::finalize () | |
374 | { | |
375 | /* The encoding requires the same number of elements to come from each | |
376 | pattern. */ | |
0ecc2b7d | 377 | gcc_assert (multiple_p (m_full_nelts, m_npatterns)); |
734914b6 RS |
378 | |
379 | /* Allow the caller to build more elements than necessary. For example, | |
380 | it's often convenient to build a stepped vector from the natural | |
381 | encoding of three elements even if the vector itself only has two. */ | |
0ecc2b7d RS |
382 | unsigned HOST_WIDE_INT const_full_nelts; |
383 | if (m_full_nelts.is_constant (&const_full_nelts) | |
384 | && const_full_nelts <= encoded_nelts ()) | |
734914b6 | 385 | { |
0ecc2b7d | 386 | m_npatterns = const_full_nelts; |
734914b6 RS |
387 | m_nelts_per_pattern = 1; |
388 | } | |
389 | ||
390 | /* Try to whittle down the number of elements per pattern. That is: | |
391 | ||
392 | 1. If we have stepped patterns whose steps are all 0, reduce the | |
393 | number of elements per pattern from 3 to 2. | |
394 | ||
395 | 2. If we have background fill values that are the same as the | |
396 | foreground values, reduce the number of elements per pattern | |
397 | from 2 to 1. */ | |
398 | while (m_nelts_per_pattern > 1 | |
399 | && repeating_sequence_p (encoded_nelts () - m_npatterns * 2, | |
400 | encoded_nelts (), m_npatterns)) | |
401 | /* The last two sequences of M_NPATTERNS elements are equal, | |
402 | so remove the last one. */ | |
403 | reshape (m_npatterns, m_nelts_per_pattern - 1); | |
404 | ||
405 | if (pow2p_hwi (m_npatterns)) | |
406 | { | |
407 | /* Try to halve the number of patterns while doing so gives a | |
408 | valid pattern. This approach is linear in the number of | |
409 | elements, whereas searcing from 1 up would be O(n*log(n)). | |
410 | ||
411 | Each halving step tries to keep the number of elements per pattern | |
412 | the same. If that isn't possible, and if all elements are still | |
413 | explicitly encoded, the halving step can instead increase the number | |
414 | of elements per pattern. | |
415 | ||
416 | E.g. for: | |
417 | ||
418 | { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8 | |
419 | ||
420 | we first realize that the second half of the sequence is not | |
421 | equal to the first, so we cannot maintain 1 element per pattern | |
422 | for npatterns == 4. Instead we halve the number of patterns | |
423 | and double the number of elements per pattern, treating this | |
424 | as a "foreground" { 0, 2, 3, 4 } against a "background" of | |
425 | { 5, 6, 7, 8 | 5, 6, 7, 8 ... }: | |
426 | ||
427 | { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4 | |
428 | ||
429 | Next we realize that this is *not* a foreround of { 0, 2 } | |
430 | against a background of { 3, 4 | 3, 4 ... }, so the only | |
431 | remaining option for reducing the number of patterns is | |
432 | to use a foreground of { 0, 2 } against a stepped background | |
433 | of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still | |
434 | haven't elided any elements: | |
435 | ||
436 | { 0, 2 | 3, 4 | 5, 6 } npatterns == 2 | |
437 | ||
438 | This in turn can be reduced to a foreground of { 0 } against a | |
439 | stepped background of { 1 | 2 | 3 ... }: | |
440 | ||
441 | { 0 | 2 | 3 } npatterns == 1 | |
442 | ||
443 | This last step would not have been possible for: | |
444 | ||
445 | { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */ | |
446 | while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2)) | |
447 | continue; | |
448 | ||
449 | /* Builders of arbitrary fixed-length vectors can use: | |
450 | ||
451 | new_vector (x, x, 1) | |
452 | ||
453 | so that every element is specified explicitly. Handle cases | |
454 | that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 } | |
455 | would be for 2-bit elements. We'll have treated them as | |
456 | duplicates in the loop above. */ | |
457 | if (m_nelts_per_pattern == 1 | |
0ecc2b7d RS |
458 | && m_full_nelts.is_constant (&const_full_nelts) |
459 | && this->length () >= const_full_nelts | |
734914b6 | 460 | && (m_npatterns & 3) == 0 |
0ecc2b7d | 461 | && stepped_sequence_p (m_npatterns / 4, const_full_nelts, |
734914b6 RS |
462 | m_npatterns / 4)) |
463 | { | |
464 | reshape (m_npatterns / 4, 3); | |
465 | while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2)) | |
466 | continue; | |
467 | } | |
468 | } | |
469 | else | |
470 | /* For the non-power-of-2 case, do a simple search up from 1. */ | |
471 | for (unsigned int i = 1; i <= m_npatterns / 2; ++i) | |
472 | if (m_npatterns % i == 0 && try_npatterns (i)) | |
473 | break; | |
474 | } | |
475 | ||
476 | #endif |