]>
Commit | Line | Data |
---|---|---|
734914b6 | 1 | /* A class for building vector constant patterns. |
8d9254fc | 2 | Copyright (C) 2017-2020 Free Software Foundation, Inc. |
734914b6 RS |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #ifndef GCC_VECTOR_BUILDER_H | |
21 | #define GCC_VECTOR_BUILDER_H | |
22 | ||
23 | /* This class is a wrapper around auto_vec<T> for building vectors of T. | |
24 | It aims to encode each vector as npatterns interleaved patterns, | |
25 | where each pattern represents a sequence: | |
26 | ||
27 | { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... } | |
28 | ||
29 | The first three elements in each pattern provide enough information | |
30 | to derive the other elements. If all patterns have a STEP of zero, | |
31 | we only need to encode the first two elements in each pattern. | |
32 | If BASE1 is also equal to BASE0 for all patterns, we only need to | |
33 | encode the first element in each pattern. The number of encoded | |
34 | elements per pattern is given by nelts_per_pattern. | |
35 | ||
36 | The class can be used in two ways: | |
37 | ||
38 | 1. It can be used to build a full image of the vector, which is then | |
39 | canonicalized by finalize (). In this case npatterns is initially | |
40 | the number of elements in the vector and nelts_per_pattern is | |
41 | initially 1. | |
42 | ||
43 | 2. It can be used to build a vector that already has a known encoding. | |
44 | This is preferred since it is more efficient and copes with | |
45 | variable-length vectors. finalize () then canonicalizes the encoding | |
46 | to a simpler form if possible. | |
47 | ||
4ce6ab68 RS |
48 | Shape is the type that specifies the number of elements in the vector |
49 | and (where relevant) the type of each element. | |
50 | ||
51 | The derived class Derived provides the functionality of this class | |
52 | for specific Ts. Derived needs to provide the following interface: | |
734914b6 RS |
53 | |
54 | bool equal_p (T elt1, T elt2) const; | |
55 | ||
56 | Return true if elements ELT1 and ELT2 are equal. | |
57 | ||
58 | bool allow_steps_p () const; | |
59 | ||
60 | Return true if a stepped representation is OK. We don't allow | |
61 | linear series for anything other than integers, to avoid problems | |
62 | with rounding. | |
63 | ||
64 | bool integral_p (T elt) const; | |
65 | ||
66 | Return true if element ELT can be interpreted as an integer. | |
67 | ||
68 | StepType step (T elt1, T elt2) const; | |
69 | ||
70 | Return the value of element ELT2 minus the value of element ELT1, | |
71 | given integral_p (ELT1) && integral_p (ELT2). There is no fixed | |
72 | choice of StepType. | |
73 | ||
abe73c3d RS |
74 | T apply_step (T base, unsigned int factor, StepType step) const; |
75 | ||
76 | Return a vector element with the value BASE + FACTOR * STEP. | |
77 | ||
734914b6 RS |
78 | bool can_elide_p (T elt) const; |
79 | ||
80 | Return true if we can drop element ELT, even if the retained | |
81 | elements are different. This is provided for TREE_OVERFLOW | |
82 | handling. | |
83 | ||
84 | void note_representative (T *elt1_ptr, T elt2); | |
85 | ||
86 | Record that ELT2 is being elided, given that ELT1_PTR points to | |
87 | the last encoded element for the containing pattern. This is | |
4ce6ab68 RS |
88 | again provided for TREE_OVERFLOW handling. |
89 | ||
90 | static poly_uint64 shape_nelts (Shape shape); | |
91 | ||
92 | Return the number of elements in SHAPE. | |
93 | ||
94 | The class provides additional functionality for the case in which | |
95 | T can describe a vector constant as well as an individual element. | |
96 | This functionality requires: | |
97 | ||
98 | static poly_uint64 nelts_of (T x); | |
99 | ||
100 | Return the number of elements in vector constant X. | |
734914b6 | 101 | |
4ce6ab68 RS |
102 | static unsigned int npatterns_of (T x); |
103 | ||
104 | Return the number of patterns used to encode vector constant X. | |
105 | ||
106 | static unsigned int nelts_per_pattern_of (T x); | |
107 | ||
108 | Return the number of elements used to encode each pattern | |
109 | in vector constant X. */ | |
110 | ||
111 | template<typename T, typename Shape, typename Derived> | |
734914b6 RS |
112 | class vector_builder : public auto_vec<T, 32> |
113 | { | |
114 | public: | |
115 | vector_builder (); | |
116 | ||
0ecc2b7d | 117 | poly_uint64 full_nelts () const { return m_full_nelts; } |
734914b6 RS |
118 | unsigned int npatterns () const { return m_npatterns; } |
119 | unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; } | |
120 | unsigned int encoded_nelts () const; | |
121 | bool encoded_full_vector_p () const; | |
abe73c3d | 122 | T elt (unsigned int) const; |
3a0afad0 | 123 | unsigned int count_dups (int, int, int) const; |
734914b6 | 124 | |
1a1c441d RS |
125 | bool operator == (const Derived &) const; |
126 | bool operator != (const Derived &x) const { return !operator == (x); } | |
127 | ||
4ce6ab68 RS |
128 | bool new_unary_operation (Shape, T, bool); |
129 | bool new_binary_operation (Shape, T, T, bool); | |
130 | ||
734914b6 RS |
131 | void finalize (); |
132 | ||
4ce6ab68 RS |
133 | static unsigned int binary_encoded_nelts (T, T); |
134 | ||
734914b6 | 135 | protected: |
0ecc2b7d | 136 | void new_vector (poly_uint64, unsigned int, unsigned int); |
734914b6 RS |
137 | void reshape (unsigned int, unsigned int); |
138 | bool repeating_sequence_p (unsigned int, unsigned int, unsigned int); | |
139 | bool stepped_sequence_p (unsigned int, unsigned int, unsigned int); | |
140 | bool try_npatterns (unsigned int); | |
141 | ||
142 | private: | |
143 | vector_builder (const vector_builder &); | |
144 | vector_builder &operator= (const vector_builder &); | |
145 | Derived *derived () { return static_cast<Derived *> (this); } | |
146 | const Derived *derived () const; | |
147 | ||
0ecc2b7d | 148 | poly_uint64 m_full_nelts; |
734914b6 RS |
149 | unsigned int m_npatterns; |
150 | unsigned int m_nelts_per_pattern; | |
151 | }; | |
152 | ||
4ce6ab68 | 153 | template<typename T, typename Shape, typename Derived> |
734914b6 | 154 | inline const Derived * |
4ce6ab68 | 155 | vector_builder<T, Shape, Derived>::derived () const |
734914b6 RS |
156 | { |
157 | return static_cast<const Derived *> (this); | |
158 | } | |
159 | ||
4ce6ab68 | 160 | template<typename T, typename Shape, typename Derived> |
734914b6 | 161 | inline |
4ce6ab68 | 162 | vector_builder<T, Shape, Derived>::vector_builder () |
734914b6 RS |
163 | : m_full_nelts (0), |
164 | m_npatterns (0), | |
165 | m_nelts_per_pattern (0) | |
166 | {} | |
167 | ||
168 | /* Return the number of elements that are explicitly encoded. The vec | |
169 | starts with these explicitly-encoded elements and may contain additional | |
170 | elided elements. */ | |
171 | ||
4ce6ab68 | 172 | template<typename T, typename Shape, typename Derived> |
734914b6 | 173 | inline unsigned int |
4ce6ab68 | 174 | vector_builder<T, Shape, Derived>::encoded_nelts () const |
734914b6 RS |
175 | { |
176 | return m_npatterns * m_nelts_per_pattern; | |
177 | } | |
178 | ||
179 | /* Return true if every element of the vector is explicitly encoded. */ | |
180 | ||
4ce6ab68 | 181 | template<typename T, typename Shape, typename Derived> |
734914b6 | 182 | inline bool |
4ce6ab68 | 183 | vector_builder<T, Shape, Derived>::encoded_full_vector_p () const |
734914b6 | 184 | { |
0ecc2b7d | 185 | return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); |
734914b6 RS |
186 | } |
187 | ||
188 | /* Start building a vector that has FULL_NELTS elements. Initially | |
189 | encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ | |
190 | ||
4ce6ab68 | 191 | template<typename T, typename Shape, typename Derived> |
734914b6 | 192 | void |
4ce6ab68 RS |
193 | vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts, |
194 | unsigned int npatterns, | |
195 | unsigned int nelts_per_pattern) | |
734914b6 RS |
196 | { |
197 | m_full_nelts = full_nelts; | |
198 | m_npatterns = npatterns; | |
199 | m_nelts_per_pattern = nelts_per_pattern; | |
200 | this->reserve (encoded_nelts ()); | |
201 | this->truncate (0); | |
202 | } | |
203 | ||
1a1c441d RS |
204 | /* Return true if this vector and OTHER have the same elements and |
205 | are encoded in the same way. */ | |
206 | ||
4ce6ab68 | 207 | template<typename T, typename Shape, typename Derived> |
1a1c441d | 208 | bool |
4ce6ab68 | 209 | vector_builder<T, Shape, Derived>::operator == (const Derived &other) const |
1a1c441d | 210 | { |
0ecc2b7d | 211 | if (maybe_ne (m_full_nelts, other.m_full_nelts) |
1a1c441d RS |
212 | || m_npatterns != other.m_npatterns |
213 | || m_nelts_per_pattern != other.m_nelts_per_pattern) | |
214 | return false; | |
215 | ||
216 | unsigned int nelts = encoded_nelts (); | |
217 | for (unsigned int i = 0; i < nelts; ++i) | |
218 | if (!derived ()->equal_p ((*this)[i], other[i])) | |
219 | return false; | |
220 | ||
221 | return true; | |
222 | } | |
223 | ||
abe73c3d RS |
224 | /* Return the value of vector element I, which might or might not be |
225 | encoded explicitly. */ | |
226 | ||
4ce6ab68 | 227 | template<typename T, typename Shape, typename Derived> |
abe73c3d | 228 | T |
4ce6ab68 | 229 | vector_builder<T, Shape, Derived>::elt (unsigned int i) const |
abe73c3d | 230 | { |
abe73c3d RS |
231 | /* First handle elements that are already present in the underlying |
232 | vector, regardless of whether they're part of the encoding or not. */ | |
233 | if (i < this->length ()) | |
234 | return (*this)[i]; | |
235 | ||
72ab1c51 RS |
236 | /* Extrapolation is only possible if the encoding has been fully |
237 | populated. */ | |
238 | gcc_checking_assert (encoded_nelts () <= this->length ()); | |
239 | ||
abe73c3d RS |
240 | /* Identify the pattern that contains element I and work out the index of |
241 | the last encoded element for that pattern. */ | |
242 | unsigned int pattern = i % m_npatterns; | |
243 | unsigned int count = i / m_npatterns; | |
244 | unsigned int final_i = encoded_nelts () - m_npatterns + pattern; | |
245 | T final = (*this)[final_i]; | |
246 | ||
247 | /* If there are no steps, the final encoded value is the right one. */ | |
248 | if (m_nelts_per_pattern <= 2) | |
249 | return final; | |
250 | ||
251 | /* Otherwise work out the value from the last two encoded elements. */ | |
252 | T prev = (*this)[final_i - m_npatterns]; | |
253 | return derived ()->apply_step (final, count - 2, | |
254 | derived ()->step (prev, final)); | |
255 | } | |
256 | ||
4ce6ab68 RS |
257 | /* Try to start building a new vector of shape SHAPE that holds the result of |
258 | a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the | |
259 | operation can handle stepped encodings directly, without having to expand | |
260 | the full sequence. | |
261 | ||
262 | Return true if the operation is possible, which it always is when | |
263 | ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ | |
264 | ||
265 | template<typename T, typename Shape, typename Derived> | |
266 | bool | |
267 | vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec, | |
268 | bool allow_stepped_p) | |
269 | { | |
270 | poly_uint64 full_nelts = Derived::shape_nelts (shape); | |
271 | gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec))); | |
272 | unsigned int npatterns = Derived::npatterns_of (vec); | |
273 | unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec); | |
274 | if (!allow_stepped_p && nelts_per_pattern > 2) | |
275 | { | |
276 | if (!full_nelts.is_constant ()) | |
277 | return false; | |
278 | npatterns = full_nelts.to_constant (); | |
279 | nelts_per_pattern = 1; | |
280 | } | |
281 | derived ()->new_vector (shape, npatterns, nelts_per_pattern); | |
282 | return true; | |
283 | } | |
284 | ||
285 | /* Try to start building a new vector of shape SHAPE that holds the result of | |
286 | a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is | |
287 | true if the operation can handle stepped encodings directly, without | |
288 | having to expand the full sequence. | |
289 | ||
290 | Return true if the operation is possible. Leave the builder unchanged | |
291 | otherwise. */ | |
292 | ||
293 | template<typename T, typename Shape, typename Derived> | |
294 | bool | |
295 | vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape, | |
296 | T vec1, T vec2, | |
297 | bool allow_stepped_p) | |
298 | { | |
299 | poly_uint64 full_nelts = Derived::shape_nelts (shape); | |
300 | gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1)) | |
301 | && known_eq (full_nelts, Derived::nelts_of (vec2))); | |
302 | /* Conceptually we split the patterns in VEC1 and VEC2 until we have | |
303 | an equal number for both. Each split pattern requires the same | |
304 | number of elements per pattern as the original. E.g. splitting: | |
305 | ||
306 | { 1, 2, 3, ... } | |
307 | ||
308 | into two gives: | |
309 | ||
310 | { 1, 3, 5, ... } | |
311 | { 2, 4, 6, ... } | |
312 | ||
313 | while splitting: | |
314 | ||
315 | { 1, 0, ... } | |
316 | ||
317 | into two gives: | |
318 | ||
319 | { 1, 0, ... } | |
320 | { 0, 0, ... }. */ | |
321 | unsigned int npatterns | |
322 | = least_common_multiple (Derived::npatterns_of (vec1), | |
323 | Derived::npatterns_of (vec2)); | |
324 | unsigned int nelts_per_pattern | |
325 | = MAX (Derived::nelts_per_pattern_of (vec1), | |
326 | Derived::nelts_per_pattern_of (vec2)); | |
327 | if (!allow_stepped_p && nelts_per_pattern > 2) | |
328 | { | |
329 | if (!full_nelts.is_constant ()) | |
330 | return false; | |
331 | npatterns = full_nelts.to_constant (); | |
332 | nelts_per_pattern = 1; | |
333 | } | |
334 | derived ()->new_vector (shape, npatterns, nelts_per_pattern); | |
335 | return true; | |
336 | } | |
337 | ||
338 | /* Return the number of elements that the caller needs to operate on in | |
339 | order to handle a binary operation on vector constants VEC1 and VEC2. | |
340 | This static function is used instead of new_binary_operation if the | |
341 | result of the operation is not a constant vector. */ | |
342 | ||
343 | template<typename T, typename Shape, typename Derived> | |
344 | unsigned int | |
345 | vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2) | |
346 | { | |
347 | poly_uint64 nelts = Derived::nelts_of (vec1); | |
348 | gcc_assert (known_eq (nelts, Derived::nelts_of (vec2))); | |
349 | /* See new_binary_operation for details. */ | |
350 | unsigned int npatterns | |
351 | = least_common_multiple (Derived::npatterns_of (vec1), | |
352 | Derived::npatterns_of (vec2)); | |
353 | unsigned int nelts_per_pattern | |
354 | = MAX (Derived::nelts_per_pattern_of (vec1), | |
355 | Derived::nelts_per_pattern_of (vec2)); | |
356 | unsigned HOST_WIDE_INT const_nelts; | |
357 | if (nelts.is_constant (&const_nelts)) | |
358 | return MIN (npatterns * nelts_per_pattern, const_nelts); | |
359 | return npatterns * nelts_per_pattern; | |
360 | } | |
361 | ||
3a0afad0 PK |
362 | /* Return the number of leading duplicate elements in the range |
363 | [START:END:STEP]. The value is always at least 1. */ | |
364 | ||
4ce6ab68 | 365 | template<typename T, typename Shape, typename Derived> |
3a0afad0 | 366 | unsigned int |
4ce6ab68 RS |
367 | vector_builder<T, Shape, Derived>::count_dups (int start, int end, |
368 | int step) const | |
3a0afad0 PK |
369 | { |
370 | gcc_assert ((end - start) % step == 0); | |
371 | ||
372 | unsigned int ndups = 1; | |
373 | for (int i = start + step; | |
374 | i != end && derived ()->equal_p (elt (i), elt (start)); | |
375 | i += step) | |
376 | ndups++; | |
377 | return ndups; | |
378 | } | |
379 | ||
734914b6 RS |
380 | /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, |
381 | but without changing the underlying vector. */ | |
382 | ||
4ce6ab68 | 383 | template<typename T, typename Shape, typename Derived> |
734914b6 | 384 | void |
4ce6ab68 RS |
385 | vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns, |
386 | unsigned int nelts_per_pattern) | |
734914b6 RS |
387 | { |
388 | unsigned int old_encoded_nelts = encoded_nelts (); | |
389 | unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; | |
390 | gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts); | |
391 | unsigned int next = new_encoded_nelts - npatterns; | |
392 | for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i) | |
393 | { | |
394 | derived ()->note_representative (&(*this)[next], (*this)[i]); | |
395 | next += 1; | |
396 | if (next == new_encoded_nelts) | |
397 | next -= npatterns; | |
398 | } | |
399 | m_npatterns = npatterns; | |
400 | m_nelts_per_pattern = nelts_per_pattern; | |
401 | } | |
402 | ||
403 | /* Return true if elements [START, END) contain a repeating sequence of | |
404 | STEP elements. */ | |
405 | ||
4ce6ab68 | 406 | template<typename T, typename Shape, typename Derived> |
734914b6 | 407 | bool |
4ce6ab68 RS |
408 | vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start, |
409 | unsigned int end, | |
410 | unsigned int step) | |
734914b6 RS |
411 | { |
412 | for (unsigned int i = start; i < end - step; ++i) | |
413 | if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) | |
414 | return false; | |
415 | return true; | |
416 | } | |
417 | ||
418 | /* Return true if elements [START, END) contain STEP interleaved linear | |
419 | series. */ | |
420 | ||
4ce6ab68 | 421 | template<typename T, typename Shape, typename Derived> |
734914b6 | 422 | bool |
4ce6ab68 RS |
423 | vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start, |
424 | unsigned int end, | |
425 | unsigned int step) | |
734914b6 RS |
426 | { |
427 | if (!derived ()->allow_steps_p ()) | |
428 | return false; | |
429 | ||
430 | for (unsigned int i = start + step * 2; i < end; ++i) | |
431 | { | |
432 | T elt1 = (*this)[i - step * 2]; | |
433 | T elt2 = (*this)[i - step]; | |
434 | T elt3 = (*this)[i]; | |
435 | ||
436 | if (!derived ()->integral_p (elt1) | |
437 | || !derived ()->integral_p (elt2) | |
438 | || !derived ()->integral_p (elt3)) | |
439 | return false; | |
440 | ||
6b0630fb RS |
441 | if (maybe_ne (derived ()->step (elt1, elt2), |
442 | derived ()->step (elt2, elt3))) | |
734914b6 RS |
443 | return false; |
444 | ||
445 | if (!derived ()->can_elide_p (elt3)) | |
446 | return false; | |
447 | } | |
448 | return true; | |
449 | } | |
450 | ||
451 | /* Try to change the number of encoded patterns to NPATTERNS, returning | |
452 | true on success. */ | |
453 | ||
4ce6ab68 | 454 | template<typename T, typename Shape, typename Derived> |
734914b6 | 455 | bool |
4ce6ab68 | 456 | vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns) |
734914b6 RS |
457 | { |
458 | if (m_nelts_per_pattern == 1) | |
459 | { | |
460 | /* See whether NPATTERNS is valid with the current 1-element-per-pattern | |
461 | encoding. */ | |
462 | if (repeating_sequence_p (0, encoded_nelts (), npatterns)) | |
463 | { | |
464 | reshape (npatterns, 1); | |
465 | return true; | |
466 | } | |
467 | ||
468 | /* We can only increase the number of elements per pattern if all | |
469 | elements are still encoded explicitly. */ | |
470 | if (!encoded_full_vector_p ()) | |
471 | return false; | |
472 | } | |
473 | ||
474 | if (m_nelts_per_pattern <= 2) | |
475 | { | |
476 | /* See whether NPATTERNS is valid with a 2-element-per-pattern | |
477 | encoding. */ | |
478 | if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns)) | |
479 | { | |
480 | reshape (npatterns, 2); | |
481 | return true; | |
482 | } | |
483 | ||
484 | /* We can only increase the number of elements per pattern if all | |
485 | elements are still encoded explicitly. */ | |
486 | if (!encoded_full_vector_p ()) | |
487 | return false; | |
488 | } | |
489 | ||
490 | if (m_nelts_per_pattern <= 3) | |
491 | { | |
492 | /* See whether we have NPATTERNS interleaved linear series, | |
493 | giving a 3-element-per-pattern encoding. */ | |
494 | if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns)) | |
495 | { | |
496 | reshape (npatterns, 3); | |
497 | return true; | |
498 | } | |
499 | return false; | |
500 | } | |
501 | ||
502 | gcc_unreachable (); | |
503 | } | |
504 | ||
505 | /* Replace the current encoding with the canonical form. */ | |
506 | ||
4ce6ab68 | 507 | template<typename T, typename Shape, typename Derived> |
734914b6 | 508 | void |
4ce6ab68 | 509 | vector_builder<T, Shape, Derived>::finalize () |
734914b6 RS |
510 | { |
511 | /* The encoding requires the same number of elements to come from each | |
512 | pattern. */ | |
0ecc2b7d | 513 | gcc_assert (multiple_p (m_full_nelts, m_npatterns)); |
734914b6 RS |
514 | |
515 | /* Allow the caller to build more elements than necessary. For example, | |
516 | it's often convenient to build a stepped vector from the natural | |
517 | encoding of three elements even if the vector itself only has two. */ | |
0ecc2b7d RS |
518 | unsigned HOST_WIDE_INT const_full_nelts; |
519 | if (m_full_nelts.is_constant (&const_full_nelts) | |
520 | && const_full_nelts <= encoded_nelts ()) | |
734914b6 | 521 | { |
0ecc2b7d | 522 | m_npatterns = const_full_nelts; |
734914b6 RS |
523 | m_nelts_per_pattern = 1; |
524 | } | |
525 | ||
526 | /* Try to whittle down the number of elements per pattern. That is: | |
527 | ||
528 | 1. If we have stepped patterns whose steps are all 0, reduce the | |
529 | number of elements per pattern from 3 to 2. | |
530 | ||
531 | 2. If we have background fill values that are the same as the | |
532 | foreground values, reduce the number of elements per pattern | |
533 | from 2 to 1. */ | |
534 | while (m_nelts_per_pattern > 1 | |
535 | && repeating_sequence_p (encoded_nelts () - m_npatterns * 2, | |
536 | encoded_nelts (), m_npatterns)) | |
537 | /* The last two sequences of M_NPATTERNS elements are equal, | |
538 | so remove the last one. */ | |
539 | reshape (m_npatterns, m_nelts_per_pattern - 1); | |
540 | ||
541 | if (pow2p_hwi (m_npatterns)) | |
542 | { | |
543 | /* Try to halve the number of patterns while doing so gives a | |
544 | valid pattern. This approach is linear in the number of | |
545 | elements, whereas searcing from 1 up would be O(n*log(n)). | |
546 | ||
547 | Each halving step tries to keep the number of elements per pattern | |
548 | the same. If that isn't possible, and if all elements are still | |
549 | explicitly encoded, the halving step can instead increase the number | |
550 | of elements per pattern. | |
551 | ||
552 | E.g. for: | |
553 | ||
554 | { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8 | |
555 | ||
556 | we first realize that the second half of the sequence is not | |
557 | equal to the first, so we cannot maintain 1 element per pattern | |
558 | for npatterns == 4. Instead we halve the number of patterns | |
559 | and double the number of elements per pattern, treating this | |
560 | as a "foreground" { 0, 2, 3, 4 } against a "background" of | |
561 | { 5, 6, 7, 8 | 5, 6, 7, 8 ... }: | |
562 | ||
563 | { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4 | |
564 | ||
565 | Next we realize that this is *not* a foreround of { 0, 2 } | |
566 | against a background of { 3, 4 | 3, 4 ... }, so the only | |
567 | remaining option for reducing the number of patterns is | |
568 | to use a foreground of { 0, 2 } against a stepped background | |
569 | of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still | |
570 | haven't elided any elements: | |
571 | ||
572 | { 0, 2 | 3, 4 | 5, 6 } npatterns == 2 | |
573 | ||
574 | This in turn can be reduced to a foreground of { 0 } against a | |
575 | stepped background of { 1 | 2 | 3 ... }: | |
576 | ||
577 | { 0 | 2 | 3 } npatterns == 1 | |
578 | ||
579 | This last step would not have been possible for: | |
580 | ||
581 | { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */ | |
582 | while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2)) | |
583 | continue; | |
584 | ||
585 | /* Builders of arbitrary fixed-length vectors can use: | |
586 | ||
587 | new_vector (x, x, 1) | |
588 | ||
589 | so that every element is specified explicitly. Handle cases | |
590 | that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 } | |
591 | would be for 2-bit elements. We'll have treated them as | |
592 | duplicates in the loop above. */ | |
593 | if (m_nelts_per_pattern == 1 | |
0ecc2b7d RS |
594 | && m_full_nelts.is_constant (&const_full_nelts) |
595 | && this->length () >= const_full_nelts | |
734914b6 | 596 | && (m_npatterns & 3) == 0 |
0ecc2b7d | 597 | && stepped_sequence_p (m_npatterns / 4, const_full_nelts, |
734914b6 RS |
598 | m_npatterns / 4)) |
599 | { | |
600 | reshape (m_npatterns / 4, 3); | |
601 | while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2)) | |
602 | continue; | |
603 | } | |
604 | } | |
605 | else | |
606 | /* For the non-power-of-2 case, do a simple search up from 1. */ | |
607 | for (unsigned int i = 1; i <= m_npatterns / 2; ++i) | |
608 | if (m_npatterns % i == 0 && try_npatterns (i)) | |
609 | break; | |
610 | } | |
611 | ||
612 | #endif |