]>
Commit | Line | Data |
---|---|---|
1 | /* A representation of vector permutation indices. | |
2 | Copyright (C) 2017-2024 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "vec-perm-indices.h" | |
24 | #include "tree.h" | |
25 | #include "fold-const.h" | |
26 | #include "tree-vector-builder.h" | |
27 | #include "backend.h" | |
28 | #include "rtl.h" | |
29 | #include "memmodel.h" | |
30 | #include "emit-rtl.h" | |
31 | #include "selftest.h" | |
32 | #include "rtx-vector-builder.h" | |
33 | ||
34 | /* Switch to a new permutation vector that selects between NINPUTS vector | |
35 | inputs that have NELTS_PER_INPUT elements each. Take the elements of the | |
36 | new permutation vector from ELEMENTS, clamping each one to be in range. */ | |
37 | ||
38 | void | |
39 | vec_perm_indices::new_vector (const vec_perm_builder &elements, | |
40 | unsigned int ninputs, | |
41 | poly_uint64 nelts_per_input) | |
42 | { | |
43 | m_ninputs = ninputs; | |
44 | m_nelts_per_input = nelts_per_input; | |
45 | /* If the vector has a constant number of elements, expand the | |
46 | encoding and clamp each element. E.g. { 0, 2, 4, ... } might | |
47 | wrap halfway if there is only one vector input, and we want | |
48 | the wrapped form to be the canonical one. | |
49 | ||
50 | If the vector has a variable number of elements, just copy | |
51 | the encoding. In that case the unwrapped form is canonical | |
52 | and there is no way of representing the wrapped form. */ | |
53 | poly_uint64 full_nelts = elements.full_nelts (); | |
54 | unsigned HOST_WIDE_INT copy_nelts; | |
55 | if (full_nelts.is_constant (©_nelts)) | |
56 | m_encoding.new_vector (full_nelts, copy_nelts, 1); | |
57 | else | |
58 | { | |
59 | copy_nelts = elements.encoded_nelts (); | |
60 | m_encoding.new_vector (full_nelts, elements.npatterns (), | |
61 | elements.nelts_per_pattern ()); | |
62 | } | |
63 | unsigned int npatterns = m_encoding.npatterns (); | |
64 | for (unsigned int i = 0; i < npatterns; ++i) | |
65 | m_encoding.quick_push (clamp (elements.elt (i))); | |
66 | /* Use the fact that: | |
67 | ||
68 | (a + b) % c == ((a % c) + (b % c)) % c | |
69 | ||
70 | to simplify the clamping of variable-length vectors. */ | |
71 | for (unsigned int i = npatterns; i < copy_nelts; ++i) | |
72 | { | |
73 | element_type step = clamp (elements.elt (i) | |
74 | - elements.elt (i - npatterns)); | |
75 | m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step)); | |
76 | } | |
77 | m_encoding.finalize (); | |
78 | } | |
79 | ||
80 | /* Switch to a new permutation vector that selects the same input elements | |
81 | as ORIG, but with each element split into FACTOR pieces. For example, | |
82 | if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is | |
83 | { 2, 3, 4, 5, 0, 1, 6, 7 }. */ | |
84 | ||
85 | void | |
86 | vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig, | |
87 | unsigned int factor) | |
88 | { | |
89 | m_ninputs = orig.m_ninputs; | |
90 | m_nelts_per_input = orig.m_nelts_per_input * factor; | |
91 | m_encoding.new_vector (orig.m_encoding.full_nelts () * factor, | |
92 | orig.m_encoding.npatterns () * factor, | |
93 | orig.m_encoding.nelts_per_pattern ()); | |
94 | unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); | |
95 | for (unsigned int i = 0; i < encoded_nelts; ++i) | |
96 | { | |
97 | element_type base = orig.m_encoding[i] * factor; | |
98 | for (unsigned int j = 0; j < factor; ++j) | |
99 | m_encoding.quick_push (base + j); | |
100 | } | |
101 | m_encoding.finalize (); | |
102 | } | |
103 | ||
104 | /* Check whether we can switch to a new permutation vector that | |
105 | selects the same input elements as ORIG, but with each element | |
106 | built up from FACTOR pieces. Return true if yes, otherwise | |
107 | return false. Every FACTOR permutation indexes should be | |
108 | continuous separately and the first one of each batch should | |
109 | be able to exactly modulo FACTOR. For example, if ORIG is | |
110 | { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation | |
111 | is { 1, 2, 0, 3 }. */ | |
112 | ||
113 | bool | |
114 | vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig, | |
115 | unsigned int factor) | |
116 | { | |
117 | gcc_assert (factor > 0); | |
118 | ||
119 | if (maybe_lt (orig.m_nelts_per_input, factor)) | |
120 | return false; | |
121 | ||
122 | poly_uint64 nelts; | |
123 | /* Invalid if vector units number isn't multiple of factor. */ | |
124 | if (!multiple_p (orig.m_nelts_per_input, factor, &nelts)) | |
125 | return false; | |
126 | ||
127 | /* Only handle the case that npatterns is multiple of factor. | |
128 | FIXME: Try to see whether we can reshape it by factor npatterns. */ | |
129 | if (orig.m_encoding.npatterns () % factor != 0) | |
130 | return false; | |
131 | ||
132 | unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); | |
133 | auto_vec<element_type, 32> encoding (encoded_nelts); | |
134 | /* Separate all encoded elements into batches by size factor, | |
135 | then ensure the first element of each batch is multiple of | |
136 | factor and all elements in each batch is consecutive from | |
137 | the first one. */ | |
138 | for (unsigned int i = 0; i < encoded_nelts; i += factor) | |
139 | { | |
140 | element_type first = orig.m_encoding[i]; | |
141 | element_type new_index; | |
142 | if (!multiple_p (first, factor, &new_index)) | |
143 | return false; | |
144 | for (unsigned int j = 1; j < factor; ++j) | |
145 | if (maybe_ne (first + j, orig.m_encoding[i + j])) | |
146 | return false; | |
147 | encoding.quick_push (new_index); | |
148 | } | |
149 | ||
150 | m_ninputs = orig.m_ninputs; | |
151 | m_nelts_per_input = nelts; | |
152 | poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor); | |
153 | unsigned int npatterns = orig.m_encoding.npatterns () / factor; | |
154 | ||
155 | m_encoding.new_vector (full_nelts, npatterns, | |
156 | orig.m_encoding.nelts_per_pattern ()); | |
157 | m_encoding.splice (encoding); | |
158 | m_encoding.finalize (); | |
159 | ||
160 | return true; | |
161 | } | |
162 | ||
163 | /* Rotate the inputs of the permutation right by DELTA inputs. This changes | |
164 | the values of the permutation vector but it doesn't change the way that | |
165 | the elements are encoded. */ | |
166 | ||
167 | void | |
168 | vec_perm_indices::rotate_inputs (int delta) | |
169 | { | |
170 | element_type element_delta = delta * m_nelts_per_input; | |
171 | for (unsigned int i = 0; i < m_encoding.length (); ++i) | |
172 | m_encoding[i] = clamp (m_encoding[i] + element_delta); | |
173 | } | |
174 | ||
175 | /* Return true if index OUT_BASE + I * OUT_STEP selects input | |
176 | element IN_BASE + I * IN_STEP. For example, the call to test | |
177 | whether a permute reverses a vector of N elements would be: | |
178 | ||
179 | series_p (0, 1, N - 1, -1) | |
180 | ||
181 | which would return true for { N - 1, N - 2, N - 3, ... }. | |
182 | The calls to test for an interleaving of elements starting | |
183 | at N1 and N2 would be: | |
184 | ||
185 | series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1). | |
186 | ||
187 | which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */ | |
188 | ||
189 | bool | |
190 | vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, | |
191 | element_type in_base, element_type in_step) const | |
192 | { | |
193 | /* Check the base value. */ | |
194 | if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base))) | |
195 | return false; | |
196 | ||
197 | element_type full_nelts = m_encoding.full_nelts (); | |
198 | unsigned int npatterns = m_encoding.npatterns (); | |
199 | ||
200 | /* Calculate which multiple of OUT_STEP elements we need to get | |
201 | back to the same pattern. */ | |
202 | unsigned int cycle_length = least_common_multiple (out_step, npatterns); | |
203 | ||
204 | /* Check the steps. */ | |
205 | in_step = clamp (in_step); | |
206 | out_base += out_step; | |
207 | unsigned int limit = 0; | |
208 | for (;;) | |
209 | { | |
210 | /* Succeed if we've checked all the elements in the vector. */ | |
211 | if (known_ge (out_base, full_nelts)) | |
212 | return true; | |
213 | ||
214 | if (out_base >= npatterns) | |
215 | { | |
216 | /* We've got to the end of the "foreground" values. Check | |
217 | 2 elements from each pattern in the "background" values. */ | |
218 | if (limit == 0) | |
219 | limit = out_base + cycle_length * 2; | |
220 | else if (out_base >= limit) | |
221 | return true; | |
222 | } | |
223 | ||
224 | element_type v0 = m_encoding.elt (out_base - out_step); | |
225 | element_type v1 = m_encoding.elt (out_base); | |
226 | if (maybe_ne (clamp (v1 - v0), in_step)) | |
227 | return false; | |
228 | ||
229 | out_base += out_step; | |
230 | } | |
231 | } | |
232 | ||
233 | /* Return true if all elements of the permutation vector are in the range | |
234 | [START, START + SIZE). */ | |
235 | ||
236 | bool | |
237 | vec_perm_indices::all_in_range_p (element_type start, element_type size) const | |
238 | { | |
239 | /* Check the first two elements of each pattern. */ | |
240 | unsigned int npatterns = m_encoding.npatterns (); | |
241 | unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern (); | |
242 | unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2); | |
243 | for (unsigned int i = 0; i < base_nelts; ++i) | |
244 | if (!known_in_range_p (m_encoding[i], start, size)) | |
245 | return false; | |
246 | ||
247 | /* For stepped encodings, check the full range of the series. */ | |
248 | if (nelts_per_pattern == 3) | |
249 | { | |
250 | element_type limit = input_nelts (); | |
251 | ||
252 | /* The number of elements in each pattern beyond the first two | |
253 | that we checked above. */ | |
254 | poly_int64 step_nelts = exact_div (m_encoding.full_nelts (), | |
255 | npatterns) - 2; | |
256 | for (unsigned int i = 0; i < npatterns; ++i) | |
257 | { | |
258 | /* BASE1 has been checked but BASE2 hasn't. */ | |
259 | element_type base1 = m_encoding[i + npatterns]; | |
260 | element_type base2 = m_encoding[i + base_nelts]; | |
261 | ||
262 | /* The step to add to get from BASE1 to each subsequent value. */ | |
263 | element_type step = clamp (base2 - base1); | |
264 | ||
265 | /* STEP has no inherent sign, so a value near LIMIT can | |
266 | act as a negative step. The series is in range if it | |
267 | is in range according to one of the two interpretations. | |
268 | ||
269 | Since we're dealing with clamped values, ELEMENT_TYPE is | |
270 | wide enough for overflow not to be a problem. */ | |
271 | element_type headroom_down = base1 - start; | |
272 | element_type headroom_up = size - headroom_down - 1; | |
273 | HOST_WIDE_INT diff; | |
274 | if ((!step.is_constant (&diff) | |
275 | || maybe_lt (headroom_up, diff * step_nelts)) | |
276 | && (!(limit - step).is_constant (&diff) | |
277 | || maybe_lt (headroom_down, diff * step_nelts))) | |
278 | return false; | |
279 | } | |
280 | } | |
281 | return true; | |
282 | } | |
283 | ||
284 | /* Try to read the contents of VECTOR_CST CST as a constant permutation | |
285 | vector. Return true and add the elements to BUILDER on success, | |
286 | otherwise return false without modifying BUILDER. */ | |
287 | ||
288 | bool | |
289 | tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst) | |
290 | { | |
291 | unsigned int encoded_nelts = vector_cst_encoded_nelts (cst); | |
292 | for (unsigned int i = 0; i < encoded_nelts; ++i) | |
293 | if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i))) | |
294 | return false; | |
295 | ||
296 | builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)), | |
297 | VECTOR_CST_NPATTERNS (cst), | |
298 | VECTOR_CST_NELTS_PER_PATTERN (cst)); | |
299 | for (unsigned int i = 0; i < encoded_nelts; ++i) | |
300 | builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i))); | |
301 | return true; | |
302 | } | |
303 | ||
304 | /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */ | |
305 | ||
306 | tree | |
307 | vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices) | |
308 | { | |
309 | gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ())); | |
310 | tree_vector_builder sel (type, indices.encoding ().npatterns (), | |
311 | indices.encoding ().nelts_per_pattern ()); | |
312 | unsigned int encoded_nelts = sel.encoded_nelts (); | |
313 | for (unsigned int i = 0; i < encoded_nelts; i++) | |
314 | sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i])); | |
315 | return sel.build (); | |
316 | } | |
317 | ||
318 | /* Return a CONST_VECTOR of mode MODE that contains the elements of | |
319 | INDICES. */ | |
320 | ||
321 | rtx | |
322 | vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) | |
323 | { | |
324 | gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT | |
325 | && known_eq (GET_MODE_NUNITS (mode), indices.length ())); | |
326 | rtx_vector_builder sel (mode, indices.encoding ().npatterns (), | |
327 | indices.encoding ().nelts_per_pattern ()); | |
328 | unsigned int encoded_nelts = sel.encoded_nelts (); | |
329 | for (unsigned int i = 0; i < encoded_nelts; i++) | |
330 | sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode))); | |
331 | return sel.build (); | |
332 | } | |
333 | ||
334 | #if CHECKING_P | |
335 | ||
336 | namespace selftest { | |
337 | ||
338 | /* Test a 12-element vector. */ | |
339 | ||
340 | static void | |
341 | test_vec_perm_12 (void) | |
342 | { | |
343 | vec_perm_builder builder (12, 12, 1); | |
344 | for (unsigned int i = 0; i < 4; ++i) | |
345 | { | |
346 | builder.quick_push (i * 5); | |
347 | builder.quick_push (3 + i); | |
348 | builder.quick_push (2 + 3 * i); | |
349 | } | |
350 | vec_perm_indices indices (builder, 1, 12); | |
351 | ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); | |
352 | ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); | |
353 | ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); | |
354 | ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); | |
355 | ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); | |
356 | ||
357 | ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); | |
358 | ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); | |
359 | ||
360 | ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); | |
361 | ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); | |
362 | ||
363 | ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); | |
364 | ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); | |
365 | ||
366 | ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); | |
367 | ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); | |
368 | ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); | |
369 | } | |
370 | ||
371 | /* Run selftests for this file. */ | |
372 | ||
373 | void | |
374 | vec_perm_indices_cc_tests () | |
375 | { | |
376 | test_vec_perm_12 (); | |
377 | } | |
378 | ||
379 | } // namespace selftest | |
380 | ||
381 | #endif |