]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/vec-perm-indices.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / vec-perm-indices.c
CommitLineData
f151c9e1 1/* A representation of vector permutation indices.
99dee823 2 Copyright (C) 2017-2021 Free Software Foundation, Inc.
f151c9e1
RS
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "vec-perm-indices.h"
24#include "tree.h"
e3342de4
RS
25#include "fold-const.h"
26#include "tree-vector-builder.h"
f151c9e1
RS
27#include "backend.h"
28#include "rtl.h"
29#include "memmodel.h"
30#include "emit-rtl.h"
1a1c441d 31#include "selftest.h"
3d8ca53d 32#include "rtx-vector-builder.h"
f151c9e1 33
e3342de4
RS
34/* Switch to a new permutation vector that selects between NINPUTS vector
35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 new permutation vector from ELEMENTS, clamping each one to be in range. */
37
38void
39vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 unsigned int ninputs,
0ecc2b7d 41 poly_uint64 nelts_per_input)
e3342de4
RS
42{
43 m_ninputs = ninputs;
44 m_nelts_per_input = nelts_per_input;
0ecc2b7d
RS
45 /* If the vector has a constant number of elements, expand the
46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 wrap halfway if there is only one vector input, and we want
48 the wrapped form to be the canonical one.
49
50 If the vector has a variable number of elements, just copy
51 the encoding. In that case the unwrapped form is canonical
52 and there is no way of representing the wrapped form. */
53 poly_uint64 full_nelts = elements.full_nelts ();
54 unsigned HOST_WIDE_INT copy_nelts;
55 if (full_nelts.is_constant (&copy_nelts))
56 m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 else
58 {
59 copy_nelts = elements.encoded_nelts ();
60 m_encoding.new_vector (full_nelts, elements.npatterns (),
61 elements.nelts_per_pattern ());
62 }
63 unsigned int npatterns = m_encoding.npatterns ();
64 for (unsigned int i = 0; i < npatterns; ++i)
e3342de4 65 m_encoding.quick_push (clamp (elements.elt (i)));
0ecc2b7d
RS
66 /* Use the fact that:
67
68 (a + b) % c == ((a % c) + (b % c)) % c
69
70 to simplify the clamping of variable-length vectors. */
71 for (unsigned int i = npatterns; i < copy_nelts; ++i)
72 {
73 element_type step = clamp (elements.elt (i)
74 - elements.elt (i - npatterns));
75 m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
76 }
e3342de4
RS
77 m_encoding.finalize ();
78}
79
f151c9e1
RS
80/* Switch to a new permutation vector that selects the same input elements
81 as ORIG, but with each element split into FACTOR pieces. For example,
82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */
84
85void
86vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 unsigned int factor)
88{
e3342de4
RS
89 m_ninputs = orig.m_ninputs;
90 m_nelts_per_input = orig.m_nelts_per_input * factor;
91 m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 orig.m_encoding.npatterns () * factor,
93 orig.m_encoding.nelts_per_pattern ());
94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 for (unsigned int i = 0; i < encoded_nelts; ++i)
f151c9e1 96 {
e3342de4 97 element_type base = orig.m_encoding[i] * factor;
f151c9e1 98 for (unsigned int j = 0; j < factor; ++j)
e3342de4 99 m_encoding.quick_push (base + j);
f151c9e1 100 }
e3342de4
RS
101 m_encoding.finalize ();
102}
103
4a9f2306
KL
104/* Check whether we can switch to a new permutation vector that
105 selects the same input elements as ORIG, but with each element
106 built up from FACTOR pieces. Return true if yes, otherwise
107 return false. Every FACTOR permutation indexes should be
108 continuous separately and the first one of each batch should
109 be able to exactly modulo FACTOR. For example, if ORIG is
110 { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111 is { 1, 2, 0, 3 }. */
112
113bool
114vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115 unsigned int factor)
116{
117 gcc_assert (factor > 0);
118
119 if (maybe_lt (orig.m_nelts_per_input, factor))
120 return false;
121
122 poly_uint64 nelts;
123 /* Invalid if vector units number isn't multiple of factor. */
124 if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
125 return false;
126
127 /* Only handle the case that npatterns is multiple of factor.
128 FIXME: Try to see whether we can reshape it by factor npatterns. */
129 if (orig.m_encoding.npatterns () % factor != 0)
130 return false;
131
132 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133 auto_vec<element_type, 32> encoding (encoded_nelts);
134 /* Separate all encoded elements into batches by size factor,
135 then ensure the first element of each batch is multiple of
136 factor and all elements in each batch is consecutive from
137 the first one. */
138 for (unsigned int i = 0; i < encoded_nelts; i += factor)
139 {
140 element_type first = orig.m_encoding[i];
141 element_type new_index;
142 if (!multiple_p (first, factor, &new_index))
143 return false;
144 for (unsigned int j = 1; j < factor; ++j)
145 if (maybe_ne (first + j, orig.m_encoding[i + j]))
146 return false;
147 encoding.quick_push (new_index);
148 }
149
150 m_ninputs = orig.m_ninputs;
151 m_nelts_per_input = nelts;
152 poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
153 unsigned int npatterns = orig.m_encoding.npatterns () / factor;
154
155 m_encoding.new_vector (full_nelts, npatterns,
156 orig.m_encoding.nelts_per_pattern ());
157 m_encoding.splice (encoding);
158 m_encoding.finalize ();
159
160 return true;
161}
162
e3342de4
RS
163/* Rotate the inputs of the permutation right by DELTA inputs. This changes
164 the values of the permutation vector but it doesn't change the way that
165 the elements are encoded. */
166
167void
168vec_perm_indices::rotate_inputs (int delta)
169{
170 element_type element_delta = delta * m_nelts_per_input;
171 for (unsigned int i = 0; i < m_encoding.length (); ++i)
172 m_encoding[i] = clamp (m_encoding[i] + element_delta);
f151c9e1
RS
173}
174
1a1c441d 175/* Return true if index OUT_BASE + I * OUT_STEP selects input
65aa25a4
RS
176 element IN_BASE + I * IN_STEP. For example, the call to test
177 whether a permute reverses a vector of N elements would be:
178
179 series_p (0, 1, N - 1, -1)
180
181 which would return true for { N - 1, N - 2, N - 3, ... }.
182 The calls to test for an interleaving of elements starting
183 at N1 and N2 would be:
184
185 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
186
187 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
1a1c441d
RS
188
189bool
190vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191 element_type in_base, element_type in_step) const
192{
193 /* Check the base value. */
6b0630fb 194 if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
1a1c441d
RS
195 return false;
196
0ecc2b7d 197 element_type full_nelts = m_encoding.full_nelts ();
1a1c441d
RS
198 unsigned int npatterns = m_encoding.npatterns ();
199
200 /* Calculate which multiple of OUT_STEP elements we need to get
201 back to the same pattern. */
202 unsigned int cycle_length = least_common_multiple (out_step, npatterns);
203
204 /* Check the steps. */
205 in_step = clamp (in_step);
206 out_base += out_step;
207 unsigned int limit = 0;
208 for (;;)
209 {
210 /* Succeed if we've checked all the elements in the vector. */
0ecc2b7d 211 if (known_ge (out_base, full_nelts))
1a1c441d
RS
212 return true;
213
214 if (out_base >= npatterns)
215 {
216 /* We've got to the end of the "foreground" values. Check
217 2 elements from each pattern in the "background" values. */
218 if (limit == 0)
219 limit = out_base + cycle_length * 2;
220 else if (out_base >= limit)
221 return true;
222 }
223
224 element_type v0 = m_encoding.elt (out_base - out_step);
225 element_type v1 = m_encoding.elt (out_base);
6b0630fb 226 if (maybe_ne (clamp (v1 - v0), in_step))
1a1c441d
RS
227 return false;
228
229 out_base += out_step;
230 }
231 return true;
232}
233
f151c9e1
RS
234/* Return true if all elements of the permutation vector are in the range
235 [START, START + SIZE). */
236
237bool
238vec_perm_indices::all_in_range_p (element_type start, element_type size) const
239{
e3342de4
RS
240 /* Check the first two elements of each pattern. */
241 unsigned int npatterns = m_encoding.npatterns ();
242 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
243 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
244 for (unsigned int i = 0; i < base_nelts; ++i)
6b0630fb 245 if (!known_in_range_p (m_encoding[i], start, size))
f151c9e1 246 return false;
e3342de4
RS
247
248 /* For stepped encodings, check the full range of the series. */
249 if (nelts_per_pattern == 3)
250 {
251 element_type limit = input_nelts ();
252
253 /* The number of elements in each pattern beyond the first two
254 that we checked above. */
0ecc2b7d
RS
255 poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
256 npatterns) - 2;
e3342de4
RS
257 for (unsigned int i = 0; i < npatterns; ++i)
258 {
259 /* BASE1 has been checked but BASE2 hasn't. */
260 element_type base1 = m_encoding[i + npatterns];
261 element_type base2 = m_encoding[i + base_nelts];
262
263 /* The step to add to get from BASE1 to each subsequent value. */
264 element_type step = clamp (base2 - base1);
265
266 /* STEP has no inherent sign, so a value near LIMIT can
267 act as a negative step. The series is in range if it
268 is in range according to one of the two interpretations.
269
270 Since we're dealing with clamped values, ELEMENT_TYPE is
271 wide enough for overflow not to be a problem. */
272 element_type headroom_down = base1 - start;
273 element_type headroom_up = size - headroom_down - 1;
6b0630fb
RS
274 HOST_WIDE_INT diff;
275 if ((!step.is_constant (&diff)
276 || maybe_lt (headroom_up, diff * step_nelts))
277 && (!(limit - step).is_constant (&diff)
278 || maybe_lt (headroom_down, diff * step_nelts)))
e3342de4
RS
279 return false;
280 }
281 }
f151c9e1
RS
282 return true;
283}
284
285/* Try to read the contents of VECTOR_CST CST as a constant permutation
286 vector. Return true and add the elements to BUILDER on success,
287 otherwise return false without modifying BUILDER. */
288
289bool
290tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
291{
e3342de4
RS
292 unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
293 for (unsigned int i = 0; i < encoded_nelts; ++i)
6b0630fb 294 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
f151c9e1
RS
295 return false;
296
e3342de4
RS
297 builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
298 VECTOR_CST_NPATTERNS (cst),
299 VECTOR_CST_NELTS_PER_PATTERN (cst));
300 for (unsigned int i = 0; i < encoded_nelts; ++i)
6b0630fb 301 builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
f151c9e1
RS
302 return true;
303}
304
736d0f28
RS
305/* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
306
307tree
308vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
309{
0ecc2b7d 310 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
736d0f28
RS
311 tree_vector_builder sel (type, indices.encoding ().npatterns (),
312 indices.encoding ().nelts_per_pattern ());
313 unsigned int encoded_nelts = sel.encoded_nelts ();
314 for (unsigned int i = 0; i < encoded_nelts; i++)
315 sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
316 return sel.build ();
317}
318
f151c9e1
RS
319/* Return a CONST_VECTOR of mode MODE that contains the elements of
320 INDICES. */
321
322rtx
323vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
324{
325 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
0ecc2b7d 326 && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
3d8ca53d
RS
327 rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
328 indices.encoding ().nelts_per_pattern ());
329 unsigned int encoded_nelts = sel.encoded_nelts ();
330 for (unsigned int i = 0; i < encoded_nelts; i++)
331 sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
332 return sel.build ();
f151c9e1 333}
1a1c441d
RS
334
335#if CHECKING_P
336
337namespace selftest {
338
339/* Test a 12-element vector. */
340
341static void
342test_vec_perm_12 (void)
343{
344 vec_perm_builder builder (12, 12, 1);
345 for (unsigned int i = 0; i < 4; ++i)
346 {
347 builder.quick_push (i * 5);
348 builder.quick_push (3 + i);
349 builder.quick_push (2 + 3 * i);
350 }
351 vec_perm_indices indices (builder, 1, 12);
352 ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
353 ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
354 ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
355 ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
356 ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
357
358 ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
359 ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
360
361 ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
362 ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
363
364 ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
365 ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
366
367 ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
368 ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
369 ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
370}
371
372/* Run selftests for this file. */
373
374void
375vec_perm_indices_c_tests ()
376{
377 test_vec_perm_12 ();
378}
379
380} // namespace selftest
381
382#endif