bool need_insert = false;
bool have_series = false;
+ poly_int64 len = d->perm.length ();
+ bool need_modulo = !len.is_constant ();
+
/* Check for a full series. */
if (known_ne (step1, 0) && d->perm.series_p (0, 1, el1, step1))
have_series = true;
need_insert = true;
}
- if (!have_series)
+ /* A permute like {0, 3, 2, 1} is recognized as series because series_p also
+ allows wrapping/modulo of the permute index. The step would be 3 and the
+ indices are correct modulo 4. As noted in expand_vec_perm vrgather does
+ not handle wrapping but rather zeros out-of-bounds indices.
+ This means we would need to emit an explicit modulo operation here which
+ does not seem worth it. We rather defer to the generic handling instead.
+ Even in the non-wrapping case it is doubtful whether
+ vid
+ vmul
+ vrgather
+ is preferable over
+ vle
+ vrgather.
+ If the permute mask can be reused there shouldn't be any difference and
+ otherwise it becomes a question of load bandwidth. */
+ if (have_series && len.is_constant ())
+ {
+ int64_t step = need_insert ? step2.to_constant () : step1.to_constant ();
+ int prec = GET_MODE_PRECISION (GET_MODE_INNER (d->vmode));
+ wide_int wlen = wide_int::from (len.to_constant (), prec * 2, SIGNED);
+ wide_int wstep = wide_int::from (step, prec * 2, SIGNED);
+ wide_int result = wi::mul (wlen, wstep);
+ if (wi::gt_p (result, wlen, SIGNED))
+ need_modulo = true;
+ }
+
+ if (!have_series || (len.is_constant () && need_modulo))
return false;
/* Disable shuffle if we can't find an appropriate integer index mode for
expand_vec_series (series, gen_int_mode (need_insert ? el2 : el1, eltmode),
gen_int_mode (need_insert ? step2 : step1, eltmode));
+ if (need_modulo)
+ {
+ rtx mod = gen_const_vector_dup (sel_mode, len - 1);
+ series = expand_simple_binop (sel_mode, AND, series, mod, NULL,
+ 0, OPTAB_DIRECT);
+ }
+
/* Insert the remaining element if necessary. */
if (need_insert)
{
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O0" } */
+
+#include <stdint.h>
+typedef uint32_t a;
+typedef uint64_t uint64;
+
+uint64 b;
+__attribute__ ((__vector_size__ (4 * sizeof (a)))) a f = {504339, 7, 3};
+uint64 *g = &b;
+
+int32_t *
+c (uint8_t, int32_t *, uint32_t, uint32_t, int64_t);
+int8_t
+d ()
+{
+ int32_t e;
+ c (0, &e, 0, 0, 1);
+ return 0;
+}
+
+int32_t *
+c (uint8_t, int32_t *j, uint32_t, uint32_t, int64_t)
+{
+ f = __builtin_shufflevector (f, f, 0, 3, 2, 1);
+ *g = f[2];
+ return j;
+}
+
+int
+main ()
+{
+ d ();
+ if (b != 3)
+ __builtin_abort ();
+}