is the first element of OP0. */
bool slideup = false;
bool slidedown = false;
+ bool need_slideup_p = false;
/* For a slideup the permutation must start at OP0's first element. */
if (known_eq (d->perm[0], 0))
if (known_eq (d->perm[vlen - 1], 2 * vlen - 1))
slidedown = true;
+ int slideup_cnt = 0;
if (!slideup && !slidedown)
- return false;
+ {
+ /* Check if the permutation starts with the end of OP0 followed by the
+ beginning of OP1. In this case we can do a slideup followed by a
+ slidedown. */
+ slideup_cnt = vlen - (d->perm[vlen - 1].to_constant () % vlen) - 1;
+ if (known_eq (d->perm[slideup_cnt], vlen) && known_eq (d->perm[slideup_cnt - 1], vlen - 1))
+ {
+ slidedown = true;
+ need_slideup_p = true;
+ }
+ else
+ return false;
+ }
/* Check for a monotonic sequence with one or two pivots. */
int pivot = -1;
}
else
{
+ rtx op1 = d->op1;
+ if (need_slideup_p)
+ {
+ op1 = gen_reg_rtx (vmode);
+ rtx ops[] = {op1, d->op1, gen_int_mode (slideup_cnt, Pmode)};
+ insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+
len = pivot;
- rtx ops[] = {d->target, d->op1, d->op0,
+ rtx ops[] = {d->target, op1, d->op0,
gen_int_mode (slide_cnt, Pmode)};
icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode);
emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops,
/*
** build_linked_list:
** ...
-** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*tu,\s*ma
+** vslideup\.vi\s+v[0-9]+,\s*v[0-9]+,\s*1
** ...
-** vcompress\.vm\s+v[0-9]+,\s*v[0-9]+,\s*v0
+** vslidedown\.vi\s+v[0-9]+,\s*v[0-9]+,\s*7
** ...
-** vcompress\.vm\s+v[0-9]+,\s*v[0-9]+,\s*v0
-** vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*ta,\s*ma
+** vslidedown\.vi\s+v[0-9]+,\s*v[0-9]+,\s*7
** ...
*/
void
--- /dev/null
+/* { dg-do compile { target { ! riscv_abi_e } } } */
+/* { dg-options "-O3 -march=rv64gcv -mrvv-max-lmul=m8 -Wno-overflow -mabi=lp64d" { target { rv64 } } } */
+/* { dg-options "-O3 -march=rv32gcv -mrvv-max-lmul=m8 -Wno-overflow -mabi=ilp32" { target { rv32 } } } */
+
+typedef int vnx4i __attribute__ ((vector_size (16)));
+
+vnx4i
+test (vnx4i x, vnx4i y)
+{
+ return __builtin_shufflevector (x, y, 2, 3, 4, 5);
+}
+
+/* { dg-final { scan-assembler-times "vslideup" 1 } } */
+/* { dg-final { scan-assembler-times "vslidedown" 1 } } */
+/* { dg-final { scan-assembler-not "vcompress" } } */