switch (mode)
{
case E_V16SImode:
- gen =gen_avx512f_permvarv16si;
+ gen = gen_avx512f_permvarv16si;
break;
case E_V16SFmode:
gen = gen_avx512f_permvarv16sf;
return;
case E_V4SImode:
+ if (one_operand_shuffle)
+ break; /* Handled below for TARGET_AVX. */
/* By combining the two 128-bit input vectors into one 256-bit
input vector, we can use VPERMD and VPERMPS for the full
two-operand shuffle. */
return;
case E_V4SFmode:
+ if (one_operand_shuffle)
+ break; /* Handled below for TARGET_AVX. */
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SImode);
mask = gen_lowpart (V4SImode, mask);
}
}
+ if (TARGET_AVX
+ && one_operand_shuffle
+ && (mode == V4SImode || mode == V4SFmode))
+ {
+ if (mode == V4SImode)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ t1 = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_avx_vpermilvarv4sf3 (t1, op0, mask));
+ emit_move_insn (target, gen_lowpart (mode, t1));
+ }
+ else
+ emit_insn (gen_avx_vpermilvarv4sf3 (target, op0, mask));
+ return;
+ }
+
if (TARGET_XOP)
{
/* The XOP VPPERM insn supports three inputs. By ignoring the
--- /dev/null
+/* PR target/125357 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mno-avx2 -mno-xop" } */
+/* { dg-final { scan-assembler-times "\tvpermilps\t" 2 } } */
+
+typedef int v4si __attribute__((vector_size (16)));
+typedef float v4sf __attribute__((vector_size (16)));
+
+v4si
+foo (v4si x, v4si y)
+{
+ return __builtin_shuffle (x, y);
+}
+
+v4sf
+bar (v4sf x, v4si y)
+{
+ return __builtin_shuffle (x, y);
+}
--- /dev/null
+/* PR target/125357 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -mno-avx512f -mno-xop" } */
+/* { dg-final { scan-assembler-times "\tvpermilps\t" 2 } } */
+
+typedef int v4si __attribute__((vector_size (16)));
+typedef float v4sf __attribute__((vector_size (16)));
+
+v4si
+foo (v4si x, v4si y)
+{
+ return __builtin_shuffle (x, y);
+}
+
+v4sf
+bar (v4sf x, v4si y)
+{
+ return __builtin_shuffle (x, y);
+}