(set_attr "mode" "<MODE>")])
;; xvpermi.q
-(define_insn "lasx_xvpermi_q_<LASX:mode>"
+(define_insn_and_split "lasx_xvpermi_q_<LASX:mode>"
[(set (match_operand:LASX 0 "register_operand" "=f")
(unspec:LASX
[(match_operand:LASX 1 "register_operand" "0")
"ISA_HAS_LASX"
{
return "xvpermi.q\t%u0,%u2,%3";
+}
+ "&& ((INTVAL (operands[3]) & 0xee) == 0x0
+ || (INTVAL (operands[3]) & 0xee) == 0x22)"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT selector = INTVAL (operands[3]);
+ /* Reduce the dependency caused by using output operands[0] as input. */
+ switch (INTVAL (operands[3]))
+ {
+ case 0x22:
+ case 0x23:
+ case 0x33:
+ selector -= 0x22;
+ operands[2] = operands[1];
+ /* FALLTHRU. */
+ case 0x0:
+ case 0x1:
+ case 0x11:
+ emit_insn (gen_lasx_xvpermi_d_<mode> (operands[0], operands[2],
+ GEN_INT (selector * 0xa + 0x44)));
+ break;
+ case 0x10:
+ emit_move_insn (operands[0], operands[2]);
+ break;
+ case 0x32:
+ emit_move_insn (operands[0], operands[1]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
}
[(set_attr "type" "simd_splat")
(set_attr "mode" "<MODE>")])
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx -ftree-vectorize" } */
+
+#include <lasxintrin.h>
+
+#define TEST_FUNC(imm) \
+ __m256i \
+ test_##imm (__m256i op0, __m256i op1) \
+ { \
+ return __lasx_xvpermi_q (op0, op1, imm); \
+ }
+
+TEST_FUNC (0x00)
+/* { dg-final { scan-assembler-not "test_0x00:.*\txvld.*xvld.*-test_0x00"} } */
+/* { dg-final { scan-assembler-times "test_0x00:.*\txvpermi\\.d.*-test_0x00" 1 } } */
+
+TEST_FUNC (0x01)
+/* { dg-final { scan-assembler-not "test_0x01:.*\txvld.*xvld.*-test_0x01"} } */
+/* { dg-final { scan-assembler-times "test_0x01:.*\txvpermi\\.d.*-test_0x01" 1 } } */
+
+TEST_FUNC (0x10)
+/* { dg-final { scan-assembler-not "test_0x10:.*\txvld.*xvld.*-test_0x10"} } */
+/* { dg-final { scan-assembler-not "test_0x10:.*\txvpermi.*-test_0x10"} } */
+
+TEST_FUNC (0x11)
+/* { dg-final { scan-assembler-not "test_0x11:.*\txvld.*xvld.*-test_0x11"} } */
+/* { dg-final { scan-assembler-times "test_0x11:.*\txvpermi\\.d.*-test_0x11" 1 } } */
+
+TEST_FUNC (0x22)
+/* { dg-final { scan-assembler-not "test_0x22:.*\txvld.*xvld.*-test_0x22"} } */
+/* { dg-final { scan-assembler-times "test_0x22:.*\txvpermi\\.d.*-test_0x22" 1 } } */
+
+TEST_FUNC (0x23)
+/* { dg-final { scan-assembler-not "test_0x23:.*\txvld.*xvld.*-test_0x23"} } */
+/* { dg-final { scan-assembler-times "test_0x23:.*\txvpermi\\.d.*-test_0x23" 1 } } */
+
+TEST_FUNC (0x32)
+/* { dg-final { scan-assembler-not "test_0x32:.*\txvld.*xvld.*-test_0x32"} } */
+/* { dg-final { scan-assembler-not "test_0x32:.*\txvpermi.*-test_0x32"} } */
+
+TEST_FUNC (0x33)
+/* { dg-final { scan-assembler-not "test_0x33:.*\txvld.*xvld.*-test_0x33"} } */
+/* { dg-final { scan-assembler-times "test_0x33:.*\txvpermi\\.d.*-test_0x33" 1 } } */
+
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_hi_lo_v8si" } } */
-/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */
void
test_vec_unpacks_hi_lo_v8si (void)
{
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\tvext2xv\\.w\\.h.*-test_vec_unpacks_hi_lo_v16hi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */
void
test_vec_unpacks_hi_lo_v16hi (void)
{
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\tvext2xv\\.h\\.b.*-test_vec_unpacks_hi_lo_v32qi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */
void
test_vec_unpacks_hi_lo_v32qi (void)
{
}
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\tvext2xv\\.du\\.wu.*-test_vec_unpacku_hi_lo_v8si" } } */
-/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */
void
test_vec_unpacku_hi_lo_v8si (void)
{
}
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\tvext2xv\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v16hi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */
void
test_vec_unpacku_hi_lo_v16hi (void)
{
}
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\tvext2xv\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v32qi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */
void
test_vec_unpacku_hi_lo_v32qi (void)
{
v32i8
__lasx_xvpermi_q (v32i8 _1, v32i8 _2)
{
- return __builtin_lasx_xvpermi_q (_1, _2, 1);
+ return __builtin_lasx_xvpermi_q (_1, _2, 0x20);
}
v4i64
__lasx_xvpermi_d (v4i64 _1)