From: Guo Jie Date: Sun, 2 Nov 2025 03:31:32 +0000 (+0800) Subject: LoongArch: Eliminate unnecessary dependencies introduced by xvpermi.q X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dddb16c8170d2a92f105817eb00cf04bfaf54bd5;p=thirdparty%2Fgcc.git LoongArch: Eliminate unnecessary dependencies introduced by xvpermi.q 1. When the selector is 0x0, 0x1, 0x10, or 0x11, the result of xvpermi.q does not depend on the output operand, thus eliminating the dependency chain of the output operand as input, which can reduce the number of instructions. 2. When the selector is 0x22, 0x23, 0x32, or 0x33, the result of xvpermi.q does not depend on the second input operand, thus eliminating the dependency chain of the second input operand, which can also reduce the number of instructions. gcc/ChangeLog: * config/loongarch/lasx.md (lasx_xvpermi_q_): Add new splitter for optimization. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vec_pack_unpack_256.c: Adjust to changed lasx_xvpermi_q_ template. * gcc.target/loongarch/vector/lasx/lasx-builtin.c: Ditto. * gcc.target/loongarch/lasx-xvpermi_q-opt.c: New test. --- diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 7704f8c798e..71dd25d0b5a 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -515,7 +515,7 @@ (set_attr "mode" "")]) ;; xvpermi.q -(define_insn "lasx_xvpermi_q_" +(define_insn_and_split "lasx_xvpermi_q_" [(set (match_operand:LASX 0 "register_operand" "=f") (unspec:LASX [(match_operand:LASX 1 "register_operand" "0") @@ -525,6 +525,37 @@ "ISA_HAS_LASX" { return "xvpermi.q\t%u0,%u2,%3"; +} + "&& ((INTVAL (operands[3]) & 0xee) == 0x0 + || (INTVAL (operands[3]) & 0xee) == 0x22)" + [(const_int 0)] +{ + HOST_WIDE_INT selector = INTVAL (operands[3]); + /* Reduce the dependency caused by using output operands[0] as input. */ + switch (INTVAL (operands[3])) + { + case 0x22: + case 0x23: + case 0x33: + selector -= 0x22; + operands[2] = operands[1]; + /* FALLTHRU. */ + case 0x0: + case 0x1: + case 0x11: + emit_insn (gen_lasx_xvpermi_d_ (operands[0], operands[2], + GEN_INT (selector * 0xa + 0x44))); + break; + case 0x10: + emit_move_insn (operands[0], operands[2]); + break; + case 0x32: + emit_move_insn (operands[0], operands[1]); + break; + default: + gcc_unreachable (); + } + DONE; } [(set_attr "type" "simd_splat") (set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c new file mode 100644 index 00000000000..16fb9dfecdc --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlasx -ftree-vectorize" } */ + +#include + +#define TEST_FUNC(imm) \ + __m256i \ + test_##imm (__m256i op0, __m256i op1) \ + { \ + return __lasx_xvpermi_q (op0, op1, imm); \ + } + +TEST_FUNC (0x00) +/* { dg-final { scan-assembler-not "test_0x00:.*\txvld.*xvld.*-test_0x00"} } */ +/* { dg-final { scan-assembler-times "test_0x00:.*\txvpermi\\.d.*-test_0x00" 1 } } */ + +TEST_FUNC (0x01) +/* { dg-final { scan-assembler-not "test_0x01:.*\txvld.*xvld.*-test_0x01"} } */ +/* { dg-final { scan-assembler-times "test_0x01:.*\txvpermi\\.d.*-test_0x01" 1 } } */ + +TEST_FUNC (0x10) +/* { dg-final { scan-assembler-not "test_0x10:.*\txvld.*xvld.*-test_0x10"} } */ +/* { dg-final { scan-assembler-not "test_0x10:.*\txvpermi.*-test_0x10"} } */ + +TEST_FUNC (0x11) +/* { dg-final { scan-assembler-not "test_0x11:.*\txvld.*xvld.*-test_0x11"} } */ +/* { dg-final { scan-assembler-times "test_0x11:.*\txvpermi\\.d.*-test_0x11" 1 } } */ + +TEST_FUNC (0x22) +/* { dg-final { scan-assembler-not "test_0x22:.*\txvld.*xvld.*-test_0x22"} } */ +/* { dg-final { scan-assembler-times "test_0x22:.*\txvpermi\\.d.*-test_0x22" 1 } } */ + +TEST_FUNC (0x23) +/* { dg-final { scan-assembler-not "test_0x23:.*\txvld.*xvld.*-test_0x23"} } */ +/* { dg-final { scan-assembler-times "test_0x23:.*\txvpermi\\.d.*-test_0x23" 1 } } */ + +TEST_FUNC (0x32) +/* { dg-final { scan-assembler-not "test_0x32:.*\txvld.*xvld.*-test_0x32"} } */ +/* { dg-final { scan-assembler-not "test_0x32:.*\txvpermi.*-test_0x32"} } */ + +TEST_FUNC (0x33) +/* { dg-final { scan-assembler-not "test_0x33:.*\txvld.*xvld.*-test_0x33"} } */ +/* { dg-final { scan-assembler-times "test_0x33:.*\txvpermi\\.d.*-test_0x33" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c index 506b7bdb03e..5b2fd9b0599 100644 --- a/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c +++ b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c @@ -55,7 +55,8 @@ test_vec_unpacks_float_hi_lo_v8si (void) } /* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_hi_lo_v8si" } } */ -/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */ +/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v8si" } } */ +/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */ void test_vec_unpacks_hi_lo_v8si (void) { @@ -64,7 +65,8 @@ test_vec_unpacks_hi_lo_v8si (void) } /* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\tvext2xv\\.w\\.h.*-test_vec_unpacks_hi_lo_v16hi" } } */ -/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */ +/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v16hi" } } */ +/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */ void test_vec_unpacks_hi_lo_v16hi (void) { @@ -73,7 +75,8 @@ test_vec_unpacks_hi_lo_v16hi (void) } /* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\tvext2xv\\.h\\.b.*-test_vec_unpacks_hi_lo_v32qi" } } */ -/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */ +/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v32qi" } } */ +/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */ void test_vec_unpacks_hi_lo_v32qi (void) { @@ -91,7 +94,8 @@ test_vec_unpacks_hi_lo_v8sf (void) } /* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\tvext2xv\\.du\\.wu.*-test_vec_unpacku_hi_lo_v8si" } } */ -/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */ +/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v8si" } } */ +/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */ void test_vec_unpacku_hi_lo_v8si (void) { @@ -100,7 +104,8 @@ test_vec_unpacku_hi_lo_v8si (void) } /* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\tvext2xv\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v16hi" } } */ -/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */ +/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v16hi" } } */ +/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */ void test_vec_unpacku_hi_lo_v16hi (void) { @@ -109,7 +114,8 @@ test_vec_unpacku_hi_lo_v16hi (void) } /* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\tvext2xv\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v32qi" } } */ -/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */ +/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v32qi" } } */ +/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */ void test_vec_unpacku_hi_lo_v32qi (void) { diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c index 64ff870a4c5..3f34a430c4e 100644 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c @@ -3301,7 +3301,7 @@ __lasx_vext2xv_du_bu (v32i8 _1) v32i8 __lasx_xvpermi_q (v32i8 _1, v32i8 _2) { - return __builtin_lasx_xvpermi_q (_1, _2, 1); + return __builtin_lasx_xvpermi_q (_1, _2, 0x20); } v4i64 __lasx_xvpermi_d (v4i64 _1)