]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
LoongArch: Eliminate unnecessary dependencies introduced by xvpermi.q
authorGuo Jie <guojie@loongson.cn>
Sun, 2 Nov 2025 03:31:32 +0000 (11:31 +0800)
committerLulu Cheng <chenglulu@loongson.cn>
Mon, 3 Nov 2025 08:09:39 +0000 (16:09 +0800)
1. When the selector is 0x0, 0x1, 0x10, or 0x11, the result of
xvpermi.q does not depend on the output operand, thus eliminating
the dependency chain of the output operand as input, which can
reduce the number of instructions.

2. When the selector is 0x22, 0x23, 0x32, or 0x33, the result of
xvpermi.q does not depend on the second input operand, thus
eliminating the dependency chain of the second input operand,
which can also reduce the number of instructions.

gcc/ChangeLog:

* config/loongarch/lasx.md (lasx_xvpermi_q_<LASX:mode>):
Add new splitter for optimization.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vec_pack_unpack_256.c: Adjust to changed
lasx_xvpermi_q_<LASX:mode> template.
* gcc.target/loongarch/vector/lasx/lasx-builtin.c: Ditto.
* gcc.target/loongarch/lasx-xvpermi_q-opt.c: New test.

gcc/config/loongarch/lasx.md
gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c [new file with mode: 0644]
gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c

index 7704f8c798e5674332576c6b60200c448b3ba7d6..71dd25d0b5a25fdf14331b63bec863b28993a753 100644 (file)
    (set_attr "mode" "<MODE>")])
 
 ;; xvpermi.q
-(define_insn "lasx_xvpermi_q_<LASX:mode>"
+(define_insn_and_split "lasx_xvpermi_q_<LASX:mode>"
   [(set (match_operand:LASX 0 "register_operand" "=f")
        (unspec:LASX
          [(match_operand:LASX 1 "register_operand" "0")
   "ISA_HAS_LASX"
 {
   return "xvpermi.q\t%u0,%u2,%3";
+}
+  "&& ((INTVAL (operands[3]) & 0xee) == 0x0
+       || (INTVAL (operands[3]) & 0xee) == 0x22)"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT selector = INTVAL (operands[3]);
+  /* Reduce the dependency caused by using output operands[0] as input.  */
+  switch (INTVAL (operands[3]))
+    {
+    case 0x22:
+    case 0x23:
+    case 0x33:
+      selector -= 0x22;
+      operands[2] = operands[1];
+    /* FALLTHRU.  */
+    case 0x0:
+    case 0x1:
+    case 0x11:
+      emit_insn (gen_lasx_xvpermi_d_<mode> (operands[0], operands[2],
+                                           GEN_INT (selector * 0xa + 0x44)));
+      break;
+    case 0x10:
+      emit_move_insn (operands[0], operands[2]);
+      break;
+    case 0x32:
+      emit_move_insn (operands[0], operands[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
 }
   [(set_attr "type" "simd_splat")
    (set_attr "mode" "<MODE>")])
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c
new file mode 100644 (file)
index 0000000..16fb9df
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx -ftree-vectorize" } */
+
+#include <lasxintrin.h>
+
+#define TEST_FUNC(imm)                                                        \
+  __m256i                                                                     \
+  test_##imm (__m256i op0, __m256i op1)                                       \
+  {                                                                           \
+    return __lasx_xvpermi_q (op0, op1, imm);                                  \
+  }
+
+TEST_FUNC (0x00)
+/* { dg-final { scan-assembler-not "test_0x00:.*\txvld.*xvld.*-test_0x00"} } */
+/* { dg-final { scan-assembler-times "test_0x00:.*\txvpermi\\.d.*-test_0x00" 1 } } */
+
+TEST_FUNC (0x01)
+/* { dg-final { scan-assembler-not "test_0x01:.*\txvld.*xvld.*-test_0x01"} } */
+/* { dg-final { scan-assembler-times "test_0x01:.*\txvpermi\\.d.*-test_0x01" 1 } } */
+
+TEST_FUNC (0x10)
+/* { dg-final { scan-assembler-not "test_0x10:.*\txvld.*xvld.*-test_0x10"} } */
+/* { dg-final { scan-assembler-not "test_0x10:.*\txvpermi.*-test_0x10"} } */
+
+TEST_FUNC (0x11)
+/* { dg-final { scan-assembler-not "test_0x11:.*\txvld.*xvld.*-test_0x11"} } */
+/* { dg-final { scan-assembler-times "test_0x11:.*\txvpermi\\.d.*-test_0x11" 1 } } */
+
+TEST_FUNC (0x22)
+/* { dg-final { scan-assembler-not "test_0x22:.*\txvld.*xvld.*-test_0x22"} } */
+/* { dg-final { scan-assembler-times "test_0x22:.*\txvpermi\\.d.*-test_0x22" 1 } } */
+
+TEST_FUNC (0x23)
+/* { dg-final { scan-assembler-not "test_0x23:.*\txvld.*xvld.*-test_0x23"} } */
+/* { dg-final { scan-assembler-times "test_0x23:.*\txvpermi\\.d.*-test_0x23" 1 } } */
+
+TEST_FUNC (0x32)
+/* { dg-final { scan-assembler-not "test_0x32:.*\txvld.*xvld.*-test_0x32"} } */
+/* { dg-final { scan-assembler-not "test_0x32:.*\txvpermi.*-test_0x32"} } */
+
+TEST_FUNC (0x33)
+/* { dg-final { scan-assembler-not "test_0x33:.*\txvld.*xvld.*-test_0x33"} } */
+/* { dg-final { scan-assembler-times "test_0x33:.*\txvpermi\\.d.*-test_0x33" 1 } } */
+
index 506b7bdb03e9abf8accac19a0224526559a8c952..5b2fd9b0599f3677d83694a9fcfa068e8c249ef4 100644 (file)
@@ -55,7 +55,8 @@ test_vec_unpacks_float_hi_lo_v8si (void)
 }
 
 /* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_hi_lo_v8si" } } */
-/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */
 void
 test_vec_unpacks_hi_lo_v8si (void)
 {
@@ -64,7 +65,8 @@ test_vec_unpacks_hi_lo_v8si (void)
 }
 
 /* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\tvext2xv\\.w\\.h.*-test_vec_unpacks_hi_lo_v16hi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */
 void
 test_vec_unpacks_hi_lo_v16hi (void)
 {
@@ -73,7 +75,8 @@ test_vec_unpacks_hi_lo_v16hi (void)
 }
 
 /* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\tvext2xv\\.h\\.b.*-test_vec_unpacks_hi_lo_v32qi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */
 void
 test_vec_unpacks_hi_lo_v32qi (void)
 {
@@ -91,7 +94,8 @@ test_vec_unpacks_hi_lo_v8sf (void)
 }
 
 /* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\tvext2xv\\.du\\.wu.*-test_vec_unpacku_hi_lo_v8si" } } */
-/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v8si" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */
 void
 test_vec_unpacku_hi_lo_v8si (void)
 {
@@ -100,7 +104,8 @@ test_vec_unpacku_hi_lo_v8si (void)
 }
 
 /* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\tvext2xv\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v16hi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v16hi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */
 void
 test_vec_unpacku_hi_lo_v16hi (void)
 {
@@ -109,7 +114,8 @@ test_vec_unpacku_hi_lo_v16hi (void)
 }
 
 /* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\tvext2xv\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v32qi" } } */
-/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v32qi" } } */
+/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */
 void
 test_vec_unpacku_hi_lo_v32qi (void)
 {
index 64ff870a4c57c6f976d0b4e84bfbcea8c6742e2b..3f34a430c4e77351a77e7119b7256f5c07fd6da0 100644 (file)
@@ -3301,7 +3301,7 @@ __lasx_vext2xv_du_bu (v32i8 _1)
 v32i8
 __lasx_xvpermi_q (v32i8 _1, v32i8 _2)
 {
-  return __builtin_lasx_xvpermi_q (_1, _2, 1);
+  return __builtin_lasx_xvpermi_q (_1, _2, 0x20);
 }
 v4i64
 __lasx_xvpermi_d (v4i64 _1)