From: chenxiaolong <chenxiaolong@loongson.cn>
Date: Thu, 11 Dec 2025 02:49:05 +0000 (+0800)
Subject: LoongArch: Add support for the TARGET_MODES_TIEABLE_P vectorization type.
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a38c4404955e793159d07d7b65f780545f3100d;p=thirdparty%2Fgcc.git

LoongArch: Add support for the TARGET_MODES_TIEABLE_P vectorization type.

v1->v2:
Add the TARGET_MODES_TIEABLE_P function description and analyze the
reasons for the cost change of Subreg type rtx after supporting
vectorization.

This hook returns true if a value of mode mode1 is accessible in mode
mode2 without copying. On LA, for vector types V4SF and V8SF, the lower
128 bit data can be shared. After adding vector support in this hook,
the cost of type conversion for the subreg operation from the V4SF to
the V8SF registers can be made zero, and some rtx optimization
operations can be completed in the combine traversal. The comparison
of the backend support vectors before and after is as follows:

support before:

(insn 7 4 9 2 (set (reg:V8SF 82 [ _6 ])
        (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0))
(insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
        (plus:V8SF (reg:V8SF 82 [ _6 ])
            (reg:V8SF 82 [ _6 ])))

===>

support after:

(insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
        (plus:V8SF (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)
            (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)))

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (loongarch_modes_tieable_p):
	Add support for vector conversion.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/vector/lasx/vect-extract-256-128.c:
	After supporting the vectorized type corresponding to subreg in
	the backend, the cost of rtx becomes 0. In fwprop1 pass,
	memory-loaded rtx cannot be propagated to this insn, which leads
	to xvld not being optimized into vld instructions.
	* gcc.target/loongarch/vect-mode-tieable.c: New test.
---

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index cc0523f2ab7..06d0dcc5efd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -7514,7 +7514,11 @@ loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 	  || (GET_MODE_CLASS(mode1) == MODE_FLOAT
 	      && GET_MODE_CLASS(mode2) == MODE_INT)
 	  || (GET_MODE_CLASS(mode2) == MODE_FLOAT
-	      && GET_MODE_CLASS(mode1) == MODE_INT));
+	      && GET_MODE_CLASS(mode1) == MODE_INT)
+	  || (GET_MODE_CLASS (mode1) == MODE_VECTOR_INT
+	      && GET_MODE_CLASS (mode2) == MODE_VECTOR_INT)
+	  || (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
+	      &&  GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT));
 }
 
 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
new file mode 100644
index 00000000000..d156f92761d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
@@ -0,0 +1,47 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+**	vinsgr2vr.d	(\$vr[0-9]+),\$r5,0
+**	vinsgr2vr.d	(\$vr[0-9]+),\$r6,1
+**	xvadd.d	(\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**	xvst	(\$xr[0-9]+),\$r4,0
+**	jr	\$r1
+*/
+__m256i
+foo1 (__m128i a)
+{
+  return __lasx_xvadd_d (__lasx_cast_128 (a), __lasx_cast_128 (a));
+}
+
+/*
+**foo2:
+**	vinsgr2vr.d	(\$vr[0-9]+),\$r5,0
+**	vinsgr2vr.d	(\$vr[0-9]+),\$r6,1
+**	xvfadd.s	(\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**	xvst	(\$xr[0-9]+),\$r4,0
+**	jr	\$r1
+*/
+__m256
+foo2 (__m128 a)
+{
+  return __lasx_xvfadd_s (__lasx_cast_128_s (a), __lasx_cast_128_s (a));
+}
+
+/*
+**foo3:
+**	vinsgr2vr.d	(\$vr[0-9]+),\$r5,0
+**	vinsgr2vr.d	(\$vr[0-9]+),\$r6,1
+**	xvfadd.d	(\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**	xvst	(\$xr[0-9]+),\$r4,0
+**	jr	\$r1
+*/
+__m256d
+foo3 (__m128d a)
+{
+  return __lasx_xvfadd_d (__lasx_cast_128_d (a), __lasx_cast_128_d (a));
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
index d2219ea82de..bdf6e160ace 100644
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
@@ -6,7 +6,7 @@
 
 /*
 **foo1_lo:
-**	vld	(\$vr[0-9]+),\$r4,0
+**	xvld	(\$xr[0-9]+),\$r4,0
 **	vpickve2gr.du	\$r4,(\$vr[0-9]+),0
 **	vpickve2gr.du	\$r5,(\$vr[0-9]+),1
 **	jr	\$r1
@@ -33,7 +33,7 @@ foo1_hi (__m256 x)
 
 /*
 **foo2_lo:
-**	vld	(\$vr[0-9]+),\$r4,0
+**	xvld	(\$xr[0-9]+),\$r4,0
 **	vpickve2gr.du	\$r4,(\$vr[0-9]+),0
 **	vpickve2gr.du	\$r5,(\$vr[0-9]+),1
 **	jr	\$r1
@@ -60,7 +60,7 @@ foo2_hi (__m256d x)
 
 /*
 **foo3_lo:
-**	vld	(\$vr[0-9]+),\$r4,0
+**	xvld	(\$xr[0-9]+),\$r4,0
 **	vpickve2gr.du	\$r4,(\$vr[0-9]+),0
 **	vpickve2gr.du	\$r5,(\$vr[0-9]+),1
 **	jr	\$r1