]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
LoongArch: Fix wrong code generated by TARGET_VECTORIZE_VEC_PERM_CONST [PR121064]
authorXi Ruoyao <xry111@xry111.site>
Mon, 14 Jul 2025 19:01:12 +0000 (03:01 +0800)
committerXi Ruoyao <xry111@xry111.site>
Mon, 28 Jul 2025 01:45:07 +0000 (09:45 +0800)
When TARGET_VECTORIZE_VEC_PERM_CONST is called, target may be the
same pseudo as op0 and/or op1.  Loading the selector into target
would clobber the input, producing wrong code like

    vld     $vr0, $t0
    vshuf.w $vr0, $vr0, $vr1

So don't load the selector into d->target, use a new pseudo to hold the
selector instead.  The reload pass will load the pseudo for selector and
the pseudo for target into the same hard register (following our
constraint '0' on the shuf instructions) anyway.

gcc/ChangeLog:

PR target/121064
* config/loongarch/lsx.md (lsx_vshuf_<lsxfmt_f>): Add '@' to
generate a mode-aware helper.  Use <VIMODE> as the mode of the
operand 1 (selector).
* config/loongarch/lasx.md (lasx_xvshuf_<lasxfmt_f>): Likewise.
* config/loongarch/loongarch.cc
(loongarch_try_expand_lsx_vshuf_const): Create a new pseudo for
the selector.  Use the mode-aware helper to simplify the code.
(loongarch_expand_vec_perm_const): Likewise.

gcc/testsuite/ChangeLog:

PR target/121064
* gcc.target/loongarch/pr121064.c: New test.

(cherry picked from commit d626debcb3717f18bf2ee88f4281b109b13e1181)

gcc/config/loongarch/lasx.md
gcc/config/loongarch/loongarch.cc
gcc/config/loongarch/lsx.md
gcc/testsuite/gcc.target/loongarch/pr121064.c [new file with mode: 0644]

index 43e3ab0026abe16cff52034fc15486ec5292fc6e..3d71f30a54be83ff936e0bbf8fbb8eb744d6c4ad 100644 (file)
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvshuf_<lasxfmt_f>"
+(define_insn "@lasx_xvshuf_<lasxfmt_f>"
   [(set (match_operand:LASX_DWH 0 "register_operand" "=f")
-       (unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0")
+       (unspec:LASX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0")
                          (match_operand:LASX_DWH 2 "register_operand" "f")
                          (match_operand:LASX_DWH 3 "register_operand" "f")]
                        UNSPEC_LASX_XVSHUF))]
index 7533e53839f43c7f82f2d36b00c05fd2de1384e2..ac24c6d768fbdcb2872f50657004b7d5c04e2aa0 100644 (file)
@@ -8382,7 +8382,7 @@ static bool
 loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
 {
   int i;
-  rtx target, op0, op1, sel, tmp;
+  rtx target, op0, op1;
   rtx rperm[MAX_VECT_LEN];
 
   if (GET_MODE_SIZE (d->vmode) == 16)
@@ -8401,47 +8401,23 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
       for (i = 0; i < d->nelt; i += 1)
          rperm[i] = GEN_INT (d->perm[i]);
 
-      if (d->vmode == E_V2DFmode)
-       {
-         sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
-         tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
-         emit_move_insn (tmp, sel);
-       }
-      else if (d->vmode == E_V4SFmode)
-       {
-         sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
-         tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
-         emit_move_insn (tmp, sel);
-       }
+      machine_mode sel_mode = related_int_vector_mode (d->vmode)
+       .require ();
+      rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
+
+      /* Despite vshuf.* (except vshuf.b) needs sel == target, we cannot
+        load sel into target right now: here we are dealing with
+        pseudo regs, and target may be the same pseudo as one of op0
+        or op1.  Then we'd clobber the input.  Instead, we use a new
+        pseudo reg here.  The reload pass will look at the constraint
+        of vshuf.* and move sel into target first if needed.  */
+      rtx sel = force_reg (sel_mode,
+                          gen_rtx_CONST_VECTOR (sel_mode, sel_v));
+
+      if (d->vmode == E_V16QImode)
+       emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
       else
-       {
-         sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
-         emit_move_insn (d->target, sel);
-       }
-
-      switch (d->vmode)
-       {
-       case E_V2DFmode:
-         emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
-         break;
-       case E_V2DImode:
-         emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
-         break;
-       case E_V4SFmode:
-         emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
-         break;
-       case E_V4SImode:
-         emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
-         break;
-       case E_V8HImode:
-         emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
-         break;
-       case E_V16QImode:
-         emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
-         break;
-       default:
-         break;
-       }
+       emit_insn (gen_lsx_vshuf (d->vmode, target, sel, op1, op0));
 
       return true;
     }
@@ -9437,7 +9413,7 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
   bool flag = false;
   unsigned int i;
   unsigned char idx;
-  rtx target, op0, op1, sel, tmp;
+  rtx target, op0, op1;
   rtx rperm[MAX_VECT_LEN];
   unsigned int remapped[MAX_VECT_LEN];
   unsigned char perm2[MAX_VECT_LEN];
@@ -9617,63 +9593,23 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
 expand_perm_const_end:
       if (flag)
        {
-         /* Copy selector vector from memory to vector register for later insn
-            gen function.
-            If vector's element in floating point value, we cannot fit
-            selector argument into insn gen function directly, because of the
-            insn template definition.  As a solution, generate a integral mode
-            subreg of target, then copy selector vector (that is in integral
-            mode) to this subreg.  */
-         switch (d->vmode)
-           {
-           case E_V4DFmode:
-             sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt,
-                                                                  rperm));
-             tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
-             emit_move_insn (tmp, sel);
-             break;
-           case E_V8SFmode:
-             sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt,
-                                                                  rperm));
-             tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
-             emit_move_insn (tmp, sel);
-             break;
-           default:
-             sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt,
-                                                                rperm));
-             emit_move_insn (d->target, sel);
-             break;
-           }
-
          target = d->target;
          op0 = d->op0;
          op1 = d->one_vector_p ? d->op0 : d->op1;
 
-         /* We FINALLY can generate xvshuf.* insn.  */
-         switch (d->vmode)
-           {
-           case E_V4DFmode:
-             emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0));
-             break;
-           case E_V4DImode:
-             emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0));
-             break;
-           case E_V8SFmode:
-             emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0));
-             break;
-           case E_V8SImode:
-             emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0));
-             break;
-           case E_V16HImode:
-             emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0));
-             break;
-           case E_V32QImode:
-             emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target));
-             break;
-           default:
-             gcc_unreachable ();
-             break;
-           }
+         machine_mode sel_mode = related_int_vector_mode (d->vmode)
+           .require ();
+         rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
+
+         /* See the comment in loongarch_expand_lsx_shuffle for why
+            we don't simply use a SUBREG to pun target.  */
+         rtx sel = force_reg (sel_mode,
+                              gen_rtx_CONST_VECTOR (sel_mode, sel_v));
+
+         if (d->vmode == E_V32QImode)
+           emit_insn (gen_lasx_xvshuf_b (target, op1, op0, sel));
+         else
+           emit_insn (gen_lasx_xvshuf (d->vmode, target, sel, op1, op0));
 
          return true;
        }
index 407c86870dfc75d8bbbe566d3129d76227a9dcf4..fb0236ba0f1b4191e715b9d04e52fbf93d0aec11 100644 (file)
   DONE;
 })
 
-(define_insn "lsx_vshuf_<lsxfmt_f>"
+(define_insn "@lsx_vshuf_<lsxfmt_f>"
   [(set (match_operand:LSX_DWH 0 "register_operand" "=f")
-       (unspec:LSX_DWH [(match_operand:LSX_DWH 1 "register_operand" "0")
+       (unspec:LSX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0")
                         (match_operand:LSX_DWH 2 "register_operand" "f")
                         (match_operand:LSX_DWH 3 "register_operand" "f")]
                        UNSPEC_LSX_VSHUF))]
diff --git a/gcc/testsuite/gcc.target/loongarch/pr121064.c b/gcc/testsuite/gcc.target/loongarch/pr121064.c
new file mode 100644 (file)
index 0000000..a466c7a
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-require-effective-target loongarch_sx_hw } */
+/* { dg-do run } */
+/* { dg-options "-march=loongarch64 -mfpu=64 -mlsx -O3" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef unsigned __INT32_TYPE__ uint32_t;
+
+__attribute__ ((noipa)) static int32_t
+long_filter_ehigh_3830_1 (int32_t *buffer, int length)
+{
+  int i, j;
+  int32_t dotprod = 0;
+  int32_t delay[4] = { 0 };
+  uint32_t coeffs[4] = { 0 };
+
+  for (i = 0; i < length; i++)
+    {
+      dotprod = 0;
+      for (j = 3; j >= 0; j--)
+        {
+          dotprod += delay[j] * coeffs[j];
+          coeffs[j] += ((delay[j] >> 31) | 1);
+        }
+      for (j = 3; j > 0; j--)
+        delay[j] = delay[j - 1];
+      delay[0] = buffer[i];
+    }
+
+  return dotprod;
+}
+
+int
+main ()
+{
+  int32_t buffer[] = { -1, 1 };
+  if (long_filter_ehigh_3830_1 (buffer, 2) != -1)
+    __builtin_trap ();
+}