]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Use more whole-reg loads/stores.
authorRobin Dapp <rdapp@oss.qualcomm.com>
Fri, 27 Mar 2026 15:36:23 +0000 (16:36 +0100)
committerRobin Dapp <robin.dapp@oss.qualcomm.com>
Wed, 6 May 2026 07:31:27 +0000 (09:31 +0200)
This patch allows pred_mov, which usually results in vle/vse insns to
split off whole-register loads and stores so we can emit more of them.
The advantage of whole-reg operations is that they don't require a vtype
and therefore allow more freedom in vsetvl placement.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (whole_reg_to_reg_move_p):
Rename from this...
(whole_reg_move_p): ...to this.
(whole_reg_loadstore_p): Declare.
* config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
(whole_reg_move_p): Ditto.
(whole_reg_loadstore_p): New function.
* config/riscv/thead-vector.md: Use renamed function.
* config/riscv/vector.md (@pred_store<mode>): Use new function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c: Disable
instruction scheduling.
* gcc.target/riscv/rvv/base/vle-vl1r.c: New test.

gcc/config/riscv/riscv-protos.h
gcc/config/riscv/riscv-v.cc
gcc/config/riscv/thead-vector.md
gcc/config/riscv/vector.md
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c [new file with mode: 0644]

index e2858a8b19feee1545b4e9b2bf98b46daa2f163e..8b362e323d984b3ecb7db28e9a81de897364809d 100644 (file)
@@ -790,7 +790,9 @@ bool can_be_broadcast_p (rtx);
 bool strided_broadcast_p (rtx);
 bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
-bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
+bool whole_reg_move_p (rtx *, machine_mode, int);
+bool whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx
+                           avl_type);
 bool splat_to_scalar_move_p (rtx *);
 rtx get_fp_rounding_coefficient (machine_mode);
 }
index 3c7e749cb60310d19f35136f8afc6d6a6b028b8c..2103764da06e9c44f80239b7d9a5671902d203b3 100644 (file)
@@ -6451,7 +6451,7 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
 
 /* Return true it is whole register-register move.  */
 bool
-whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
+whole_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
 {
   /* An operation is a whole-register move if either
      (1) Its vlmax operand equals VLMAX
@@ -6469,6 +6469,32 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
               && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
        return true;
     }
+
+  return false;
+}
+
+/* Same but for a whole-register load or store.  */
+bool
+whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx avl_type)
+{
+  machine_mode mode = GET_MODE (dest);
+  if (!multiple_p (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
+    return false;
+
+  if (((memory_operand (dest, mode)
+       && register_operand (src, mode))
+      || (register_operand (dest, mode)
+         && memory_operand (src, mode)))
+      && satisfies_constraint_Wc1 (mask))
+    {
+      if (INTVAL (avl_type) == VLMAX)
+       return true;
+      /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+        into NON-VLMAX with LEN = NUNITS.  */
+      else if (CONST_INT_P (avl)
+              && known_eq (INTVAL (avl), GET_MODE_NUNITS (mode)))
+       return true;
+    }
   return false;
 }
 
index 5a02debdd207054750b411334cc107b058e2ea04..4ad37bb441d775eb0e1961cacc5a87476a8eeca2 100644 (file)
    vs<vlmem_op_attr>.v\t%3,%0%p1
    vmv.v.v\t%0,%3
    vmv.v.v\t%0,%3"
-  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
+  "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
index 45be9e6fe177b7654a4adbe827260fcbdbc3c754..136ecdc787e4a8175d1860ca1d9213b559a28eb3 100644 (file)
    vse<sew>.v\t%3,%0%p1
    vmv.v.v\t%0,%3
    vmv.v.v\t%0,%3"
-  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
+  "&& (register_operand (operands[0], <MODE>mode)
+       && register_operand (operands[3], <MODE>mode)
+       && riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7))
+      || ((memory_operand (operands[0], <MODE>mode)
+         || memory_operand (operands[3], <MODE>mode))
+        && operands[2] != operands[0]
+        && !reload_completed
+        && riscv_vector::whole_reg_loadstore_p (operands[0], operands[3],
+                                                operands[1], operands[4],
+                                                operands[7]))"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
 
 ;; Dedicated pattern for vse.v instruction since we can't reuse pred_mov pattern to include
 ;; memory operand as input which will produce inferior codegen.
-(define_insn "@pred_store<mode>"
+(define_insn_and_split "@pred_store<mode>"
   [(set (match_operand:V_VLS 0 "memory_operand"                 "+m")
        (if_then_else:V_VLS
          (unspec:<VM>
          (match_dup 0)))]
   "TARGET_VECTOR"
   "vse<sew>.v\t%2,%0%p1"
+  "&& !reload_completed
+  && riscv_vector::whole_reg_loadstore_p (operands[0], operands[2],
+                                         operands[1], operands[3],
+                                         operands[4])"
+  [(set (match_dup 0) (match_dup 2))]
+  ""
   [(set_attr "type" "vste")
    (set_attr "mode" "<MODE>")
    (set (attr "avl_type_idx") (const_int 4))
    vmmv.m\t%0,%3
    vmclr.m\t%0
    vmset.m\t%0"
-  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
+  "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 5)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
index 1a99df6adf6d3d362db1686073cb6c56bc7a745b..498ede9d10d71afe20bd306d37071df371dd22fc 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d -mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d -mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
 
 #include "reduc_call-1.c"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
new file mode 100644 (file)
index 0000000..0dc3ff5
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d -mrvv-vector-bits=zvl" } */
+
+#include <riscv_vector.h>
+
+vfloat32m1_t
+foo (float *a)
+{
+    vfloat32m1_t a0 = __riscv_vle32_v_f32m1 (a, 4);
+    return a0;
+}
+
+/* { dg-final { scan-assembler-not "vle32" } } */
+/* { dg-final { scan-assembler-times "vl1re32.v" 1 } } */