]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Introduce -mvector-strict-align.
authorRobin Dapp <rdapp.gcc@gmail.com>
Tue, 28 May 2024 19:19:26 +0000 (21:19 +0200)
committerRobin Dapp <rdapp@ventanamicro.com>
Wed, 5 Jun 2024 19:32:45 +0000 (21:32 +0200)
this patch disables movmisalign by default and introduces
the -mno-vector-strict-align option to override it and re-enable
movmisalign.  For now, generic-ooo is the only uarch that supports
misaligned vector access.

The patch also adds a check_effective_target_riscv_v_misalign_ok to
the testsuite which enables or disables the vector misalignment tests
depending on whether the target under test can execute a misaligned
vle32.

Changes from v3:
 - Adressed Kito's comments.
 - Made -mscalar-strict-align a real alias.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
Move from here...
* config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
...to here and map to riscv_vector_unaligned_access_p.
* config/riscv/riscv.opt: Add -mvector-strict-align.
* config/riscv/riscv.cc (struct riscv_tune_param): Add
vector_unaligned_access.
(riscv_override_options_internal): Set
riscv_vector_unaligned_access_p.
* doc/invoke.texi: Document -mvector-strict-align.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add
check_effective_target_riscv_v_misalign_ok.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add
-mno-vector-strict-align.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto.

13 files changed:
gcc/config/riscv/riscv-opts.h
gcc/config/riscv/riscv.cc
gcc/config/riscv/riscv.h
gcc/config/riscv/riscv.opt
gcc/doc/invoke.texi
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c
gcc/testsuite/lib/target-supports.exp

index 1b2dd5757a8e1508fbc5fb11c8c0fde497e73bad..f58a07abffcd93b145479865043af2af03995e6c 100644 (file)
@@ -147,9 +147,6 @@ enum rvv_vector_bits_enum {
      ? 0                                                                       \
      : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
 
-/* TODO: Enable RVV movmisalign by default for now.  */
-#define TARGET_VECTOR_MISALIGN_SUPPORTED 1
-
 /* The maximmum LMUL according to user configuration.  */
 #define TARGET_MAX_LMUL                                                        \
   (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
index c5c4c77734969c8fb27f0be6c8465adbd7c49047..9704ff9c6a02484b8d219583c39b902b3caf10f0 100644 (file)
@@ -288,6 +288,7 @@ struct riscv_tune_param
   unsigned short memory_cost;
   unsigned short fmv_cost;
   bool slow_unaligned_access;
+  bool vector_unaligned_access;
   bool use_divmod_expansion;
   bool overlap_op_by_pieces;
   unsigned int fusible_ops;
@@ -300,6 +301,10 @@ struct riscv_tune_param
 /* Whether unaligned accesses execute very slowly.  */
 bool riscv_slow_unaligned_access_p;
 
+/* Whether misaligned vector accesses are supported (i.e. do not
+   throw an exception).  */
+bool riscv_vector_unaligned_access_p;
+
 /* Whether user explicitly passed -mstrict-align.  */
 bool riscv_user_wants_strict_align;
 
@@ -442,6 +447,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   5,                                           /* memory_cost */
   8,                                           /* fmv_cost */
   true,                                                /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
@@ -460,6 +466,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   3,                                           /* memory_cost */
   8,                                           /* fmv_cost */
   true,                                                /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
@@ -478,6 +485,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
   3,                                           /* memory_cost */
   4,                                           /* fmv_cost */
   true,                                                /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
@@ -496,6 +504,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
   3,                                           /* memory_cost */
   4,                                           /* fmv_cost */
   true,                                                /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
@@ -514,6 +523,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   5,            /* memory_cost */
   8,           /* fmv_cost */
   false,            /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
@@ -532,6 +542,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
   3,                                           /* memory_cost */
   3,                                           /* fmv_cost */
   true,                                                /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
@@ -550,6 +561,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
   4,                                           /* memory_cost */
   4,                                           /* fmv_cost */
   false,                                       /* slow_unaligned_access */
+  true,                                                /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   true,                                                /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
@@ -568,6 +580,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   2,                                           /* memory_cost */
   8,                                           /* fmv_cost */
   false,                                       /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
@@ -9714,6 +9727,12 @@ riscv_override_options_internal (struct gcc_options *opts)
   riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
                                   || TARGET_STRICT_ALIGN);
 
+  /* By default, when -mno-vector-strict-align is not specified, do not allow
+     unaligned vector memory accesses except if -mtune's setting explicitly
+     allows it.  */
+  riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
+    || cpu->tune_param->vector_unaligned_access;
+
   /* Make a note if user explicitly passed -mstrict-align for later
      builtin macro generation.  Can't use target_flags_explicitly since
      it is set even for -mno-strict-align.  */
index d6b14c4d6205f9c3468ff9475bc81fcc5dc6ff2d..57910eecd3ea4e0699befaebf85b175fa8e22fad 100644 (file)
@@ -934,6 +934,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
   || (riscv_microarchitecture == sifive_p400) \
   || (riscv_microarchitecture == sifive_p600))
 
+/* True if the target supports misaligned vector loads and stores.  */
+#define TARGET_VECTOR_MISALIGN_SUPPORTED \
+   riscv_vector_unaligned_access_p
+
 #define LOGICAL_OP_NON_SHORT_CIRCUIT 0
 
 /* Control the assembler format that we output.  */
@@ -1161,6 +1165,7 @@ while (0)
 #ifndef USED_FOR_TARGET
 extern const enum reg_class riscv_regno_to_class[];
 extern bool riscv_slow_unaligned_access_p;
+extern bool riscv_vector_unaligned_access_p;
 extern bool riscv_user_wants_strict_align;
 extern unsigned riscv_stack_boundary;
 extern unsigned riscv_bytes_per_vector_chunk;
index 87f5833201681e235a5072279af7b88b088e14d8..78cb1c37e69fa53f949d01e24b1eaeadcb118612 100644 (file)
@@ -128,6 +128,14 @@ mstrict-align
 Target Mask(STRICT_ALIGN) Save
 Do not generate unaligned memory accesses.
 
+mscalar-strict-align
+Target Save Alias(mstrict-align)
+Do not generate unaligned scalar memory accesses.
+
+mvector-strict-align
+Target Save Var(rvv_vector_strict_align) Init(1)
+Do not create element-misaligned vector memory accesses.
+
 Enum
 Name(code_model) Type(enum riscv_code_model)
 Known code models (for use with the -mcmodel= option):
index 4e8967fd8ab5647d90497bea81c96c47b7bf4f29..e5a5d1d9335b4038b058dc3885b095cc35d5fdf3 100644 (file)
@@ -31104,6 +31104,23 @@ Do not or do generate unaligned memory accesses.  The default is set depending
 on whether the processor we are optimizing for supports fast unaligned access
 or not.
 
+@opindex mscalar-strict-align
+@opindex mno-scalar-strict-align
+@item -mscalar-strict-align
+@itemx -mno-scalar-strict-align
+Do not or do generate unaligned memory accesses.  The default is set depending
+on whether the processor we are optimizing for supports fast unaligned access
+or not.  This is an alias for @option{-mstrict-align}.
+
+@opindex mvector-strict-align
+@opindex mno-vector-strict-align
+@item -mvector-strict-align
+@itemx -mno-vector-strict-align
+Do not or do generate unaligned vector memory accesses.  The default is set
+to off unless the processor we are optimizing for explicitly supports
+element-misaligned vector memory access.
+
+
 @opindex mcmodel=
 @opindex mcmodel=medlow
 @item -mcmodel=medlow
index 49ea3c2cf7251b185660f6a4762f97db5b6c0e7c..754f84ae0a08b25aaf4552529fc82a06c5ae340b 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mno-vector-strict-align" } */
 
 int
 x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2)
index 144479324d7578127d50d736c4b45e227c442358..d0a0f4208eed4c478fbacbf582885af453d25bbb 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
 
 #include <stdint-gcc.h>
 
index 13ae8bd3bcfac28e70ca833335ef88bb386aa998..5a779a9ee7569d543d14aadc33fb15806f02b782 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
 
 #include <stdint-gcc.h>
 
index 1f9fa48264e1720090749d87679bb3bd145c3027..e7e4e841bb8abe6eb1cdef56650cc960392596ab 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
 
 #include <stdint-gcc.h>
 
index ea6a7cbe2b17c051e5b654ebd596fd7a13cdd8ac..0e5b4522de5c5013254c9b47cfb09fa0ed0f2b53 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mno-vector-strict-align" } */
 
 #include <stdint-gcc.h>
 
index cb4abeca98989f25f2eaac706e4065f64bab6d31..5276e0b2f6c159188071d411454fb3e10e082206 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 -mno-vector-strict-align" } */
 
 #include <stdint-gcc.h>
 
index 1a076cbcd0f59a5a5d0bf95be353ce5174b51137..5184a295e16791482bb486cd32ccd47b709b9bfa 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns -mno-vector-strict-align" } */
 
 #include <stdlib.h>
 
index 836545b4e116bbae0c84b8e4b9c6de3e30194d9d..4766104c6d80331ac5a3703e2f5d8b9c8f6b2b78 100644 (file)
@@ -2044,7 +2044,7 @@ proc check_effective_target_riscv_zvfh_ok { } {
     # check if we can execute vector insns with the given hardware or
     # simulator
     set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
-    if { [check_runtime ${gcc_march}_exec {
+    if { [check_runtime ${gcc_march}_zvfh_exec {
        int main()
        {
            asm ("vsetivli zero,8,e16,m1,ta,ma");
@@ -2057,6 +2057,8 @@ proc check_effective_target_riscv_zvfh_ok { } {
     return 0
 }
 
+# Return 1 if we can execute code when using dg-add-options riscv_zvbb
+
 proc check_effective_target_riscv_zvbb_ok { } {
     # If the target already supports v without any added options,
     # we may assume we can execute just fine.
@@ -2076,6 +2078,28 @@ proc check_effective_target_riscv_zvbb_ok { } {
        } } "-march=${gcc_march}"] } {
            return 1
        }
+    return 0
+}
+
+# Return 1 if we can load a vector from a 1-byte aligned address.
+
+proc check_effective_target_riscv_v_misalign_ok { } {
+
+    if { ![check_effective_target_riscv_v_ok] } {
+       return 0
+    }
+
+    set gcc_march [riscv_get_arch]
+    if { [check_runtime ${gcc_march}_misalign_exec {
+         int main() {
+             unsigned char a[16]
+               = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+             asm ("vsetivli zero,7,e8,m1,ta,ma");
+             asm ("addi a7,%0,1" : : "r" (a) : "a7" );
+             asm ("vle8.v v8,0(a7)" : : : "v8");
+             return 0; } } "-march=${gcc_march}"] } {
+       return 1
+    }
 
     return 0
 }
@@ -8184,7 +8208,6 @@ proc check_effective_target_vect_hw_misalign { } {
             || ([istarget mips*-*-*] && [et-is-effective-target mips_msa])
             || ([istarget s390*-*-*]
                 && [check_effective_target_s390_vx])
-            || ([istarget riscv*-*-*])
             || ([istarget loongarch*-*-*])
             || [istarget amdgcn*-*-*] } {
          return 1
@@ -8193,6 +8216,11 @@ proc check_effective_target_vect_hw_misalign { } {
             && ![check_effective_target_arm_vect_no_misalign] } {
          return 1
        }
+       if { [istarget riscv*-*-*]
+            && [check_effective_target_riscv_v_misalign_ok] } {
+           return 1
+       }
+
         return 0
     }]
 }
@@ -11610,6 +11638,9 @@ proc check_vect_support_and_set_flags { } {
     } elseif [istarget riscv*-*-*] {
        if [check_effective_target_riscv_v] {
            set dg-do-what-default run
+           if [check_effective_target_riscv_v_misalign_ok] {
+               lappend DEFAULT_VECTCFLAGS "-mno-vector-strict-align"
+           }
        } else {
            foreach item [add_options_for_riscv_v ""] {
                lappend DEFAULT_VECTCFLAGS $item