[committed] [RISC-V] Allow uarchs to set TARGET_OVERLAP_OP_BY_PIECES_P

author Christoph Müllner <christoph.muellner@vrull.eu>

Tue, 7 May 2024 21:16:21 +0000 (15:16 -0600)

committer Jeff Law <jlaw@ventanamicro.com>

Tue, 7 May 2024 21:17:16 +0000 (15:17 -0600)
author Christoph Müllner <christoph.muellner@vrull.eu>
Tue, 7 May 2024 21:16:21 +0000 (15:16 -0600)
committer Jeff Law <jlaw@ventanamicro.com>
Tue, 7 May 2024 21:17:16 +0000 (15:17 -0600)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc

index 545e68566dc71073325e98f489b69b044ecbf260..a9b57d411841fc2201a014c6f91509f6741a263b 100644 (file)
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -288,6 +288,7 @@ struct riscv_tune_param
    unsigned short fmv_cost;
    bool slow_unaligned_access;
    bool use_divmod_expansion;
+  bool overlap_op_by_pieces;
    unsigned int fusible_ops;
    const struct cpu_vector_cost *vec_costs;
  };
@@ -427,6 +428,7 @@ static const struct riscv_tune_param rocket_tune_info = {
    8,                                           /* fmv_cost */
    true,                                                /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_NOTHING,                           /* fusible_ops */
    NULL,                                                /* vector cost */
  };
@@ -444,6 +446,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
    8,                                           /* fmv_cost */
    true,                                                /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_NOTHING,                           /* fusible_ops */
    NULL,                                                /* vector cost */
  };
@@ -461,6 +464,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
    4,                                           /* fmv_cost */
    true,                                                /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
    &generic_vector_cost,                                /* vector cost */
  };
@@ -478,6 +482,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
    4,                                           /* fmv_cost */
    true,                                                /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
    &generic_vector_cost,                                /* vector cost */
  };
@@ -495,6 +500,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
    8,           /* fmv_cost */
    false,            /* slow_unaligned_access */
    false,       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_NOTHING,                           /* fusible_ops */
    NULL,                                                /* vector cost */
  };
@@ -512,6 +518,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
    3,                                           /* fmv_cost */
    true,                                                /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
    NULL,                                                /* vector cost */
  };
@@ -529,6 +536,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
    4,                                           /* fmv_cost */
    false,                                       /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_NOTHING,                           /* fusible_ops */
    &generic_vector_cost,                                /* vector cost */
  };
@@ -546,6 +554,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
    8,                                           /* fmv_cost */
    false,                                       /* slow_unaligned_access */
    false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
    RISCV_FUSE_NOTHING,                           /* fusible_ops */
    NULL,                                                /* vector cost */
  };
@@ -9979,6 +9988,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
    return riscv_slow_unaligned_access_p;
  }
  
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces;
+}
+
  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
  
  static bool
@@ -11420,6 +11435,9 @@ riscv_get_raw_result_mode (int regno)
  #undef TARGET_SLOW_UNALIGNED_ACCESS
  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
  
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
  #undef TARGET_SECONDARY_MEMORY_NEEDED
  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
  
diff --git a/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c

new file mode 100644 (file)

index 0000000..1c99e13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+
+#define COPY_N(N)                              \
+void copy##N (char *src, char *dst)            \
+{                                              \
+  dst = __builtin_assume_aligned (dst, 4096);  \
+  src = __builtin_assume_aligned (src, 4096);  \
+  __builtin_memcpy (dst, src, N);              \
+}
+
+/* Emits 1x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(11)
+
+/* Emits 1x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(13)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,sw,sh}.  */
+COPY_N(14)
+
+/* Emits 1x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(15)
+
+/* Emits 2x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(19)
+
+/* Emits 2x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x {ld,sd} and 1x {lhu,lbu,sh,sb}.  */
+COPY_N(27)
+
+/* Emits 3x {ld,sd} and 1x {lw,lbu,sw,sb}.  */
+COPY_N(29)
+
+/* Emits 3x {ld,sd} and 1x {lw,lhu,lbu,sw,sh,sb}.  */
+COPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld\t" 17 } } */
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+
+/* { dg-final { scan-assembler-times "lw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+
+/* { dg-final { scan-assembler-times "lhu\t" 7 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+
+/* { dg-final { scan-assembler-times "lbu\t" 8 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c

new file mode 100644 (file)

index 0000000..c4311c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=sifive-u74 -march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+#define ZERO_N(N)                              \
+void zero##N (char *dst)                       \
+{                                              \
+  dst = __builtin_assume_aligned (dst, 4096);  \
+  __builtin_memset (dst, 0, N);                        \
+}
+
+/* Emits 1x sd and 1x {sh,sb}.  */
+ZERO_N(11)
+
+/* Emits 1x sd and 1x {sw,sb}.  */
+ZERO_N(13)
+
+/* Emits 1x sd and 1x {sw,sh}.  */
+ZERO_N(14)
+
+/* Emits 1x sd and 1x {sw,sh,sb}.  */
+ZERO_N(15)
+
+/* Emits 2x sd and 1x {sh,sb}.  */
+ZERO_N(19)
+
+/* Emits 2x sd and 1x {sw,sh,sb}.  */
+ZERO_N(23)
+
+/* The by-pieces infrastructure handles up to 24 bytes.
+   So the code below is emitted via cpymemsi/block_move_straight.  */
+
+/* Emits 3x sd and 1x {sh,sb}.  */
+ZERO_N(27)
+
+/* Emits 3x sd and 1x {sw,sb}.  */
+ZERO_N(29)
+
+/* Emits 3x sd and 1x {sw,sh,sb}.  */
+ZERO_N(31)
+
+/* { dg-final { scan-assembler-times "sd\t" 17 } } */
+/* { dg-final { scan-assembler-times "sw\t" 6 } } */
+/* { dg-final { scan-assembler-times "sh\t" 7 } } */
+/* { dg-final { scan-assembler-times "sb\t" 8 } } */
author	Christoph Müllner <christoph.muellner@vrull.eu>
	Tue, 7 May 2024 21:16:21 +0000 (15:16 -0600)
committer	Jeff Law <jlaw@ventanamicro.com>
	Tue, 7 May 2024 21:17:16 +0000 (15:17 -0600)
gcc/config/riscv/riscv.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/memcpy-nonoverlapping.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/memset-nonoverlapping.c	[new file with mode: 0644]	patch \| blob