aarch64: Leveraging the use of STP instruction for vec_duplicate

author Victor Do Nascimento <victor.donascimento@arm.com>

Tue, 25 Apr 2023 09:57:00 +0000 (10:57 +0100)

committer Victor Do Nascimento <victor.donascimento@arm.com>

Tue, 25 Apr 2023 10:44:42 +0000 (11:44 +0100)
author Victor Do Nascimento <victor.donascimento@arm.com>
Tue, 25 Apr 2023 09:57:00 +0000 (10:57 +0100)
committer Victor Do Nascimento <victor.donascimento@arm.com>
Tue, 25 Apr 2023 10:44:42 +0000 (11:44 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 9f2fce6f03310d1a541448327aa143f2fc927e1d..cfad812658fb5411f5a7d5073cfadedff1f4884c 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -257,6 +257,16 @@
    [(set_attr "type" "neon_stp")]
  )
  
+(define_insn "aarch64_simd_stp<mode>"
+  [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn")
+       (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand" "w,r")))]
+  "TARGET_SIMD"
+  "@
+   stp\\t%<Vetype>1, %<Vetype>1, %y0
+   stp\\t%<vw>1, %<vw>1, %y0"
+  [(set_attr "type" "neon_stp, store_<ldpstp_vel_sz>")]
+)
+
  (define_insn "load_pair<VQ:mode><VQ2:mode>"
    [(set (match_operand:VQ 0 "register_operand" "=w")
         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md

index 5b20abc27e5c61068576a7f94282a4ca84e36612..6df1dbec2a8097abe9783ed1670c77a8fad4ca57 100644 (file)
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -287,7 +287,7 @@
  ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP
  ;; as a vector-concat.  The address mode uses the same constraints as if it
  ;; were for a single value.
-(define_memory_constraint "Umn"
+(define_relaxed_memory_constraint "Umn"
    "@internal
    A memory address suitable for a load/store pair operation."
    (and (match_code "mem")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 13a7e89777d70da9695cdf2e9d55e2560d43f092..1d0b4822102612bed51943ec83fd8da00b078495 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1020,6 +1020,9 @@
  ;; Likewise for load/store pair.
  (define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
  
+;; Size of element access for STP/LDP-generated vectors.
+(define_mode_attr ldpstp_vel_sz [(V2SI "8") (V2SF "8") (V2DI "16") (V2DF "16")])
+
  ;; For inequal width int to float conversion
  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c

new file mode 100644 (file)

index 0000000..fc2c1ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__((vector_size (16)));
+typedef int v2si __attribute__((vector_size (8)));
+
+#define TESTV2DI(lab, idx)                     \
+  void                                         \
+  stpv2di_##lab (v2di *x, long long a)         \
+  {                                            \
+    v2di tmp = {a, a};                         \
+    x[idx] = tmp;                              \
+  }
+
+
+#define TESTV2SI(lab, idx)                     \
+  void                                         \
+  stpv2si_##lab (v2si *x, int a)               \
+  {                                            \
+    v2si tmp = {a, a};                         \
+    x[idx] = tmp;                              \
+  }                                            \
+
+/* Core test, no imm assembler offset:  */
+
+TESTV2SI(0, 0)
+TESTV2DI(0, 0)
+/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+\]} } } */
+
+/* Lower offset bounds:  */
+
+/* Vaid offsets:  */
+TESTV2SI(1, -32)
+TESTV2DI(1, -32)
+/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -256\]} } } */
+/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -512\]} } } */
+/* Invalid offsets:  */
+TESTV2SI(2, -33)
+TESTV2DI(2, -33)
+/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -264\]} } } */
+/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -528\]} } } */
+
+/* Upper offset bounds:   */
+
+/* Valid offsets:  */
+TESTV2SI(3, 31)
+TESTV2DI(3, 31)
+/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 248\]} } } */
+/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 496\]} } } */
+/* Invalid offsets:  */
+TESTV2SI(4, 32)
+TESTV2DI(4, 32)
+/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 256\]} } } */
+/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 512\]} } } */
+
+
author	Victor Do Nascimento <victor.donascimento@arm.com>
	Tue, 25 Apr 2023 09:57:00 +0000 (10:57 +0100)
committer	Victor Do Nascimento <victor.donascimento@arm.com>
	Tue, 25 Apr 2023 10:44:42 +0000 (11:44 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/constraints.md		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c	[new file with mode: 0644]	patch \| blob