aarch64: Improve RTL representation of ADDP instructions

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 7 Jun 2023 15:18:01 +0000 (16:18 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 7 Jun 2023 15:18:01 +0000 (16:18 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 7 Jun 2023 15:18:01 +0000 (16:18 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 7 Jun 2023 15:18:01 +0000 (16:18 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index dbd6fc68914dc3c24c35034c37dc5cfe7e699ec2..b23067c6754e6e4c82f86dda27499bc7a4b676c3 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7014,17 +7014,73 @@
  
  ;; addp
  
-(define_insn "aarch64_addp<mode><vczle><vczbe>"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (unspec:VDQ_I
-          [(match_operand:VDQ_I 1 "register_operand" "w")
-          (match_operand:VDQ_I 2 "register_operand" "w")]
-          UNSPEC_ADDP))]
-  "TARGET_SIMD"
+;; ADDP with two registers semantically concatenates them and performs
+;; a pairwise addition on the result.  For 128-bit input modes represent this
+;; as a concatentation of the pairwise addition results of the two input
+;; registers.  This allow us to avoid using intermediate 256-bit modes.
+(define_insn "aarch64_addp<mode>_insn"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+       (vec_concat:VQ_I
+         (plus:<VHALF>
+           (vec_select:<VHALF>
+             (match_operand:VQ_I 1 "register_operand" "w")
+             (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
+           (vec_select:<VHALF>
+             (match_dup 1)
+             (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
+         (plus:<VHALF>
+           (vec_select:<VHALF>
+             (match_operand:VQ_I 2 "register_operand" "w")
+             (match_dup 3))
+           (vec_select:<VHALF>
+             (match_dup 2)
+             (match_dup 4)))))]
+  "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
+  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+;; For 64-bit input modes an ADDP is represented as a concatentation
+;; of the input registers into an 128-bit register which is then fed
+;; into a pairwise add.  That way we avoid having to create intermediate
+;; 32-bit vector modes.
+(define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
+  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
+       (plus:VD_BHSI
+         (vec_select:VD_BHSI
+           (vec_concat:<VDBL>
+             (match_operand:VD_BHSI 1 "register_operand" "w")
+             (match_operand:VD_BHSI 2 "register_operand" "w"))
+           (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
+         (vec_select:VD_BHSI
+           (vec_concat:<VDBL>
+             (match_dup 1)
+             (match_dup 2))
+           (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
+  "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
    "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
    [(set_attr "type" "neon_reduc_add<q>")]
  )
  
+(define_expand "aarch64_addp<mode>"
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:VDQ_I 2 "register_operand")]
+  "TARGET_SIMD"
+  {
+    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
+    if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
+      nunits /= 2;
+    rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
+    rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
+    if (BYTES_BIG_ENDIAN)
+      std::swap (operands[1], operands[2]);
+    emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
+                                           operands[2], par_even, par_odd));
+    DONE;
+  }
+)
+
  ;; sqrt
  
  (define_expand "sqrt<mode>2"
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 7 Jun 2023 15:18:01 +0000 (16:18 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 7 Jun 2023 15:18:01 +0000 (16:18 +0100)