]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Improve representation of vpaddd intrinsics
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 6 Jun 2023 10:09:12 +0000 (11:09 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 6 Jun 2023 10:09:12 +0000 (11:09 +0100)
The aarch64_addpdi pattern is redundant as the reduc_plus_scal_<mode> pattern can already generate
the required form of the ADDP instruction, and is mostly folded to GIMPLE early on so can benefit from more optimisations.
Though it turns out that we were missing the folding for the unsigned variants.
This patch adds that and wires up the vpaddd_u64 and vpaddd_s64 intrinsics through the above pattern instead
so that we can remove a redundant pattern and get more optimisation earlier.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (aarch64_general_gimple_fold_builtin):
Handle unsigned reduc_plus_scal_ builtins.
* config/aarch64/aarch64-simd-builtins.def (addp): Delete DImode instances.
* config/aarch64/aarch64-simd.md (aarch64_addpdi): Delete.
* config/aarch64/arm_neon.h (vpaddd_s64): Reimplement with
__builtin_aarch64_reduc_plus_scal_v2di.
(vpaddd_u64): Reimplement with __builtin_aarch64_reduc_plus_scal_v2di_uu.

gcc/config/aarch64/aarch64-builtins.cc
gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/arm_neon.h

index e0bb2128e02961435600bdfda2aac1bd5b329d85..50c20c8fa8696abac173ad5e28b749dc5fdaaf8c 100644 (file)
@@ -3049,6 +3049,7 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
   switch (fcode)
     {
       BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
+      BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, NONE)
        new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
                                               1, args[0]);
        gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
index 1beaa08c1e7c94bc13a64865ddb677345534699c..94ff3f1852f2849a644a57813257bb59dfd9581e 100644 (file)
@@ -53,8 +53,6 @@
   BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
   BUILTIN_VDQ_I (BINOP, addp, 0, NONE)
   BUILTIN_VDQ_I (BINOPU, addp, 0, NONE)
-  VAR1 (UNOP, addp, 0, NONE, di)
-  VAR1 (UNOPU, addp, 0, NONE, di)
   BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, NONE)
   BUILTIN_VDQ_BHSI (UNOP, clz, 2, NONE)
   BUILTIN_VS (UNOP, ctz, 2, NONE)
index dd1b084f8569a74e24c8ddee52e7e1e1be2a30a2..dbd6fc68914dc3c24c35034c37dc5cfe7e699ec2 100644 (file)
   [(set_attr "type" "neon_reduc_add<q>")]
 )
 
-(define_insn "aarch64_addpdi"
-  [(set (match_operand:DI 0 "register_operand" "=w")
-        (unspec:DI
-          [(match_operand:V2DI 1 "register_operand" "w")]
-          UNSPEC_ADDP))]
-  "TARGET_SIMD"
-  "addp\t%d0, %1.2d"
-  [(set_attr "type" "neon_reduc_add")]
-)
-
 ;; sqrt
 
 (define_expand "sqrt<mode>2"
index afe205cb83cde89ddeede4c2b370a9de8911b172..0bb98396b4c9ec5a5e24edf1beb21bad2f9c1f53 100644 (file)
@@ -17588,14 +17588,14 @@ __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddd_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_addpdi (__a);
+  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddd_u64 (uint64x2_t __a)
 {
-  return __builtin_aarch64_addpdi_uu (__a);
+  return __builtin_aarch64_reduc_plus_scal_v2di_uu (__a);
 }
 
 /* vqabs */