]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: PR target/99195 annotate simple ternary ops for vec-concat with zero
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 4 May 2023 08:42:37 +0000 (09:42 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 4 May 2023 08:43:17 +0000 (09:43 +0100)
We're now moving onto various simple ternary instructions, including some lane forms.
These include intrinsics that map down to mla, mls, fma, aba, bsl instructions.
Tests are added for lane 0 and lane 1 as for some of these instructions the lane 0 variants
use separate simpler patterns that need a separate annotation.

Bootstrapped and tested on aarch64-none-linux-gnu.

gcc/ChangeLog:

PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_<su>aba<mode>): Rename to...
(aarch64_<su>aba<mode><vczle><vczbe>): ... This.
(aarch64_mla<mode>): Rename to...
(aarch64_mla<mode><vczle><vczbe>): ... This.
(*aarch64_mla_elt<mode>): Rename to...
(*aarch64_mla_elt<mode><vczle><vczbe>): ... This.
(*aarch64_mla_elt_<vswap_width_name><mode>): Rename to...
(*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>): ... This.
(aarch64_mla_n<mode>): Rename to...
(aarch64_mla_n<mode><vczle><vczbe>): ... This.
(aarch64_mls<mode>): Rename to...
(aarch64_mls<mode><vczle><vczbe>): ... This.
(*aarch64_mls_elt<mode>): Rename to...
(*aarch64_mls_elt<mode><vczle><vczbe>): ... This.
(*aarch64_mls_elt_<vswap_width_name><mode>): Rename to...
(*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>): ... This.
(aarch64_mls_n<mode>): Rename to...
(aarch64_mls_n<mode><vczle><vczbe>): ... This.
(fma<mode>4): Rename to...
(fma<mode>4<vczle><vczbe>): ... This.
(*aarch64_fma4_elt<mode>): Rename to...
(*aarch64_fma4_elt<mode><vczle><vczbe>): ... This.
(*aarch64_fma4_elt_<vswap_width_name><mode>): Rename to...
(*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>): ... This.
(*aarch64_fma4_elt_from_dup<mode>): Rename to...
(*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>): ... This.
(fnma<mode>4): Rename to...
(fnma<mode>4<vczle><vczbe>): ... This.
(*aarch64_fnma4_elt<mode>): Rename to...
(*aarch64_fnma4_elt<mode><vczle><vczbe>): ... This.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Rename to...
(*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>): ... This.
(*aarch64_fnma4_elt_from_dup<mode>): Rename to...
(*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>): ... This.
(aarch64_simd_bsl<mode>_internal): Rename to...
(aarch64_simd_bsl<mode>_internal<vczle><vczbe>): ... This.
(*aarch64_simd_bsl<mode>_alt): Rename to...
(*aarch64_simd_bsl<mode>_alt<vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

PR target/99195
* gcc.target/aarch64/simd/pr99195_3.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c [new file with mode: 0644]

index 511d1e78809d2185c9d6a390008cb1005899ae25..705c4b0b4b404355adb5f738be936dad46488a79 100644 (file)
   }
 )
 
-(define_insn "aarch64_<su>aba<mode>"
+(define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (plus:VDQ_BHSI (minus:VDQ_BHSI
                         (USMAX:VDQ_BHSI
 )
 
 
-(define_insn "aarch64_mla<mode>"
+(define_insn "aarch64_mla<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (plus:VDQ_BHSI (mult:VDQ_BHSI
                        (match_operand:VDQ_BHSI 2 "register_operand" "w")
   [(set_attr "type" "neon_mla_<Vetype><q>")]
 )
 
-(define_insn "*aarch64_mla_elt<mode>"
+(define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
  [(set (match_operand:VDQHS 0 "register_operand" "=w")
        (plus:VDQHS
         (mult:VDQHS
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
  [(set (match_operand:VDQHS 0 "register_operand" "=w")
        (plus:VDQHS
         (mult:VDQHS
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_mla_n<mode>"
+(define_insn "aarch64_mla_n<mode><vczle><vczbe>"
  [(set (match_operand:VDQHS 0 "register_operand" "=w")
        (plus:VDQHS
          (mult:VDQHS
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_mls<mode>"
+(define_insn "aarch64_mls<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
                   (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
   [(set_attr "type" "neon_mla_<Vetype><q>")]
 )
 
-(define_insn "*aarch64_mls_elt<mode>"
+(define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
  [(set (match_operand:VDQHS 0 "register_operand" "=w")
        (minus:VDQHS
         (match_operand:VDQHS 4 "register_operand" "0")
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
  [(set (match_operand:VDQHS 0 "register_operand" "=w")
        (minus:VDQHS
         (match_operand:VDQHS 4 "register_operand" "0")
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_mls_n<mode>"
+(define_insn "aarch64_mls_n<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
        (minus:VDQHS
          (match_operand:VDQHS 1 "register_operand" "0")
   }
 )
 
-(define_insn "fma<mode>4"
+(define_insn "fma<mode>4<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
                  (match_operand:VHSDF 2 "register_operand" "w")
   [(set_attr "type" "neon_fp_mla_<stype><q>")]
 )
 
-(define_insn "*aarch64_fma4_elt<mode>"
+(define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
     (fma:VDQF
       (vec_duplicate:VDQF
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
   [(set (match_operand:VDQSF 0 "register_operand" "=w")
     (fma:VDQSF
       (vec_duplicate:VDQSF
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_fma4_elt_from_dup<mode>"
+(define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
   [(set (match_operand:VMUL 0 "register_operand" "=w")
     (fma:VMUL
       (vec_duplicate:VMUL
   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
 
-(define_insn "fnma<mode>4"
+(define_insn "fnma<mode>4<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (fma:VHSDF
          (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
   [(set_attr "type" "neon_fp_mla_<stype><q>")]
 )
 
-(define_insn "*aarch64_fnma4_elt<mode>"
+(define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
     (fma:VDQF
       (neg:VDQF
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
   [(set (match_operand:VDQSF 0 "register_operand" "=w")
     (fma:VDQSF
       (neg:VDQSF
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
+(define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
   [(set (match_operand:VMUL 0 "register_operand" "=w")
     (fma:VMUL
       (neg:VMUL
 ;; Some forms of straight-line code may generate the equivalent form
 ;; in *aarch64_simd_bsl<mode>_alt.
 
-(define_insn "aarch64_simd_bsl<mode>_internal"
+(define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
        (xor:VDQ_I
           (and:VDQ_I
 ;; the first.  The two are equivalent but since recog doesn't try all
 ;; permutations of commutative operations, we have to have a separate pattern.
 
-(define_insn "*aarch64_simd_bsl<mode>_alt"
+(define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
        (xor:VDQ_I
           (and:VDQ_I
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c
new file mode 100644 (file)
index 0000000..c751924
--- /dev/null
@@ -0,0 +1,68 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define TERNARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c), zeros);      \
+}
+
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3)    \
+FUNC (T, IS, OS, OP1, S)        \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4)       \
+FUNC (T, IS, OS, OP1, S)                \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+OPTHREE (int8, 8, 16, s8, mla, mls, aba)
+OPTHREE (int16, 4, 8, s16, mla, mls, aba)
+OPTHREE (int32, 2, 4, s32, mla, mls, aba)
+
+OPFOUR (uint8, 8, 16, u8, mla, mls, aba, bsl)
+OPFOUR (uint16, 4, 8, u16, mla, mls, aba, bsl)
+OPFOUR (uint32, 2, 4, u32, mla, mls, aba, bsl)
+
+OPTHREE (float32, 2, 4, f32, mla, fma, fms)
+
+#undef FUNC
+#define TERNARY_LANE(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros);      \
+}                                              \
+OT                                              \
+foo_##OP##_##S##_lane1 (IT a, IT b, IT c)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c, 1), zeros);      \
+}
+
+#define FUNC(T,IS,OS,OP,S) TERNARY_LANE (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPTWO (int16, 4, 8, s16, mla_lane, mls_lane)
+OPTWO (int32, 2, 4, s32, mla_lane, mls_lane)
+
+OPTWO (uint16, 4, 8, u16, mla_lane, mls_lane)
+OPTWO (uint32, 2, 4, u32, mla_lane, mls_lane)
+
+OPTHREE (float32, 2, 4, f32, mla_lane, fma_lane, fms_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+