aarch64: Make existing V2HF be usable.

author Tamar Christina <tamar.christina@arm.com>

Mon, 12 Dec 2022 15:15:07 +0000 (15:15 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Mon, 12 Dec 2022 15:15:07 +0000 (15:15 +0000)
author Tamar Christina <tamar.christina@arm.com>
Mon, 12 Dec 2022 15:15:07 +0000 (15:15 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Mon, 12 Dec 2022 15:15:07 +0000 (15:15 +0000)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 104088f67d2eb005880e2a62b9421421a8ba6a8c..c0e6164b3bde97c8ce1ebfae05aacb0409365f18 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -19,10 +19,10 @@
  ;; <http://www.gnu.org/licenses/>.
  
  (define_expand "mov<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
-       (match_operand:VALL_F16 1 "general_operand"))]
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+       (match_operand:VMOVE 1 "general_operand"))]
    "TARGET_FLOAT"
-  "
+{
    /* Force the operand into a register if it is not an
       immediate whose use can be replaced with xzr.
       If the mode is 16 bytes wide, then we will be doing
@@ -46,12 +46,11 @@
        aarch64_expand_vector_init (operands[0], operands[1]);
        DONE;
      }
-  "
-)
+})
  
  (define_expand "movmisalign<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
-        (match_operand:VALL_F16 1 "general_operand"))]
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+        (match_operand:VMOVE 1 "general_operand"))]
    "TARGET_FLOAT && !STRICT_ALIGNMENT"
  {
    /* This pattern is not permitted to fail during expansion: if both arguments
@@ -73,6 +72,16 @@
    [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  )
  
+(define_insn "aarch64_simd_dupv2hf"
+  [(set (match_operand:V2HF 0 "register_operand" "=w")
+       (vec_duplicate:V2HF
+         (match_operand:HF 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "@
+   sli\\t%d0, %d0, 16"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
  (define_insn "aarch64_simd_dup<mode>"
    [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
         (vec_duplicate:VDQF_F16
@@ -85,10 +94,10 @@
  )
  
  (define_insn "aarch64_dup_lane<mode>"
-  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
-       (vec_duplicate:VALL_F16
+  [(set (match_operand:VMOVE 0 "register_operand" "=w")
+       (vec_duplicate:VMOVE
           (vec_select:<VEL>
-           (match_operand:VALL_F16 1 "register_operand" "w")
+           (match_operand:VMOVE 1 "register_operand" "w")
             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
            )))]
    "TARGET_SIMD"
@@ -150,6 +159,29 @@
     (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
  )
  
+(define_insn "*aarch64_simd_movv2hf"
+  [(set (match_operand:V2HF 0 "nonimmediate_operand"
+               "=w, m,  m,  w, ?r, ?w, ?r, w, w")
+       (match_operand:V2HF 1 "general_operand"
+               "m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
+  "TARGET_SIMD_F16INST
+   && (register_operand (operands[0], V2HFmode)
+       || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
+   "@
+    ldr\\t%s0, %1
+    str\\twzr, %0
+    str\\t%s1, %0
+    mov\\t%0.2s[0], %1.2s[0]
+    umov\\t%w0, %1.s[0]
+    fmov\\t%s0, %w1
+    mov\\t%w0, %w1
+    movi\\t%d0, 0
+    * return aarch64_output_simd_mov_immediate (operands[1], 32);"
+  [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\
+                    neon_logic, neon_to_gp, f_mcr,\
+                    mov_reg, neon_move, neon_move")]
+)
+
  (define_insn "*aarch64_simd_mov<VQMOV:mode>"
    [(set (match_operand:VQMOV 0 "nonimmediate_operand"
                 "=w, Umn,  m,  w, ?r, ?w, ?r, w,  w")
@@ -193,7 +225,7 @@
  
  (define_insn "aarch64_store_lane0<mode>"
    [(set (match_operand:<VEL> 0 "memory_operand" "=m")
-       (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
+       (vec_select:<VEL> (match_operand:VMOVE 1 "register_operand" "w")
                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
    "TARGET_SIMD
     && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
@@ -1058,11 +1090,11 @@
  )
  
  (define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
-       (vec_merge:VALL_F16
-           (vec_duplicate:VALL_F16
+  [(set (match_operand:VMOVE 0 "register_operand" "=w,w,w")
+       (vec_merge:VMOVE
+           (vec_duplicate:VMOVE
                 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
-           (match_operand:VALL_F16 3 "register_operand" "0,0,0")
+           (match_operand:VMOVE 3 "register_operand" "0,0,0")
             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
    "TARGET_SIMD"
    {
@@ -1084,14 +1116,14 @@
  )
  
  (define_insn "@aarch64_simd_vec_copy_lane<mode>"
-  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
-       (vec_merge:VALL_F16
-           (vec_duplicate:VALL_F16
+  [(set (match_operand:VMOVE 0 "register_operand" "=w")
+       (vec_merge:VMOVE
+           (vec_duplicate:VMOVE
               (vec_select:<VEL>
-               (match_operand:VALL_F16 3 "register_operand" "w")
+               (match_operand:VMOVE 3 "register_operand" "w")
                 (parallel
                   [(match_operand:SI 4 "immediate_operand" "i")])))
-           (match_operand:VALL_F16 1 "register_operand" "0")
+           (match_operand:VMOVE 1 "register_operand" "0")
             (match_operand:SI 2 "immediate_operand" "i")))]
    "TARGET_SIMD"
    {
@@ -1399,7 +1431,7 @@
  )
  
  (define_expand "vec_set<mode>"
-  [(match_operand:VALL_F16 0 "register_operand")
+  [(match_operand:VMOVE 0 "register_operand")
     (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
     (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
@@ -3518,7 +3550,7 @@
  ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
  (define_expand "reduc_<optab>_scal_<mode>"
    [(match_operand:<VEL> 0 "register_operand")
-   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+   (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
                  FMAXMINV)]
    "TARGET_SIMD"
    {
@@ -3533,7 +3565,7 @@
  
  (define_expand "reduc_<fmaxmin>_scal_<mode>"
    [(match_operand:<VEL> 0 "register_operand")
-   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+   (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
                  FMAXMINNMV)]
    "TARGET_SIMD"
    {
@@ -3577,8 +3609,8 @@
  )
  
  (define_insn "aarch64_reduc_<optab>_internal<mode>"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
-       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+ [(set (match_operand:VHSDF_P 0 "register_operand" "=w")
+       (unspec:VHSDF_P [(match_operand:VHSDF_P 1 "register_operand" "w")]
                       FMAXMINV))]
   "TARGET_SIMD"
   "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
@@ -4223,7 +4255,7 @@
  (define_insn_and_split "aarch64_get_lane<mode>"
    [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
         (vec_select:<VEL>
-         (match_operand:VALL_F16 1 "register_operand" "w, w, w")
+         (match_operand:VMOVE 1 "register_operand" "w, w, w")
           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
    "TARGET_SIMD"
    {
@@ -8028,7 +8060,7 @@
  ;; Standard pattern name vec_init<mode><Vel>.
  
  (define_expand "vec_init<mode><Vel>"
-  [(match_operand:VALL_F16 0 "register_operand")
+  [(match_operand:VMOVE 0 "register_operand")
     (match_operand 1 "" "")]
    "TARGET_SIMD"
  {
@@ -8107,7 +8139,7 @@
  
  (define_expand "vec_extract<mode><Vel>"
    [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
-   (match_operand:VALL_F16 1 "register_operand")
+   (match_operand:VMOVE 1 "register_operand")
     (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
  {
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 45d659a4a91d73ffea675d489ca1f3fe7768fd0f..fd92212f96a9062fa7a8182b55d9e03589e591cc 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3634,6 +3634,7 @@ aarch64_classify_vector_mode (machine_mode mode)
      case E_V8BFmode:
      case E_V4SFmode:
      case E_V2DFmode:
+    case E_V2HFmode:
        return TARGET_FLOAT ? VEC_ADVSIMD : 0;
  
      default:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 7c7fcbbc24b99b3ad8687097a432fea64af47226..d10cf93572ef173c92d6d256bd937c069ce04d3e 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -160,6 +160,10 @@
  (define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
                              (V8HF "TARGET_SIMD_F16INST")
                              V2SF V4SF V2DF])
+;; Advanced SIMD Float modes suitable for reduction or pairwise operations
+(define_mode_iterator VHSDF_P [(V4HF "TARGET_SIMD_F16INST")
+                              (V8HF "TARGET_SIMD_F16INST")
+                              V2SF V4SF V2DF (V2HF "TARGET_SIMD_F16INST")])
  
  ;; Advanced SIMD Float modes, and DF.
  (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
@@ -188,15 +192,22 @@
  (define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
  
  ;; Advanced SIMD Float modes with 2 elements.
-(define_mode_iterator V2F [V2SF V2DF])
+(define_mode_iterator V2F [V2SF V2DF V2HF])
  
  ;; All Advanced SIMD modes on which we support any arithmetic operations.
  (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
  
-;; All Advanced SIMD modes suitable for moving, loading, and storing.
+;; The set of all modes for which vld1 intrinsics are provided.
  (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
                                 V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
  
+;; All Advanced SIMD modes suitable for moving, loading, and storing
+;; including V2HF
+(define_mode_iterator VMOVE [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+                            V4HF V8HF V4BF V8BF V2SF V4SF V2DF
+                            (V2HF "TARGET_SIMD_F16INST")])
+
+
  ;; The VALL_F16 modes except the 128-bit 2-element ones.
  (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
                                 V4HF V8HF V2SF V4SF])
@@ -1079,7 +1090,7 @@
                           (V2SF "2") (V4SF "4")
                           (V1DF "1") (V2DF "2")
                           (DI "1") (DF "1")
-                         (V8DI "8")])
+                         (V8DI "8") (V2HF "2")])
  
  ;; Map a mode to the number of bits in it, if the size of the mode
  ;; is constant.
@@ -1196,7 +1207,7 @@
  (define_mode_attr Vetype [(V8QI "b") (V16QI "b")
                           (V4HI "h") (V8HI  "h")
                           (V2SI "s") (V4SI  "s")
-                         (V2DI "d")
+                         (V2DI "d") (V2HF  "h")
                           (V4HF "h") (V8HF  "h")
                           (V2SF "s") (V4SF  "s")
                           (V2DF "d")
@@ -1288,7 +1299,7 @@
  ;; more accurately.
  (define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
                          (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
-                        (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
+                        (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d") (V2HF "s")
                          (HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
                          (SI "s") (DI "d")])
  
@@ -1363,8 +1374,8 @@
                        (V4HF "HF") (V8HF  "HF")
                        (V2SF "SF") (V4SF  "SF")
                        (DF   "DF") (V2DF  "DF")
-                      (SI   "SI") (HI    "HI")
-                      (QI   "QI")
+                      (SI   "SI") (V2HF  "HF")
+                      (QI   "QI") (HI    "HI")
                        (V4BF "BF") (V8BF "BF")
                        (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
                        (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
@@ -1384,7 +1395,7 @@
                        (V2SF "sf") (V4SF "sf")
                        (V2DF "df") (DF   "df")
                        (SI   "si") (HI   "hi")
-                      (QI   "qi")
+                      (QI   "qi") (V2HF "hf")
                        (V4BF "bf") (V8BF "bf")
                        (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
                        (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
@@ -1869,7 +1880,7 @@
                      (V4HF "") (V8HF "_q")
                      (V4BF "") (V8BF "_q")
                      (V2SF "") (V4SF  "_q")
-                              (V2DF  "_q")
+                    (V2HF "") (V2DF  "_q")
                      (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")
                      (V2x8QI "") (V2x16QI "_q")
                      (V2x4HI "") (V2x8HI "_q")
@@ -1908,6 +1919,7 @@
                       (V2SI "p") (V4SI  "v")
                       (V2DI "p") (V2DF  "p")
                       (V2SF "p") (V4SF  "v")
+                     (V2HF "p")
                       (V4HF "v") (V8HF  "v")])
  
  (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md

index d0d9997efd2a677e226a47eff125ff1760314fa8..880353dbeedbac3df5e237d78fa90651d6a91d52 100644 (file)
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -484,6 +484,7 @@
  ; neon_fp_minmax_s_q
  ; neon_fp_minmax_d
  ; neon_fp_minmax_d_q
+; neon_fp_reduc_add_h
  ; neon_fp_reduc_add_s
  ; neon_fp_reduc_add_s_q
  ; neon_fp_reduc_add_d
@@ -1034,6 +1035,7 @@
    neon_fp_minmax_d,\
    neon_fp_minmax_d_q,\
  \
+  neon_fp_reduc_add_h,\
    neon_fp_reduc_add_s,\
    neon_fp_reduc_add_s_q,\
    neon_fp_reduc_add_d,\
@@ -1258,8 +1260,8 @@
            neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\
            neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\
            neon_fp_neg_s, neon_fp_neg_s_q, neon_fp_neg_d, neon_fp_neg_d_q,\
-          neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\
-          neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,
+          neon_fp_reduc_add_h, neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
+          neon_fp_reduc_add_d, neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,\
            neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\
            neon_fp_reduc_minmax_d_q,\
            neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c

index 07d71a63414b1066ea431e287286ad048515711a..e6021c5a42748701e5326a5c387a39a0bbadc9e5 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -30,11 +30,9 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n)     \
  TEST_ALL (VEC_PERM)
  
  /* We should use one DUP for each of the 8-, 16- and 32-bit types,
-   although we currently use LD1RW for _Float16.  We should use two
-   DUPs for each of the three 64-bit types.  */
+   We should use two DUPs for each of the three 64-bit types.  */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 3 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
  /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
  /* { dg-final { scan-assembler-not {\tzip2\t} } } */
@@ -53,7 +51,7 @@ TEST_ALL (VEC_PERM)
  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
  /* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-times {\tstr} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {\tstr} } } */
+/* { dg-final { scan-assembler-not {\tstr\th[0-9]+} } } */
  
  /* { dg-final { scan-assembler-not {\tuqdec} } } */
author	Tamar Christina <tamar.christina@arm.com>
	Mon, 12 Dec 2022 15:15:07 +0000 (15:15 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Mon, 12 Dec 2022 15:15:07 +0000 (15:15 +0000)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/config/arm/types.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/slp_1.c		patch \| blob \| blame \| history