aarch64: Add support for fp8dot2 and fp8dot4

author Saurabh Jha <saurabh.jha@arm.com>

Tue, 10 Dec 2024 13:21:20 +0000 (13:21 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Tue, 10 Dec 2024 13:21:20 +0000 (13:21 +0000)
author Saurabh Jha <saurabh.jha@arm.com>
Tue, 10 Dec 2024 13:21:20 +0000 (13:21 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Tue, 10 Dec 2024 13:21:20 +0000 (13:21 +0000)
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc

index 99be5935c544e9ddf452b1c0d31a981ed5eb5416..63e17eeb20e5d3e571350f0f2ededc77e4b8f1fe 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1621,6 +1621,7 @@ enum class aarch64_builtin_signatures
    binary,
    binary_lane,
    ternary,
+  ternary_lane,
    unary,
  };
  
@@ -1713,6 +1714,7 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
        break;
  
      case aarch64_builtin_signatures::ternary:
+    case aarch64_builtin_signatures::ternary_lane:
        return_type = builtin_data.types[0].type ();
        for (int i = 1; i <= 3; ++i)
         arg_types.quick_push (builtin_data.types[i].type ());
@@ -1726,6 +1728,7 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
    switch (builtin_data.signature)
      {
      case aarch64_builtin_signatures::binary_lane:
+    case aarch64_builtin_signatures::ternary_lane:
        arg_types.quick_push (integer_type_node);
        break;
  
@@ -2592,6 +2595,7 @@ struct aarch64_pragma_builtins_checker
  
    bool require_immediate_range (unsigned int, HOST_WIDE_INT,
                                 HOST_WIDE_INT);
+  bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
  
    bool check ();
  
@@ -2639,6 +2643,22 @@ require_immediate_range (unsigned int argno, HOST_WIDE_INT min,
    return true;
  }
  
+/* Require argument LANE_ARGNO to be an immediate lane index into vector
+   argument VEC_ARGNO, given that each index selects enough data to fill
+   one element of argument ELT_ARGNO.  Return true if the argument
+   is valid.  */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno,
+                             unsigned int elt_argno)
+{
+  auto vec_mode = TYPE_MODE (TREE_TYPE (args[vec_argno]));
+  auto elt_mode = TYPE_MODE (TREE_TYPE (args[elt_argno]));
+  auto nunits = exact_div (GET_MODE_SIZE (vec_mode),
+                          GET_MODE_UNIT_SIZE (elt_mode)).to_constant ();
+  return require_immediate_range (lane_argno, 0, nunits - 1);
+}
+
  /* Check the arguments to the intrinsic call and return true if they
     are valid.  */
  bool
@@ -2646,6 +2666,9 @@ aarch64_pragma_builtins_checker::check ()
  {
    switch (builtin_data.unspec)
      {
+    case UNSPEC_FDOT_LANE_FP8:
+      return require_immediate_lane_index (nargs - 2, nargs - 3, 0);
+
      case UNSPEC_LUTI2:
      case UNSPEC_LUTI4:
        {
@@ -3656,6 +3679,7 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
      case UNSPEC_FAMIN:
      case UNSPEC_F1CVTL_FP8:
      case UNSPEC_F2CVTL_FP8:
+    case UNSPEC_FDOT_FP8:
      case UNSPEC_FSCALE:
        icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
        break;
@@ -3689,6 +3713,11 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
         break;
        }
  
+    case UNSPEC_FDOT_LANE_FP8:
+      icode = code_for_aarch64_lane (builtin_data.unspec,
+                                    ops[0].mode, ops[3].mode);
+      break;
+
      case UNSPEC_LUTI2:
      case UNSPEC_LUTI4:
        create_integer_operand (ops.safe_push ({}),
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc

index ff0e5d21e937fd63ea699c93654c7b43616bb099..7591f1622d2dda6872659491bed7fdbbd6840151 100644 (file)
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -270,6 +270,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
  
    aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
  
+  aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile);
+
+  aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
+
    aarch64_def_or_undef (TARGET_LS64,
                         "__ARM_FEATURE_LS64", pfile);
    aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def

index 6221652b38ff657f4cd319c660bc106a9e714169..19277860b8cea2d4666a699e32fc4fd66251738f 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -30,6 +30,10 @@
  #define ENTRY_TERNARY(N, T0, T1, T2, T3, U, F) \
    ENTRY (N, ternary, T0, T1, T2, T3, U, F)
  
+#undef ENTRY_TERNARY_LANE
+#define ENTRY_TERNARY_LANE(N, T0, T1, T2, T3, U, F)    \
+  ENTRY (N, ternary_lane, T0, T1, T2, T3, U, F)
+
  #undef ENTRY_UNARY
  #define ENTRY_UNARY(N, T0, T1, U, F)   \
    ENTRY (N, unary, T0, T1, none, none, U, F)
@@ -85,6 +89,21 @@
    ENTRY_UNARY (N##_bf16_mf8_fpm, bf16q, T1, UNSPEC, FLAGS)     \
    ENTRY_UNARY (N##_f16_mf8_fpm, f16q, T1, UNSPEC, FLAGS)
  
+#undef ENTRY_VDOT_FPM
+#define ENTRY_VDOT_FPM(T)                                              \
+  ENTRY_TERNARY (vdot_##T##_mf8_fpm, T, T, f8, f8,                     \
+                UNSPEC_FDOT_FP8, FP8)                                  \
+  ENTRY_TERNARY (vdotq_##T##_mf8_fpm, T##q, T##q, f8q, f8q,            \
+                UNSPEC_FDOT_FP8, FP8)                                  \
+  ENTRY_TERNARY_LANE (vdot_lane_##T##_mf8_fpm, T, T, f8, f8,           \
+                     UNSPEC_FDOT_LANE_FP8, FP8)                        \
+  ENTRY_TERNARY_LANE (vdot_laneq_##T##_mf8_fpm, T, T, f8, f8q,         \
+                     UNSPEC_FDOT_LANE_FP8, FP8)                        \
+  ENTRY_TERNARY_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8,   \
+                     UNSPEC_FDOT_LANE_FP8, FP8)                        \
+  ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \
+                     UNSPEC_FDOT_LANE_FP8, FP8)
+
  // faminmax
  #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
  ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP)
@@ -125,3 +144,13 @@ ENTRY_TERNARY (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q,
  #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
  ENTRY_BINARY_VHSDF_SIGNED (vscale, UNSPEC_FSCALE, FP)
  #undef REQUIRED_EXTENSIONS
+
+// fpm dot2 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2)
+ENTRY_VDOT_FPM (f16)
+#undef REQUIRED_EXTENSIONS
+
+// fpm dot4 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
+ENTRY_VDOT_FPM (f32)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index e3b4e609a199f144e0a68c7d3fb46cbbdd9c728b..69035c797fb0d686bebb6cc0dc2dfce01477840c 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -10097,3 +10097,30 @@
    "TARGET_FP8"
    "<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
  )
+
+;; fpm vdot instructions.  The target requirements are enforced by
+;; VDQ_HSF_FDOT.
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
+       (unspec:VDQ_HSF_FDOT
+        [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
+         (match_operand:<VNARROWB> 2 "register_operand" "w")
+         (match_operand:<VNARROWB> 3 "register_operand" "w")
+         (reg:DI FPM_REGNUM)]
+        FPM_FDOT))]
+  ""
+  "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>"
+)
+
+(define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>"
+  [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
+       (unspec:VDQ_HSF_FDOT
+        [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
+         (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w")
+         (match_operand:VB 3 "register_operand" "w")
+         (match_operand 4 "const_int_operand")
+         (reg:DI FPM_REGNUM)]
+        FPM_FDOT_LANE))]
+  ""
+  "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
+)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 33cb513390d710c8999429bd7227bcaac5b1d308..b28ba63cc9c0836a7a6f4db4531d4fb64185aaae 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -188,6 +188,11 @@
  ;; Quad vector Float modes with half/single elements.
  (define_mode_iterator VQ_HSF [V8HF V4SF])
  
+(define_mode_iterator VDQ_HSF_FDOT [(V4HF "TARGET_FP8DOT2")
+                                   (V8HF "TARGET_FP8DOT2")
+                                   (V2SF "TARGET_FP8DOT4")
+                                   (V4SF "TARGET_FP8DOT4")])
+
  ;; Modes suitable to use as the return type of a vcond expression.
  (define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
  
@@ -728,6 +733,8 @@
      UNSPEC_F1CVTL2_FP8 ; Used in aarch64-builtins.cc.
      UNSPEC_F2CVTL_FP8  ; Used in aarch64-simd.md.
      UNSPEC_F2CVTL2_FP8 ; Used in aarch64-builtins.cc.
+    UNSPEC_FDOT_FP8    ; Used in aarch64-simd.md.
+    UNSPEC_FDOT_LANE_FP8 ; Used in aarch64-simd.md.
      UNSPEC_FMAX                ; Used in aarch64-simd.md.
      UNSPEC_FMAXNMV     ; Used in aarch64-simd.md.
      UNSPEC_FMAXV       ; Used in aarch64-simd.md.
@@ -1813,6 +1820,18 @@
  (define_mode_attr VPACKB [(V4HF "V8QI") (V8HF "V16QI") (V4SF "V8QI")])
  (define_mode_attr VPACKBtype [(V4HF "8b") (V8HF "16b") (V4SF "8b")])
  
+;; Modes narrowed all the way to bytes.
+(define_mode_attr VNARROWB [(V4HF "V8QI") (V8HF "V16QI")
+                           (V2SF "V8QI") (V4SF "V16QI")])
+
+;; Register suffix for modes narrowed to bytes.
+(define_mode_attr Vnbtype [(V4HF "8b") (V8HF "16b")
+                          (V2SF "8b") (V4SF "16b")])
+
+;; Register suffix representing one group of byte elements per wider element.
+(define_mode_attr Vnbsubtype [(V4HF "2b") (V8HF "2b")
+                             (V2SF "4b") (V4SF "4b")])
+
  ;; Widened modes of vector modes.
  (define_mode_attr VWIDE [(V8QI  "V8HI")  (V4HI  "V4SI")
                          (V2SI  "V2DI")  (V16QI "V8HI")
@@ -3826,6 +3845,9 @@
  
  (define_int_iterator FSCALE_UNS [UNSPEC_FSCALE])
  
+(define_int_iterator FPM_FDOT [UNSPEC_FDOT_FP8])
+(define_int_iterator FPM_FDOT_LANE [UNSPEC_FDOT_LANE_FP8])
+
  ;; -------------------------------------------------------------------
  ;; Int Iterators Attributes.
  ;; -------------------------------------------------------------------
@@ -3835,6 +3857,8 @@
    [(UNSPEC_F1CVTL_FP8 "f1cvtl")
     (UNSPEC_F2CVTL_FP8 "f2cvtl")
     (UNSPEC_FCVTN_FP8 "fcvtn")
+   (UNSPEC_FDOT_FP8 "fdot")
+   (UNSPEC_FDOT_LANE_FP8 "fdot")
     (UNSPEC_FSCALE "fscale")])
  
  ;; The optab associated with an operation.  Note that for ANDF, IORF
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c

index e5a19aaefb65ca3723024cf15f6aec11aea09cc2..fb3dc139f1f7af67b3366d849b679049bcd9de16 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -273,3 +273,25 @@
  #ifndef __ARM_FEATURE_FP8
  #error Foo
  #endif
+
+#pragma GCC target "arch=armv9-a+fp8dot4"
+#ifndef __ARM_FEATURE_FP8
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8DOT4
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_FP8DOT2
+#error Foo
+#endif
+
+#pragma GCC target "arch=armv9-a+fp8dot2"
+#ifndef __ARM_FEATURE_FP8
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8DOT4
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8DOT2
+#error Foo
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c

new file mode 100644 (file)

index 0000000..5fe1391
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8dot2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vdot_f16_fpm:
+**     msr     fpmr, x0
+**     fdot    v0.4h, v1.8b, v2.8b
+**     ret
+*/
+float16x4_t
+test_vdot_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdot_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdotq_f16_fpm:
+**     msr     fpmr, x0
+**     fdot    v0.8h, v1.16b, v2.16b
+**     ret
+*/
+float16x8_t
+test_vdotq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdotq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdot_lane_f16_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.4h, v1.8b, v2.2b\[0\]
+**     ret
+*/
+float16x4_t
+test_vdot_lane_f16_fpm_0 (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdot_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_lane_f16_fpm_3:
+**     msr     fpmr, x0
+**     fdot    v0.4h, v1.8b, v2.2b\[3\]
+**     ret
+*/
+float16x4_t
+test_vdot_lane_f16_fpm_3 (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdot_lane_f16_mf8_fpm (a, b, c, 3, d);
+}
+
+/*
+** test_vdot_laneq_f16_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.4h, v1.8b, v2.2b\[0\]
+**     ret
+*/
+float16x4_t
+test_vdot_laneq_f16_fpm_0 (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdot_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_laneq_f16_fpm_7:
+**     msr     fpmr, x0
+**     fdot    v0.4h, v1.8b, v2.2b\[7\]
+**     ret
+*/
+float16x4_t
+test_vdot_laneq_f16_fpm_7 (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdot_laneq_f16_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vdotq_lane_f16_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.8h, v1.16b, v2.2b\[0\]
+**     ret
+*/
+float16x8_t
+test_vdotq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdotq_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_lane_f16_fpm_3:
+**     msr     fpmr, x0
+**     fdot    v0.8h, v1.16b, v2.2b\[3\]
+**     ret
+*/
+float16x8_t
+test_vdotq_lane_f16_fpm_3 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdotq_lane_f16_mf8_fpm (a, b, c, 3, d);
+}
+
+/*
+** test_vdotq_laneq_f16_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.8h, v1.16b, v2.2b\[0\]
+**     ret
+*/
+float16x8_t
+test_vdotq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdotq_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_laneq_f16_fpm_7:
+**     msr     fpmr, x0
+**     fdot    v0.8h, v1.16b, v2.2b\[7\]
+**     ret
+*/
+float16x8_t
+test_vdotq_laneq_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdotq_laneq_f16_mf8_fpm (a, b, c, 7, d);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c

new file mode 100644 (file)

index 0000000..e47a737
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8dot4" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vdot_f32_fpm:
+**     msr     fpmr, x0
+**     fdot    v0.2s, v1.8b, v2.8b
+**     ret
+*/
+float32x2_t
+test_vdot_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdot_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdotq_f32_fpm:
+**     msr     fpmr, x0
+**     fdot    v0.4s, v1.16b, v2.16b
+**     ret
+*/
+float32x4_t
+test_vdotq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdotq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdot_lane_f32_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.2s, v1.8b, v2.4b\[0\]
+**     ret
+*/
+float32x2_t
+test_vdot_lane_f32_fpm_0 (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdot_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_lane_f32_fpm_1:
+**     msr     fpmr, x0
+**     fdot    v0.2s, v1.8b, v2.4b\[1\]
+**     ret
+*/
+float32x2_t
+test_vdot_lane_f32_fpm_1 (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdot_lane_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdot_laneq_f32_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.2s, v1.8b, v2.4b\[0\]
+**     ret
+*/
+float32x2_t
+test_vdot_laneq_f32_fpm_0 (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdot_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_laneq_f32_fpm_3:
+**     msr     fpmr, x0
+**     fdot    v0.2s, v1.8b, v2.4b\[3\]
+**     ret
+*/
+float32x2_t
+test_vdot_laneq_f32_fpm_3 (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdot_laneq_f32_mf8_fpm (a, b, c, 3, d);
+}
+
+/*
+** test_vdotq_lane_f32_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.4s, v1.16b, v2.4b\[0\]
+**     ret
+*/
+float32x4_t
+test_vdotq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdotq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_lane_f32_fpm_1:
+**     msr     fpmr, x0
+**     fdot    v0.4s, v1.16b, v2.4b\[1\]
+**     ret
+*/
+float32x4_t
+test_vdotq_lane_f32_fpm_1 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+  return vdotq_lane_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdotq_laneq_f32_fpm_0:
+**     msr     fpmr, x0
+**     fdot    v0.4s, v1.16b, v2.4b\[0\]
+**     ret
+*/
+float32x4_t
+test_vdotq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdotq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_laneq_f32_fpm_3:
+**     msr     fpmr, x0
+**     fdot    v0.4s, v1.16b, v2.4b\[3\]
+**     ret
+*/
+float32x4_t
+test_vdotq_laneq_f32_fpm_3 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vdotq_laneq_f32_mf8_fpm (a, b, c, 3, d);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c

new file mode 100644 (file)

index 0000000..7585cff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+fp8dot4+fp8dot2"
+
+void
+test(float16x4_t f16, float16x8_t f16q, float32x2_t f32,
+     float32x4_t f32q, mfloat8x8_t mf8, mfloat8x16_t mf8q, int x,
+     fpm_t fpm)
+{
+  vdot_lane_f16_mf8_fpm (f16, mf8, mf8, x, fpm); /* { dg-error {argument 4 of 'vdot_lane_f16_mf8_fpm' must be an integer constant expression} } */
+  vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdot_laneq_f16_mf8_fpm' must be an integer constant expression} } */
+  vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vdotq_lane_f16_mf8_fpm' must be an integer constant expression} } */
+  vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdotq_laneq_f16_mf8_fpm' must be an integer constant expression} } */
+
+  vdot_lane_f32_mf8_fpm (f32, mf8, mf8, x, fpm); /* { dg-error {argument 4 of 'vdot_lane_f32_mf8_fpm' must be an integer constant expression} } */
+  vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdot_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+  vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vdotq_lane_f32_mf8_fpm' must be an integer constant expression} } */
+  vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdotq_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+
+  vdot_lane_f16_mf8_fpm (f16, mf8, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+  vdot_lane_f16_mf8_fpm (f16, mf8, mf8, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdot_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+
+  vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+  vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vdot_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+
+  vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+  vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdotq_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+
+  vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+  vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vdotq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+
+  vdot_lane_f32_mf8_fpm (f32, mf8, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */
+  vdot_lane_f32_mf8_fpm (f32, mf8, mf8, 2, fpm); /* { dg-error { passing 2 to argument 4 of 'vdot_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */
+
+  vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+  vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdot_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+
+  vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */
+  vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 2, fpm); /* { dg-error { passing 2 to argument 4 of 'vdotq_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */
+
+  vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+  vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdotq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */
+}
author	Saurabh Jha <saurabh.jha@arm.com>
	Tue, 10 Dec 2024 13:21:20 +0000 (13:21 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 10 Dec 2024 13:21:20 +0000 (13:21 +0000)
gcc/config/aarch64/aarch64-builtins.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-c.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd-pragma-builtins.def		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c	[new file with mode: 0644]	patch \| blob