AArch64: Add SVE support for simd clones [PR96342]

author Tamar Christina <tamar.christina@arm.com>

Fri, 20 Dec 2024 14:27:25 +0000 (14:27 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Fri, 20 Dec 2024 14:28:56 +0000 (14:28 +0000)
author Tamar Christina <tamar.christina@arm.com>
Fri, 20 Dec 2024 14:27:25 +0000 (14:27 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Fri, 20 Dec 2024 14:28:56 +0000 (14:28 +0000)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index bd17486e9128a21bd205ef1fb3ec3e323408ec59..7ab1316cf56850678d93b6fdb8d19eea18ad78f1 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1151,6 +1151,8 @@ namespace aarch64_sve {
  #ifdef GCC_TARGET_H
    bool verify_type_context (location_t, type_context_kind, const_tree, bool);
  #endif
+ void add_sve_type_attribute (tree, unsigned int, unsigned int,
+                             const char *, const char *);
  }
  
  extern void aarch64_split_combinev16qi (rtx operands[3]);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc

index 5acc56f99c65498cbf5593a9ee21540fa55098c2..e93c3a78e6d6c909f5de32ba8672503fc42b8d1c 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1032,15 +1032,18 @@ static GTY(()) hash_map<tree, registered_function *> *overload_names[2];
  
  /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors
     and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
-   mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type.  */
-static void
+   mangling of the type.  mangling of the type.  ACLE_NAME is the <arm_sve.h>
+   name of the type, or null if <arm_sve.h> does not provide the type.  */
+void
  add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
                         const char *mangled_name, const char *acle_name)
  {
    tree mangled_name_tree
      = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
+  tree acle_name_tree
+    = (acle_name ? get_identifier (acle_name) : NULL_TREE);
  
-  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
+  tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
    value = tree_cons (NULL_TREE, mangled_name_tree, value);
    value = tree_cons (NULL_TREE, size_int (num_pr), value);
    value = tree_cons (NULL_TREE, size_int (num_zr), value);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 77a2a6bfa3a3a6cd678ceb820d310f44cacfe581..de4c0a0783912b54ac35d7c818c24574b27a4ca0 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29323,7 +29323,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
                                         int num, bool explicit_p)
  {
    tree t, ret_type;
-  unsigned int nds_elt_bits;
+  unsigned int nds_elt_bits, wds_elt_bits;
    unsigned HOST_WIDE_INT const_simdlen;
  
    if (!TARGET_SIMD)
@@ -29368,10 +29368,14 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
    if (TREE_CODE (ret_type) != VOID_TYPE)
      {
        nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type);
+      wds_elt_bits = nds_elt_bits;
        vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits));
      }
    else
-    nds_elt_bits = POINTER_SIZE;
+    {
+      nds_elt_bits = POINTER_SIZE;
+      wds_elt_bits = 0;
+    }
  
    int i;
    tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
@@ -29379,44 +29383,65 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
    for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
         t && t != void_list_node; t = TREE_CHAIN (t), i++)
      {
-      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
+      tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
        if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
-         && !supported_simd_type (arg_type))
+         && !supported_simd_type (type))
         {
           if (!explicit_p)
             ;
-         else if (COMPLEX_FLOAT_TYPE_P (ret_type))
+         else if (COMPLEX_FLOAT_TYPE_P (type))
             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
                         "GCC does not currently support argument type %qT "
-                       "for simd", arg_type);
+                       "for simd", type);
           else
             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
                         "unsupported argument type %qT for simd",
-                       arg_type);
+                       type);
           return 0;
         }
-      unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type);
+      unsigned lane_bits = lane_size (clonei->args[i].arg_type, type);
        if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
-       vec_elts.safe_push (std::make_pair (arg_type, lane_bits));
+       vec_elts.safe_push (std::make_pair (type, lane_bits));
        if (nds_elt_bits > lane_bits)
         nds_elt_bits = lane_bits;
+      if (wds_elt_bits < lane_bits)
+       wds_elt_bits = lane_bits;
      }
  
-  clonei->vecsize_mangle = 'n';
+  /* If we could not determine the WDS type from available parameters/return,
+     then fallback to using uintptr_t.  */
+  if (wds_elt_bits == 0)
+    wds_elt_bits = POINTER_SIZE;
+
    clonei->mask_mode = VOIDmode;
    poly_uint64 simdlen;
-  auto_vec<poly_uint64> simdlens (2);
+  typedef struct
+    {
+      poly_uint64 len;
+      char mangle;
+    } aarch64_clone_info;
+  auto_vec<aarch64_clone_info, 3> clones;
+
    /* Keep track of the possible simdlens the clones of this function can have,
       and check them later to see if we support them.  */
    if (known_eq (clonei->simdlen, 0U))
      {
        simdlen = exact_div (poly_uint64 (64), nds_elt_bits);
        if (maybe_ne (simdlen, 1U))
-       simdlens.safe_push (simdlen);
-      simdlens.safe_push (simdlen * 2);
+       clones.safe_push ({simdlen, 'n'});
+      clones.safe_push ({simdlen * 2, 'n'});
+      /* Only create an SVE simd clone if we aren't dealing with an unprototyped
+        function.
+        We have also disabled support for creating SVE simdclones for functions
+        with function bodies and any simdclones when -msve-vector-bits is used.
+        TODO: add support for these.  */
+      if (prototype_p (TREE_TYPE (node->decl))
+         && !node->definition
+         && !aarch64_sve_vg.is_constant ())
+       clones.safe_push ({exact_div (BITS_PER_SVE_VECTOR, wds_elt_bits), 's'});
      }
    else
-    simdlens.safe_push (clonei->simdlen);
+    clones.safe_push ({clonei->simdlen, 'n'});
  
    clonei->vecsize_int = 0;
    clonei->vecsize_float = 0;
@@ -29430,11 +29455,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
       simdclone would cause a vector type to be larger than 128-bits, and reject
       such a clone.  */
    unsigned j = 0;
-  while (j < simdlens.length ())
+  while (j < clones.length ())
      {
        bool remove_simdlen = false;
        for (auto elt : vec_elts)
-       if (known_gt (simdlens[j] * elt.second, 128U))
+       if (clones[j].mangle == 'n'
+           && known_gt (clones[j].len * elt.second, 128U))
           {
             /* Don't issue a warning for every simdclone when there is no
                specific simdlen clause.  */
@@ -29442,18 +29468,17 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
               warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
                           "GCC does not currently support simdlen %wd for "
                           "type %qT",
-                         constant_lower_bound (simdlens[j]), elt.first);
+                         constant_lower_bound (clones[j].len), elt.first);
             remove_simdlen = true;
             break;
           }
        if (remove_simdlen)
-       simdlens.ordered_remove (j);
+       clones.ordered_remove (j);
        else
         j++;
      }
  
-
-  int count = simdlens.length ();
+  int count = clones.length ();
    if (count == 0)
      {
        if (explicit_p && known_eq (clonei->simdlen, 0U))
@@ -29470,21 +29495,112 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
      }
  
    gcc_assert (num < count);
-  clonei->simdlen = simdlens[num];
+  clonei->simdlen = clones[num].len;
+  clonei->vecsize_mangle = clones[num].mangle;
+  /* SVE simdclones always have a Mask, so set inbranch to 1.  */
+  if (clonei->vecsize_mangle == 's')
+    clonei->inbranch = 1;
    return count;
  }
  
-/* Implement TARGET_SIMD_CLONE_ADJUST.  */
+/* Helper function to adjust an SVE vector type of an SVE simd clone.  Returns
+   an SVE vector type based on the element type of the vector TYPE, with SIMDLEN
+   number of elements.  If IS_MASK, returns an SVE mask type appropriate for use
+   with the SVE type it would otherwise return.  */
+
+static tree
+simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 simdlen)
+{
+  unsigned int num_zr = 0;
+  unsigned int num_pr = 0;
+  machine_mode vector_mode;
+  type = TREE_TYPE (type);
+  scalar_mode scalar_m = SCALAR_TYPE_MODE (type);
+  vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require ();
+  type = build_vector_type_for_mode (type, vector_mode);
+  if (is_mask)
+    {
+      type = truth_type_for (type);
+      num_pr = 1;
+    }
+  else
+    num_zr = 1;
+
+  /* We create new types here with the SVE type attribute instead of using ACLE
+     types as we need to support unpacked vectors which aren't available as
+     ACLE SVE types.  */
+
+  /* ??? This creates anonymous "SVE type" attributes for all types,
+     even those that correspond to <arm_sve.h> types.  This affects type
+     compatibility in C/C++, but not in gimple.  (Gimple type equivalence
+     is instead decided by TARGET_COMPATIBLE_VECTOR_TYPES_P.)
  
+     Thus a C/C++ definition of the implementation function will have a
+     different function type from the declaration that this code creates.
+     However, it doesn't seem worth trying to fix that until we have a
+     way of handling implementations that operate on unpacked types.  */
+  type = build_distinct_type_copy (type);
+  aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
+  return type;
+}
+
+/* Implement TARGET_SIMD_CLONE_ADJUST.  */
  static void
  aarch64_simd_clone_adjust (struct cgraph_node *node)
  {
-  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
-     use the correct ABI.  */
-
    tree t = TREE_TYPE (node->decl);
-  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
-                                       TYPE_ATTRIBUTES (t));
+
+  if (node->simdclone->vecsize_mangle == 's')
+    {
+      /* This is additive and has no effect if SVE, or a superset thereof, is
+        already enabled.  */
+      tree target = build_string (strlen ("+sve") + 1, "+sve");
+      if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0))
+       gcc_unreachable ();
+      push_function_decl (node->decl);
+    }
+  else
+    {
+      /* Add aarch64_vector_pcs target attribute to SIMD clones so they
+        use the correct ABI.  */
+      TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
+                                           TYPE_ATTRIBUTES (t));
+    }
+
+  cgraph_simd_clone *sc = node->simdclone;
+
+  for (unsigned i = 0; i < sc->nargs; ++i)
+    {
+      bool is_mask = false;
+      tree type;
+      switch (sc->args[i].arg_type)
+       {
+       case SIMD_CLONE_ARG_TYPE_MASK:
+         is_mask = true;
+         gcc_fallthrough ();
+       case SIMD_CLONE_ARG_TYPE_VECTOR:
+       case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
+       case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
+         type = sc->args[i].vector_type;
+         gcc_assert (VECTOR_TYPE_P (type));
+         if (node->simdclone->vecsize_mangle == 's')
+           type = simd_clone_adjust_sve_vector_type (type, is_mask,
+                                                     sc->simdlen);
+         sc->args[i].vector_type = type;
+         break;
+       default:
+         continue;
+       }
+    }
+  if (node->simdclone->vecsize_mangle == 's')
+    {
+      tree ret_type = TREE_TYPE (t);
+      if (VECTOR_TYPE_P (ret_type))
+       TREE_TYPE (t)
+         = simd_clone_adjust_sve_vector_type (ret_type, false,
+                                              node->simdclone->simdlen);
+      pop_function_decl ();
+    }
  }
  
  /* Implement TARGET_SIMD_CLONE_USABLE.  */
@@ -29498,6 +29614,11 @@ aarch64_simd_clone_usable (struct cgraph_node *node, machine_mode vector_mode)
        if (!TARGET_SIMD || aarch64_sve_mode_p (vector_mode))
         return -1;
        return 0;
+    case 's':
+      if (!TARGET_SVE
+         || !aarch64_sve_mode_p (vector_mode))
+       return -1;
+      return 0;
      default:
        gcc_unreachable ();
      }
diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc

index 864586207ee89269b5a2cf136487440212d59695..4be25539057251a318409e576e4bc43fc5fd4c40 100644 (file)
--- a/gcc/omp-simd-clone.cc
+++ b/gcc/omp-simd-clone.cc
@@ -541,9 +541,12 @@ simd_clone_mangle (struct cgraph_node *node,
    pp_string (&pp, "_ZGV");
    pp_character (&pp, vecsize_mangle);
    pp_character (&pp, mask);
-  /* For now, simdlen is always constant, while variable simdlen pp 'n'.  */
-  unsigned int len = simdlen.to_constant ();
-  pp_decimal_int (&pp, (len));
+
+  unsigned HOST_WIDE_INT len;
+  if (simdlen.is_constant (&len))
+    pp_decimal_int (&pp, (int) (len));
+  else
+    pp_character (&pp, 'x');
  
    for (n = 0; n < clone_info->nargs; ++n)
      {
@@ -1533,8 +1536,8 @@ simd_clone_adjust (struct cgraph_node *node)
          below).  */
        loop = alloc_loop ();
        cfun->has_force_vectorize_loops = true;
-      /* For now, simlen is always constant.  */
-      loop->safelen = node->simdclone->simdlen.to_constant ();
+      /* We can assert that safelen is the 'minimum' simdlen.  */
+      loop->safelen = constant_lower_bound (node->simdclone->simdlen);
        loop->force_vectorize = true;
        loop->header = body_bb;
      }
diff --git a/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C

new file mode 100644 (file)

index 0000000..90febec
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
@@ -0,0 +1,88 @@
+/* { dg-do compile }  */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
+    function calls from scalar versions in accordance with the Vector Function
+    Application Binary Interface Specification for AArch64 (AAVPCS).
+
+  We check for correctness in:
+    - Vector function name mangling, with the grammar:
+
+      vector name := prefix  "_" name
+      prefix := "_ZGV" isa mask <len> <parameters>
+
+      Whereby:
+      - <isa>  := "s" for SVE
+      - <mask> := "M" for Mask
+      - <len>  := "x" for VLA SVE
+
+      resulting in:
+      <prefix> := "_ZGVsMx" <parameters>
+
+      with each vector parameter contributing a "v" to the prefix.
+
+    - Parameter and return value mapping:
+      - Unless marked with uniform or linear OpenMP clauses, parameters and
+        return values are expected to map to vectors.
+      - Where the lane-size of a parameter is less than the widest data size
+        for a given function, the resulting vector should be unpacked and
+        populated via extending loads.
+
+    - Finally, we also make sure we can correctly generate calls to the same
+      function, differing only in the target architecture (i.e. SVE vs SIMD),
+      ensuring that each call points to the correctly-mangled vector function
+      and employs the correct ABI.  For example, for `fn' we may expect:
+
+       for #pragma GCC target("+sve"): _ZGVsMxvv_fn
+       for #pragma GCC target("+simd): _ZGVnN4vv_fn */
+
+#pragma GCC target ("+sve")
+/* { dg-final { scan-assembler {\s+_ZGVsMxv__Z3fn0i\n} } } */
+extern int __attribute__ ((simd, const)) fn0 (int);
+void test_fn0 (int *a, int *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] += fn0 (b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn1si\n} } } */
+extern int __attribute__ ((simd, const)) fn1 (short, int);
+void test_fn1 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn1 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn2si\n} } } */
+extern short __attribute__ ((simd, const)) fn2 (short, int);
+void test_fn2 (short *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn2 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn3ic\n} } } */
+extern char __attribute__ ((simd, const)) fn3 (int, char);
+void test_fn3 (int *a, int *b, char *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn4 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
+
+#pragma GCC reset_options
+#pragma GCC target ("+simd")
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn5 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c

index e2e80f0c663dcc182b8cc48b0453558e794f4085..2f4d3a866e55018b8ac8b483b8c33db862a57071 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
@@ -43,6 +43,7 @@ float f04 (double a)
  }
  /* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */
  /* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */
+/* { dg-final { scan-assembler-not {_ZGVs[0-9a-z]*_f04:} } } */
  
  #pragma omp declare simd uniform(a) linear (b)
  void f05 (short a, short *b, short c)
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c

new file mode 100644 (file)

index 0000000..0d8f497
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c
@@ -0,0 +1,89 @@
+/* { dg-do compile }  */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
+    function calls from scalar versions in accordance with the Vector Function
+    Application Binary Interface Specification for AArch64 (AAVPCS).
+
+  We check for correctness in:
+    - Vector function name mangling, with the grammar:
+
+      vector name := prefix  "_" name
+      prefix := "_ZGV" isa mask <len> <parameters>
+
+      Whereby:
+      - <isa>  := "s" for SVE
+      - <mask> := "M" for Mask
+      - <len>  := "x" for VLA SVE
+
+      resulting in:
+      <prefix> := "_ZGVsMx" <parameters>
+
+      with each vector parameter contributing a "v" to the prefix.
+
+    - Parameter and return value mapping:
+      - Unless marked with uniform or linear OpenMP clauses, parameters and
+        return values are expected to map to vectors.
+      - Where the lane-size of a parameter is less than the widest data size
+        for a given function, the resulting vector should be unpacked and
+        populated via extending loads.
+
+    - Finally, we also make sure we can correctly generate calls to the same
+      function, differing only in the target architecture (i.e. SVE vs SIMD),
+      ensuring that each call points to the correctly-mangled vector function
+      and employs the correct ABI.  For example, for `fn' we may expect:
+
+       for #pragma GCC target("+sve"): _ZGVsMxvv_fn
+       for #pragma GCC target("+simd): _ZGVnN4vv_fn */
+
+#pragma GCC target ("+sve")
+/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn0\n} } } */
+extern int __attribute__ ((simd, const)) fn0 (int);
+void test_fn0 (int *a, int *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] += fn0 (b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn1\n} } } */
+extern int __attribute__ ((simd, const)) fn1 (short, int);
+void test_fn1 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn1 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn2\n} } } */
+extern short __attribute__ ((simd, const)) fn2 (short, int);
+void test_fn2 (short *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn2 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */
+extern char __attribute__ ((simd, const)) fn3 (int, char);
+void test_fn3 (int *a, int *b, char *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn4\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn4 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
+
+#pragma GCC reset_options
+#pragma GCC target ("+simd")
+/* { dg-final { scan-assembler {\s+_ZGVnN4vv_fn4\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn5 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
author	Tamar Christina <tamar.christina@arm.com>
	Fri, 20 Dec 2024 14:27:25 +0000 (14:27 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Fri, 20 Dec 2024 14:28:56 +0000 (14:28 +0000)
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-sve-builtins.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/omp-simd-clone.cc		patch \| blob \| blame \| history
gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/declare-simd-2.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c	[new file with mode: 0644]	patch \| blob