arm: [MVE intrinsics] Add new framework

author Christophe Lyon <christophe.lyon@arm.com>

Fri, 15 Jul 2022 09:26:48 +0000 (10:26 +0100)

committer Christophe Lyon <christophe.lyon@arm.com>

Wed, 3 May 2023 14:58:26 +0000 (16:58 +0200)
author Christophe Lyon <christophe.lyon@arm.com>
Fri, 15 Jul 2022 09:26:48 +0000 (10:26 +0100)
committer Christophe Lyon <christophe.lyon@arm.com>
Wed, 3 May 2023 14:58:26 +0000 (16:58 +0200)
diff --git a/gcc/config.gcc b/gcc/config.gcc

index 671c7e3b018ca95107634d33ea457cad4be1d220..e08c67d7cde1889a31bf0724df19a93c20d655da 100644 (file)
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -362,7 +362,7 @@ arc*-*-*)
         ;;
  arm*-*-*)
         cpu_type=arm
-       extra_objs="arm-builtins.o arm-mve-builtins.o aarch-common.o aarch-bti-insert.o"
+       extra_objs="arm-builtins.o arm-mve-builtins.o arm-mve-builtins-shapes.o arm-mve-builtins-base.o aarch-common.o aarch-bti-insert.o"
         extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h arm_mve_types.h arm_mve.h arm_cde.h"
         target_type_format_char='%'
         c_target_objs="arm-c.o"
diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc

index adcb50d21854562d07be9565aeb070ae361d2445..d0c57409b4c2b4fa2810291945c80780c096bd4a 100644 (file)
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -2712,6 +2712,7 @@ arm_general_builtin_decl (unsigned code)
    return arm_builtin_decls[code];
  }
  
+/* Implement TARGET_BUILTIN_DECL.  */
  /* Return the ARM builtin for CODE.  */
  tree
  arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
@@ -2721,6 +2722,8 @@ arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
      {
      case ARM_BUILTIN_GENERAL:
        return arm_general_builtin_decl (subcode);
+    case ARM_BUILTIN_MVE:
+      return error_mark_node;
      default:
        gcc_unreachable ();
      }
@@ -4087,6 +4090,8 @@ arm_expand_builtin (tree exp,
      {
      case ARM_BUILTIN_GENERAL:
        return arm_general_expand_builtin (subcode, exp, target, ignore);
+    case ARM_BUILTIN_MVE:
+      return arm_mve::expand_builtin (subcode, exp, target);
      default:
        gcc_unreachable ();
      }
@@ -4188,8 +4193,9 @@ arm_general_check_builtin_call (unsigned int code)
  
  /* Implement TARGET_CHECK_BUILTIN_CALL.  */
  bool
-arm_check_builtin_call (location_t, vec<location_t>, tree fndecl, tree,
-                       unsigned int, tree *)
+arm_check_builtin_call (location_t loc, vec<location_t> arg_loc,
+                       tree fndecl, tree orig_fndecl,
+                       unsigned int nargs, tree *args)
  {
    unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
    unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
@@ -4197,6 +4203,9 @@ arm_check_builtin_call (location_t, vec<location_t>, tree fndecl, tree,
      {
      case ARM_BUILTIN_GENERAL:
        return arm_general_check_builtin_call (subcode);
+    case ARM_BUILTIN_MVE:
+      return arm_mve::check_builtin_call (loc, arg_loc, subcode,
+                                         orig_fndecl, nargs, args);
      default:
        gcc_unreachable ();
      }
@@ -4215,6 +4224,8 @@ arm_describe_resolver (tree fndecl)
         && subcode < ARM_BUILTIN_MVE_BASE)
         return arm_cde_resolver;
        return arm_no_resolver;
+    case ARM_BUILTIN_MVE:
+      return arm_mve_resolver;
      default:
        gcc_unreachable ();
      }
diff --git a/gcc/config/arm/arm-builtins.h b/gcc/config/arm/arm-builtins.h

index 8c94b6bc40b952cab52bf106997316b264aac99f..494dcd09411ca8e45d1631e4bf0e732453edfabe 100644 (file)
--- a/gcc/config/arm/arm-builtins.h
+++ b/gcc/config/arm/arm-builtins.h
@@ -27,6 +27,7 @@
  
  enum resolver_ident {
      arm_cde_resolver,
+    arm_mve_resolver,
      arm_no_resolver
  };
  enum resolver_ident arm_describe_resolver (tree);
diff --git a/gcc/config/arm/arm-c.cc b/gcc/config/arm/arm-c.cc

index 59c0d8ce747a72050a5ae0431564735ff59fad52..d3d93ceba0040e3e63ee7e2a901f25561985d892 100644 (file)
--- a/gcc/config/arm/arm-c.cc
+++ b/gcc/config/arm/arm-c.cc
@@ -144,20 +144,44 @@ arm_pragma_arm (cpp_reader *)
    const char *name = TREE_STRING_POINTER (x);
    if (strcmp (name, "arm_mve_types.h") == 0)
      arm_mve::handle_arm_mve_types_h ();
+  else if (strcmp (name, "arm_mve.h") == 0)
+    {
+      if (pragma_lex (&x) == CPP_NAME)
+       {
+         if (strcmp (IDENTIFIER_POINTER (x), "true") == 0)
+           arm_mve::handle_arm_mve_h (true);
+         else if (strcmp (IDENTIFIER_POINTER (x), "false") == 0)
+           arm_mve::handle_arm_mve_h (false);
+         else
+           error ("%<#pragma GCC arm \"arm_mve.h\"%> requires a boolean parameter");
+       }
+    }
    else
      error ("unknown %<#pragma GCC arm%> option %qs", name);
  }
  
-/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  This is currently only
-   used for the MVE related builtins for the CDE extension.
-   Here we ensure the type of arguments is such that the size is correct, and
-   then return a tree that describes the same function call but with the
-   relevant types cast as necessary.  */
+/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  */
  tree
-arm_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist)
+arm_resolve_overloaded_builtin (location_t loc, tree fndecl,
+                               void *uncast_arglist)
  {
-  if (arm_describe_resolver (fndecl) == arm_cde_resolver)
-    return arm_resolve_cde_builtin (loc, fndecl, arglist);
+  enum resolver_ident resolver = arm_describe_resolver (fndecl);
+  if (resolver == arm_cde_resolver)
+    return arm_resolve_cde_builtin (loc, fndecl, uncast_arglist);
+  if (resolver == arm_mve_resolver)
+    {
+      vec<tree, va_gc> empty = {};
+      vec<tree, va_gc> *arglist = (uncast_arglist
+                                  ? (vec<tree, va_gc> *) uncast_arglist
+                                  : &empty);
+      unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+      unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
+      tree new_fndecl = arm_mve::resolve_overloaded_builtin (loc, subcode, arglist);
+      if (new_fndecl == NULL_TREE || new_fndecl == error_mark_node)
+       return new_fndecl;
+      return build_function_call_vec (loc, vNULL, new_fndecl, arglist,
+                                     NULL, fndecl);
+    }
    return NULL_TREE;
  }
  
@@ -519,7 +543,9 @@ arm_register_target_pragmas (void)
  {
    /* Update pragma hook to allow parsing #pragma GCC target.  */
    targetm.target_option.pragma_parse = arm_pragma_target_parse;
+
    targetm.resolve_overloaded_builtin = arm_resolve_overloaded_builtin;
+  targetm.check_builtin_call = arm_check_builtin_call;
  
    c_register_pragma ("GCC", "arm", arm_pragma_arm);
  
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc

new file mode 100644 (file)

index 0000000..e9f285f
--- /dev/null
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -0,0 +1,45 @@
+/* ACLE support for Arm MVE (__ARM_FEATURE_MVE intrinsics)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "basic-block.h"
+#include "function.h"
+#include "gimple.h"
+#include "arm-mve-builtins.h"
+#include "arm-mve-builtins-shapes.h"
+#include "arm-mve-builtins-base.h"
+#include "arm-mve-builtins-functions.h"
+
+using namespace arm_mve;
+
+namespace {
+
+} /* end anonymous namespace */
+
+namespace arm_mve {
+
+} /* end namespace arm_mve */
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def

new file mode 100644 (file)

index 0000000..d15ba2e
--- /dev/null
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -0,0 +1,24 @@
+/* ACLE support for Arm MVE (__ARM_FEATURE_MVE intrinsics)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define REQUIRES_FLOAT false
+#undef REQUIRES_FLOAT
+
+#define REQUIRES_FLOAT true
+#undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h

new file mode 100644 (file)

index 0000000..c4d7b75
--- /dev/null
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -0,0 +1,29 @@
+/* ACLE support for Arm MVE (__ARM_FEATURE_MVE intrinsics)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_MVE_BUILTINS_BASE_H
+#define GCC_ARM_MVE_BUILTINS_BASE_H
+
+namespace arm_mve {
+namespace functions {
+
+} /* end namespace arm_mve::functions */
+} /* end namespace arm_mve */
+
+#endif
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h

new file mode 100644 (file)

index 0000000..dff0199
--- /dev/null
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -0,0 +1,50 @@
+/* ACLE support for Arm MVE (function_base classes)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_MVE_BUILTINS_FUNCTIONS_H
+#define GCC_ARM_MVE_BUILTINS_FUNCTIONS_H
+
+namespace arm_mve {
+
+/* Wrap T, which is derived from function_base, and indicate that the
+   function never has side effects.  It is only necessary to use this
+   wrapper on functions that might have floating-point suffixes, since
+   otherwise we assume by default that the function has no side effects.  */
+template<typename T>
+class quiet : public T
+{
+public:
+  CONSTEXPR quiet () : T () {}
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return 0;
+  }
+};
+
+} /* end namespace arm_mve */
+
+/* Declare the global function base NAME, creating it from an instance
+   of class CLASS with constructor arguments ARGS.  */
+#define FUNCTION(NAME, CLASS, ARGS) \
+  namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \
+  namespace functions { const function_base *const NAME = &NAME##_obj; }
+
+#endif
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc

new file mode 100644 (file)

index 0000000..f20660d
--- /dev/null
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -0,0 +1,343 @@
+/* ACLE support for Arm MVE (function shapes)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "arm-mve-builtins.h"
+#include "arm-mve-builtins-shapes.h"
+
+/* In the comments below, _t0 represents the first type suffix
+   (e.g. "_s8") and _t1 represents the second.  T0/T1 represent the
+   type full names (e.g. int8x16_t). Square brackets enclose
+   characters that are present in only the full name, not the
+   overloaded name.  Governing predicate arguments and predicate
+   suffixes are not shown, since they depend on the predication type,
+   which is a separate piece of information from the shape.  */
+
+namespace arm_mve {
+
+/* If INSTANCE has a predicate, add it to the list of argument types
+   in ARGUMENT_TYPES.  RETURN_TYPE is the type returned by the
+   function.  */
+static void
+apply_predication (const function_instance &instance, tree return_type,
+                  vec<tree> &argument_types)
+{
+  if (instance.pred != PRED_none)
+    {
+      /* When predicate is PRED_m, insert a first argument
+        ("inactive") with the same type as return_type.  */
+      if (instance.has_inactive_argument ())
+       argument_types.quick_insert (0, return_type);
+      argument_types.quick_push (get_mve_pred16_t ());
+    }
+}
+
+/* Parse and move past an element type in FORMAT and return it as a type
+   suffix.  The format is:
+
+   [01]    - the element type in type suffix 0 or 1 of INSTANCE.
+   h<elt>  - a half-sized version of <elt>
+   s<bits> - a signed type with the given number of bits
+   s[01]   - a signed type with the same width as type suffix 0 or 1
+   u<bits> - an unsigned type with the given number of bits
+   u[01]   - an unsigned type with the same width as type suffix 0 or 1
+   w<elt>  - a double-sized version of <elt>
+   x<bits> - a type with the given number of bits and same signedness
+             as the next argument.
+
+   Future intrinsics will extend this format.  */
+static type_suffix_index
+parse_element_type (const function_instance &instance, const char *&format)
+{
+  int ch = *format++;
+
+
+  if (ch == 's' || ch == 'u')
+    {
+      type_class_index tclass = (ch == 'f' ? TYPE_float
+                                : ch == 's' ? TYPE_signed
+                                : TYPE_unsigned);
+      char *end;
+      unsigned int bits = strtol (format, &end, 10);
+      format = end;
+      if (bits == 0 || bits == 1)
+       bits = instance.type_suffix (bits).element_bits;
+      return find_type_suffix (tclass, bits);
+    }
+
+  if (ch == 'h')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return find_type_suffix (type_suffixes[suffix].tclass,
+                              type_suffixes[suffix].element_bits / 2);
+    }
+
+   if (ch == 'w')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return find_type_suffix (type_suffixes[suffix].tclass,
+                              type_suffixes[suffix].element_bits * 2);
+    }
+
+  if (ch == 'x')
+    {
+      const char *next = format;
+      next = strstr (format, ",");
+      next+=2;
+      type_suffix_index suffix = parse_element_type (instance, next);
+      type_class_index tclass = type_suffixes[suffix].tclass;
+      char *end;
+      unsigned int bits = strtol (format, &end, 10);
+      format = end;
+      return find_type_suffix (tclass, bits);
+    }
+
+  if (ch == '0' || ch == '1')
+    return instance.type_suffix_ids[ch - '0'];
+
+  gcc_unreachable ();
+}
+
+/* Read and return a type from FORMAT for function INSTANCE.  Advance
+   FORMAT beyond the type string.  The format is:
+
+   p       - predicates with type mve_pred16_t
+   s<elt>  - a scalar type with the given element suffix
+   t<elt>  - a vector or tuple type with given element suffix [*1]
+   v<elt>  - a vector with the given element suffix
+
+   where <elt> has the format described above parse_element_type.
+
+   Future intrinsics will extend this format.
+
+   [*1] the vectors_per_tuple function indicates whether the type should
+        be a tuple, and if so, how many vectors it should contain.  */
+static tree
+parse_type (const function_instance &instance, const char *&format)
+{
+  int ch = *format++;
+
+  if (ch == 'p')
+    return get_mve_pred16_t ();
+
+  if (ch == 's')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return scalar_types[type_suffixes[suffix].vector_type];
+    }
+
+  if (ch == 't')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      vector_type_index vector_type = type_suffixes[suffix].vector_type;
+      unsigned int num_vectors = instance.vectors_per_tuple ();
+      return acle_vector_types[num_vectors - 1][vector_type];
+    }
+
+  if (ch == 'v')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return acle_vector_types[0][type_suffixes[suffix].vector_type];
+    }
+
+  gcc_unreachable ();
+}
+
+/* Read a type signature for INSTANCE from FORMAT.  Add the argument
+   types to ARGUMENT_TYPES and return the return type.  Assert there
+   are no more than MAX_ARGS arguments.
+
+   The format is a comma-separated list of types (as for parse_type),
+   with the first type being the return type and the rest being the
+   argument types.  */
+static tree
+parse_signature (const function_instance &instance, const char *format,
+                vec<tree> &argument_types, unsigned int max_args)
+{
+  tree return_type = parse_type (instance, format);
+  unsigned int args = 0;
+  while (format[0] == ',')
+    {
+      gcc_assert (args < max_args);
+      format += 1;
+      tree argument_type = parse_type (instance, format);
+      argument_types.quick_push (argument_type);
+      args += 1;
+    }
+  gcc_assert (format[0] == 0);
+  return return_type;
+}
+
+/* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID,
+   the type suffixes at index TI and the predication suffix at index PI.
+   The other arguments are as for build_all.  */
+static void
+build_one (function_builder &b, const char *signature,
+          const function_group_info &group, mode_suffix_index mode_suffix_id,
+          unsigned int ti, unsigned int pi, bool preserve_user_namespace,
+          bool force_direct_overloads)
+{
+  /* Current functions take at most five arguments.  Match
+     parse_signature parameter below.  */
+  auto_vec<tree, 5> argument_types;
+  function_instance instance (group.base_name, *group.base, *group.shape,
+                             mode_suffix_id, group.types[ti],
+                             group.preds[pi]);
+  tree return_type = parse_signature (instance, signature, argument_types, 5);
+  apply_predication (instance, return_type, argument_types);
+  b.add_unique_function (instance, return_type, argument_types,
+                        preserve_user_namespace, group.requires_float,
+                        force_direct_overloads);
+}
+
+/* Add a function instance for every type and predicate combination in
+   GROUP, except if requested to use only the predicates listed in
+   RESTRICT_TO_PREDS.  Take the function base name from GROUP and the
+   mode suffix from MODE_SUFFIX_ID. Use SIGNATURE to construct the
+   function signature, then use apply_predication to add in the
+   predicate.  */
+static void
+build_all (function_builder &b, const char *signature,
+          const function_group_info &group, mode_suffix_index mode_suffix_id,
+          bool preserve_user_namespace,
+          bool force_direct_overloads = false,
+          const predication_index *restrict_to_preds = NULL)
+{
+  for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+    {
+      unsigned int pi2 = 0;
+
+      if (restrict_to_preds)
+       for (; restrict_to_preds[pi2] != NUM_PREDS; ++pi2)
+         if (restrict_to_preds[pi2] == group.preds[pi])
+           break;
+
+      if (restrict_to_preds == NULL || restrict_to_preds[pi2] != NUM_PREDS)
+       for (unsigned int ti = 0;
+            ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+         build_one (b, signature, group, mode_suffix_id, ti, pi,
+                    preserve_user_namespace, force_direct_overloads);
+    }
+}
+
+/* Add a function instance for every type and predicate combination in
+   GROUP, except if requested to use only the predicates listed in
+   RESTRICT_TO_PREDS, and only for 16-bit and 32-bit integers.  Take
+   the function base name from GROUP and the mode suffix from
+   MODE_SUFFIX_ID. Use SIGNATURE to construct the function signature,
+   then use apply_predication to add in the predicate.  */
+static void
+build_16_32 (function_builder &b, const char *signature,
+            const function_group_info &group, mode_suffix_index mode_suffix_id,
+            bool preserve_user_namespace,
+            bool force_direct_overloads = false,
+            const predication_index *restrict_to_preds = NULL)
+{
+  for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+    {
+      unsigned int pi2 = 0;
+
+      if (restrict_to_preds)
+       for (; restrict_to_preds[pi2] != NUM_PREDS; ++pi2)
+         if (restrict_to_preds[pi2] == group.preds[pi])
+           break;
+
+      if (restrict_to_preds == NULL || restrict_to_preds[pi2] != NUM_PREDS)
+       for (unsigned int ti = 0;
+            ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+         {
+           unsigned int element_bits = type_suffixes[group.types[ti][0]].element_bits;
+           type_class_index tclass = type_suffixes[group.types[ti][0]].tclass;
+           if ((tclass == TYPE_signed || tclass == TYPE_unsigned)
+               && (element_bits == 16 || element_bits == 32))
+             build_one (b, signature, group, mode_suffix_id, ti, pi,
+                        preserve_user_namespace, force_direct_overloads);
+         }
+    }
+}
+
+/* Declare the function shape NAME, pointing it to an instance
+   of class <NAME>_def.  */
+#define SHAPE(NAME) \
+  static CONSTEXPR const NAME##_def NAME##_obj; \
+  namespace shapes { const function_shape *const NAME = &NAME##_obj; }
+
+/* Base class for functions that are not overloaded.  */
+struct nonoverloaded_base : public function_shape
+{
+  bool
+  explicit_type_suffix_p (unsigned int, enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  bool
+  skip_overload_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return false;
+  }
+
+  tree
+  resolve (function_resolver &) const override
+  {
+    gcc_unreachable ();
+  }
+};
+
+/* Base class for overloaded functions.  Bit N of EXPLICIT_MASK is true
+   if type suffix N appears in the overloaded name.  */
+template<unsigned int EXPLICIT_MASK>
+struct overloaded_base : public function_shape
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index, enum mode_suffix_index) const override
+  {
+    return (EXPLICIT_MASK >> i) & 1;
+  }
+
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return false;
+  }
+
+  bool
+  skip_overload_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return false;
+  }
+};
+
+} /* end namespace arm_mve */
+
+#undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h

new file mode 100644 (file)

index 0000000..9e353b8
--- /dev/null
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -0,0 +1,30 @@
+/* ACLE support for Arm MVE (function shapes)
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_MVE_BUILTINS_SHAPES_H
+#define GCC_ARM_MVE_BUILTINS_SHAPES_H
+
+namespace arm_mve
+{
+  namespace shapes
+  {
+  } /* end namespace arm_mve::shapes */
+} /* end namespace arm_mve */
+
+#endif
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc

index 7586a82e3c1c311f6dc547cbc907b609d7755512..b0cceb75cebe38467b2db48ebf9e1671879f90de 100644 (file)
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -24,7 +24,19 @@
  #include "coretypes.h"
  #include "tm.h"
  #include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
  #include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "emit-rtl.h"
  #include "langhooks.h"
  #include "stringpool.h"
  #include "attribs.h"
@@ -32,6 +44,8 @@
  #include "arm-protos.h"
  #include "arm-builtins.h"
  #include "arm-mve-builtins.h"
+#include "arm-mve-builtins-base.h"
+#include "arm-mve-builtins-shapes.h"
  
  namespace arm_mve {
  
@@ -46,6 +60,33 @@ struct vector_type_info
    const bool requires_float;
  };
  
+/* Describes a function decl.  */
+class GTY(()) registered_function
+{
+public:
+  /* The ACLE function that the decl represents.  */
+  function_instance instance GTY ((skip));
+
+  /* The decl itself.  */
+  tree decl;
+
+  /* Whether the function requires a floating point abi.  */
+  bool requires_float;
+
+  /* True if the decl represents an overloaded function that needs to be
+     resolved by function_resolver.  */
+  bool overloaded_p;
+};
+
+/* Hash traits for registered_function.  */
+struct registered_function_hasher : nofree_ptr_hash <registered_function>
+{
+  typedef function_instance compare_type;
+
+  static hashval_t hash (value_type);
+  static bool equal (value_type, const compare_type &);
+};
+
  /* Flag indicating whether the arm MVE types have been handled.  */
  static bool handle_arm_mve_types_p;
  
@@ -54,11 +95,167 @@ static CONSTEXPR const vector_type_info vector_types[] = {
  #define DEF_MVE_TYPE(ACLE_NAME, SCALAR_TYPE) \
    { #ACLE_NAME, REQUIRES_FLOAT },
  #include "arm-mve-builtins.def"
-#undef DEF_MVE_TYPE
+};
+
+/* The function name suffix associated with each predication type.  */
+static const char *const pred_suffixes[NUM_PREDS + 1] = {
+  "",
+  "_m",
+  "_p",
+  "_x",
+  "_z",
+  ""
+};
+
+/* Static information about each mode_suffix_index.  */
+CONSTEXPR const mode_suffix_info mode_suffixes[] = {
+#define VECTOR_TYPE_none NUM_VECTOR_TYPES
+#define DEF_MVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) \
+  { "_" #NAME, VECTOR_TYPE_##BASE, VECTOR_TYPE_##DISPLACEMENT, UNITS_##UNITS },
+#include "arm-mve-builtins.def"
+#undef VECTOR_TYPE_none
+  { "", NUM_VECTOR_TYPES, NUM_VECTOR_TYPES, UNITS_none }
+};
+
+/* Static information about each type_suffix_index.  */
+CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
+#define DEF_MVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE)        \
+  { "_" #NAME, \
+    VECTOR_TYPE_##ACLE_TYPE, \
+    TYPE_##CLASS, \
+    BITS, \
+    BITS / BITS_PER_UNIT, \
+    TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
+    TYPE_##CLASS == TYPE_unsigned, \
+    TYPE_##CLASS == TYPE_float, \
+    0, \
+    MODE },
+#include "arm-mve-builtins.def"
+  { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false,
+    0, VOIDmode }
+};
+
+/* Define a TYPES_<combination> macro for each combination of type
+   suffixes that an ACLE function can have, where <combination> is the
+   name used in DEF_MVE_FUNCTION entries.
+
+   Use S (T) for single type suffix T and D (T1, T2) for a pair of type
+   suffixes T1 and T2.  Use commas to separate the suffixes.
+
+   Although the order shouldn't matter, the convention is to sort the
+   suffixes lexicographically after dividing suffixes into a type
+   class ("b", "f", etc.) and a numerical bit count.  */
+
+/* _f16.  */
+#define TYPES_float16(S, D) \
+  S (f16)
+
+/* _f16 _f32.  */
+#define TYPES_all_float(S, D) \
+  S (f16), S (f32)
+
+/* _s8  _u8 .  */
+#define TYPES_integer_8(S, D) \
+  S (s8), S (u8)
+
+/* _s8 _s16
+   _u8 _u16.  */
+#define TYPES_integer_8_16(S, D) \
+  S (s8), S (s16), S (u8), S(u16)
+
+/* _s16 _s32
+   _u16 _u32.  */
+#define TYPES_integer_16_32(S, D)     \
+  S (s16), S (s32),                  \
+  S (u16), S (u32)
+
+/* _s16 _s32.  */
+#define TYPES_signed_16_32(S, D) \
+  S (s16), S (s32)
+
+/* _s8 _s16 _s32.  */
+#define TYPES_all_signed(S, D) \
+  S (s8), S (s16), S (s32)
+
+/* _u8 _u16 _u32.  */
+#define TYPES_all_unsigned(S, D) \
+  S (u8), S (u16), S (u32)
+
+/* _s8 _s16 _s32
+   _u8 _u16 _u32.  */
+#define TYPES_all_integer(S, D) \
+  TYPES_all_signed (S, D), TYPES_all_unsigned (S, D)
+
+/* _s8 _s16 _s32 _s64
+   _u8 _u16 _u32 _u64.  */
+#define TYPES_all_integer_with_64(S, D) \
+  TYPES_all_signed (S, D), S (s64), TYPES_all_unsigned (S, D), S (u64)
+
+/* s32 _u32.  */
+#define TYPES_integer_32(S, D) \
+  S (s32), S (u32)
+
+/* s32 .  */
+#define TYPES_signed_32(S, D) \
+  S (s32)
+
+/* Describe a pair of type suffixes in which only the first is used.  */
+#define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES }
+
+/* Describe a pair of type suffixes in which both are used.  */
+#define DEF_DOUBLE_TYPE(X, Y) { TYPE_SUFFIX_ ## X, TYPE_SUFFIX_ ## Y }
+
+/* Create an array that can be used in arm-mve-builtins.def to
+   select the type suffixes in TYPES_<NAME>.  */
+#define DEF_MVE_TYPES_ARRAY(NAME) \
+  static const type_suffix_pair types_##NAME[] = { \
+    TYPES_##NAME (DEF_VECTOR_TYPE, DEF_DOUBLE_TYPE), \
+    { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } \
+  }
+
+/* For functions that don't take any type suffixes.  */
+static const type_suffix_pair types_none[] = {
+  { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES },
+  { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES }
+};
+
+DEF_MVE_TYPES_ARRAY (all_integer);
+DEF_MVE_TYPES_ARRAY (all_integer_with_64);
+DEF_MVE_TYPES_ARRAY (float16);
+DEF_MVE_TYPES_ARRAY (all_float);
+DEF_MVE_TYPES_ARRAY (all_signed);
+DEF_MVE_TYPES_ARRAY (all_unsigned);
+DEF_MVE_TYPES_ARRAY (integer_8);
+DEF_MVE_TYPES_ARRAY (integer_8_16);
+DEF_MVE_TYPES_ARRAY (integer_16_32);
+DEF_MVE_TYPES_ARRAY (integer_32);
+DEF_MVE_TYPES_ARRAY (signed_16_32);
+DEF_MVE_TYPES_ARRAY (signed_32);
+
+/* Used by functions that have no governing predicate.  */
+static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
+
+/* Used by functions that have the m (merging) predicated form, and in
+   addition have an unpredicated form.  */
+static const predication_index preds_m_or_none[] = {
+  PRED_m, PRED_none, NUM_PREDS
+};
+
+/* Used by functions that have the mx (merging and "don't care"
+   predicated forms, and in addition have an unpredicated form.  */
+static const predication_index preds_mx_or_none[] = {
+  PRED_m, PRED_x, PRED_none, NUM_PREDS
+};
+
+/* Used by functions that have the p predicated form, in addition to
+   an unpredicated form.  */
+static const predication_index preds_p_or_none[] = {
+  PRED_p, PRED_none, NUM_PREDS
  };
  
  /* The scalar type associated with each vector type.  */
-GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
+extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
+tree scalar_types[NUM_VECTOR_TYPES];
  
  /* The single-predicate and single-vector types, with their built-in
     "__simd128_..._t" name.  Allow an index of NUM_VECTOR_TYPES, which always
@@ -66,7 +263,20 @@ GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
  static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
  
  /* Same, but with the arm_mve.h names.  */
-GTY(()) tree acle_vector_types[3][NUM_VECTOR_TYPES + 1];
+extern GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+
+/* The list of all registered function decls, indexed by code.  */
+static GTY(()) vec<registered_function *, va_gc> *registered_functions;
+
+/* All registered function decls, hashed on the function_instance
+   that they implement.  This is used for looking up implementations of
+   overloaded functions.  */
+static hash_table<registered_function_hasher> *function_table;
+
+/* True if we've already complained about attempts to use functions
+   when the required extension is disabled.  */
+static bool reported_missing_float_p;
  
  /* Return the MVE abi type with element of type TYPE.  */
  static tree
@@ -87,7 +297,6 @@ register_builtin_types ()
  #define DEF_MVE_TYPE(ACLE_NAME, SCALAR_TYPE) \
    scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE;
  #include "arm-mve-builtins.def"
-#undef DEF_MVE_TYPE
    for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
      {
        if (vector_types[i].requires_float && !TARGET_HAVE_MVE_FLOAT)
@@ -113,8 +322,18 @@ register_builtin_types ()
  static void
  register_vector_type (vector_type_index type)
  {
+
+  /* If the target does not have the mve.fp extension, but the type requires
+     it, then it needs to be assigned a non-dummy type so that functions
+     with those types in their signature can be registered.  This allows for
+     diagnostics about the missing extension, rather than about a missing
+     function definition.  */
    if (vector_types[type].requires_float && !TARGET_HAVE_MVE_FLOAT)
-    return;
+    {
+      acle_vector_types[0][type] = void_type_node;
+      return;
+    }
+
    tree vectype = abi_vector_types[type];
    tree id = get_identifier (vector_types[type].acle_name);
    tree decl = build_decl (input_location, TYPE_DECL, id, vectype);
@@ -133,15 +352,26 @@ register_vector_type (vector_type_index type)
    acle_vector_types[0][type] = vectype;
  }
  
-/* Register tuple type TYPE with NUM_VECTORS arity under its
-   arm_mve_types.h name.  */
+/* Register tuple types of element type TYPE under their arm_mve_types.h
+   names.  */
  static void
  register_builtin_tuple_types (vector_type_index type)
  {
    const vector_type_info* info = &vector_types[type];
+
+  /* If the target does not have the mve.fp extension, but the type requires
+     it, then it needs to be assigned a non-dummy type so that functions
+     with those types in their signature can be registered.  This allows for
+     diagnostics about the missing extension, rather than about a missing
+     function definition.  */
    if (scalar_types[type] == boolean_type_node
        || (info->requires_float && !TARGET_HAVE_MVE_FLOAT))
+    {
+      for (unsigned int num_vectors = 2; num_vectors <= 4; num_vectors += 2)
+       acle_vector_types[num_vectors >> 1][type] = void_type_node;
      return;
+    }
+
    const char *vector_type_name = info->acle_name;
    char buffer[sizeof ("float32x4x2_t")];
    for (unsigned int num_vectors = 2; num_vectors <= 4; num_vectors += 2)
@@ -189,8 +419,1710 @@ handle_arm_mve_types_h ()
      }
  }
  
-} /* end namespace arm_mve */
+/* Implement #pragma GCC arm "arm_mve.h" <bool>.  */
+void
+handle_arm_mve_h (bool preserve_user_namespace)
+{
+  if (function_table)
+    {
+      error ("duplicate definition of %qs", "arm_mve.h");
+      return;
+    }
  
-using namespace arm_mve;
+  /* Define MVE functions.  */
+  function_table = new hash_table<registered_function_hasher> (1023);
+}
+
+/* Return true if CANDIDATE is equivalent to MODEL_TYPE for overloading
+   purposes.  */
+static bool
+matches_type_p (const_tree model_type, const_tree candidate)
+{
+  if (VECTOR_TYPE_P (model_type))
+    {
+      if (!VECTOR_TYPE_P (candidate)
+         || maybe_ne (TYPE_VECTOR_SUBPARTS (model_type),
+                      TYPE_VECTOR_SUBPARTS (candidate))
+         || TYPE_MODE (model_type) != TYPE_MODE (candidate))
+       return false;
+
+      model_type = TREE_TYPE (model_type);
+      candidate = TREE_TYPE (candidate);
+    }
+  return (candidate != error_mark_node
+         && TYPE_MAIN_VARIANT (model_type) == TYPE_MAIN_VARIANT (candidate));
+}
+
+/* Report an error against LOCATION that the user has tried to use
+   a floating point function when the mve.fp extension is disabled.  */
+static void
+report_missing_float (location_t location, tree fndecl)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_float_p)
+    return;
+
+  error_at (location, "ACLE function %qD requires ISA extension %qs",
+           fndecl, "mve.fp");
+  inform (location, "you can enable mve.fp by using the command-line"
+         " option %<-march%>, or by using the %<target%>"
+         " attribute or pragma");
+  reported_missing_float_p = true;
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO
+   was not an integer constant expression.  ARGNO counts from zero.  */
+static void
+report_non_ice (location_t location, tree fndecl, unsigned int argno)
+{
+  error_at (location, "argument %d of %qE must be an integer constant"
+           " expression", argno + 1, fndecl);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires a value in the range
+   [MIN, MAX].  ARGNO counts from zero.  */
+static void
+report_out_of_range (location_t location, tree fndecl, unsigned int argno,
+                    HOST_WIDE_INT actual, HOST_WIDE_INT min,
+                    HOST_WIDE_INT max)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+           " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
+           min, max);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires a valid value of
+   enum type ENUMTYPE.  ARGNO counts from zero.  */
+static void
+report_not_enum (location_t location, tree fndecl, unsigned int argno,
+                HOST_WIDE_INT actual, tree enumtype)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+           " a valid %qT value", actual, argno + 1, fndecl, enumtype);
+}
+
+/* Checks that the mve.fp extension is enabled, given that REQUIRES_FLOAT
+   indicates whether it is required or not for function FNDECL.
+   Report an error against LOCATION if not.  */
+static bool
+check_requires_float (location_t location, tree fndecl,
+                     bool requires_float)
+{
+  if (requires_float && !TARGET_HAVE_MVE_FLOAT)
+    {
+      report_missing_float (location, fndecl);
+      return false;
+    }
+
+  return true;
+}
+
+/* Return a hash code for a function_instance.  */
+hashval_t
+function_instance::hash () const
+{
+  inchash::hash h;
+  /* BASE uniquely determines BASE_NAME, so we don't need to hash both.  */
+  h.add_ptr (base);
+  h.add_ptr (shape);
+  h.add_int (mode_suffix_id);
+  h.add_int (type_suffix_ids[0]);
+  h.add_int (type_suffix_ids[1]);
+  h.add_int (pred);
+  return h.end ();
+}
+
+/* Return a set of CP_* flags that describe what the function could do,
+   taking the command-line flags into account.  */
+unsigned int
+function_instance::call_properties () const
+{
+  unsigned int flags = base->call_properties (*this);
+
+  /* -fno-trapping-math means that we can assume any FP exceptions
+     are not user-visible.  */
+  if (!flag_trapping_math)
+    flags &= ~CP_RAISE_FP_EXCEPTIONS;
+
+  return flags;
+}
+
+/* Return true if calls to the function could read some form of
+   global state.  */
+bool
+function_instance::reads_global_state_p () const
+{
+  unsigned int flags = call_properties ();
+
+  /* Preserve any dependence on rounding mode, flush to zero mode, etc.
+     There is currently no way of turning this off; in particular,
+     -fno-rounding-math (which is the default) means that we should make
+     the usual assumptions about rounding mode, which for intrinsics means
+     acting as the instructions do.  */
+  if (flags & CP_READ_FPCR)
+    return true;
+
+  return false;
+}
+
+/* Return true if calls to the function could modify some form of
+   global state.  */
+bool
+function_instance::modifies_global_state_p () const
+{
+  unsigned int flags = call_properties ();
+
+  /* Preserve any exception state written back to the FPCR,
+     unless -fno-trapping-math says this is unnecessary.  */
+  if (flags & CP_RAISE_FP_EXCEPTIONS)
+    return true;
+
+  /* Handle direct modifications of global state.  */
+  return flags & CP_WRITE_MEMORY;
+}
+
+/* Return true if calls to the function could raise a signal.  */
+bool
+function_instance::could_trap_p () const
+{
+  unsigned int flags = call_properties ();
+
+  /* Handle functions that could raise SIGFPE.  */
+  if (flags & CP_RAISE_FP_EXCEPTIONS)
+    return true;
+
+  /* Handle functions that could raise SIGBUS or SIGSEGV.  */
+  if (flags & (CP_READ_MEMORY | CP_WRITE_MEMORY))
+    return true;
+
+  return false;
+}
+
+/* Return true if the function has an implicit "inactive" argument.
+   This is the case of most _m predicated functions, but not all.
+   The list will be updated as needed.  */
+bool
+function_instance::has_inactive_argument () const
+{
+  if (pred != PRED_m)
+    return false;
+
+  return true;
+}
+
+inline hashval_t
+registered_function_hasher::hash (value_type value)
+{
+  return value->instance.hash ();
+}
+
+inline bool
+registered_function_hasher::equal (value_type value, const compare_type &key)
+{
+  return value->instance == key;
+}
+
+function_builder::function_builder ()
+{
+  m_overload_type = build_function_type (void_type_node, void_list_node);
+  m_direct_overloads = lang_GNU_CXX ();
+  gcc_obstack_init (&m_string_obstack);
+}
+
+function_builder::~function_builder ()
+{
+  obstack_free (&m_string_obstack, NULL);
+}
+
+/* Add NAME to the end of the function name being built.  */
+void
+function_builder::append_name (const char *name)
+{
+  obstack_grow (&m_string_obstack, name, strlen (name));
+}
+
+/* Zero-terminate and complete the function name being built.  */
+char *
+function_builder::finish_name ()
+{
+  obstack_1grow (&m_string_obstack, 0);
+  return (char *) obstack_finish (&m_string_obstack);
+}
+
+/* Return the overloaded or full function name for INSTANCE, with optional
+   prefix; PRESERVE_USER_NAMESPACE selects the prefix, and OVERLOADED_P
+   selects which the overloaded or full function name.  Allocate the string on
+   m_string_obstack; the caller must use obstack_free to free it after use.  */
+char *
+function_builder::get_name (const function_instance &instance,
+                           bool preserve_user_namespace,
+                           bool overloaded_p)
+{
+  if (preserve_user_namespace)
+    append_name ("__arm_");
+  append_name (instance.base_name);
+  append_name (pred_suffixes[instance.pred]);
+  if (!overloaded_p
+      || instance.shape->explicit_mode_suffix_p (instance.pred,
+                                                instance.mode_suffix_id))
+    append_name (instance.mode_suffix ().string);
+  for (unsigned int i = 0; i < 2; ++i)
+    if (!overloaded_p
+       || instance.shape->explicit_type_suffix_p (i, instance.pred,
+                                                  instance.mode_suffix_id))
+      append_name (instance.type_suffix (i).string);
+  return finish_name ();
+}
+
+/* Add attribute NAME to ATTRS.  */
+static tree
+add_attribute (const char *name, tree attrs)
+{
+  return tree_cons (get_identifier (name), NULL_TREE, attrs);
+}
+
+/* Return the appropriate function attributes for INSTANCE.  */
+tree
+function_builder::get_attributes (const function_instance &instance)
+{
+  tree attrs = NULL_TREE;
+
+  if (!instance.modifies_global_state_p ())
+    {
+      if (instance.reads_global_state_p ())
+       attrs = add_attribute ("pure", attrs);
+      else
+       attrs = add_attribute ("const", attrs);
+    }
+
+  if (!flag_non_call_exceptions || !instance.could_trap_p ())
+    attrs = add_attribute ("nothrow", attrs);
+
+  return add_attribute ("leaf", attrs);
+}
+
+/* Add a function called NAME with type FNTYPE and attributes ATTRS.
+   INSTANCE describes what the function does and OVERLOADED_P indicates
+   whether it is overloaded.  REQUIRES_FLOAT indicates whether the function
+   requires the mve.fp extension.  */
+registered_function &
+function_builder::add_function (const function_instance &instance,
+                               const char *name, tree fntype, tree attrs,
+                               bool requires_float,
+                               bool overloaded_p,
+                               bool placeholder_p)
+{
+  unsigned int code = vec_safe_length (registered_functions);
+  code = (code << ARM_BUILTIN_SHIFT) | ARM_BUILTIN_MVE;
+
+  /* We need to be able to generate placeholders to ensure that we have a
+     consistent numbering scheme for function codes between the C and C++
+     frontends, so that everything ties up in LTO.
+
+     Currently, tree-streamer-in.cc:unpack_ts_function_decl_value_fields
+     validates that tree nodes returned by TARGET_BUILTIN_DECL are non-NULL and
+     some node other than error_mark_node.  This is a holdover from when builtin
+     decls were streamed by code rather than by value.
+
+     Ultimately, we should be able to remove this validation of BUILT_IN_MD
+     nodes and remove the target hook.  For now, however, we need to appease the
+     validation and return a non-NULL, non-error_mark_node node, so we
+     arbitrarily choose integer_zero_node.  */
+  tree decl = placeholder_p
+    ? integer_zero_node
+    : simulate_builtin_function_decl (input_location, name, fntype,
+                                     code, NULL, attrs);
+
+  registered_function &rfn = *ggc_alloc <registered_function> ();
+  rfn.instance = instance;
+  rfn.decl = decl;
+  rfn.requires_float = requires_float;
+  rfn.overloaded_p = overloaded_p;
+  vec_safe_push (registered_functions, &rfn);
+
+  return rfn;
+}
+
+/* Add a built-in function for INSTANCE, with the argument types given
+   by ARGUMENT_TYPES and the return type given by RETURN_TYPE.
+   REQUIRES_FLOAT indicates whether the function requires the mve.fp extension,
+   and PRESERVE_USER_NAMESPACE indicates whether the function should also be
+   registered under its non-prefixed name.  */
+void
+function_builder::add_unique_function (const function_instance &instance,
+                                      tree return_type,
+                                      vec<tree> &argument_types,
+                                      bool preserve_user_namespace,
+                                      bool requires_float,
+                                      bool force_direct_overloads)
+{
+  /* Add the function under its full (unique) name with prefix.  */
+  char *name = get_name (instance, true, false);
+  tree fntype = build_function_type_array (return_type,
+                                          argument_types.length (),
+                                          argument_types.address ());
+  tree attrs = get_attributes (instance);
+  registered_function &rfn = add_function (instance, name, fntype, attrs,
+                                          requires_float, false, false);
+
+  /* Enter the function into the hash table.  */
+  hashval_t hash = instance.hash ();
+  registered_function **rfn_slot
+    = function_table->find_slot_with_hash (instance, hash, INSERT);
+  gcc_assert (!*rfn_slot);
+  *rfn_slot = &rfn;
+
+  /* Also add the non-prefixed non-overloaded function, if the user namespace
+     does not need to be preserved.  */
+  if (!preserve_user_namespace)
+    {
+      char *noprefix_name = get_name (instance, false, false);
+      tree attrs = get_attributes (instance);
+      add_function (instance, noprefix_name, fntype, attrs, requires_float,
+                   false, false);
+    }
+
+  /* Also add the function under its overloaded alias, if we want
+     a separate decl for each instance of an overloaded function.  */
+  char *overload_name = get_name (instance, true, true);
+  if (strcmp (name, overload_name) != 0)
+    {
+      /* Attribute lists shouldn't be shared.  */
+      tree attrs = get_attributes (instance);
+      bool placeholder_p = !(m_direct_overloads || force_direct_overloads);
+      add_function (instance, overload_name, fntype, attrs,
+                   requires_float, false, placeholder_p);
+
+      /* Also add the non-prefixed overloaded function, if the user namespace
+        does not need to be preserved.  */
+      if (!preserve_user_namespace)
+       {
+         char *noprefix_overload_name = get_name (instance, false, true);
+         tree attrs = get_attributes (instance);
+         add_function (instance, noprefix_overload_name, fntype, attrs,
+                       requires_float, false, placeholder_p);
+       }
+    }
+
+  obstack_free (&m_string_obstack, name);
+}
+
+/* Add one function decl for INSTANCE, to be used with manual overload
+   resolution.  REQUIRES_FLOAT indicates whether the function requires the
+   mve.fp extension.
+
+   For simplicity, partition functions by instance and required extensions,
+   and check whether the required extensions are available as part of resolving
+   the function to the relevant unique function.  */
+void
+function_builder::add_overloaded_function (const function_instance &instance,
+                                          bool preserve_user_namespace,
+                                          bool requires_float)
+{
+  char *name = get_name (instance, true, true);
+  if (registered_function **map_value = m_overload_names.get (name))
+    {
+      gcc_assert ((*map_value)->instance == instance);
+      obstack_free (&m_string_obstack, name);
+    }
+  else
+    {
+      registered_function &rfn
+       = add_function (instance, name, m_overload_type, NULL_TREE,
+                       requires_float, true, m_direct_overloads);
+      m_overload_names.put (name, &rfn);
+      if (!preserve_user_namespace)
+       {
+         char *noprefix_name = get_name (instance, false, true);
+         registered_function &noprefix_rfn
+           = add_function (instance, noprefix_name, m_overload_type,
+                           NULL_TREE, requires_float, true,
+                           m_direct_overloads);
+         m_overload_names.put (noprefix_name, &noprefix_rfn);
+       }
+    }
+}
+
+/* If we are using manual overload resolution, add one function decl
+   for each overloaded function in GROUP.  Take the function base name
+   from GROUP and the mode from MODE.  */
+void
+function_builder::add_overloaded_functions (const function_group_info &group,
+                                           mode_suffix_index mode,
+                                           bool preserve_user_namespace)
+{
+  for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+    {
+      unsigned int explicit_type0
+       = (*group.shape)->explicit_type_suffix_p (0, group.preds[pi], mode);
+      unsigned int explicit_type1
+       = (*group.shape)->explicit_type_suffix_p (1, group.preds[pi], mode);
+
+      if ((*group.shape)->skip_overload_p (group.preds[pi], mode))
+       continue;
+
+      if (!explicit_type0 && !explicit_type1)
+       {
+         /* Deal with the common case in which there is one overloaded
+            function for all type combinations.  */
+         function_instance instance (group.base_name, *group.base,
+                                     *group.shape, mode, types_none[0],
+                                     group.preds[pi]);
+         add_overloaded_function (instance, preserve_user_namespace,
+                                  group.requires_float);
+       }
+      else
+       for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
+            ++ti)
+         {
+           /* Stub out the types that are determined by overload
+              resolution.  */
+           type_suffix_pair types = {
+             explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES,
+             explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES
+           };
+           function_instance instance (group.base_name, *group.base,
+                                       *group.shape, mode, types,
+                                       group.preds[pi]);
+           add_overloaded_function (instance, preserve_user_namespace,
+                                    group.requires_float);
+         }
+    }
+}
+
+/* Register all the functions in GROUP.  */
+void
+function_builder::register_function_group (const function_group_info &group,
+                                          bool preserve_user_namespace)
+{
+  (*group.shape)->build (*this, group, preserve_user_namespace);
+}
+
+function_call_info::function_call_info (location_t location_in,
+                                       const function_instance &instance_in,
+                                       tree fndecl_in)
+  : function_instance (instance_in), location (location_in), fndecl (fndecl_in)
+{
+}
+
+function_resolver::function_resolver (location_t location,
+                                     const function_instance &instance,
+                                     tree fndecl, vec<tree, va_gc> &arglist)
+  : function_call_info (location, instance, fndecl), m_arglist (arglist)
+{
+}
+
+/* Return the vector type associated with type suffix TYPE.  */
+tree
+function_resolver::get_vector_type (type_suffix_index type)
+{
+  return acle_vector_types[0][type_suffixes[type].vector_type];
+}
+
+/* Return the <stdint.h> name associated with TYPE.  Using the <stdint.h>
+   name should be more user-friendly than the underlying canonical type,
+   since it makes the signedness and bitwidth explicit.  */
+const char *
+function_resolver::get_scalar_type_name (type_suffix_index type)
+{
+  return vector_types[type_suffixes[type].vector_type].acle_name + 2;
+}
+
+/* Return the type of argument I, or error_mark_node if it isn't
+   well-formed.  */
+tree
+function_resolver::get_argument_type (unsigned int i)
+{
+  tree arg = m_arglist[i];
+  return arg == error_mark_node ? arg : TREE_TYPE (arg);
+}
+
+/* Return true if argument I is some form of scalar value.  */
+bool
+function_resolver::scalar_argument_p (unsigned int i)
+{
+  tree type = get_argument_type (i);
+  return (INTEGRAL_TYPE_P (type)
+         /* Allow pointer types, leaving the frontend to warn where
+            necessary.  */
+         || POINTER_TYPE_P (type)
+         || SCALAR_FLOAT_TYPE_P (type));
+}
+
+/* Report that the function has no form that takes type suffix TYPE.
+   Return error_mark_node.  */
+tree
+function_resolver::report_no_such_form (type_suffix_index type)
+{
+  error_at (location, "%qE has no form that takes %qT arguments",
+           fndecl, get_vector_type (type));
+  return error_mark_node;
+}
+
+/* Silently check whether there is an instance of the function with the
+   mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1.
+   Return its function decl if so, otherwise return null.  */
+tree
+function_resolver::lookup_form (mode_suffix_index mode,
+                               type_suffix_index type0,
+                               type_suffix_index type1)
+{
+  type_suffix_pair types = { type0, type1 };
+  function_instance instance (base_name, base, shape, mode, types, pred);
+  registered_function *rfn
+    = function_table->find_with_hash (instance, instance.hash ());
+  return rfn ? rfn->decl : NULL_TREE;
+}
+
+/* Resolve the function to one with the mode suffix given by MODE and the
+   type suffixes given by TYPE0 and TYPE1.  Return its function decl on
+   success, otherwise report an error and return error_mark_node.  */
+tree
+function_resolver::resolve_to (mode_suffix_index mode,
+                              type_suffix_index type0,
+                              type_suffix_index type1)
+{
+  tree res = lookup_form (mode, type0, type1);
+  if (!res)
+    {
+      if (type1 == NUM_TYPE_SUFFIXES)
+       return report_no_such_form (type0);
+      if (type0 == type_suffix_ids[0])
+       return report_no_such_form (type1);
+      /* To be filled in when we have other cases.  */
+      gcc_unreachable ();
+    }
+  return res;
+}
+
+/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
+   vectors; NUM_VECTORS is 1 for the former.  Return the associated type
+   suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
+   and return NUM_TYPE_SUFFIXES on failure.  */
+type_suffix_index
+function_resolver::infer_vector_or_tuple_type (unsigned int argno,
+                                              unsigned int num_vectors)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return NUM_TYPE_SUFFIXES;
+
+  /* A linear search should be OK here, since the code isn't hot and
+     the number of types is only small.  */
+  for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
+    for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+      {
+       vector_type_index type_i = type_suffixes[suffix_i].vector_type;
+       tree type = acle_vector_types[size_i][type_i];
+       if (type && matches_type_p (type, actual))
+         {
+           if (size_i + 1 == num_vectors)
+             return type_suffix_index (suffix_i);
+
+           if (num_vectors == 1)
+             error_at (location, "passing %qT to argument %d of %qE, which"
+                       " expects a single MVE vector rather than a tuple",
+                       actual, argno + 1, fndecl);
+           else if (size_i == 0 && type_i != VECTOR_TYPE_mve_pred16_t)
+             /* num_vectors is always != 1, so the singular isn't needed.  */
+             error_n (location, num_vectors, "%qT%d%qE%d",
+                      "passing single vector %qT to argument %d"
+                      " of %qE, which expects a tuple of %d vectors",
+                      actual, argno + 1, fndecl, num_vectors);
+           else
+             /* num_vectors is always != 1, so the singular isn't needed.  */
+             error_n (location, num_vectors, "%qT%d%qE%d",
+                      "passing %qT to argument %d of %qE, which"
+                      " expects a tuple of %d vectors", actual, argno + 1,
+                      fndecl, num_vectors);
+           return NUM_TYPE_SUFFIXES;
+         }
+      }
+
+  if (num_vectors == 1)
+    error_at (location, "passing %qT to argument %d of %qE, which"
+             " expects an MVE vector type", actual, argno + 1, fndecl);
+  else
+    error_at (location, "passing %qT to argument %d of %qE, which"
+             " expects an MVE tuple type", actual, argno + 1, fndecl);
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Require argument ARGNO to have some form of vector type.  Return the
+   associated type suffix on success, using TYPE_SUFFIX_b for predicates.
+   Report an error and return NUM_TYPE_SUFFIXES on failure.  */
+type_suffix_index
+function_resolver::infer_vector_type (unsigned int argno)
+{
+  return infer_vector_or_tuple_type (argno, 1);
+}
+
+/* Require argument ARGNO to be a vector or scalar argument.  Return true
+   if it is, otherwise report an appropriate error.  */
+bool
+function_resolver::require_vector_or_scalar_type (unsigned int argno)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return false;
+
+  if (!scalar_argument_p (argno) && !VECTOR_TYPE_P (actual))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+               " expects a vector or scalar type", actual, argno + 1, fndecl);
+      return false;
+    }
+
+  return true;
+}
+
+/* Require argument ARGNO to have vector type TYPE, in cases where this
+   requirement holds for all uses of the function.  Return true if the
+   argument has the right form, otherwise report an appropriate error.  */
+bool
+function_resolver::require_vector_type (unsigned int argno,
+                                       vector_type_index type)
+{
+  tree expected = acle_vector_types[0][type];
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return false;
+
+  if (!matches_type_p (expected, actual))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+               " expects %qT", actual, argno + 1, fndecl, expected);
+      return false;
+    }
+  return true;
+}
+
+/* Like require_vector_type, but TYPE is inferred from previous arguments
+   rather than being a fixed part of the function signature.  This changes
+   the nature of the error messages.  */
+bool
+function_resolver::require_matching_vector_type (unsigned int argno,
+                                                type_suffix_index type)
+{
+  type_suffix_index new_type = infer_vector_type (argno);
+  if (new_type == NUM_TYPE_SUFFIXES)
+    return false;
+
+  if (type != new_type)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, but"
+               " previous arguments had type %qT",
+               get_vector_type (new_type), argno + 1, fndecl,
+               get_vector_type (type));
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be a vector type with the following properties:
+
+   - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS
+     is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
+
+   - the element size must be:
+
+     - the same as FIRST_TYPE's if EXPECTED_BITS == SAME_SIZE
+     - half of FIRST_TYPE's if EXPECTED_BITS == HALF_SIZE
+     - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE
+     - EXPECTED_BITS itself otherwise
+
+   Return true if the argument has the required type, otherwise report
+   an appropriate error.
+
+   FIRST_ARGNO is the first argument that is known to have type FIRST_TYPE.
+   Usually it comes before ARGNO, but sometimes it is more natural to resolve
+   arguments out of order.
+
+   If the required properties depend on FIRST_TYPE then both FIRST_ARGNO and
+   ARGNO contribute to the resolution process.  If the required properties
+   are fixed, only FIRST_ARGNO contributes to the resolution process.
+
+   This function is a bit of a Swiss army knife.  The complication comes
+   from trying to give good error messages when FIRST_ARGNO and ARGNO are
+   inconsistent, since either of them might be wrong.  */
+bool function_resolver::
+require_derived_vector_type (unsigned int argno,
+                            unsigned int first_argno,
+                            type_suffix_index first_type,
+                            type_class_index expected_tclass,
+                            unsigned int expected_bits)
+{
+  /* If the type needs to match FIRST_ARGNO exactly, use the preferred
+     error message for that case.  The VECTOR_TYPE_P test excludes tuple
+     types, which we handle below instead.  */
+  bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno));
+  if (both_vectors_p
+      && expected_tclass == SAME_TYPE_CLASS
+      && expected_bits == SAME_SIZE)
+    {
+      /* There's no need to resolve this case out of order.  */
+      gcc_assert (argno > first_argno);
+      return require_matching_vector_type (argno, first_type);
+    }
+
+  /* Use FIRST_TYPE to get the expected type class and element size.  */
+  type_class_index orig_expected_tclass = expected_tclass;
+  if (expected_tclass == NUM_TYPE_CLASSES)
+    expected_tclass = type_suffixes[first_type].tclass;
+
+  unsigned int orig_expected_bits = expected_bits;
+  if (expected_bits == SAME_SIZE)
+    expected_bits = type_suffixes[first_type].element_bits;
+  else if (expected_bits == HALF_SIZE)
+    expected_bits = type_suffixes[first_type].element_bits / 2;
+  else if (expected_bits == QUARTER_SIZE)
+    expected_bits = type_suffixes[first_type].element_bits / 4;
+
+  /* If the expected type doesn't depend on FIRST_TYPE at all,
+     just check for the fixed choice of vector type.  */
+  if (expected_tclass == orig_expected_tclass
+      && expected_bits == orig_expected_bits)
+    {
+      const type_suffix_info &expected_suffix
+       = type_suffixes[find_type_suffix (expected_tclass, expected_bits)];
+      return require_vector_type (argno, expected_suffix.vector_type);
+    }
+
+  /* Require the argument to be some form of MVE vector type,
+     without being specific about the type of vector we want.  */
+  type_suffix_index actual_type = infer_vector_type (argno);
+  if (actual_type == NUM_TYPE_SUFFIXES)
+    return false;
+
+  /* Exit now if we got the right type.  */
+  bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass);
+  bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits);
+  if (tclass_ok_p && size_ok_p)
+    return true;
+
+  /* First look for cases in which the actual type contravenes a fixed
+     size requirement, without having to refer to FIRST_TYPE.  */
+  if (!size_ok_p && expected_bits == orig_expected_bits)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+               " expects a vector of %d-bit elements",
+               get_vector_type (actual_type), argno + 1, fndecl,
+               expected_bits);
+      return false;
+    }
+
+  /* Likewise for a fixed type class requirement.  This is only ever
+     needed for signed and unsigned types, so don't create unnecessary
+     translation work for other type classes.  */
+  if (!tclass_ok_p && orig_expected_tclass == TYPE_signed)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+               " expects a vector of signed integers",
+               get_vector_type (actual_type), argno + 1, fndecl);
+      return false;
+    }
+  if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+               " expects a vector of unsigned integers",
+               get_vector_type (actual_type), argno + 1, fndecl);
+      return false;
+    }
+
+  /* Make sure that FIRST_TYPE itself is sensible before using it
+     as a basis for an error message.  */
+  if (resolve_to (mode_suffix_id, first_type) == error_mark_node)
+    return false;
+
+  /* If the arguments have consistent type classes, but a link between
+     the sizes has been broken, try to describe the error in those terms.  */
+  if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE)
+    {
+      if (argno < first_argno)
+       {
+         std::swap (argno, first_argno);
+         std::swap (actual_type, first_type);
+       }
+      error_at (location, "arguments %d and %d of %qE must have the"
+               " same element size, but the values passed here have type"
+               " %qT and %qT respectively", first_argno + 1, argno + 1,
+               fndecl, get_vector_type (first_type),
+               get_vector_type (actual_type));
+      return false;
+    }
+
+  /* Likewise in reverse: look for cases in which the sizes are consistent
+     but a link between the type classes has been broken.  */
+  if (both_vectors_p
+      && size_ok_p
+      && orig_expected_tclass == SAME_TYPE_CLASS
+      && type_suffixes[first_type].integer_p
+      && type_suffixes[actual_type].integer_p)
+    {
+      if (argno < first_argno)
+       {
+         std::swap (argno, first_argno);
+         std::swap (actual_type, first_type);
+       }
+      error_at (location, "arguments %d and %d of %qE must have the"
+               " same signedness, but the values passed here have type"
+               " %qT and %qT respectively", first_argno + 1, argno + 1,
+               fndecl, get_vector_type (first_type),
+               get_vector_type (actual_type));
+      return false;
+    }
+
+  /* The two arguments are wildly inconsistent.  */
+  type_suffix_index expected_type
+    = find_type_suffix (expected_tclass, expected_bits);
+  error_at (location, "passing %qT instead of the expected %qT to argument"
+           " %d of %qE, after passing %qT to argument %d",
+           get_vector_type (actual_type), get_vector_type (expected_type),
+           argno + 1, fndecl, get_argument_type (first_argno),
+           first_argno + 1);
+  return false;
+}
+
+/* Require argument ARGNO to be a (possibly variable) scalar, expecting it
+   to have the following properties:
+
+   - the type class must be the same as for type suffix 0 if EXPECTED_TCLASS
+     is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
+
+   - the element size must be the same as for type suffix 0 if EXPECTED_BITS
+     is SAME_TYPE_SIZE, otherwise it must be EXPECTED_BITS itself.
+
+   Return true if the argument is valid, otherwise report an appropriate error.
+
+   Note that we don't check whether the scalar type actually has the required
+   properties, since that's subject to implicit promotions and conversions.
+   Instead we just use the expected properties to tune the error message.  */
+bool function_resolver::
+require_derived_scalar_type (unsigned int argno,
+                            type_class_index expected_tclass,
+                            unsigned int expected_bits)
+{
+  gcc_assert (expected_tclass == SAME_TYPE_CLASS
+             || expected_tclass == TYPE_signed
+             || expected_tclass == TYPE_unsigned);
+
+  /* If the expected type doesn't depend on the type suffix at all,
+     just check for the fixed choice of scalar type.  */
+  if (expected_tclass != SAME_TYPE_CLASS && expected_bits != SAME_SIZE)
+    {
+      type_suffix_index expected_type
+       = find_type_suffix (expected_tclass, expected_bits);
+      return require_scalar_type (argno, get_scalar_type_name (expected_type));
+    }
+
+  if (scalar_argument_p (argno))
+    return true;
+
+  if (expected_tclass == SAME_TYPE_CLASS)
+    /* It doesn't really matter whether the element is expected to be
+       the same size as type suffix 0.  */
+    error_at (location, "passing %qT to argument %d of %qE, which"
+             " expects a scalar element", get_argument_type (argno),
+             argno + 1, fndecl);
+  else
+    /* It doesn't seem useful to distinguish between signed and unsigned
+       scalars here.  */
+    error_at (location, "passing %qT to argument %d of %qE, which"
+             " expects a scalar integer", get_argument_type (argno),
+             argno + 1, fndecl);
+  return false;
+}
+
+/* Require argument ARGNO to be suitable for an integer constant expression.
+   Return true if it is, otherwise report an appropriate error.
+
+   function_checker checks whether the argument is actually constant and
+   has a suitable range.  The reason for distinguishing immediate arguments
+   here is because it provides more consistent error messages than
+   require_scalar_type would.  */
+bool
+function_resolver::require_integer_immediate (unsigned int argno)
+{
+  if (!scalar_argument_p (argno))
+    {
+      report_non_ice (location, fndecl, argno);
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be a (possibly variable) scalar, using EXPECTED
+   as the name of its expected type.  Return true if the argument has the
+   right form, otherwise report an appropriate error.  */
+bool
+function_resolver::require_scalar_type (unsigned int argno,
+                                       const char *expected)
+{
+  if (!scalar_argument_p (argno))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+               " expects %qs", get_argument_type (argno), argno + 1,
+               fndecl, expected);
+      return false;
+    }
+  return true;
+}
+
+/* Require the function to have exactly EXPECTED arguments.  Return true
+   if it does, otherwise report an appropriate error.  */
+bool
+function_resolver::check_num_arguments (unsigned int expected)
+{
+  if (m_arglist.length () < expected)
+    error_at (location, "too few arguments to function %qE", fndecl);
+  else if (m_arglist.length () > expected)
+    error_at (location, "too many arguments to function %qE", fndecl);
+  return m_arglist.length () == expected;
+}
+
+/* If the function is predicated, check that the last argument is a
+   suitable predicate.  Also check that there are NOPS further
+   arguments before any predicate, but don't check what they are.
+
+   Return true on success, otherwise report a suitable error.
+   When returning true:
+
+   - set I to the number of the last unchecked argument.
+   - set NARGS to the total number of arguments.  */
+bool
+function_resolver::check_gp_argument (unsigned int nops,
+                                     unsigned int &i, unsigned int &nargs)
+{
+  i = nops - 1;
+  if (pred != PRED_none)
+    {
+      switch (pred)
+       {
+       case PRED_m:
+         /* Add first inactive argument if needed, and final predicate.  */
+         if (has_inactive_argument ())
+           nargs = nops + 2;
+         else
+           nargs = nops + 1;
+         break;
+
+       case PRED_p:
+       case PRED_x:
+         /* Add final predicate.  */
+         nargs = nops + 1;
+         break;
+
+       default:
+         gcc_unreachable ();
+       }
+
+      if (!check_num_arguments (nargs)
+         || !require_vector_type (nargs - 1, VECTOR_TYPE_mve_pred16_t))
+       return false;
+
+      i = nargs - 2;
+    }
+  else
+    {
+      nargs = nops;
+      if (!check_num_arguments (nargs))
+       return false;
+    }
+
+  return true;
+}
+
+/* Finish resolving a function whose final argument can be a vector
+   or a scalar, with the function having an implicit "_n" suffix
+   in the latter case.  This "_n" form might only exist for certain
+   type suffixes.
+
+   ARGNO is the index of the final argument.  The inferred type suffix
+   was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE.
+   EXPECTED_TCLASS and EXPECTED_BITS describe the expected properties
+   of the final vector or scalar argument, in the same way as for
+   require_derived_vector_type.  INFERRED_TYPE is the inferred type
+   suffix itself, or NUM_TYPE_SUFFIXES if it's the same as FIRST_TYPE.
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree function_resolver::
+finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
+                        type_suffix_index first_type,
+                        type_class_index expected_tclass,
+                        unsigned int expected_bits,
+                        type_suffix_index inferred_type)
+{
+  if (inferred_type == NUM_TYPE_SUFFIXES)
+    inferred_type = first_type;
+  tree scalar_form = lookup_form (MODE_n, inferred_type);
+
+  /* Allow the final argument to be scalar, if an _n form exists.  */
+  if (scalar_argument_p (argno))
+    {
+      if (scalar_form)
+       return scalar_form;
+
+      /* Check the vector form normally.  If that succeeds, raise an
+        error about having no corresponding _n form.  */
+      tree res = resolve_to (mode_suffix_id, inferred_type);
+      if (res != error_mark_node)
+       error_at (location, "passing %qT to argument %d of %qE, but its"
+                 " %qT form does not accept scalars",
+                 get_argument_type (argno), argno + 1, fndecl,
+                 get_vector_type (first_type));
+      return error_mark_node;
+    }
+
+  /* If an _n form does exist, provide a more accurate message than
+     require_derived_vector_type would for arguments that are neither
+     vectors nor scalars.  */
+  if (scalar_form && !require_vector_or_scalar_type (argno))
+    return error_mark_node;
+
+  /* Check for the correct vector type.  */
+  if (!require_derived_vector_type (argno, first_argno, first_type,
+                                   expected_tclass, expected_bits))
+    return error_mark_node;
+
+  return resolve_to (mode_suffix_id, inferred_type);
+}
+
+/* Resolve a (possibly predicated) unary function.  If the function uses
+   merge predication or if TREAT_AS_MERGE_P is true, there is an extra
+   vector argument before the governing predicate that specifies the
+   values of inactive elements.  This argument has the following
+   properties:
+
+   - the type class must be the same as for active elements if MERGE_TCLASS
+     is SAME_TYPE_CLASS, otherwise it must be MERGE_TCLASS itself.
+
+   - the element size must be the same as for active elements if MERGE_BITS
+     is SAME_TYPE_SIZE, otherwise it must be MERGE_BITS itself.
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_unary (type_class_index merge_tclass,
+                                 unsigned int merge_bits,
+                                 bool treat_as_merge_p)
+{
+  type_suffix_index type;
+  if (pred == PRED_m || treat_as_merge_p)
+    {
+      if (!check_num_arguments (3))
+       return error_mark_node;
+      if (merge_tclass == SAME_TYPE_CLASS && merge_bits == SAME_SIZE)
+       {
+         /* The inactive elements are the same as the active elements,
+            so we can use normal left-to-right resolution.  */
+         if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+             /* Predicates are the last argument.  */
+             || !require_vector_type (2 , VECTOR_TYPE_mve_pred16_t)
+             || !require_matching_vector_type (1 , type))
+           return error_mark_node;
+       }
+      else
+       {
+         /* The inactive element type is a function of the active one,
+            so resolve the active one first.  */
+         if (!require_vector_type (1, VECTOR_TYPE_mve_pred16_t)
+             || (type = infer_vector_type (2)) == NUM_TYPE_SUFFIXES
+             || !require_derived_vector_type (0, 2, type, merge_tclass,
+                                              merge_bits))
+           return error_mark_node;
+       }
+    }
+  else
+    {
+      /* We just need to check the predicate (if any) and the single
+        vector argument.  */
+      unsigned int i, nargs;
+      if (!check_gp_argument (1, i, nargs)
+         || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+       return error_mark_node;
+    }
+
+  /* Handle convert-like functions in which the first type suffix is
+     explicit.  */
+  if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES)
+    return resolve_to (mode_suffix_id, type_suffix_ids[0], type);
+
+  return resolve_to (mode_suffix_id, type);
+}
+
+/* Resolve a (possibly predicated) unary function taking a scalar
+   argument (_n suffix).  If the function uses merge predication,
+   there is an extra vector argument in the first position, and the
+   final governing predicate that specifies the values of inactive
+   elements.
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_unary_n ()
+{
+  type_suffix_index type;
+
+  /* Currently only support overrides for _m (vdupq).  */
+  if (pred != PRED_m)
+    return error_mark_node;
+
+  if (pred == PRED_m)
+    {
+      if (!check_num_arguments (3))
+       return error_mark_node;
+
+      /* The inactive elements are the same as the active elements,
+        so we can use normal left-to-right resolution.  */
+      if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+         /* Predicates are the last argument.  */
+         || !require_vector_type (2 , VECTOR_TYPE_mve_pred16_t))
+       return error_mark_node;
+    }
+
+  /* Make sure the argument is scalar.  */
+  tree scalar_form = lookup_form (MODE_n, type);
+
+  if (scalar_argument_p (1) && scalar_form)
+    return scalar_form;
+
+  return error_mark_node;
+}
+
+/* Resolve a (possibly predicated) function that takes NOPS like-typed
+   vector arguments followed by NIMM integer immediates.  Return the
+   function decl of the resolved function on success, otherwise report
+   a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm)
+{
+  unsigned int i, nargs;
+  type_suffix_index type;
+  if (!check_gp_argument (nops + nimm, i, nargs)
+      || (type = infer_vector_type (0 )) == NUM_TYPE_SUFFIXES)
+    return error_mark_node;
+
+  unsigned int last_arg = i + 1 - nimm;
+  for (i = 0; i < last_arg; i++)
+    if (!require_matching_vector_type (i, type))
+      return error_mark_node;
+
+  for (i = last_arg; i < nargs; ++i)
+    if (!require_integer_immediate (i))
+      return error_mark_node;
+
+  return resolve_to (mode_suffix_id, type);
+}
+
+/* Resolve a (possibly predicated) function that offers a choice between
+   taking:
+
+   - NOPS like-typed vector arguments or
+   - NOPS - 1 like-typed vector arguments followed by a scalar argument
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_uniform_opt_n (unsigned int nops)
+{
+  unsigned int i, nargs;
+  type_suffix_index type;
+  if (!check_gp_argument (nops, i, nargs)
+      /* Unary operators should use resolve_unary, so using i - 1 is
+        safe.  */
+      || (type = infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+    return error_mark_node;
+
+  /* Skip last argument, may be scalar.  */
+  unsigned int last_arg = i;
+  for (i = 0; i < last_arg; i++)
+    if (!require_matching_vector_type (i, type))
+      return error_mark_node;
+
+  return finish_opt_n_resolution (last_arg, 0, type);
+}
+
+/* If the call is erroneous, report an appropriate error and return
+   error_mark_node.  Otherwise, if the function is overloaded, return
+   the decl of the non-overloaded function.  Return NULL_TREE otherwise,
+   indicating that the call should be processed in the normal way.  */
+tree
+function_resolver::resolve ()
+{
+  return shape->resolve (*this);
+}
+
+function_checker::function_checker (location_t location,
+                                   const function_instance &instance,
+                                   tree fndecl, tree fntype,
+                                   unsigned int nargs, tree *args)
+  : function_call_info (location, instance, fndecl),
+    m_fntype (fntype), m_nargs (nargs), m_args (args)
+{
+  if (instance.has_inactive_argument ())
+    m_base_arg = 1;
+  else
+    m_base_arg = 0;
+}
+
+/* Return true if argument ARGNO exists. which it might not for
+   erroneous calls.  It is safe to wave through checks if this
+   function returns false.  */
+bool
+function_checker::argument_exists_p (unsigned int argno)
+{
+  gcc_assert (argno < (unsigned int) type_num_arguments (m_fntype));
+  return argno < m_nargs;
+}
+
+/* Check that argument ARGNO is an integer constant expression and
+   store its value in VALUE_OUT if so.  The caller should first
+   check that argument ARGNO exists.  */
+bool
+function_checker::require_immediate (unsigned int argno,
+                                    HOST_WIDE_INT &value_out)
+{
+  gcc_assert (argno < m_nargs);
+  tree arg = m_args[argno];
+
+  /* The type and range are unsigned, so read the argument as an
+     unsigned rather than signed HWI.  */
+  if (!tree_fits_uhwi_p (arg))
+    {
+      report_non_ice (location, fndecl, argno);
+      return false;
+    }
+
+  /* ...but treat VALUE_OUT as signed for error reporting, since printing
+     -1 is more user-friendly than the maximum uint64_t value.  */
+  value_out = tree_to_uhwi (arg);
+  return true;
+}
+
+/* Check that argument REL_ARGNO is an integer constant expression that has
+   a valid value for enumeration type TYPE.  REL_ARGNO counts from the end
+   of the predication arguments.  */
+bool
+function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  for (tree entry = TYPE_VALUES (type); entry; entry = TREE_CHAIN (entry))
+    {
+      /* The value is an INTEGER_CST for C and a CONST_DECL wrapper
+        around an INTEGER_CST for C++.  */
+      tree value = TREE_VALUE (entry);
+      if (TREE_CODE (value) == CONST_DECL)
+       value = DECL_INITIAL (value);
+      if (wi::to_widest (value) == actual)
+       return true;
+    }
+
+  report_not_enum (location, fndecl, argno, actual, type);
+  return false;
+}
+
+/* Check that argument REL_ARGNO is an integer constant expression in the
+   range [MIN, MAX].  REL_ARGNO counts from the end of the predication
+   arguments.  */
+bool
+function_checker::require_immediate_range (unsigned int rel_argno,
+                                          HOST_WIDE_INT min,
+                                          HOST_WIDE_INT max)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  /* Required because of the tree_to_uhwi -> HOST_WIDE_INT conversion
+     in require_immediate.  */
+  gcc_assert (min >= 0 && min <= max);
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  if (!IN_RANGE (actual, min, max))
+    {
+      report_out_of_range (location, fndecl, argno, actual, min, max);
+      return false;
+    }
+
+  return true;
+}
+
+/* Perform semantic checks on the call.  Return true if the call is valid,
+   otherwise report a suitable error.  */
+bool
+function_checker::check ()
+{
+  function_args_iterator iter;
+  tree type;
+  unsigned int i = 0;
+  FOREACH_FUNCTION_ARGS (m_fntype, type, iter)
+    {
+      if (type == void_type_node || i >= m_nargs)
+       break;
+
+      if (i >= m_base_arg
+         && TREE_CODE (type) == ENUMERAL_TYPE
+         && !require_immediate_enum (i - m_base_arg, type))
+       return false;
+
+      i += 1;
+    }
+
+  return shape->check (*this);
+}
+
+gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
+                             gcall *call_in)
+  : function_call_info (gimple_location (call_in), instance, fndecl),
+    call (call_in), lhs (gimple_call_lhs (call_in))
+{
+}
+
+/* Try to fold the call.  Return the new statement on success and null
+   on failure.  */
+gimple *
+gimple_folder::fold ()
+{
+  /* Don't fold anything when MVE is disabled; emit an error during
+     expansion instead.  */
+  if (!TARGET_HAVE_MVE)
+    return NULL;
+
+  /* Punt if the function has a return type and no result location is
+     provided.  The attributes should allow target-independent code to
+     remove the calls if appropriate.  */
+  if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node)
+    return NULL;
+
+  return base->fold (*this);
+}
+
+function_expander::function_expander (const function_instance &instance,
+                                     tree fndecl, tree call_expr_in,
+                                     rtx possible_target_in)
+  : function_call_info (EXPR_LOCATION (call_expr_in), instance, fndecl),
+    call_expr (call_expr_in), possible_target (possible_target_in)
+{
+}
+
+/* Return the handler of direct optab OP for type suffix SUFFIX_I.  */
+insn_code
+function_expander::direct_optab_handler (optab op, unsigned int suffix_i)
+{
+  return ::direct_optab_handler (op, vector_mode (suffix_i));
+}
+
+/* For a function that does the equivalent of:
+
+     OUTPUT = COND ? FN (INPUTS) : FALLBACK;
+
+   return the value of FALLBACK.
+
+   MODE is the mode of OUTPUT.
+   MERGE_ARGNO is the argument that provides FALLBACK for _m functions,
+   or DEFAULT_MERGE_ARGNO if we should apply the usual rules.
+
+   ARGNO is the caller's index into args.  If the returned value is
+   argument 0 (as for unary _m operations), increment ARGNO past the
+   returned argument.  */
+rtx
+function_expander::get_fallback_value (machine_mode mode,
+                                      unsigned int merge_argno,
+                                      unsigned int &argno)
+{
+  if (pred == PRED_z)
+    return CONST0_RTX (mode);
+
+  gcc_assert (pred == PRED_m || pred == PRED_x);
+
+  if (merge_argno == 0)
+    return args[argno++];
+
+  return args[merge_argno];
+}
+
+/* Return a REG rtx that can be used for the result of the function,
+   using the preferred target if suitable.  */
+rtx
+function_expander::get_reg_target ()
+{
+  machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
+  if (!possible_target || GET_MODE (possible_target) != target_mode)
+    possible_target = gen_reg_rtx (target_mode);
+  return possible_target;
+}
+
+/* Add an output operand to the instruction we're building, which has
+   code ICODE.  Bind the output to the preferred target rtx if possible.  */
+void
+function_expander::add_output_operand (insn_code icode)
+{
+  unsigned int opno = m_ops.length ();
+  machine_mode mode = insn_data[icode].operand[opno].mode;
+  m_ops.safe_grow (opno + 1, true);
+  create_output_operand (&m_ops.last (), possible_target, mode);
+}
+
+/* Add an input operand to the instruction we're building, which has
+   code ICODE.  Calculate the value of the operand as follows:
+
+   - If the operand is a predicate, coerce X to have the
+     mode that the instruction expects.
+
+   - Otherwise use X directly.  The expand machinery checks that X has
+     the right mode for the instruction.  */
+void
+function_expander::add_input_operand (insn_code icode, rtx x)
+{
+  unsigned int opno = m_ops.length ();
+  const insn_operand_data &operand = insn_data[icode].operand[opno];
+  machine_mode mode = operand.mode;
+  if (mode == VOIDmode)
+    {
+      /* The only allowable use of VOIDmode is the wildcard
+        arm_any_register_operand, which is used to avoid
+        combinatorial explosion in the reinterpret patterns.  */
+      gcc_assert (operand.predicate == arm_any_register_operand);
+      mode = GET_MODE (x);
+    }
+  else if (VALID_MVE_PRED_MODE (mode))
+    x = gen_lowpart (mode, x);
+
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_input_operand (&m_ops.last (), x, mode);
+}
+
+/* Add an integer operand with value X to the instruction.  */
+void
+function_expander::add_integer_operand (HOST_WIDE_INT x)
+{
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_integer_operand (&m_ops.last (), x);
+}
+
+/* Generate instruction ICODE, given that its operands have already
+   been added to M_OPS.  Return the value of the first operand.  */
+rtx
+function_expander::generate_insn (insn_code icode)
+{
+  expand_insn (icode, m_ops.length (), m_ops.address ());
+  return function_returns_void_p () ? const0_rtx : m_ops[0].value;
+}
+
+/* Implement the call using instruction ICODE, with a 1:1 mapping between
+   arguments and input operands.  */
+rtx
+function_expander::use_exact_insn (insn_code icode)
+{
+  unsigned int nops = insn_data[icode].n_operands;
+  if (!function_returns_void_p ())
+    {
+      add_output_operand (icode);
+      nops -= 1;
+    }
+  for (unsigned int i = 0; i < nops; ++i)
+    add_input_operand (icode, args[i]);
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which does not use a
+   predicate.  */
+rtx
+function_expander::use_unpred_insn (insn_code icode)
+{
+  gcc_assert (pred == PRED_none);
+  /* Discount the output operand.  */
+  unsigned int nops = insn_data[icode].n_operands - 1;
+  unsigned int i = 0;
+
+  add_output_operand (icode);
+  for (; i < nops; ++i)
+    add_input_operand (icode, args[i]);
+
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which is a predicated
+   operation that returns arbitrary values for inactive lanes.  */
+rtx
+function_expander::use_pred_x_insn (insn_code icode)
+{
+  gcc_assert (pred == PRED_x);
+  unsigned int nops = args.length ();
+
+  add_output_operand (icode);
+  /* Use first operand as arbitrary inactive input.  */
+  add_input_operand (icode, possible_target);
+  emit_clobber (possible_target);
+  /* Copy remaining arguments, including the final predicate.  */
+  for (unsigned int i = 0; i < nops; ++i)
+      add_input_operand (icode, args[i]);
+
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which does the equivalent of:
+
+     OUTPUT = COND ? FN (INPUTS) : FALLBACK;
+
+   The instruction operands are in the order above: OUTPUT, COND, INPUTS
+   and FALLBACK.  MERGE_ARGNO is the argument that provides FALLBACK for _m
+   functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules.  */
+rtx
+function_expander::use_cond_insn (insn_code icode, unsigned int merge_argno)
+{
+  /* At present we never need to handle PRED_none, which would involve
+     creating a new predicate rather than using one supplied by the user.  */
+  gcc_assert (pred != PRED_none);
+  /* For MVE, we only handle PRED_m at present.  */
+  gcc_assert (pred == PRED_m);
+
+  /* Discount the output, predicate and fallback value.  */
+  unsigned int nops = insn_data[icode].n_operands - 3;
+  machine_mode mode = insn_data[icode].operand[0].mode;
+
+  unsigned int opno = 0;
+  rtx fallback_arg = NULL_RTX;
+  fallback_arg = get_fallback_value (mode, merge_argno, opno);
+  rtx pred_arg = args[nops + 1];
+
+  add_output_operand (icode);
+    add_input_operand (icode, fallback_arg);
+  for (unsigned int i = 0; i < nops; ++i)
+    add_input_operand (icode, args[opno + i]);
+  add_input_operand (icode, pred_arg);
+  return generate_insn (icode);
+}
+
+/* Implement the call using a normal unpredicated optab for PRED_none.
+
+   <optab> corresponds to:
+
+   - CODE_FOR_SINT for signed integers
+   - CODE_FOR_UINT for unsigned integers
+   - CODE_FOR_FP for floating-point values  */
+rtx
+function_expander::map_to_rtx_codes (rtx_code code_for_sint,
+                                    rtx_code code_for_uint,
+                                    rtx_code code_for_fp)
+{
+  gcc_assert (pred == PRED_none);
+  rtx_code code = type_suffix (0).integer_p ?
+    (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint)
+    : code_for_fp;
+  insn_code icode = direct_optab_handler (code_to_optab (code), 0);
+  if (icode == CODE_FOR_nothing)
+    gcc_unreachable ();
+
+  return use_unpred_insn (icode);
+}
+
+/* Expand the call and return its lhs.  */
+rtx
+function_expander::expand ()
+{
+  unsigned int nargs = call_expr_nargs (call_expr);
+  args.reserve (nargs);
+  for (unsigned int i = 0; i < nargs; ++i)
+    args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i)));
+
+  return base->expand (*this);
+}
+
+/* If we're implementing manual overloading, check whether the MVE
+   function with subcode CODE is overloaded, and if so attempt to
+   determine the corresponding non-overloaded function.  The call
+   occurs at location LOCATION and has the arguments given by ARGLIST.
+
+   If the call is erroneous, report an appropriate error and return
+   error_mark_node.  Otherwise, if the function is overloaded, return
+   the decl of the non-overloaded function.  Return NULL_TREE otherwise,
+   indicating that the call should be processed in the normal way.  */
+tree
+resolve_overloaded_builtin (location_t location, unsigned int code,
+                           vec<tree, va_gc> *arglist)
+{
+  if (code >= vec_safe_length (registered_functions))
+    return NULL_TREE;
+
+  registered_function &rfn = *(*registered_functions)[code];
+  if (rfn.overloaded_p)
+    return function_resolver (location, rfn.instance, rfn.decl,
+                             *arglist).resolve ();
+  return NULL_TREE;
+}
+
+/* Perform any semantic checks needed for a call to the MVE function
+   with subcode CODE, such as testing for integer constant expressions.
+   The call occurs at location LOCATION and has NARGS arguments,
+   given by ARGS.  FNDECL is the original function decl, before
+   overload resolution.
+
+   Return true if the call is valid, otherwise report a suitable error.  */
+bool
+check_builtin_call (location_t location, vec<location_t>, unsigned int code,
+                   tree fndecl, unsigned int nargs, tree *args)
+{
+  const registered_function &rfn = *(*registered_functions)[code];
+  if (!check_requires_float (location, rfn.decl, rfn.requires_float))
+    return false;
+
+  return function_checker (location, rfn.instance, fndecl,
+                          TREE_TYPE (rfn.decl), nargs, args).check ();
+}
+
+/* Attempt to fold STMT, given that it's a call to the MVE function
+   with subcode CODE.  Return the new statement on success and null
+   on failure.  Insert any other new statements at GSI.  */
+gimple *
+gimple_fold_builtin (unsigned int code, gcall *stmt)
+{
+  registered_function &rfn = *(*registered_functions)[code];
+  return gimple_folder (rfn.instance, rfn.decl, stmt).fold ();
+}
+
+/* Expand a call to the MVE function with subcode CODE.  EXP is the call
+   expression and TARGET is the preferred location for the result.
+   Return the value of the lhs.  */
+rtx
+expand_builtin (unsigned int code, tree exp, rtx target)
+{
+  registered_function &rfn = *(*registered_functions)[code];
+  if (!check_requires_float (EXPR_LOCATION (exp), rfn.decl,
+                           rfn.requires_float))
+    return target;
+  return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
+}
+
+} /* end namespace arm_mve */
+
+using namespace arm_mve;
+
+inline void
+gt_ggc_mx (function_instance *)
+{
+}
+
+inline void
+gt_pch_nx (function_instance *)
+{
+}
+
+inline void
+gt_pch_nx (function_instance *, gt_pointer_operator, void *)
+{
+}
  
  #include "gt-arm-mve-builtins.h"
diff --git a/gcc/config/arm/arm-mve-builtins.def b/gcc/config/arm/arm-mve-builtins.def

index 69f3f81b47314b96768d467d7168c55b9e509b24..49d07364fa2572d6a305e977b854f3500387e597 100644 (file)
--- a/gcc/config/arm/arm-mve-builtins.def
+++ b/gcc/config/arm/arm-mve-builtins.def
@@ -17,10 +17,25 @@
     along with GCC; see the file COPYING3.  If not see
     <http://www.gnu.org/licenses/>.  */
  
+#ifndef DEF_MVE_MODE
+#define DEF_MVE_MODE(A, B, C, D)
+#endif
+
  #ifndef DEF_MVE_TYPE
-#error "arm-mve-builtins.def included without defining DEF_MVE_TYPE"
+#define DEF_MVE_TYPE(A, B)
+#endif
+
+#ifndef DEF_MVE_TYPE_SUFFIX
+#define DEF_MVE_TYPE_SUFFIX(A, B, C, D, E)
  #endif
  
+#ifndef DEF_MVE_FUNCTION
+#define DEF_MVE_FUNCTION(A, B, C, D)
+#endif
+
+DEF_MVE_MODE (n, none, none, none)
+DEF_MVE_MODE (offset, none, none, bytes)
+
  #define REQUIRES_FLOAT false
  DEF_MVE_TYPE (mve_pred16_t, boolean_type_node)
  DEF_MVE_TYPE (uint8x16_t, unsigned_intQI_type_node)
@@ -37,3 +52,26 @@ DEF_MVE_TYPE (int64x2_t, intDI_type_node)
  DEF_MVE_TYPE (float16x8_t, arm_fp16_type_node)
  DEF_MVE_TYPE (float32x4_t, float_type_node)
  #undef REQUIRES_FLOAT
+
+#define REQUIRES_FLOAT false
+DEF_MVE_TYPE_SUFFIX (s8, int8x16_t, signed, 8, V16QImode)
+DEF_MVE_TYPE_SUFFIX (s16, int16x8_t, signed, 16, V8HImode)
+DEF_MVE_TYPE_SUFFIX (s32, int32x4_t, signed, 32, V4SImode)
+DEF_MVE_TYPE_SUFFIX (s64, int64x2_t, signed, 64, V2DImode)
+DEF_MVE_TYPE_SUFFIX (u8, uint8x16_t, unsigned, 8, V16QImode)
+DEF_MVE_TYPE_SUFFIX (u16, uint16x8_t, unsigned, 16, V8HImode)
+DEF_MVE_TYPE_SUFFIX (u32, uint32x4_t, unsigned, 32, V4SImode)
+DEF_MVE_TYPE_SUFFIX (u64, uint64x2_t, unsigned, 64, V2DImode)
+#undef REQUIRES_FLOAT
+
+#define REQUIRES_FLOAT true
+DEF_MVE_TYPE_SUFFIX (f16, float16x8_t, float, 16, V8HFmode)
+DEF_MVE_TYPE_SUFFIX (f32, float32x4_t, float, 32, V4SFmode)
+#undef REQUIRES_FLOAT
+
+#include "arm-mve-builtins-base.def"
+
+#undef DEF_MVE_TYPE
+#undef DEF_MVE_TYPE_SUFFIX
+#undef DEF_MVE_FUNCTION
+#undef DEF_MVE_MODE
diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h

index 290a118ec927c83cfcf812ad2e4672edec631fce..a20d2fb5d86c5509004b293ae15365cfafacf780 100644 (file)
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -20,7 +20,79 @@
  #ifndef GCC_ARM_MVE_BUILTINS_H
  #define GCC_ARM_MVE_BUILTINS_H
  
+/* The full name of an MVE ACLE function is the concatenation of:
+
+   - the base name ("vadd", etc.)
+   - the "mode" suffix ("_n", "_index", etc.)
+   - the type suffixes ("_s32", "_b8", etc.)
+   - the predication suffix ("_x", "_z", etc.)
+
+   Each piece of information is individually useful, so we retain this
+   classification throughout:
+
+   - function_base represents the base name
+
+   - mode_suffix_index represents the mode suffix
+
+   - type_suffix_index represents individual type suffixes, while
+     type_suffix_pair represents a pair of them
+
+   - prediction_index extends the predication suffix with an additional
+     alternative: PRED_implicit for implicitly-predicated operations
+
+   In addition to its unique full name, a function may have a shorter
+   overloaded alias.  This alias removes pieces of the suffixes that
+   can be inferred from the arguments, such as by shortening the mode
+   suffix or dropping some of the type suffixes.  The base name and the
+   predication suffix stay the same.
+
+   The function_shape class describes what arguments a given function
+   takes and what its overloaded alias is called.  In broad terms,
+   function_base describes how the underlying instruction behaves while
+   function_shape describes how that instruction has been presented at
+   the language level.
+
+   The static list of functions uses function_group to describe a group
+   of related functions.  The function_builder class is responsible for
+   expanding this static description into a list of individual functions
+   and registering the associated built-in functions.  function_instance
+   describes one of these individual functions in terms of the properties
+   described above.
+
+   The classes involved in compiling a function call are:
+
+   - function_resolver, which resolves an overloaded function call to a
+     specific function_instance and its associated function decl
+
+   - function_checker, which checks whether the values of the arguments
+     conform to the ACLE specification
+
+   - gimple_folder, which tries to fold a function call at the gimple level
+
+   - function_expander, which expands a function call into rtl instructions
+
+   function_resolver and function_checker operate at the language level
+   and so are associated with the function_shape.  gimple_folder and
+   function_expander are concerned with the behavior of the function
+   and so are associated with the function_base.
+
+   Note that we've specifically chosen not to fold calls in the frontend,
+   since MVE intrinsics will hardly ever fold a useful language-level
+   constant.  */
  namespace arm_mve {
+/* The maximum number of vectors in an ACLE tuple type.  */
+const unsigned int MAX_TUPLE_SIZE = 3;
+
+/* Used to represent the default merge argument index for _m functions.
+   The actual index depends on how many arguments the function takes.  */
+const unsigned int DEFAULT_MERGE_ARGNO = 0;
+
+/* Flags that describe what a function might do, in addition to reading
+   its arguments and returning a result.  */
+const unsigned int CP_READ_FPCR = 1U << 0;
+const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1;
+const unsigned int CP_READ_MEMORY = 1U << 2;
+const unsigned int CP_WRITE_MEMORY = 1U << 3;
  
  /* Enumerates the MVE predicate and (data) vector types, together called
     "vector types" for brevity.  */
@@ -30,11 +102,604 @@ enum vector_type_index
    VECTOR_TYPE_ ## ACLE_NAME,
  #include "arm-mve-builtins.def"
    NUM_VECTOR_TYPES
-#undef DEF_MVE_TYPE
  };
  
+/* Classifies the available measurement units for an address displacement.  */
+enum units_index
+{
+  UNITS_none,
+  UNITS_bytes
+};
+
+/* Describes the various uses of a governing predicate.  */
+enum predication_index
+{
+  /* No governing predicate is present.  */
+  PRED_none,
+
+  /* Merging predication: copy inactive lanes from the first data argument
+     to the vector result.  */
+  PRED_m,
+
+  /* Plain predication: inactive lanes are not used to compute the
+     scalar result.  */
+  PRED_p,
+
+  /* "Don't care" predication: set inactive lanes of the vector result
+     to arbitrary values.  */
+  PRED_x,
+
+  /* Zero predication: set inactive lanes of the vector result to zero.  */
+  PRED_z,
+
+  NUM_PREDS
+};
+
+/* Classifies element types, based on type suffixes with the bit count
+   removed.  */
+enum type_class_index
+{
+  TYPE_bool,
+  TYPE_float,
+  TYPE_signed,
+  TYPE_unsigned,
+  NUM_TYPE_CLASSES
+};
+
+/* Classifies an operation into "modes"; for example, to distinguish
+   vector-scalar operations from vector-vector operations, or to
+   distinguish between different addressing modes.  This classification
+   accounts for the function suffixes that occur between the base name
+   and the first type suffix.  */
+enum mode_suffix_index
+{
+#define DEF_MVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) MODE_##NAME,
+#include "arm-mve-builtins.def"
+  MODE_none
+};
+
+/* Enumerates the possible type suffixes.  Each suffix is associated with
+   a vector type, but for predicates provides extra information about the
+   element size.  */
+enum type_suffix_index
+{
+#define DEF_MVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE)        \
+  TYPE_SUFFIX_ ## NAME,
+#include "arm-mve-builtins.def"
+  NUM_TYPE_SUFFIXES
+};
+
+/* Combines two type suffixes.  */
+typedef enum type_suffix_index type_suffix_pair[2];
+
+class function_base;
+class function_shape;
+
+/* Static information about a mode suffix.  */
+struct mode_suffix_info
+{
+  /* The suffix string itself.  */
+  const char *string;
+
+  /* The type of the vector base address, or NUM_VECTOR_TYPES if the
+     mode does not include a vector base address.  */
+  vector_type_index base_vector_type;
+
+  /* The type of the vector displacement, or NUM_VECTOR_TYPES if the
+     mode does not include a vector displacement.  (Note that scalar
+     displacements are always int64_t.)  */
+  vector_type_index displacement_vector_type;
+
+  /* The units in which the vector or scalar displacement is measured,
+     or UNITS_none if the mode doesn't take a displacement.  */
+  units_index displacement_units;
+};
+
+/* Static information about a type suffix.  */
+struct type_suffix_info
+{
+  /* The suffix string itself.  */
+  const char *string;
+
+  /* The associated ACLE vector or predicate type.  */
+  vector_type_index vector_type : 8;
+
+  /* What kind of type the suffix represents.  */
+  type_class_index tclass : 8;
+
+  /* The number of bits and bytes in an element.  For predicates this
+     measures the associated data elements.  */
+  unsigned int element_bits : 8;
+  unsigned int element_bytes : 8;
+
+  /* True if the suffix is for an integer type.  */
+  unsigned int integer_p : 1;
+  /* True if the suffix is for an unsigned type.  */
+  unsigned int unsigned_p : 1;
+  /* True if the suffix is for a floating-point type.  */
+  unsigned int float_p : 1;
+  unsigned int spare : 13;
+
+  /* The associated vector or predicate mode.  */
+  machine_mode vector_mode : 16;
+};
+
+/* Static information about a set of functions.  */
+struct function_group_info
+{
+  /* The base name, as a string.  */
+  const char *base_name;
+
+  /* Describes the behavior associated with the function base name.  */
+  const function_base *const *base;
+
+  /* The shape of the functions, as described above the class definition.
+     It's possible to have entries with the same base name but different
+     shapes.  */
+  const function_shape *const *shape;
+
+  /* A list of the available type suffixes, and of the available predication
+     types.  The function supports every combination of the two.
+
+     The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES
+     while the list of predication types is terminated by NUM_PREDS.
+     The list of type suffixes is lexicographically ordered based
+     on the index value.  */
+  const type_suffix_pair *types;
+  const predication_index *preds;
+
+  /* Whether the function group requires a floating point abi.  */
+  bool requires_float;
+};
+
+/* Describes a single fully-resolved function (i.e. one that has a
+   unique full name).  */
+class GTY((user)) function_instance
+{
+public:
+  function_instance (const char *, const function_base *,
+                    const function_shape *, mode_suffix_index,
+                    const type_suffix_pair &, predication_index);
+
+  bool operator== (const function_instance &) const;
+  bool operator!= (const function_instance &) const;
+  hashval_t hash () const;
+
+  unsigned int call_properties () const;
+  bool reads_global_state_p () const;
+  bool modifies_global_state_p () const;
+  bool could_trap_p () const;
+
+  unsigned int vectors_per_tuple () const;
+
+  const mode_suffix_info &mode_suffix () const;
+
+  const type_suffix_info &type_suffix (unsigned int) const;
+  tree scalar_type (unsigned int) const;
+  tree vector_type (unsigned int) const;
+  tree tuple_type (unsigned int) const;
+  machine_mode vector_mode (unsigned int) const;
+  machine_mode gp_mode (unsigned int) const;
+
+  bool has_inactive_argument () const;
+
+  /* The properties of the function.  (The explicit "enum"s are required
+     for gengtype.)  */
+  const char *base_name;
+  const function_base *base;
+  const function_shape *shape;
+  enum mode_suffix_index mode_suffix_id;
+  type_suffix_pair type_suffix_ids;
+  enum predication_index pred;
+};
+
+class registered_function;
+
+/* A class for building and registering function decls.  */
+class function_builder
+{
+public:
+  function_builder ();
+  ~function_builder ();
+
+  void add_unique_function (const function_instance &, tree,
+                           vec<tree> &, bool, bool, bool);
+  void add_overloaded_function (const function_instance &, bool, bool);
+  void add_overloaded_functions (const function_group_info &,
+                                mode_suffix_index, bool);
+
+  void register_function_group (const function_group_info &, bool);
+
+private:
+  void append_name (const char *);
+  char *finish_name ();
+
+  char *get_name (const function_instance &, bool, bool);
+
+  tree get_attributes (const function_instance &);
+
+  registered_function &add_function (const function_instance &,
+                                    const char *, tree, tree,
+                                    bool, bool, bool);
+
+  /* The function type to use for functions that are resolved by
+     function_resolver.  */
+  tree m_overload_type;
+
+  /* True if we should create a separate decl for each instance of an
+     overloaded function, instead of using function_resolver.  */
+  bool m_direct_overloads;
+
+  /* Used for building up function names.  */
+  obstack m_string_obstack;
+
+  /* Maps all overloaded function names that we've registered so far
+     to their associated function_instances.  */
+  hash_map<nofree_string_hash, registered_function *> m_overload_names;
+};
+
+/* A base class for handling calls to built-in functions.  */
+class function_call_info : public function_instance
+{
+public:
+  function_call_info (location_t, const function_instance &, tree);
+
+  bool function_returns_void_p ();
+
+  /* The location of the call.  */
+  location_t location;
+
+  /* The FUNCTION_DECL that is being called.  */
+  tree fndecl;
+};
+
+/* A class for resolving an overloaded function call.  */
+class function_resolver : public function_call_info
+{
+public:
+  enum { SAME_SIZE = 256, HALF_SIZE, QUARTER_SIZE };
+  static const type_class_index SAME_TYPE_CLASS = NUM_TYPE_CLASSES;
+
+  function_resolver (location_t, const function_instance &, tree,
+                    vec<tree, va_gc> &);
+
+  tree get_vector_type (type_suffix_index);
+  const char *get_scalar_type_name (type_suffix_index);
+  tree get_argument_type (unsigned int);
+  bool scalar_argument_p (unsigned int);
+
+  tree report_no_such_form (type_suffix_index);
+  tree lookup_form (mode_suffix_index,
+                   type_suffix_index = NUM_TYPE_SUFFIXES,
+                   type_suffix_index = NUM_TYPE_SUFFIXES);
+  tree resolve_to (mode_suffix_index,
+                  type_suffix_index = NUM_TYPE_SUFFIXES,
+                  type_suffix_index = NUM_TYPE_SUFFIXES);
+
+  type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
+  type_suffix_index infer_vector_type (unsigned int);
+
+  bool require_vector_or_scalar_type (unsigned int);
+
+  bool require_vector_type (unsigned int, vector_type_index);
+  bool require_matching_vector_type (unsigned int, type_suffix_index);
+  bool require_derived_vector_type (unsigned int, unsigned int,
+                                   type_suffix_index,
+                                   type_class_index = SAME_TYPE_CLASS,
+                                   unsigned int = SAME_SIZE);
+  bool require_integer_immediate (unsigned int);
+  bool require_scalar_type (unsigned int, const char *);
+  bool require_derived_scalar_type (unsigned int, type_class_index,
+                                   unsigned int = SAME_SIZE);
+  
+  bool check_num_arguments (unsigned int);
+  bool check_gp_argument (unsigned int, unsigned int &, unsigned int &);
+  tree resolve_unary (type_class_index = SAME_TYPE_CLASS,
+                     unsigned int = SAME_SIZE, bool = false);
+  tree resolve_unary_n ();
+  tree resolve_uniform (unsigned int, unsigned int = 0);
+  tree resolve_uniform_opt_n (unsigned int);
+  tree finish_opt_n_resolution (unsigned int, unsigned int, type_suffix_index,
+                               type_class_index = SAME_TYPE_CLASS,
+                               unsigned int = SAME_SIZE,
+                               type_suffix_index = NUM_TYPE_SUFFIXES);
+
+  tree resolve ();
+
+private:
+  /* The arguments to the overloaded function.  */
+  vec<tree, va_gc> &m_arglist;
+};
+
+/* A class for checking that the semantic constraints on a function call are
+   satisfied, such as arguments being integer constant expressions with
+   a particular range.  The parent class's FNDECL is the decl that was
+   called in the original source, before overload resolution.  */
+class function_checker : public function_call_info
+{
+public:
+  function_checker (location_t, const function_instance &, tree,
+                   tree, unsigned int, tree *);
+
+  bool require_immediate_enum (unsigned int, tree);
+  bool require_immediate_lane_index (unsigned int, unsigned int = 1);
+  bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
+
+  bool check ();
+
+private:
+  bool argument_exists_p (unsigned int);
+
+  bool require_immediate (unsigned int, HOST_WIDE_INT &);
+
+  /* The type of the resolved function.  */
+  tree m_fntype;
+
+  /* The arguments to the function.  */
+  unsigned int m_nargs;
+  tree *m_args;
+
+  /* The first argument not associated with the function's predication
+     type.  */
+  unsigned int m_base_arg;
+};
+
+/* A class for folding a gimple function call.  */
+class gimple_folder : public function_call_info
+{
+public:
+  gimple_folder (const function_instance &, tree,
+                gcall *);
+
+  gimple *fold ();
+
+  /* The call we're folding.  */
+  gcall *call;
+
+  /* The result of the call, or null if none.  */
+  tree lhs;
+};
+
+/* A class for expanding a function call into RTL.  */
+class function_expander : public function_call_info
+{
+public:
+  function_expander (const function_instance &, tree, tree, rtx);
+  rtx expand ();
+
+  insn_code direct_optab_handler (optab, unsigned int = 0);
+
+  rtx get_fallback_value (machine_mode, unsigned int, unsigned int &);
+  rtx get_reg_target ();
+
+  void add_output_operand (insn_code);
+  void add_input_operand (insn_code, rtx);
+  void add_integer_operand (HOST_WIDE_INT);
+  rtx generate_insn (insn_code);
+
+  rtx use_exact_insn (insn_code);
+  rtx use_unpred_insn (insn_code);
+  rtx use_pred_x_insn (insn_code);
+  rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
+
+  rtx map_to_rtx_codes (rtx_code, rtx_code, rtx_code);
+
+  /* The function call expression.  */
+  tree call_expr;
+
+  /* For functions that return a value, this is the preferred location
+     of that value.  It could be null or could have a different mode
+     from the function return type.  */
+  rtx possible_target;
+
+  /* The expanded arguments.  */
+  auto_vec<rtx, 16> args;
+
+private:
+  /* Used to build up the operands to an instruction.  */
+  auto_vec<expand_operand, 8> m_ops;
+};
+
+/* Provides information about a particular function base name, and handles
+   tasks related to the base name.  */
+class function_base
+{
+public:
+  /* Return a set of CP_* flags that describe what the function might do,
+     in addition to reading its arguments and returning a result.  */
+  virtual unsigned int call_properties (const function_instance &) const;
+
+  /* If the function operates on tuples of vectors, return the number
+     of vectors in the tuples, otherwise return 1.  */
+  virtual unsigned int vectors_per_tuple () const { return 1; }
+
+  /* Try to fold the given gimple call.  Return the new gimple statement
+     on success, otherwise return null.  */
+  virtual gimple *fold (gimple_folder &) const { return NULL; }
+
+  /* Expand the given call into rtl.  Return the result of the function,
+     or an arbitrary value if the function doesn't return a result.  */
+  virtual rtx expand (function_expander &) const = 0;
+};
+
+/* Classifies functions into "shapes".  The idea is to take all the
+   type signatures for a set of functions, and classify what's left
+   based on:
+
+   - the number of arguments
+
+   - the process of determining the types in the signature from the mode
+     and type suffixes in the function name (including types that are not
+     affected by the suffixes)
+
+   - which arguments must be integer constant expressions, and what range
+     those arguments have
+
+   - the process for mapping overloaded names to "full" names.  */
+class function_shape
+{
+public:
+  virtual bool explicit_type_suffix_p (unsigned int, enum predication_index, enum mode_suffix_index) const = 0;
+  virtual bool explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const = 0;
+  virtual bool skip_overload_p (enum predication_index, enum mode_suffix_index) const = 0;
+
+  /* Define all functions associated with the given group.  */
+  virtual void build (function_builder &,
+                     const function_group_info &,
+                     bool) const = 0;
+
+  /* Try to resolve the overloaded call.  Return the non-overloaded
+     function decl on success and error_mark_node on failure.  */
+  virtual tree resolve (function_resolver &) const = 0;
+
+  /* Check whether the given call is semantically valid.  Return true
+     if it is, otherwise report an error and return false.  */
+  virtual bool check (function_checker &) const { return true; }
+};
+
+extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];
+extern const mode_suffix_info mode_suffixes[MODE_none + 1];
+
  extern tree scalar_types[NUM_VECTOR_TYPES];
-extern tree acle_vector_types[3][NUM_VECTOR_TYPES + 1];
+extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+
+/* Return the ACLE type mve_pred16_t.  */
+inline tree
+get_mve_pred16_t (void)
+{
+  return acle_vector_types[0][VECTOR_TYPE_mve_pred16_t];
+}
+
+/* Try to find a mode with the given mode_suffix_info fields.  Return the
+   mode on success or MODE_none on failure.  */
+inline mode_suffix_index
+find_mode_suffix (vector_type_index base_vector_type,
+                 vector_type_index displacement_vector_type,
+                 units_index displacement_units)
+{
+  for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i)
+    {
+      const mode_suffix_info &mode = mode_suffixes[mode_i];
+      if (mode.base_vector_type == base_vector_type
+         && mode.displacement_vector_type == displacement_vector_type
+         && mode.displacement_units == displacement_units)
+       return mode_suffix_index (mode_i);
+    }
+  return MODE_none;
+}
+
+/* Return the type suffix associated with ELEMENT_BITS-bit elements of type
+   class TCLASS.  */
+inline type_suffix_index
+find_type_suffix (type_class_index tclass, unsigned int element_bits)
+{
+  for (unsigned int i = 0; i < NUM_TYPE_SUFFIXES; ++i)
+    if (type_suffixes[i].tclass == tclass
+       && type_suffixes[i].element_bits == element_bits)
+      return type_suffix_index (i);
+  gcc_unreachable ();
+}
+
+inline function_instance::
+function_instance (const char *base_name_in,
+                  const function_base *base_in,
+                  const function_shape *shape_in,
+                  mode_suffix_index mode_suffix_id_in,
+                  const type_suffix_pair &type_suffix_ids_in,
+                  predication_index pred_in)
+  : base_name (base_name_in), base (base_in), shape (shape_in),
+    mode_suffix_id (mode_suffix_id_in), pred (pred_in)
+{
+  memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids));
+}
+
+inline bool
+function_instance::operator== (const function_instance &other) const
+{
+  return (base == other.base
+         && shape == other.shape
+         && mode_suffix_id == other.mode_suffix_id
+         && pred == other.pred
+         && type_suffix_ids[0] == other.type_suffix_ids[0]
+         && type_suffix_ids[1] == other.type_suffix_ids[1]);
+}
+
+inline bool
+function_instance::operator!= (const function_instance &other) const
+{
+  return !operator== (other);
+}
+
+/* If the function operates on tuples of vectors, return the number
+   of vectors in the tuples, otherwise return 1.  */
+inline unsigned int
+function_instance::vectors_per_tuple () const
+{
+  return base->vectors_per_tuple ();
+}
+
+/* Return information about the function's mode suffix.  */
+inline const mode_suffix_info &
+function_instance::mode_suffix () const
+{
+  return mode_suffixes[mode_suffix_id];
+}
+
+/* Return information about type suffix I.  */
+inline const type_suffix_info &
+function_instance::type_suffix (unsigned int i) const
+{
+  return type_suffixes[type_suffix_ids[i]];
+}
+
+/* Return the scalar type associated with type suffix I.  */
+inline tree
+function_instance::scalar_type (unsigned int i) const
+{
+  return scalar_types[type_suffix (i).vector_type];
+}
+
+/* Return the vector type associated with type suffix I.  */
+inline tree
+function_instance::vector_type (unsigned int i) const
+{
+  return acle_vector_types[0][type_suffix (i).vector_type];
+}
+
+/* If the function operates on tuples of vectors, return the tuple type
+   associated with type suffix I, otherwise return the vector type associated
+   with type suffix I.  */
+inline tree
+function_instance::tuple_type (unsigned int i) const
+{
+  unsigned int num_vectors = vectors_per_tuple ();
+  return acle_vector_types[num_vectors - 1][type_suffix (i).vector_type];
+}
+
+/* Return the vector or predicate mode associated with type suffix I.  */
+inline machine_mode
+function_instance::vector_mode (unsigned int i) const
+{
+  return type_suffix (i).vector_mode;
+}
+
+/* Return true if the function has no return value.  */
+inline bool
+function_call_info::function_returns_void_p ()
+{
+  return TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
+}
+
+/* Default implementation of function::call_properties, with conservatively
+   correct behavior for floating-point instructions.  */
+inline unsigned int
+function_base::call_properties (const function_instance &instance) const
+{
+  unsigned int flags = 0;
+  if (instance.type_suffix (0).float_p || instance.type_suffix (1).float_p)
+    flags |= CP_READ_FPCR | CP_RAISE_FP_EXCEPTIONS;
+  return flags;
+}
  
  } /* end namespace arm_mve */
  
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

index 1bdbd3b8ab3ec8f23ee1806190ef51288fc7628d..61fcd67143750ed729a253877e121c533eee59ba 100644 (file)
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -215,7 +215,8 @@ extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
     those groups.  */
  enum arm_builtin_class
  {
-  ARM_BUILTIN_GENERAL
+  ARM_BUILTIN_GENERAL,
+  ARM_BUILTIN_MVE
  };
  
  /* Built-in function codes are structured so that the low
@@ -229,6 +230,13 @@ const unsigned int ARM_BUILTIN_CLASS = (1 << ARM_BUILTIN_SHIFT) - 1;
  /* MVE functions.  */
  namespace arm_mve {
    void handle_arm_mve_types_h ();
+  void handle_arm_mve_h (bool);
+  tree resolve_overloaded_builtin (location_t, unsigned int,
+                                  vec<tree, va_gc> *);
+  bool check_builtin_call (location_t, vec<location_t>, unsigned int,
+                          tree, unsigned int, tree *);
+  gimple *gimple_fold_builtin (unsigned int code, gcall *stmt);
+  rtx expand_builtin (unsigned int, tree, rtx);
  }
  
  /* Thumb functions.  */
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc

index 1164119a3009a9f0ce8984d30dd493e0bcedf334..06e0756e4e8ea85c4d35ea485bacf9310cf95e76 100644 (file)
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -69,6 +69,7 @@
  #include "optabs-libfuncs.h"
  #include "gimplify.h"
  #include "gimple.h"
+#include "gimple-iterator.h"
  #include "selftest.h"
  #include "tree-vectorizer.h"
  #include "opts.h"
@@ -506,6 +507,9 @@ static const struct attribute_spec arm_attribute_table[] =
  #undef TARGET_FUNCTION_VALUE_REGNO_P
  #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
  
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
+
  #undef  TARGET_ASM_OUTPUT_MI_THUNK
  #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
  #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
@@ -2844,6 +2848,29 @@ arm_init_libfuncs (void)
    speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
  }
  
+/* Implement TARGET_GIMPLE_FOLD_BUILTIN.  */
+static bool
+arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
+  tree fndecl = gimple_call_fndecl (stmt);
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
+  gimple *new_stmt = NULL;
+  switch (code & ARM_BUILTIN_CLASS)
+    {
+    case ARM_BUILTIN_GENERAL:
+      break;
+    case ARM_BUILTIN_MVE:
+      new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
+    }
+  if (!new_stmt)
+    return false;
+
+  gsi_replace (gsi, new_stmt, true);
+  return true;
+}
+
  /* On AAPCS systems, this is the "struct __va_list".  */
  static GTY(()) tree va_list_type;
  
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h

index 1262d66812102e1d47d591ccccae0c24008dad60..0d2ba968fc08e07b0a90a47281db98bf8c417a4b 100644 (file)
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -34,6 +34,12 @@
  #endif
  #include "arm_mve_types.h"
  
+#ifdef __ARM_MVE_PRESERVE_USER_NAMESPACE
+#pragma GCC arm "arm_mve.h" true
+#else
+#pragma GCC arm "arm_mve.h" false
+#endif
+
  #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
  #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
  #define vdupq_n(__a) __arm_vdupq_n(__a)
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md

index 3139750c60628ec42f595550b58bf42a84994156..8e235f63ee697439f0df1caa01adc708c0fa29bf 100644 (file)
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -903,3 +903,7 @@
  (define_special_predicate "aligned_operand"
    (ior (not (match_code "mem"))
         (match_test "MEM_ALIGN (op) >= GET_MODE_ALIGNMENT (mode)")))
+
+;; A special predicate that doesn't match a particular mode.
+(define_special_predicate "arm_any_register_operand"
+  (match_code "reg"))
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm

index 637e72af5bb7761536e5abee77c69561c2026376..9a1b06368a1632c9ca8497c0289c70dda0413a25 100644 (file)
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -154,15 +154,41 @@ arm-builtins.o: $(srcdir)/config/arm/arm-builtins.cc $(CONFIG_H) \
                 $(srcdir)/config/arm/arm-builtins.cc
  
  arm-mve-builtins.o: $(srcdir)/config/arm/arm-mve-builtins.cc $(CONFIG_H) \
-  $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
-  fold-const.h langhooks.h stringpool.h attribs.h diagnostic.h \
+  $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) $(TM_P_H) \
+  memmodel.h insn-codes.h optabs.h recog.h expr.h basic-block.h \
+  function.h fold-const.h gimple.h gimple-fold.h emit-rtl.h langhooks.h \
+  stringpool.h attribs.h diagnostic.h \
    $(srcdir)/config/arm/arm-protos.h \
    $(srcdir)/config/arm/arm-builtins.h \
    $(srcdir)/config/arm/arm-mve-builtins.h \
-  $(srcdir)/config/arm/arm-mve-builtins.def
+  $(srcdir)/config/arm/arm-mve-builtins-base.h \
+  $(srcdir)/config/arm/arm-mve-builtins-shapes.h \
+  $(srcdir)/config/arm/arm-mve-builtins.def \
+  $(srcdir)/config/arm/arm-mve-builtins-base.def
         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
                 $(srcdir)/config/arm/arm-mve-builtins.cc
  
+arm-mve-builtins-shapes.o: \
+  $(srcdir)/config/arm/arm-mve-builtins-shapes.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
+  $(RTL_H) memmodel.h insn-codes.h optabs.h \
+  $(srcdir)/config/arm/arm-mve-builtins.h \
+  $(srcdir)/config/arm/arm-mve-builtins-shapes.h
+       $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+               $(srcdir)/config/arm/arm-mve-builtins-shapes.cc
+
+arm-mve-builtins-base.o: \
+  $(srcdir)/config/arm/arm-mve-builtins-base.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  memmodel.h insn-codes.h $(OPTABS_H) \
+  $(BASIC_BLOCK_H) $(FUNCTION_H) $(GIMPLE_H) \
+  $(srcdir)/config/arm/arm-mve-builtins.h \
+  $(srcdir)/config/arm/arm-mve-builtins-shapes.h \
+  $(srcdir)/config/arm/arm-mve-builtins-base.h \
+  $(srcdir)/config/arm/arm-mve-builtins-functions.h
+       $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+               $(srcdir)/config/arm/arm-mve-builtins-base.cc
+
  arm-c.o: $(srcdir)/config/arm/arm-c.cc $(CONFIG_H) $(SYSTEM_H) \
      coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
author	Christophe Lyon <christophe.lyon@arm.com>
	Fri, 15 Jul 2022 09:26:48 +0000 (10:26 +0100)
committer	Christophe Lyon <christophe.lyon@arm.com>
	Wed, 3 May 2023 14:58:26 +0000 (16:58 +0200)
gcc/config.gcc		patch \| blob \| blame \| history
gcc/config/arm/arm-builtins.cc		patch \| blob \| blame \| history
gcc/config/arm/arm-builtins.h		patch \| blob \| blame \| history
gcc/config/arm/arm-c.cc		patch \| blob \| blame \| history
gcc/config/arm/arm-mve-builtins-base.cc	[new file with mode: 0644]	patch \| blob
gcc/config/arm/arm-mve-builtins-base.def	[new file with mode: 0644]	patch \| blob
gcc/config/arm/arm-mve-builtins-base.h	[new file with mode: 0644]	patch \| blob
gcc/config/arm/arm-mve-builtins-functions.h	[new file with mode: 0644]	patch \| blob
gcc/config/arm/arm-mve-builtins-shapes.cc	[new file with mode: 0644]	patch \| blob
gcc/config/arm/arm-mve-builtins-shapes.h	[new file with mode: 0644]	patch \| blob
gcc/config/arm/arm-mve-builtins.cc		patch \| blob \| blame \| history
gcc/config/arm/arm-mve-builtins.def		patch \| blob \| blame \| history
gcc/config/arm/arm-mve-builtins.h		patch \| blob \| blame \| history
gcc/config/arm/arm-protos.h		patch \| blob \| blame \| history
gcc/config/arm/arm.cc		patch \| blob \| blame \| history
gcc/config/arm/arm_mve.h		patch \| blob \| blame \| history
gcc/config/arm/predicates.md		patch \| blob \| blame \| history
gcc/config/arm/t-arm		patch \| blob \| blame \| history