]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
arm: [MVE intrinsics] rework vuninitialized
authorChristophe Lyon <christophe.lyon@linaro.org>
Fri, 19 Sep 2025 12:20:27 +0000 (12:20 +0000)
committerChristophe Lyon <christophe.lyon@linaro.org>
Tue, 18 Nov 2025 17:02:33 +0000 (17:02 +0000)
Implement vuninitialized using the new MVE builtins framework.

This patch is a bit more intrusive than other similar patches because
the two vuninitialized variants use different signatures.

To handle this situation, the patch makes the inherent shape derive
from overloaded_base instead of nonoverloaded_base, and adds a new
parameter to add_unique_function so that we can choose between adding
only the nonoverloaded version, only the overloaded ones, or both (the
default).

The patch introduces pop_and_resolve_to, which pops an argument (the
only one in this case), and calls resolve_to, like many other shapes:
this is because the overloaded version takes an argument (used to
resolve which nonoverloaded version to call), which is not present in
the nonoverloaded version prototype.

gcc/ChangeLog:
* config/arm/arm-mve-builtins-shapes.cc (build_one): Add
which_overload parameter.
(inherent): Derive from overloaded_base<0>.  Add support for
overloaded version.
* config/arm/arm-mve-builtins-shapes.h (inherent): Update comment.
* config/arm/arm-mve-builtins.cc (add_unique_function): Add
support for new which_overload parameter.
(pop_and_resolve_to): New.
* config/arm/arm-mve-builtins.h (NONOVERLOADED_FORM)
(OVERLOADED_FORM): New.
(add_unique_function): Update prototype.
(pop_and_resolve_to): New prototype.
* config/arm/arm_mve.h (vuninitializedq): Delete.
(vuninitializedq_u8): Delete.
(vuninitializedq_u16): Delete.
(vuninitializedq_u32): Delete.
(vuninitializedq_u64): Delete.
(vuninitializedq_s8): Delete.
(vuninitializedq_s16): Delete.
(vuninitializedq_s32): Delete.
(vuninitializedq_s64): Delete.
(vuninitializedq_f16): Delete.
(vuninitializedq_f32): Delete.
(__arm_vuninitializedq): Delete.

gcc/config/arm/arm-mve-builtins-shapes.cc
gcc/config/arm/arm-mve-builtins-shapes.h
gcc/config/arm/arm-mve-builtins.cc
gcc/config/arm/arm-mve-builtins.h
gcc/config/arm/arm_mve.h

index dd3be87fc9cec73776ec32efeb646815e9e2d03e..b1dc1dd1172df5b09ed78bd195e9091cfec4e26e 100644 (file)
@@ -249,7 +249,8 @@ static void
 build_one (function_builder &b, const char *signature,
           const function_group_info &group, mode_suffix_index mode_suffix_id,
           unsigned int ti, unsigned int pi, bool preserve_user_namespace,
-          bool force_direct_overloads)
+          bool force_direct_overloads,
+          unsigned int which_overload = NONOVERLOADED_FORM | OVERLOADED_FORM)
 {
   /* Current functions take at most five arguments.  Match
      parse_signature parameter below.  */
@@ -261,7 +262,7 @@ build_one (function_builder &b, const char *signature,
   apply_predication (instance, return_type, argument_types);
   b.add_unique_function (instance, return_type, argument_types,
                         preserve_user_namespace, group.requires_float,
-                        force_direct_overloads);
+                        force_direct_overloads, which_overload);
 }
 
 /* Add a function instance for every type and predicate combination in
@@ -1509,18 +1510,51 @@ struct getq_lane_def : public overloaded_base<0>
 SHAPE (getq_lane)
 
 /* <T0>[xN]_t vfoo_t0().
+   <T0>[xN]_t vfoo(<T0>_t).
 
    Example: vuninitializedq.
    int8x16_t [__arm_]vuninitializedq_s8(void)
    int8x16_t [__arm_]vuninitializedq(int8x16_t t)  */
-struct inherent_def : public nonoverloaded_base
+struct inherent_def : public overloaded_base<0>
 {
   void
   build (function_builder &b, const function_group_info &group,
         bool preserve_user_namespace) const override
   {
-    build_all (b, "t0", group, MODE_none, preserve_user_namespace);
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+
+    /* Overloaded and non-overloaded forms have different signatures, so call
+       build_one with either OVERLOADED_FORM or NONOVERLOADED_FORM.  */
+    unsigned int pi = 0;
+    bool force_direct_overloads = false;
+    for (unsigned int ti = 0;
+        ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+      {
+       /* For int8x16_t [__arm_]vuninitializedq(int8x16_t t), generate only
+          the overloaded form, i.e. without type suffix.  */
+       build_one (b, "t0,t0", group, MODE_none, ti, pi,
+                  preserve_user_namespace, force_direct_overloads,
+                  OVERLOADED_FORM);
+       /* For int8x16_t [__arm_]vuninitializedq_s8(void), generate only the
+          non-overloaded form, i.e. with type suffix.  */
+       build_one (b, "t0", group, MODE_none, ti, pi,
+                  preserve_user_namespace, force_direct_overloads,
+                  NONOVERLOADED_FORM);
+      }
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    if (!r.check_num_arguments (1)
+       || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* We need to pop the useless argument for the non-overloaded function.  */
+    return r.pop_and_resolve_to (r.mode_suffix_id, type);
   }
+
 };
 SHAPE (inherent)
 
index 22d06ce0ebd369ca3142019a263e366aa3f52e90..56bba6dbf876f69811aaaa164bb7bf47b4c17b46 100644 (file)
@@ -29,7 +29,8 @@ namespace arm_mve
 
      Also:
 
-     - "inherent" means that the function takes no arguments.  */
+     - "inherent" means that the function takes no arguments, except in its
+       overloaded form.  */
 
   namespace shapes
   {
index b37c91c541bc1a7ff72a3771413db9b5afaaedc8..ecf5196437bf2457156c13adb2ff3c0c981f11bd 100644 (file)
@@ -994,7 +994,8 @@ function_builder::add_unique_function (const function_instance &instance,
                                       vec<tree> &argument_types,
                                       bool preserve_user_namespace,
                                       bool requires_float,
-                                      bool force_direct_overloads)
+                                      bool force_direct_overloads,
+                                      unsigned int which_overload)
 {
   /* Add the function under its full (unique) name with prefix.  */
   char *name = get_name (instance, true, false);
@@ -1002,27 +1003,31 @@ function_builder::add_unique_function (const function_instance &instance,
                                           argument_types.length (),
                                           argument_types.address ());
   tree attrs = get_attributes (instance);
-  registered_function &rfn = add_function (instance, name, fntype, attrs,
-                                          requires_float, false, false);
-
-  /* Enter the function into the hash table.  */
-  hashval_t hash = instance.hash ();
-  registered_function **rfn_slot
-    = function_table->find_slot_with_hash (instance, hash, INSERT);
-  gcc_assert (!*rfn_slot);
-  *rfn_slot = &rfn;
-
-  /* Also add the non-prefixed non-overloaded function, as placeholder
-     if the user namespace does not need to be preserved.  */
-  char *noprefix_name = get_name (instance, false, false);
-  attrs = get_attributes (instance);
-  add_function (instance, noprefix_name, fntype, attrs, requires_float,
-               false, preserve_user_namespace);
+  if (which_overload & NONOVERLOADED_FORM)
+    {
+      registered_function &rfn = add_function (instance, name, fntype, attrs,
+                                              requires_float, false, false);
+
+      /* Enter the function into the hash table.  */
+      hashval_t hash = instance.hash ();
+      registered_function **rfn_slot
+       = function_table->find_slot_with_hash (instance, hash, INSERT);
+      gcc_assert (!*rfn_slot);
+      *rfn_slot = &rfn;
+
+      /* Also add the non-prefixed non-overloaded function, as placeholder
+        if the user namespace does not need to be preserved.  */
+      char *noprefix_name = get_name (instance, false, false);
+      attrs = get_attributes (instance);
+      add_function (instance, noprefix_name, fntype, attrs, requires_float,
+                   false, preserve_user_namespace);
+    }
 
   /* Also add the function under its overloaded alias, if we want
      a separate decl for each instance of an overloaded function.  */
   char *overload_name = get_name (instance, true, true);
-  if (strcmp (name, overload_name) != 0)
+  if ((which_overload & OVERLOADED_FORM)
+      && (strcmp (name, overload_name) != 0))
     {
       /* Attribute lists shouldn't be shared.  */
       attrs = get_attributes (instance);
@@ -1231,6 +1236,18 @@ function_resolver::resolve_to (mode_suffix_index mode,
   return res;
 }
 
+/* Pop an argument and resolve the function to one with the mode suffix given
+   by MODE and the type suffixes given by TYPE0 and TYPE1.  Return its function
+   decl on success, otherwise report an error and return error_mark_node.  */
+tree
+function_resolver::pop_and_resolve_to (mode_suffix_index mode,
+                                      type_suffix_index type0,
+                                      type_suffix_index type1)
+{
+  m_arglist.pop ();
+  return resolve_to (mode, type0, type1);
+}
+
 /* Require argument ARGNO to be a pointer to a scalar type that has a
    corresponding type suffix.  Return that type suffix on success,
    otherwise report an error and return NUM_TYPE_SUFFIXES.  */
index 3a0d50dc64cedf9534bfb17c398a385bd4bcc4f5..5d25b166b730c9cb5f22542c7f2f57eb6c7033cc 100644 (file)
@@ -94,6 +94,13 @@ const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1;
 const unsigned int CP_READ_MEMORY = 1U << 2;
 const unsigned int CP_WRITE_MEMORY = 1U << 3;
 
+/* Flags that describe which forms of an intrinsic to generate: non-overloaded
+   and/or overloaded ones.  In general we want both, but for vuninitialized the
+   two forms have different signatures and we need to generate them
+   separately.  */
+const unsigned int NONOVERLOADED_FORM = 1U << 0;
+const unsigned int OVERLOADED_FORM = 1U << 1;
+
 /* Enumerates the MVE predicate and (data) vector types, together called
    "vector types" for brevity.  */
 enum vector_type_index
@@ -311,7 +318,7 @@ public:
   ~function_builder ();
 
   void add_unique_function (const function_instance &, tree,
-                           vec<tree> &, bool, bool, bool);
+                           vec<tree> &, bool, bool, bool, unsigned int);
   void add_overloaded_function (const function_instance &, bool, bool);
   void add_overloaded_functions (const function_group_info &,
                                 mode_suffix_index, bool);
@@ -383,6 +390,9 @@ public:
   tree resolve_to (mode_suffix_index,
                   type_suffix_index = NUM_TYPE_SUFFIXES,
                   type_suffix_index = NUM_TYPE_SUFFIXES);
+  tree pop_and_resolve_to (mode_suffix_index,
+                          type_suffix_index = NUM_TYPE_SUFFIXES,
+                          type_suffix_index = NUM_TYPE_SUFFIXES);
 
   type_suffix_index infer_pointer_type (unsigned int);
   type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
index b66b712c4ce2d3d80288c1a1b877e7a4c4395ea9..58e339ba8b0579b5d3923ed3afc42c1cfa683ea2 100644 (file)
 #pragma GCC arm "arm_mve.h" false
 #endif
 
-#ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
-#define vuninitializedq(__v) __arm_vuninitializedq(__v)
-
-
-#define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void)
-#define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void)
-#define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void)
-#define vuninitializedq_u64(void) __arm_vuninitializedq_u64(void)
-#define vuninitializedq_s8(void) __arm_vuninitializedq_s8(void)
-#define vuninitializedq_s16(void) __arm_vuninitializedq_s16(void)
-#define vuninitializedq_s32(void) __arm_vuninitializedq_s32(void)
-#define vuninitializedq_s64(void) __arm_vuninitializedq_s64(void)
-#define vuninitializedq_f16(void) __arm_vuninitializedq_f16(void)
-#define vuninitializedq_f32(void) __arm_vuninitializedq_f32(void)
-#endif
 
 
 #ifdef __cplusplus
 
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (uint8x16_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_u8 ();
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (uint16x8_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_u16 ();
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (uint32x4_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_u32 ();
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (uint64x2_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_u64 ();
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (int8x16_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_s8 ();
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (int16x8_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_s16 ();
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (int32x4_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_s32 ();
-}
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (int64x2_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_s64 ();
-}
-
-#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (float16x8_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_f16 ();
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vuninitializedq (float32x4_t /* __v ATTRIBUTE UNUSED */)
-{
- return __arm_vuninitializedq_f32 ();
-}
-#endif /* __ARM_FEATURE_MVE & 2 (MVE floating point)  */
 
 #else
 enum {
@@ -371,36 +286,6 @@ extern void *__ARM_undef;
     _Generic(param, type: param, const type: param, default: _Generic (param, float*: param, default: *(type *)__ARM_undef))
 #endif
 
-#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
-
-#define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vuninitializedq_s16 (), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vuninitializedq_s32 (), \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vuninitializedq_s64 (), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vuninitializedq_u8 (), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vuninitializedq_u16 (), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 (), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vuninitializedq_f16 (), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vuninitializedq_f32 ());})
-
-#else /* MVE Integer.  */
-
-#define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vuninitializedq_s16 (), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vuninitializedq_s32 (), \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vuninitializedq_s64 (), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vuninitializedq_u8 (), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vuninitializedq_u16 (), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 ());})
-
-#endif /* MVE Integer.  */
-
 #endif /* __cplusplus  */
 #endif /* __ARM_FEATURE_MVE  */
 #endif /* _GCC_ARM_MVE_H.  */