Implement vctp using the new MVE builtins framework.
2024-08-21 Christophe Lyon <christophe.lyon@linaro.org>
gcc/ChangeLog:
* config/arm/arm-mve-builtins-base.cc (class vctpq_impl): New.
(vctp16q): New.
(vctp32q): New.
(vctp64q): New.
(vctp8q): New.
* config/arm/arm-mve-builtins-base.def (vctp16q): New.
(vctp32q): New.
(vctp64q): New.
(vctp8q): New.
* config/arm/arm-mve-builtins-base.h (vctp16q): New.
(vctp32q): New.
(vctp64q): New.
(vctp8q): New.
* config/arm/arm-mve-builtins-shapes.cc (vctp): New.
* config/arm/arm-mve-builtins-shapes.h (vctp): New.
* config/arm/arm-mve-builtins.cc
(function_instance::has_inactive_argument): Add support for vctp.
* config/arm/arm_mve.h (vctp16q): Delete.
(vctp32q): Delete.
(vctp64q): Delete.
(vctp8q): Delete.
(vctp8q_m): Delete.
(vctp64q_m): Delete.
(vctp32q_m): Delete.
(vctp16q_m): Delete.
(__arm_vctp16q): Delete.
(__arm_vctp32q): Delete.
(__arm_vctp64q): Delete.
(__arm_vctp8q): Delete.
(__arm_vctp8q_m): Delete.
(__arm_vctp64q_m): Delete.
(__arm_vctp32q_m): Delete.
(__arm_vctp16q_m): Delete.
* config/arm/mve.md (mve_vctp<MVE_vctp>q<MVE_vpred>): Add '@'
prefix.
(mve_vctp<MVE_vctp>q_m<MVE_vpred>): Likewise.
}
};
+ /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics. */
+class vctpq_impl : public function_base
+{
+public:
+ CONSTEXPR vctpq_impl (machine_mode mode)
+ : m_mode (mode)
+ {}
+
+ /* Mode this intrinsic operates on. */
+ machine_mode m_mode;
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ insn_code code;
+ rtx target;
+
+ if (e.mode_suffix_id != MODE_none)
+ gcc_unreachable ();
+
+ switch (e.pred)
+ {
+ case PRED_none:
+ /* No predicate, no suffix. */
+ code = code_for_mve_vctpq (m_mode, m_mode);
+ target = e.use_exact_insn (code);
+ break;
+
+ case PRED_m:
+ /* No suffix, "m" predicate. */
+ code = code_for_mve_vctpq_m (m_mode, m_mode);
+ target = e.use_cond_insn (code, 0);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ rtx HItarget = gen_reg_rtx (HImode);
+ emit_move_insn (HItarget, gen_lowpart (HImode, target));
+ return HItarget;
+ }
+};
+
/* Implements vcvtq intrinsics. */
class vcvtq_impl : public function_base
{
FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
+FUNCTION (vctp8q, vctpq_impl, (V16BImode))
+FUNCTION (vctp16q, vctpq_impl, (V8BImode))
+FUNCTION (vctp32q, vctpq_impl, (V4BImode))
+FUNCTION (vctp64q, vctpq_impl, (V2QImode))
FUNCTION_WITHOUT_N_NO_F (vcvtaq, VCVTAQ)
FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, VCVTBQ_F32_F16, VCVTBQ_M_F32_F16))
FUNCTION (vcvtq, vcvtq_impl,)
DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
+DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp32q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp64q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp8q, vctp, none, m_or_none)
DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none)
DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
extern const function_base *const vcmulq_rot270;
extern const function_base *const vcmulq_rot90;
extern const function_base *const vcreateq;
+extern const function_base *const vctp16q;
+extern const function_base *const vctp32q;
+extern const function_base *const vctp64q;
+extern const function_base *const vctp8q;
extern const function_base *const vcvtaq;
extern const function_base *const vcvtbq;
extern const function_base *const vcvtmq;
};
SHAPE (unary_widen_acc)
+/* mve_pred16_t foo_t0(uint32_t)
+
+ Example: vctp16q.
+ mve_pred16_t [__arm_]vctp16q(uint32_t a)
+ mve_pred16_t [__arm_]vctp16q_m(uint32_t a, mve_pred16_t p) */
+struct vctp_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group,
+ bool preserve_user_namespace) const override
+ {
+ build_all (b, "p,su32", group, MODE_none, preserve_user_namespace);
+ }
+};
+SHAPE (vctp)
+
/* <T0>_t foo_t0[_t1](<T1>_t)
<T0>_t foo_t0_n[_t1](<T1>_t, const int)
extern const function_shape *const unary_n;
extern const function_shape *const unary_widen;
extern const function_shape *const unary_widen_acc;
+ extern const function_shape *const vctp;
extern const function_shape *const vcvt;
extern const function_shape *const vcvt_f16_f32;
extern const function_shape *const vcvt_f32_f16;
|| base == functions::vcmpltq
|| base == functions::vcmpcsq
|| base == functions::vcmphiq
+ || base == functions::vctp16q
+ || base == functions::vctp32q
+ || base == functions::vctp64q
+ || base == functions::vctp8q
|| (base == functions::vcvtbq && type_suffix (0).element_bits == 16)
|| (base == functions::vcvttq && type_suffix (0).element_bits == 16)
|| base == functions::vfmaq
#define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
#define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
#define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
-#define vctp16q(__a) __arm_vctp16q(__a)
-#define vctp32q(__a) __arm_vctp32q(__a)
-#define vctp64q(__a) __arm_vctp64q(__a)
-#define vctp8q(__a) __arm_vctp8q(__a)
#define vpnot(__a) __arm_vpnot(__a)
-#define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p)
-#define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
-#define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
-#define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
#define vshlcq_s8(__a, __b, __imm) __arm_vshlcq_s8(__a, __b, __imm)
#define vshlcq_u8(__a, __b, __imm) __arm_vshlcq_u8(__a, __b, __imm)
#define vshlcq_s16(__a, __b, __imm) __arm_vshlcq_s16(__a, __b, __imm)
__builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
}
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp16q (uint32_t __a)
-{
- return __builtin_mve_vctp16qv8bi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp32q (uint32_t __a)
-{
- return __builtin_mve_vctp32qv4bi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp64q (uint32_t __a)
-{
- return __builtin_mve_vctp64qv2qi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp8q (uint32_t __a)
-{
- return __builtin_mve_vctp8qv16bi (__a);
-}
-
__extension__ extern __inline mve_pred16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vpnot (mve_pred16_t __a)
return __builtin_mve_vpnotv16bi (__a);
}
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp8q_m (uint32_t __a, mve_pred16_t __p)
-{
- return __builtin_mve_vctp8q_mv16bi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp64q_m (uint32_t __a, mve_pred16_t __p)
-{
- return __builtin_mve_vctp64q_mv2qi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp32q_m (uint32_t __a, mve_pred16_t __p)
-{
- return __builtin_mve_vctp32q_mv4bi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp16q_m (uint32_t __a, mve_pred16_t __p)
-{
- return __builtin_mve_vctp16q_mv8bi (__a, __p);
-}
-
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
;;
;; [vctp8q vctp16q vctp32q vctp64q])
;;
-(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>"
+(define_insn "@mve_vctp<MVE_vctp>q<MVE_vpred>"
[
(set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")]
;;
;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m])
;;
-(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>"
+(define_insn "@mve_vctp<MVE_vctp>q_m<MVE_vpred>"
[
(set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")