}
};
-class svcvtnt_impl : public CODE_FOR_MODE0 (aarch64_sve_cvtnt)
-{
-public:
- gimple *
- fold (gimple_folder &f) const override
- {
- if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
- f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
- return NULL;
- }
-};
-
class svdiv_impl : public rtx_code_function
{
public:
FUNCTION (svcreate3, svcreate_impl, (3))
FUNCTION (svcreate4, svcreate_impl, (4))
FUNCTION (svcvt, svcvt_impl,)
-FUNCTION (svcvtnt, svcvtnt_impl,)
+FUNCTION (svcvtnt, NARROWING_TOP_CONVERT0 (aarch64_sve_cvtnt),)
FUNCTION (svdiv, svdiv_impl,)
FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdot, svdot_impl,)
int m_unspec_for_uint;
};
+template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N>
+class narrowing_top_convert : public code_for_mode_function <CODE_FOR_MODE, N>
+{
+ using base = code_for_mode_function <CODE_FOR_MODE, N>;
+
+public:
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
+ return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+ return NULL;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ /* If the instruction is predicated, Add a selector argument for the
+ values of inactive lanes, which is equal to all ones for merging
+ predication and to all zeros for zeroing predication. */
+ if (e.pred == PRED_none)
+ ;
+ else if (e.pred == PRED_z)
+ {
+ e.args.quick_push (CONST0_RTX (e.result_mode ()));
+ }
+ else
+ {
+ gcc_assert (e.pred == PRED_m || e.pred == PRED_x);
+ e.args.quick_push (CONST1_RTX (e.result_mode ()));
+ }
+
+ return base::expand (e);
+ }
+};
+
+#define NARROWING_TOP_CONVERT0(PATTERN)\
+ narrowing_top_convert<code_for_##PATTERN, 0>
+#define NARROWING_TOP_CONVERT1(PATTERN)\
+ narrowing_top_convert<code_for_##PATTERN, 1>
+
}
/* Declare the global function base NAME, creating it from an instance
}
};
-class svcvtxnt_impl : public CODE_FOR_MODE1 (aarch64_sve2_cvtxnt)
-{
-public:
- gimple *
- fold (gimple_folder &f) const override
- {
- if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
- return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
- return NULL;
- }
-};
-
class svdup_laneq_impl : public function_base
{
public:
FUNCTION (svcvtn, svcvtn_impl,)
FUNCTION (svcvtnb, fixed_insn_function, (CODE_FOR_aarch64_sve2_fp8_cvtnbvnx16qi))
FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
-FUNCTION (svcvtxnt, svcvtxnt_impl,)
+FUNCTION (svcvtxnt, NARROWING_TOP_CONVERT1 (aarch64_sve2_cvtxnt),)
FUNCTION (svdup_laneq, svdup_laneq_impl,)
FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1))
DEF_SVE_FUNCTION (svst1wq, store, s_data, implicit)
#undef REQUIRED_EXTENSIONS
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p2, AARCH64_FL_SME2p2)
+DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
+DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
+#undef REQUIRED_EXTENSIONS
+
#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
;;
;; This instructions does not take MOVPRFX.
(define_insn "@aarch64_sve_cvtnt<mode>"
- [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
(unspec:VNx8BF_ONLY
- [(match_operand:VNx4BI 2 "register_operand" "Upl")
+ [(match_operand:VNx4BI 2 "register_operand")
(const_int SVE_STRICT_GP)
- (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
- (match_operand:VNx4SF 3 "register_operand" "w")]
+ (match_operand:VNx8BF_ONLY 1 "register_operand")
+ (match_operand:VNx8BF_ONLY 4 "aarch64_constant_vector_operand")
+ (match_operand:VNx4SF 3 "register_operand")]
UNSPEC_COND_FCVTNT))]
- "TARGET_SVE_BF16"
- "bfcvtnt\t%0.h, %2/m, %3.s"
+ "TARGET_SVE_BF16 || TARGET_SVE2p2_OR_SME2p2"
+ {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: arch ]
+ [ w , 0 , Upl , w , vs1 ; * ] bfcvtnt\t%0.h, %2/m, %3.s
+ [ w , 0 , Upl , w , Dz ; sve2p2_or_sme2p2 ] bfcvtnt\t%0.h, %2/z, %3.s
+ }
[(set_attr "sve_type" "sve_fp_cvt")]
)
;; These instructions do not take MOVPRFX.
(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
- [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
(unspec:SVE_FULL_SDF
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ [(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_SDF
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:<VNARROW> 2 "register_operand" "w")]
+ (match_operand:<VNARROW> 2 "register_operand")]
SVE2_COND_FP_UNARY_LONG)
- (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_direct_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
- "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: arch ]
+ [ w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>
+ [ w , Upl , w , Dz ; sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Ventype>
+ }
"&& !rtx_equal_p (operands[1], operands[4])"
{
operands[4] = copy_rtx (operands[1]);
)
(define_insn "*cond_<sve_fp_op><mode>_strict"
- [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
(unspec:SVE_FULL_SDF
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ [(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_SDF
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:<VNARROW> 2 "register_operand" "w")]
+ (match_operand:<VNARROW> 2 "register_operand")]
SVE2_COND_FP_UNARY_LONG)
- (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_direct_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
- "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: arch ]
+ [ w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>
+ [ w , Upl , w , Dz ; sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Ventype>
+ }
[(set_attr "sve_type" "sve_fp_cvt")]
)
;;
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve_cvtnt<mode>"
- [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
(unspec:SVE_FULL_HSF
- [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
+ [(match_operand:<VWIDE_PRED> 2 "register_operand")
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
- (match_operand:<VWIDE> 3 "register_operand" "w")]
+ (match_operand:SVE_FULL_HSF 1 "register_operand")
+ (match_operand:SVE_FULL_HSF 4 "aarch64_constant_vector_operand")
+ (match_operand:<VWIDE> 3 "register_operand")]
UNSPEC_COND_FCVTNT))]
"TARGET_SVE2"
- "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
+ {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: arch ]
+ [ w , 0 , Upl , w , vs1 ; * ] fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>
+ [ w , 0 , Upl , w , Dz ; sve2p2_or_sme2p2 ] fcvtnt\t%0.<Vetype>, %2/z, %3.<Vewtype>
+ }
[(set_attr "sve_type" "sve_fp_cvt")]
)
;;
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve2_cvtxnt<mode>"
- [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ [(set (match_operand:<VNARROW> 0 "register_operand")
(unspec:<VNARROW>
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ [(match_operand:<VPRED> 2 "register_operand")
(const_int SVE_STRICT_GP)
- (match_operand:<VNARROW> 1 "register_operand" "0")
- (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
+ (match_operand:<VNARROW> 1 "register_operand")
+ (match_operand:<VNARROW> 4 "aarch64_constant_vector_operand")
+ (match_operand:VNx2DF_ONLY 3 "register_operand")]
UNSPEC_COND_FCVTXNT))]
"TARGET_SVE2"
- "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+ {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: arch ]
+ [ w , 0 , Upl , w , vs1 ; * ] fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>
+ [ w , 0 , Upl , w , Dz ; sve2p2_or_sme2p2 ] fcvtxnt\t%0.<Ventype>, %2/z, %3.<Vetype>
+ }
[(set_attr "sve_type" "sve_fp_cvt")]
)
+
;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Multi-vector widening conversions
;; -------------------------------------------------------------------------
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtlt_f32_f16_z_tied1:
+** fcvtlt z0\.s, p0/z, z0\.h
+** ret
+*/
+TEST_DUAL_Z_REV (cvtlt_f32_f16_z_tied1, svfloat32_t, svfloat16_t,
+ z0_res = svcvtlt_f32_f16_z (p0, z0),
+ z0_res = svcvtlt_f32_z (p0, z0))
+
+/*
+** cvtlt_f32_f16_z_untied:
+** fcvtlt z0\.s, p0/z, z4\.h
+** ret
+*/
+TEST_DUAL_Z (cvtlt_f32_f16_z_untied, svfloat32_t, svfloat16_t,
+ z0 = svcvtlt_f32_f16_z (p0, z4),
+ z0 = svcvtlt_f32_z (p0, z4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtlt_f64_f32_z_tied1:
+** fcvtlt z0\.d, p0/z, z0\.s
+** ret
+*/
+TEST_DUAL_Z_REV (cvtlt_f64_f32_z_tied1, svfloat64_t, svfloat32_t,
+ z0_res = svcvtlt_f64_f32_z (p0, z0),
+ z0_res = svcvtlt_f64_z (p0, z0))
+
+/*
+** cvtlt_f64_f32_z_untied:
+** fcvtlt z0\.d, p0/z, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvtlt_f64_f32_z_untied, svfloat64_t, svfloat32_t,
+ z0 = svcvtlt_f64_f32_z (p0, z4),
+ z0 = svcvtlt_f64_z (p0, z4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2+bf16"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtnt_bf16_f32_z_tied1:
+** bfcvtnt z0\.h, p0/z, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_z (z0, p0, z4),
+ z0 = svcvtnt_bf16_z (z0, p0, z4))
+
+/*
+** cvtnt_bf16_f32_z_untied:
+** (
+** mov z0\.d, z1\.d
+** bfcvtnt z0\.h, p0/z, z4\.s
+** |
+** bfcvtnt z1\.h, p0/z, z4\.s
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_z (z1, p0, z4),
+ z0 = svcvtnt_bf16_z (z1, p0, z4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtnt_f16_f32_z_tied1:
+** fcvtnt z0\.h, p0/z, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvtnt_f16_f32_z_tied1, svfloat16_t, svfloat32_t,
+ z0 = svcvtnt_f16_f32_z (z0, p0, z4),
+ z0 = svcvtnt_f16_z (z0, p0, z4))
+
+/*
+** cvtnt_f16_f32_z_untied:
+** (
+** mov z0\.d, z1\.d
+** fcvtnt z0\.h, p0/z, z4\.s
+** |
+** fcvtnt z1\.h, p0/z, z4\.s
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (cvtnt_f16_f32_z_untied, svfloat16_t, svfloat32_t,
+ z0 = svcvtnt_f16_f32_z (z1, p0, z4),
+ z0 = svcvtnt_f16_z (z1, p0, z4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtnt_f32_f64_z_tied1:
+** fcvtnt z0\.s, p0/z, z4\.d
+** ret
+*/
+TEST_DUAL_Z (cvtnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t,
+ z0 = svcvtnt_f32_f64_z (z0, p0, z4),
+ z0 = svcvtnt_f32_z (z0, p0, z4))
+
+/*
+** cvtnt_f32_f64_z_untied:
+** (
+** mov z0\.d, z1\.d
+** fcvtnt z0\.s, p0/z, z4\.d
+** |
+** fcvtnt z1\.s, p0/z, z4\.d
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (cvtnt_f32_f64_z_untied, svfloat32_t, svfloat64_t,
+ z0 = svcvtnt_f32_f64_z (z1, p0, z4),
+ z0 = svcvtnt_f32_z (z1, p0, z4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtxnt_f32_f64_z_tied1:
+** fcvtxnt z0\.s, p0/z, z4\.d
+** ret
+*/
+TEST_DUAL_Z (cvtxnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t,
+ z0 = svcvtxnt_f32_f64_z (z0, p0, z4),
+ z0 = svcvtxnt_f32_z (z0, p0, z4))
+
+/*
+** cvtxnt_f32_f64_z_untied:
+** (
+** mov z0\.d, z1\.d
+** fcvtxnt z0\.s, p0/z, z4\.d
+** |
+** fcvtxnt z1\.s, p0/z, z4\.d
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (cvtxnt_f32_f64_z_untied, svfloat32_t, svfloat64_t,
+ z0 = svcvtxnt_f32_f64_z (z1, p0, z4),
+ z0 = svcvtxnt_f32_z (z1, p0, z4))