From: Artemiy Volkov Date: Fri, 9 Jan 2026 19:30:52 +0000 (+0000) Subject: aarch64: add zeroing forms for predicated SVE int-/FP-to-FP conversions X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7c7ace08e4aab91ca0ce4bbd318af181da7c460f;p=thirdparty%2Fgcc.git aarch64: add zeroing forms for predicated SVE int-/FP-to-FP conversions SVE2.2 (or in streaming mode, SME2.2) adds support for zeroing predication for the following SVE FP conversion instructions: SVE1: - SCVTF (Signed integer convert to floating-point (predicated)) - UCVTF (Unsigned integer convert to floating-point (predicated)) - FCVT (Floating-point convert (predicated)) - BFCVT (Single-precision convert to BFloat16 (predicated)) SVE2: - FCVTX (Double-precision convert to single-precision, rounding to odd (predicated)) The SVE1 instructions are spread over several patterns for various combinations of source/destination widths and FP semantics, and the FCVTX instruction is serviced by two patterns in the aarch64-sve2.md file via the SVE2_COND_FP_UNARY_NARROWB iterator (one for strict, the other for relaxed FP semantics). The patch adds an alternative that emits a single zeroing-predication version of an instruction whenever the merge operand is a constant zero vector and the sve2p2_or_sme2p2 condition holds. As with the original cvt_b?f* tests in the sve/acle/asm directory, testcases for conversions from both integral and floating-point types coexist in the same files and are grouped only by the destination type. FCVTX tests are added in a separate file. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (*cond__nonextend_relaxed): New alternative for zeroing predication. Add `arch` attribute to every alternative. (*cond__nonextend_relaxed): Likewise. (*cond__nonextend_strict): Likewise. (*cond__extend): Likewise. (*cond__trunc): Likewise. (*cond__trunc): Likewise. (*cond__trunc): Likewise. (*cond__nontrunc): Likewise. (*cond__nontrunc_relaxed): Likewise. * config/aarch64/aarch64-sve2.md (*cond__any_relaxed): Likewise. (*cond__any_strict): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c: New test. * gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a4954df9d7a..c7fe14973f9 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -11027,10 +11027,11 @@ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && >= " - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] cvtf\t%0., %1/m, %2. - [ &w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] cvtf\t%0., %1/m, %2. + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] cvtf\t%0., %1/z, %2. + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -11053,10 +11054,11 @@ UNSPEC_SEL))] "TARGET_SVE && (~( | ) & ) == 0" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] cvtf\t%0., %1/m, %2. - [ &w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] cvtf\t%0., %1/m, %2. + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] cvtf\t%0., %1/z, %2. + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -11076,10 +11078,11 @@ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && >= " - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] cvtf\t%0., %1/m, %2. - [ &w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] cvtf\t%0., %1/m, %2. + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] cvtf\t%0., %1/z, %2. + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. } [(set_attr "sve_type" "sve_int_cvt")] ) @@ -11111,10 +11114,11 @@ (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] cvtf\t%0., %1/m, %2. - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] cvtf\t%0., %1/m, %2. + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] cvtf\t%0., %1/z, %2. + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;cvtf\t%0., %1/m, %2. } [(set_attr "sve_type" "sve_int_cvt")] ) @@ -11270,10 +11274,11 @@ (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && > " - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0., %1/m, %2. + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0., %1/z, %2. + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -11291,10 +11296,11 @@ (match_operand:SVE_PARTIAL_HSF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (~ & ) == 0" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0., %1/m, %2. + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0., %1/z, %2. + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -11353,10 +11359,11 @@ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE_BF16" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] bfcvt\t%0.h, %1/m, %2.s - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] bfcvt\t%0.h, %1/m, %2.s + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] bfcvt\t%0.h, %1/z, %2.s + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -11493,10 +11500,11 @@ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && > " - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0., %1/m, %2. + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0., %1/z, %2. + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -11514,10 +11522,11 @@ (match_operand:SVE_SDF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (~ & ) == 0" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0., %1/m, %2. + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0., %1/z, %2. + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0., %1/m, %2. } "&& !rtx_equal_p (operands[1], operands[4])" { diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index f4e8709cb52..995b08f084c 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -3596,10 +3596,11 @@ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] \t%0., %1/m, %2. - [ &w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. - [ &w , Upl , w , w ; yes ] movprfx\t%0, %3\;\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] \t%0., %1/m, %2. + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] \t%0., %1/z, %2. + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + [ &w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;\t%0., %1/m, %2. } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -3620,10 +3621,11 @@ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] \t%0., %1/m, %2. - [ &w , Upl , w , Dz ; yes ] movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. - [ &w , Upl , w , w ; yes ] movprfx\t%0, %3\;\t%0., %1/m, %2. + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] \t%0., %1/m, %2. + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] \t%0., %1/z, %2. + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + [ &w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;\t%0., %1/m, %2. } [(set_attr "sve_type" "sve_fp_cvt")] ) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c new file mode 100644 index 00000000000..b9421e60c91 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c @@ -0,0 +1,28 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2+bf16" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_bf16_f32_z_tied1: +** bfcvt z0\.h, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t, + z0_res = svcvt_bf16_f32_z (p0, z0), + z0_res = svcvt_bf16_z (p0, z0)) + +/* +** cvt_bf16_f32_z_untied: +** bfcvt z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t, + z0 = svcvt_bf16_f32_z (p0, z4), + z0 = svcvt_bf16_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c new file mode 100644 index 00000000000..7b164b73587 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c @@ -0,0 +1,160 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_f16_f32_z_tied1: +** fcvt z0\.h, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_f32_z_tied1, svfloat16_t, svfloat32_t, + z0_res = svcvt_f16_f32_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_f32_z_untied: +** fcvt z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f16_f32_z_untied, svfloat16_t, svfloat32_t, + z0 = svcvt_f16_f32_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_f64_z_tied1: +** fcvt z0\.h, p0/z, z0\.d +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_f64_z_tied1, svfloat16_t, svfloat64_t, + z0_res = svcvt_f16_f64_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_f64_z_untied: +** fcvt z0\.h, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f16_f64_z_untied, svfloat16_t, svfloat64_t, + z0 = svcvt_f16_f64_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_s16_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** scvtf z0\.h, p0/z, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_s16_z_tied1, svfloat16_t, svint16_t, + z0_res = svcvt_f16_s16_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_s16_z_untied: +** scvtf z0\.h, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f16_s16_z_untied, svfloat16_t, svint16_t, + z0 = svcvt_f16_s16_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_s32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** scvtf z0\.h, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_s32_z_tied1, svfloat16_t, svint32_t, + z0_res = svcvt_f16_s32_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_s32_z_untied: +** scvtf z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f16_s32_z_untied, svfloat16_t, svint32_t, + z0 = svcvt_f16_s32_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_s64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** scvtf z0\.h, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_s64_z_tied1, svfloat16_t, svint64_t, + z0_res = svcvt_f16_s64_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_s64_z_untied: +** scvtf z0\.h, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f16_s64_z_untied, svfloat16_t, svint64_t, + z0 = svcvt_f16_s64_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_u16_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** ucvtf z0\.h, p0/z, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_u16_z_tied1, svfloat16_t, svuint16_t, + z0_res = svcvt_f16_u16_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_u16_z_untied: +** ucvtf z0\.h, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f16_u16_z_untied, svfloat16_t, svuint16_t, + z0 = svcvt_f16_u16_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_u32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** ucvtf z0\.h, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_u32_z_tied1, svfloat16_t, svuint32_t, + z0_res = svcvt_f16_u32_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_u32_z_untied: +** ucvtf z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f16_u32_z_untied, svfloat16_t, svuint32_t, + z0 = svcvt_f16_u32_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_u64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** ucvtf z0\.h, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_u64_z_tied1, svfloat16_t, svuint64_t, + z0_res = svcvt_f16_u64_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_u64_z_untied: +** ucvtf z0\.h, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f16_u64_z_untied, svfloat16_t, svuint64_t, + z0 = svcvt_f16_u64_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c new file mode 100644 index 00000000000..2f970f2f61b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c @@ -0,0 +1,122 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_f32_f16_z_tied1: +** fcvt z0\.s, p0/z, z0\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_f16_z_tied1, svfloat32_t, svfloat16_t, + z0_res = svcvt_f32_f16_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_f16_z_untied: +** fcvt z0\.s, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f32_f16_z_untied, svfloat32_t, svfloat16_t, + z0 = svcvt_f32_f16_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_f64_z_tied1: +** fcvt z0\.s, p0/z, z0\.d +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_f64_z_tied1, svfloat32_t, svfloat64_t, + z0_res = svcvt_f32_f64_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_f64_z_untied: +** fcvt z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f32_f64_z_untied, svfloat32_t, svfloat64_t, + z0 = svcvt_f32_f64_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_s32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** scvtf z0\.s, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_s32_z_tied1, svfloat32_t, svint32_t, + z0_res = svcvt_f32_s32_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_s32_z_untied: +** scvtf z0\.s, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f32_s32_z_untied, svfloat32_t, svint32_t, + z0 = svcvt_f32_s32_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_s64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** scvtf z0\.s, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_s64_z_tied1, svfloat32_t, svint64_t, + z0_res = svcvt_f32_s64_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_s64_z_untied: +** scvtf z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f32_s64_z_untied, svfloat32_t, svint64_t, + z0 = svcvt_f32_s64_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_u32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** ucvtf z0\.s, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_u32_z_tied1, svfloat32_t, svuint32_t, + z0_res = svcvt_f32_u32_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_u32_z_untied: +** ucvtf z0\.s, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f32_u32_z_untied, svfloat32_t, svuint32_t, + z0 = svcvt_f32_u32_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_u64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** ucvtf z0\.s, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_u64_z_tied1, svfloat32_t, svuint64_t, + z0_res = svcvt_f32_u64_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_u64_z_untied: +** ucvtf z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f32_u64_z_untied, svfloat32_t, svuint64_t, + z0 = svcvt_f32_u64_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c new file mode 100644 index 00000000000..5a202a1f46f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c @@ -0,0 +1,120 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_f64_f16_z_tied1: +** fcvt z0\.d, p0/z, z0\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_f16_z_tied1, svfloat64_t, svfloat16_t, + z0_res = svcvt_f64_f16_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_f16_z_untied: +** fcvt z0\.d, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f64_f16_z_untied, svfloat64_t, svfloat16_t, + z0 = svcvt_f64_f16_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_f32_z_tied1: +** fcvt z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_f32_z_tied1, svfloat64_t, svfloat32_t, + z0_res = svcvt_f64_f32_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_f32_z_untied: +** fcvt z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f64_f32_z_untied, svfloat64_t, svfloat32_t, + z0 = svcvt_f64_f32_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_s32_z_tied1: +** scvtf z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_s32_z_tied1, svfloat64_t, svint32_t, + z0_res = svcvt_f64_s32_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_s32_z_untied: +** scvtf z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f64_s32_z_untied, svfloat64_t, svint32_t, + z0 = svcvt_f64_s32_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_s64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** scvtf z0\.d, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_s64_z_tied1, svfloat64_t, svint64_t, + z0_res = svcvt_f64_s64_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_s64_z_untied: +** scvtf z0\.d, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f64_s64_z_untied, svfloat64_t, svint64_t, + z0 = svcvt_f64_s64_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_u32_z_tied1: +** ucvtf z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_u32_z_tied1, svfloat64_t, svuint32_t, + z0_res = svcvt_f64_u32_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_u32_z_untied: +** ucvtf z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f64_u32_z_untied, svfloat64_t, svuint32_t, + z0 = svcvt_f64_u32_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_u64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** ucvtf z0\.d, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_u64_z_tied1, svfloat64_t, svuint64_t, + z0_res = svcvt_f64_u64_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_u64_z_untied: +** ucvtf z0\.d, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f64_u64_z_untied, svfloat64_t, svuint64_t, + z0 = svcvt_f64_u64_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c new file mode 100644 index 00000000000..d5d94f227d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c @@ -0,0 +1,29 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtx_f32_f64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** fcvtx z0\.s, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvtx_f32_f64_z_tied1, svfloat32_t, svfloat64_t, + z0_res = svcvtx_f32_f64_z (p0, z0), + z0_res = svcvtx_f32_z (p0, z0)) + +/* +** cvtx_f32_f64_z_untied: +** fcvtx z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvtx_f32_f64_z_untied, svfloat32_t, svfloat64_t, + z0 = svcvtx_f32_f64_z (p0, z4), + z0 = svcvtx_f32_z (p0, z4))