1 /* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
2 /* { dg-require-effective-target aarch64_asm_bf16_ok } */
3 /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
5 #include "test_sve_acle.h"
8 ** cvt_bf16_f32_m_tied1:
9 ** bfcvt z0\.h, p0/m, z4\.s
12 TEST_DUAL_Z (cvt_bf16_f32_m_tied1
, svbfloat16_t
, svfloat32_t
,
13 z0
= svcvt_bf16_f32_m (z0
, p0
, z4
),
14 z0
= svcvt_bf16_m (z0
, p0
, z4
))
17 ** cvt_bf16_f32_m_tied2:
18 ** mov (z[0-9]+)\.d, z0\.d
20 ** bfcvt z0\.h, p0/m, \1\.s
23 TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2
, svbfloat16_t
, svfloat32_t
,
24 z0_res
= svcvt_bf16_f32_m (z4
, p0
, z0
),
25 z0_res
= svcvt_bf16_m (z4
, p0
, z0
))
28 ** cvt_bf16_f32_m_untied:
30 ** bfcvt z0\.h, p0/m, z4\.s
33 TEST_DUAL_Z (cvt_bf16_f32_m_untied
, svbfloat16_t
, svfloat32_t
,
34 z0
= svcvt_bf16_f32_m (z1
, p0
, z4
),
35 z0
= svcvt_bf16_m (z1
, p0
, z4
))
38 ** cvt_bf16_f32_z_tied1:
39 ** mov (z[0-9]+)\.d, z0\.d
40 ** movprfx z0\.s, p0/z, \1\.s
41 ** bfcvt z0\.h, p0/m, \1\.s
44 TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1
, svbfloat16_t
, svfloat32_t
,
45 z0_res
= svcvt_bf16_f32_z (p0
, z0
),
46 z0_res
= svcvt_bf16_z (p0
, z0
))
49 ** cvt_bf16_f32_z_untied:
50 ** movprfx z0\.s, p0/z, z4\.s
51 ** bfcvt z0\.h, p0/m, z4\.s
54 TEST_DUAL_Z (cvt_bf16_f32_z_untied
, svbfloat16_t
, svfloat32_t
,
55 z0
= svcvt_bf16_f32_z (p0
, z4
),
56 z0
= svcvt_bf16_z (p0
, z4
))
59 ** cvt_bf16_f32_x_tied1:
60 ** bfcvt z0\.h, p0/m, z0\.s
63 TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1
, svbfloat16_t
, svfloat32_t
,
64 z0_res
= svcvt_bf16_f32_x (p0
, z0
),
65 z0_res
= svcvt_bf16_x (p0
, z0
))
68 ** cvt_bf16_f32_x_untied:
69 ** bfcvt z0\.h, p0/m, z4\.s
72 TEST_DUAL_Z (cvt_bf16_f32_x_untied
, svbfloat16_t
, svfloat32_t
,
73 z0
= svcvt_bf16_f32_x (p0
, z4
),
74 z0
= svcvt_bf16_x (p0
, z4
))
77 ** ptrue_cvt_bf16_f32_x_tied1:
79 ** ptrue p[0-9]+\.b[^\n]*
83 TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1
, svbfloat16_t
, svfloat32_t
,
84 z0_res
= svcvt_bf16_f32_x (svptrue_b32 (), z0
),
85 z0_res
= svcvt_bf16_x (svptrue_b32 (), z0
))
88 ** ptrue_cvt_bf16_f32_x_untied:
90 ** ptrue p[0-9]+\.b[^\n]*
94 TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied
, svbfloat16_t
, svfloat32_t
,
95 z0
= svcvt_bf16_f32_x (svptrue_b32 (), z4
),
96 z0
= svcvt_bf16_x (svptrue_b32 (), z4
))