if (eax & bit_AVX512BF16)
set_feature (FEATURE_AVX512BF16);
}
+ if (amx_usable)
+ {
+ if (eax & bit_AMX_FP16)
+ set_feature (FEATURE_AMX_FP16);
+ }
}
/* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */
#define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
#define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT
#define OPTION_MASK_ISA2_CMPCCXADD_SET OPTION_MASK_ISA2_CMPCCXADD
+#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
#define OPTION_MASK_ISA2_AVXNECONVERT_UNSET OPTION_MASK_ISA2_AVXNECONVERT
#define OPTION_MASK_ISA2_CMPCCXADD_UNSET OPTION_MASK_ISA2_CMPCCXADD
+#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
}
return true;
+ case OPT_mamx_fp16:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
FEATURE_AVXVNNIINT8,
FEATURE_AVXNECONVERT,
FEATURE_CMPCCXADD,
+ FEATURE_AMX_FP16,
CPU_FEATURE_MAX
};
ISA_NAMES_TABLE_ENTRY("avxneconvert", FEATURE_AVXNECONVERT,
P_NONE, "-mavxneconvert")
ISA_NAMES_TABLE_ENTRY("cmpccxadd", FEATURE_CMPCCXADD, P_NONE, "-mcmpccxadd")
+ ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
ISA_NAMES_TABLE_END
hresetintrin.h keylockerintrin.h avxvnniintrin.h
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h
- cmpccxaddintrin.h"
+ cmpccxaddintrin.h amxfp16intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
--- /dev/null
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxfp16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXFP16INTRIN_H_INCLUDED
+#define _AMXFP16INTRIN_H_INCLUDED
+
+#if defined(__x86_64__)
+#define _tile_dpfp16ps_internal(dst,src1,src2) \
+ __asm__ volatile \
+ ("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpfp16ps(dst,src1,src2) \
+ _tile_dpfp16ps_internal (dst,src1,src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_FP16__
+#undef __DISABLE_AMX_FP16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_FP16__ */
+
+#endif /* _AMXFP16INTRIN_H_INCLUDED */
#define bit_AVXVNNI (1 << 4)
#define bit_AVX512BF16 (1 << 5)
#define bit_CMPCCXADD (1 << 7)
+#define bit_AMX_FP16 (1 << 21)
#define bit_HRESET (1 << 22)
#define bit_AVXIFMA (1 << 23)
def_or_undef (parse_in, "__AVXNECONVERT__");
if (isa_flag2 & OPTION_MASK_ISA2_CMPCCXADD)
def_or_undef (parse_in, "__CMPCCXADD__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16)
+ def_or_undef (parse_in, "__AMX_FP16__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
DEF_PTA(AVXVNNIINT8)
DEF_PTA(AVXNECONVERT)
DEF_PTA(CMPCCXADD)
+DEF_PTA(AMX_FP16)
{ "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
{ "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 },
{ "-mavxneconvert", OPTION_MASK_ISA2_AVXNECONVERT },
- { "-mcmpccxadd", OPTION_MASK_ISA2_CMPCCXADD }
+ { "-mcmpccxadd", OPTION_MASK_ISA2_CMPCCXADD },
+ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
IX86_ATTR_ISA ("avxneconvert", OPT_mavxneconvert),
IX86_ATTR_ISA ("cmpccxadd", OPT_mcmpccxadd),
+ IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
Target Mask(ISA2_CMPCCXADD) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
CMPCCXADD build-in functions and code generation.
+
+mamx-fp16
+Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save
+Support AMX-FP16 built-in functions and code generation.
#include <keylockerintrin.h>
+#include <amxfp16intrin.h>
+
#endif /* _IMMINTRIN_H_INCLUDED */
@cindex @code{target("cmpccxadd")} function attribute, x86
Enable/disable the generation of the CMPccXADD instructions.
+@item amx-fp16
+@itemx no-amx-fp16
+@cindex @code{target("amx-fp16")} function attribute, x86
+Enable/disable the generation of the AMX-FP16 instructions.
+
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
--mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd @gol
+-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 @gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
@need 200
@itemx -mcmpccxadd
@opindex mcmpccxadd
+@need 200
+@itemx -mamx-fp16
+@opindex mamx-fp16
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
-AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD or CLDEMOTE extended instruction
-sets. Each has a corresponding @option{-mno-} option to disable use of these
-instructions.
+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16 or CLDEMOTE extended
+instruction sets. Each has a corresponding @option{-mno-} option to disable
+use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@item amx_bf16
Target supports the execution of @code{amx-bf16} instructions.
+@item amx_fp16
+Target supports the execution of @code{amx-fp16} instructions.
+
@item cell_hw
Test system can execute AltiVec and Cell PPU instructions.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
#ifdef AMX_BF16
&& __builtin_cpu_supports ("amx-bf16")
#endif
+#ifdef AMX_FP16
+ && __builtin_cpu_supports ("amx-fp16")
+#endif
#ifdef __linux__
&& request_perm_xtile_data ()
#endif
--- /dev/null
+#ifndef AMX_HELPER_H_INCLUDED
+#define AMX_HELPER_H_INCLUDED
+#if defined(AMX_FP16)
+#include <immintrin.h>
+#include <xmmintrin.h>
+#endif
+#include "amx-check.h"
+
+typedef union
+{
+ _Float16 f16;
+ uint16_t u;
+} union16f_uw;
+
+#if defined(AMX_FP16)
+/* Transformation functions between fp16/float */
+static uint16_t make_f32_fp16 (float f)
+{
+ union16f_uw tmp;
+ __m128 b = _mm_set_ss (f);
+ __m128h a;
+ tmp.f16 = _mm_cvtsh_h (_mm_cvtss_sh (a, b));
+ return tmp.u;
+}
+
+static float make_fp16_f32 (uint16_t fp)
+{
+ union16f_uw tmp;
+ tmp.u = fp;
+ __m128h b = _mm_set_sh (tmp.f16);
+ __m128 a;
+ return _mm_cvtss_f32 (_mm_cvtsh_ss (a, b));
+}
+
+/* Init tile buffer with fp16 pairs */
+void init_fp16_max_tile_buffer (uint8_t* buf)
+{
+ int i, j;
+ uint16_t* ptr = (uint16_t *) buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 32; j++)
+ {
+ float f = 2.5f * i + 1.25f * j;
+ ptr[i * 32 + j] = make_f32_fp16 (f);
+ }
+}
+
+/* Init tile fp16 pair buffer with zero */
+void init_fp16_max_tile_zero_buffer (uint8_t* buf)
+{
+ int i, j;
+ uint16_t* ptr = (uint16_t *) buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 32; j++)
+ ptr[i * 32 + j] = make_f32_fp16 (0.0f);
+}
+#endif
+
+#endif
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-fp16" } */
+/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
+#include <immintrin.h>
+
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+
+void TEST ()
+{
+ _tile_dpfp16ps (TMM1, TMM2, TMM3);
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-O2 -mamx-fp16 -masm=intel" } */
+/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
+#include <immintrin.h>
+
+void TEST ()
+{
+ _tile_dpfp16ps (1, 2, 3);
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_tile } */
+/* { dg-require-effective-target amx_fp16 } */
+/* { dg-require-effective-target avx512fp16 } */
+/* { dg-options "-O2 -mamx-tile -mamx-fp16 -mavx512fp16" } */
+#define AMX_FP16
+#define DO_TEST test_amx_fp16_dpfp16ps
+void test_amx_fp16_dpfp16ps ();
+#include "amx-helper.h"
+
+void calc_matrix_dpfp16ps (__tile *dst, __tile *src1, __tile *src2)
+{
+ uint16_t *src1_buf = (uint16_t *)src1->buf;
+ uint16_t *src2_buf = (uint16_t *)src2->buf;
+ float *dst_buf = (float *)dst->buf;
+
+ int M = src1->rows;
+ int N = src1->colsb / 4;
+ int K = src2->colsb / 4;
+ int i, j, k, t;
+
+ for (i = 0; i < M; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < K; k++)
+ for (t = 0; t < 2; t+=2)
+ {
+ dst_buf[i * K + k] +=
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t]) *
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t])) +
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t + 1]) *
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t + 1]));
+ }
+
+}
+
+void test_amx_fp16_dpfp16ps ()
+{
+ __tilecfg_u cfg;
+ __tile dst, dst_ref, src1, src2;
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
+
+ init_fp16_max_tile_buffer (tmp_dst_buf);
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
+
+ calc_matrix_dpfp16ps (&dst, &src1, &src2);
+
+ _tile_dpfp16ps (1, 2, 3);
+ _tile_stored (1, dst_ref.buf, _STRIDE);
+
+ if (!check_float_tile_register (&dst_ref, &dst))
+ abort ();
+}
extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8")));
extern void test_avxneconvert (void) __attribute__((__target__("avxneconvert")));
extern void test_cmpccxadd (void) __attribute__((__target__("cmpccxadd")));
+extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint8")));
extern void test_no_avxneconvert (void) __attribute__((__target__("no-avxneconvert")));
extern void test_no_cmpccxadd (void) __attribute__((__target__("no-cmpccxadd")));
+extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1)
#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16")
#include <x86intrin.h>
} "-mamx-bf16" ]
}
+# Return 1 if amx-fp16 instructions can be compiled.
+proc check_effective_target_amx_fp16 { } {
+ return [check_no_compiler_messages amx_fp16 object {
+ void
+ foo ()
+ {
+ __asm__ volatile ("tdpfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
+ }
+ } "-mamx-fp16" ]
+}
+
# Return 1 if vpclmulqdq instructions can be compiled.
proc check_effective_target_vpclmulqdq { } {
return [check_no_compiler_messages vpclmulqdq object {