set_feature (FEATURE_AMX_TRANSPOSE);
if (eax & bit_AMX_FP8)
set_feature (FEATURE_AMX_FP8);
+ if (eax & bit_AMX_MOVRS)
+ set_feature (FEATURE_AMX_MOVRS);
}
}
#define OPTION_MASK_ISA2_AMX_FP8_SET \
(OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_FP8)
#define OPTION_MASK_ISA2_MOVRS_SET OPTION_MASK_ISA2_MOVRS
+#define OPTION_MASK_ISA2_AMX_MOVRS_SET \
+ (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_MOVRS)
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
| OPTION_MASK_ISA2_AMX_BF16_UNSET | OPTION_MASK_ISA2_AMX_FP16_UNSET \
| OPTION_MASK_ISA2_AMX_COMPLEX_UNSET | OPTION_MASK_ISA2_AMX_AVX512_UNSET \
| OPTION_MASK_ISA2_AMX_TF32_UNSET | OPTION_MASK_ISA2_AMX_TRANSPOSE_UNSET \
- | OPTION_MASK_ISA2_AMX_FP8_UNSET)
+ | OPTION_MASK_ISA2_AMX_FP8_UNSET | OPTION_MASK_ISA2_AMX_MOVRS_UNSET)
#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
#define OPTION_MASK_ISA2_AMX_TRANSPOSE_UNSET OPTION_MASK_ISA2_AMX_TRANSPOSE
#define OPTION_MASK_ISA2_AMX_FP8_UNSET OPTION_MASK_ISA2_AMX_FP8
#define OPTION_MASK_ISA2_MOVRS_UNSET OPTION_MASK_ISA2_MOVRS
+#define OPTION_MASK_ISA2_AMX_MOVRS_UNSET OPTION_MASK_ISA2_AMX_MOVRS
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
}
return true;
+ case OPT_mamx_movrs:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_MOVRS_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_MOVRS_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_MOVRS_UNSET;
+ opts->x_ix86_isa_flags2_explicit |=
+ OPTION_MASK_ISA2_AMX_MOVRS_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
FEATURE_AMX_TRANSPOSE,
FEATURE_AMX_FP8,
FEATURE_MOVRS,
+ FEATURE_AMX_MOVRS,
CPU_FEATURE_MAX
};
P_NONE, "-mamx-transpose")
ISA_NAMES_TABLE_ENTRY("amx-fp8", FEATURE_AMX_FP8, P_NONE, "-mamx-fp8")
ISA_NAMES_TABLE_ENTRY("movrs", FEATURE_MOVRS, P_NONE, "-mmovrs")
+ ISA_NAMES_TABLE_ENTRY("amx-movrs", FEATURE_AMX_MOVRS, P_NONE, "-mamx-movrs")
ISA_NAMES_TABLE_END
avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h
avx10_2copyintrin.h amxavx512intrin.h amxtf32intrin.h
- amxtransposeintrin.h amxfp8intrin.h movrsintrin.h"
+ amxtransposeintrin.h amxfp8intrin.h movrsintrin.h
+ amxmovrsintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
--- /dev/null
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMX_MOVRSINTRIN_H_INCLUDED
+#define _AMX_MOVRSINTRIN_H_INCLUDED
+
+#if defined(__x86_64__)
+
+#if !defined(__AMX_MOVRS__)
+#pragma GCC push_options
+#pragma GCC target("amx-movrs")
+#define __DISABLE_AMX_MOVRS__
+#endif /* __AMX_MOVRS__ */
+
+#define _tile_loaddrs_internal(tdst, base, stride) \
+__asm__ volatile \
+ ("{tileloaddrs\t(%0,%1,1), %%tmm"#tdst \
+ "|tileloaddrs\t%%tmm"#tdst", [%0+%1*1]}" \
+ :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_loaddrst1_internal(tdst, base, stride) \
+__asm__ volatile \
+ ("{tileloaddrst1\t(%0,%1,1), %%tmm"#tdst \
+ "|tileloaddrst1\t%%tmm"#tdst", [%0+%1*1]}" \
+ :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_loaddrs(tdst, base, stride) \
+ _tile_loaddrs_internal(tdst, base, stride)
+
+#define _tile_loaddrst1(tdst, base, stride) \
+ _tile_loaddrst1_internal(tdst, base, stride)
+
+#ifdef __DISABLE_AMX_MOVRS__
+#undef __DISABLE_AMX_MOVRS__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_MOVRS__ */
+
+#if !defined(__AMX_MOVRS__) || !defined (__AMX_TRANSPOSE__)
+#pragma GCC push_options
+#pragma GCC target("amx-movrs,amx-transpose")
+#define __DISABLE_AMX_MOVRS_TRANSPOSE__
+#endif /* __AMX_MOVRS_TRANSPOSE__ */
+
+#define _tile_2rpntlvwz0rs_internal(tdst, base, stride) \
+ __asm__ volatile \
+ ("{t2rpntlvwz0rs\t(%0,%1,1), %%tmm"#tdst \
+ "|t2rpntlvwz0rs\t%%tmm"#tdst", [%0+%1*1]}" \
+ :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz0rst1_internal(tdst, base, stride) \
+ __asm__ volatile \
+ ("{t2rpntlvwz0rst1\t(%0,%1,1), %%tmm"#tdst \
+ "|t2rpntlvwz0rst1\t%%tmm"#tdst", [%0+%1*1]}" \
+ :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz1rs_internal(tdst, base, stride) \
+ __asm__ volatile \
+ ("{t2rpntlvwz1rs\t(%0,%1,1), %%tmm"#tdst \
+ "|t2rpntlvwz1rs\t%%tmm"#tdst", [%0+%1*1]}" \
+ :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz1rst1_internal(tdst, base, stride) \
+ __asm__ volatile \
+ ("{t2rpntlvwz1rst1\t(%0,%1,1), %%tmm"#tdst \
+ "|t2rpntlvwz1rst1\t%%tmm"#tdst", [%0+%1*1]}" \
+ :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz0rs(tdst, base, stride) \
+ _tile_2rpntlvwz0rs_internal(tdst, base, stride)
+
+#define _tile_2rpntlvwz0rst1(tdst, base, stride) \
+ _tile_2rpntlvwz0rst1_internal(tdst, base, stride)
+
+#define _tile_2rpntlvwz1rs(tdst, base, stride) \
+ _tile_2rpntlvwz1rs_internal(tdst, base, stride)
+
+#define _tile_2rpntlvwz1rst1(tdst, base, stride) \
+ _tile_2rpntlvwz1rst1_internal(tdst, base, stride)
+
+#ifdef __DISABLE_AMX_MOVRS_TRANSPOSE__
+#undef __DISABLE_AMX_MOVRS_TRANSPOSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_MOVRS_TRANSPOSE__ */
+
+#endif /* __x86_64__ */
+
+#endif /* _AMX_MOVRSINTRIN_H_INCLUDED */
#define bit_AMX_TRANSPOSE (1 << 5)
#define bit_AMX_TF32 (1 << 6)
#define bit_AMX_AVX512 (1 << 7)
+#define bit_AMX_MOVRS (1 << 8)
/* AVX10 sub leaf (%eax == 0x24) */
/* %ebx */
def_or_undef (parse_in, "__AMX_FP8__");
if (isa_flag2 & OPTION_MASK_ISA2_MOVRS)
def_or_undef (parse_in, "__MOVRS__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_MOVRS)
+ def_or_undef (parse_in, "__AMX_MOVRS__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
DEF_PTA(AMX_TRANSPOSE)
DEF_PTA(AMX_FP8)
DEF_PTA(MOVRS)
+DEF_PTA(AMX_MOVRS)
{ "-mamx-tf32", OPTION_MASK_ISA2_AMX_TF32 },
{ "-mamx-transpose", OPTION_MASK_ISA2_AMX_TRANSPOSE },
{ "-mamx-fp8", OPTION_MASK_ISA2_AMX_FP8 },
- { "-mmovrs", OPTION_MASK_ISA2_MOVRS }
+ { "-mmovrs", OPTION_MASK_ISA2_MOVRS },
+ { "-mamx-movrs", OPTION_MASK_ISA2_AMX_MOVRS }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("amx-transpose", OPT_mamx_transpose),
IX86_ATTR_ISA ("amx-fp8", OPT_mamx_fp8),
IX86_ATTR_ISA ("movrs", OPT_mmovrs),
+ IX86_ATTR_ISA ("amx-movrs", OPT_mamx_movrs),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
mmovrs
Target Mask(ISA2_MOVRS) Var(ix86_isa_flags2) Save
Support MOVRS built-in functions and code generation.
+
+mamx-movrs
+Target Mask(ISA2_AMX_MOVRS) Var(ix86_isa_flags2) Save
+Support AMX-MOVRS built-in functions and code generation.
mmovrs
UrlSuffix(gcc/x86-Options.html#index-mmovrs)
+mamx-movrs
+UrlSuffix(gcc/x86-Options.html#index-mamx-movrs)
+
#include <movrsintrin.h>
+#include <amxmovrsintrin.h>
#endif /* _IMMINTRIN_H_INCLUDED */
@itemx no-movrs
Enable/disable the generation of the MOVRS instructions.
+@cindex @code{target("amx-movrs")} function attribute, x86
+@item amx-movrs
+@itemx no-amx-movrs
+Enable/disable the generation of the AMX-MOVRS instructions.
+
@cindex @code{target("cld")} function attribute, x86
@item cld
@itemx no-cld
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
-mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf
-musermsr -mavx10.1 -mavx10.1-256 -mavx10.1-512 -mevex512 -mavx10.2 -mavx10.2-256
--mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mmovrs
+-mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mmovrs -mamx-movrs
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@need 200
@opindex mmovrs
@itemx -mmovrs
+@need 200
+@opindex mamx-movrs
+@itemx -mamx-movrs
These switches enable the use of instructions in the MMX, SSE,
AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES,
PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT,
CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512,
SM4, APX_F, USER_MSR, AVX10.1, AVX10.2, AMX-AVX512, AMX-TF32, AMX-TRANSPOSE,
-AMX-FP8, MOVRS or CLDEMOTE extended instruction sets. Each has a corresponding
-@option{-mno-} option to disable use of these instructions.
+AMX-FP8, MOVRS, AMX-MOVRS or CLDEMOTE extended instruction sets. Each has a
+corresponding @option{-mno-} option to disable use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@item amx_fp16
Target supports the execution of @code{amx-fp16} instructions.
+@item amx_movrs
+Target supports the execution of @code{amx-movrs} instructions.
+
@item amx_tf32
Target supports the execution of @code{amx-tf32} instructions.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */
/* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */
/* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
#ifdef AMX_FP8
&& __builtin_cpu_supports ("amx-fp8")
#endif
+#ifdef AMX_MOVRS
+ && __builtin_cpu_supports ("amx-movrs")
+#endif
#ifdef __linux__
&& request_perm_xtile_data ()
#endif
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-movrs -mamx-transpose" } */
+/* { dg-final { scan-assembler "tileloaddrs\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "tileloaddrst1\[ \\t]+\[^\n\]*\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz0rs\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz0rst1\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz1rs\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz1rst1\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */
+#include <immintrin.h>
+
+extern const void* base;
+extern const int stride;
+
+#define TMM0 0
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+
+void TEST()
+{
+ _tile_loaddrs (TMM1, base, stride);
+ _tile_loaddrst1 (TMM1, base, stride);
+ _tile_2rpntlvwz0rs (TMM0, base, stride);
+ _tile_2rpntlvwz0rst1 (TMM1, base, stride);
+ _tile_2rpntlvwz1rs (TMM2, base, stride);
+ _tile_2rpntlvwz1rst1 (TMM3, base, stride);
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-O2 -mamx-movrs -mamx-transpose -masm=intel" } */
+/* { dg-final { scan-assembler-times "tileloaddrs\[ \\t]%tmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "tileloaddrst1\[ \\t]%tmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler "t2rpntlvwz0rs\[ \\t]%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz0rst1\[ \\t]%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz1rs\[ \\t]%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "t2rpntlvwz1rst1\[ \\t]%tmm\[0-9\]" } } */
+#include <immintrin.h>
+
+extern const void* base;
+extern const int stride;
+
+#define TMM0 0
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+
+void TEST()
+{
+ _tile_loaddrs (TMM1, base, stride);
+ _tile_loaddrst1 (TMM1, base, stride);
+ _tile_2rpntlvwz0rs (TMM0, base, stride);
+ _tile_2rpntlvwz0rst1 (TMM1, base, stride);
+ _tile_2rpntlvwz1rs (TMM2, base, stride);
+ _tile_2rpntlvwz1rst1 (TMM3, base, stride);
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_movrs } */
+/* { dg-require-effective-target amx_transpose } */
+/* { dg-require-effective-target avx512fp16 } */
+/* { dg-options "-O2 -mamx-movrs -mamx-transpose -mavx512fp16 -mavx512bf16" } */
+#define AMX_MOVRS
+#define AMX_TRANSPOSE
+#define DO_TEST test_amx_movrs_t2rpntlvw
+void test_amx_movrs_t2rpntlvw ();
+#include "amx-helper.h"
+
+#define init_pair_tile_reg_and_src_z_t1(tmm_num, src, buffer, ztype, wtype)\
+{ \
+ init_pair_tile_src (tmm_num, &src, buffer, ztype); \
+ _tile_2rpntlvwz##ztype##wtype (tmm_num, buffer, _STRIDE);\
+}
+
+void test_amx_movrs_t2rpntlvw ()
+{
+ __tilecfg_u cfg;
+ __tilepair src;
+ __tile ref_0, ref_1;
+ uint8_t buffer[2048];
+ int i;
+
+ init_tile_config (&cfg);
+
+ for (i = 0; i < 2048; i++)
+ buffer[i] = i % 256;
+
+ /* Check t2rpntlvwz0. */
+ init_pair_tile_reg_and_src_z_t1 (0, src, buffer, 0,);
+ _tile_stored (0, ref_0.buf, _STRIDE);
+ _tile_stored (1, ref_1.buf, _STRIDE);
+ if (!check_pair_tile_register (&ref_0, &ref_1, &src))
+ abort ();
+
+ /* Check t2rpntlvwz1. */
+ init_pair_tile_reg_and_src_z_t1 (1, src, buffer, 1,);
+ _tile_stored (0, ref_0.buf, _STRIDE);
+ _tile_stored (1, ref_1.buf, _STRIDE);
+ if (!check_pair_tile_register (&ref_0, &ref_1, &src))
+ abort ();
+
+ /* Check t2rpntlvwz0t1. */
+ init_pair_tile_reg_and_src_z_t1 (0, src, buffer, 0, t1);
+ _tile_stored (0, ref_0.buf, _STRIDE);
+ _tile_stored (1, ref_1.buf, _STRIDE);
+ if (!check_pair_tile_register (&ref_0, &ref_1, &src))
+ abort ();
+
+ /* Check t2rpntlvwz1t1. */
+ init_pair_tile_reg_and_src_z_t1 (1, src, buffer, 1, t1);
+ _tile_stored (0, ref_0.buf, _STRIDE);
+ _tile_stored (1, ref_1.buf, _STRIDE);
+ if (!check_pair_tile_register (&ref_0, &ref_1, &src))
+ abort ();
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_movrs } */
+/* { dg-options "-O2 -mamx-movrs" } */
+#define AMX_MOVRS
+#define DO_TEST test_amx_movrs_tileloaddrs
+void test_amx_movrs_tileloaddrs ();
+#include "amx-helper.h"
+
+void test_amx_movrs_tileloaddrs ()
+{
+ __tilecfg_u cfg;
+ __tile reg_src0, reg_src1, reg_ref0, reg_ref1;
+ uint8_t buffer[1024];
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ buffer[i] = i % 256;
+
+
+ init_tile_config (&cfg);
+
+ init_tile_src (0, ®_src0, buffer);
+ _tile_loaddrs (0, reg_src0.buf, _STRIDE);
+ _tile_stored (0, reg_ref0.buf, _STRIDE);
+ if (!check_tile_register (®_ref0, ®_src0))
+ abort();
+
+ init_tile_src (1, ®_src1, buffer);
+ _tile_loaddrst1 (1, reg_src1.buf, _STRIDE);
+ _tile_stored (1, reg_ref1.buf, _STRIDE);
+ if (!check_tile_register (®_ref1, ®_src1))
+ abort();
+}
extern void test_amx_transpose (void) __attribute__((__target__("amx-transpose")));
extern void test_amx_fp8 (void) __attribute__((__target__("amx-fp8")));
extern void test_movrs (void) __attribute__((__target__("movrs")));
+extern void test_amx_movrs (void) __attribute__((__target__("amx-movrs")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq")));
extern void test_no_amx_transpose (void) __attribute__((__target__("no-amx-transpose")));
extern void test_no_amx_fp8 (void) __attribute__((__target__("no-amx-fp8")));
extern void test_no_movrs (void) __attribute__((__target__("no-movrs")));
+extern void test_no_amx_movrs (void) __attribute__((__target__("no-amx-movrs")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs")
#include <x86intrin.h>
} "-mamx-fp8" ]
}
+# Return 1 if amx-movrs instructions can be compiled.
+proc check_effective_target_amx_movrs { } {
+ return [check_no_compiler_messages amx_movrs object {
+ void
+ foo ()
+ {
+ __asm__ volatile ("tileloaddrs\t(%%r9), %%tmm3" ::);
+ }
+ } "-mamx-movrs" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {