}
else
op1 = expand_normal (arg1);
- /* Argument 1 must be either zero or one. */
- if (INTVAL (op1) != 0 && INTVAL (op1) != 1)
+ /* Argument 1 must be 0, 1 or 2. */
+ if (INTVAL (op1) < 0 || INTVAL (op1) > 2)
{
warning (0, "invalid second argument to %<__builtin_prefetch%>;"
" using zero");
set_feature (FEATURE_RAOINT);
if (edx & bit_USER_MSR)
set_feature (FEATURE_USER_MSR);
+ if (eax & bit_MOVRS)
+ set_feature (FEATURE_MOVRS);
if (avx_usable)
{
if (eax & bit_AVXVNNI)
(OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_TRANSPOSE)
#define OPTION_MASK_ISA2_AMX_FP8_SET \
(OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_FP8)
+#define OPTION_MASK_ISA2_MOVRS_SET OPTION_MASK_ISA2_MOVRS
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
#define OPTION_MASK_ISA2_AMX_TF32_UNSET OPTION_MASK_ISA2_AMX_TF32
#define OPTION_MASK_ISA2_AMX_TRANSPOSE_UNSET OPTION_MASK_ISA2_AMX_TRANSPOSE
#define OPTION_MASK_ISA2_AMX_FP8_UNSET OPTION_MASK_ISA2_AMX_FP8
+#define OPTION_MASK_ISA2_MOVRS_UNSET OPTION_MASK_ISA2_MOVRS
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
}
return true;
+ case OPT_mmovrs:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_MOVRS_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_MOVRS_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_MOVRS_UNSET;
+ opts->x_ix86_isa_flags2_explicit |=
+ OPTION_MASK_ISA2_MOVRS_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
FEATURE_AMX_TF32,
FEATURE_AMX_TRANSPOSE,
FEATURE_AMX_FP8,
+ FEATURE_MOVRS,
CPU_FEATURE_MAX
};
ISA_NAMES_TABLE_ENTRY("amx-transpose", FEATURE_AMX_TRANSPOSE,
P_NONE, "-mamx-transpose")
ISA_NAMES_TABLE_ENTRY("amx-fp8", FEATURE_AMX_FP8, P_NONE, "-mamx-fp8")
+ ISA_NAMES_TABLE_ENTRY("movrs", FEATURE_MOVRS, P_NONE, "-mmovrs")
ISA_NAMES_TABLE_END
avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h
avx10_2copyintrin.h amxavx512intrin.h amxtf32intrin.h
- amxtransposeintrin.h amxfp8intrin.h"
+ amxtransposeintrin.h amxfp8intrin.h movrsintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
#define bit_AMX_FP16 (1 << 21)
#define bit_HRESET (1 << 22)
#define bit_AVXIFMA (1 << 23)
+#define bit_MOVRS (1 << 31)
/* %edx */
#define bit_AVXVNNIINT8 (1 << 4)
# SM4 builtins
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
+
+# MOVRS builtins
+DEF_FUNCTION_TYPE (CHAR, PCCHAR)
+DEF_FUNCTION_TYPE (SHORT, PCSHORT)
+DEF_FUNCTION_TYPE (INT, PCINT)
+DEF_FUNCTION_TYPE (INT64, PCINT64)
BDESC (0, OPTION_MASK_ISA2_PREFETCHI, CODE_FOR_prefetchi, "__builtin_ia32_prefetchi", IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT)
BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_prefetch", IX86_BUILTIN_PREFETCH, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT_INT_INT)
+/* MOVRS */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrsqi, "__builtin_ia32_movrsqi", IX86_BUILTIN_MOVRSQI, UNKNOWN, (int) CHAR_FTYPE_PCCHAR)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrshi, "__builtin_ia32_movrshi", IX86_BUILTIN_MOVRSHI, UNKNOWN, (int) SHORT_FTYPE_PCSHORT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrssi, "__builtin_ia32_movrssi", IX86_BUILTIN_MOVRSSI, UNKNOWN, (int) INT_FTYPE_PCINT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrsdi, "__builtin_ia32_movrsdi", IX86_BUILTIN_MOVRSDI, UNKNOWN, (int) INT64_FTYPE_PCINT64)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrsbv64qi_mask, "__builtin_ia32_vmovrsb512_mask", IX86_BUILTIN_VMOVRSB_512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrsdv16si_mask, "__builtin_ia32_vmovrsd512_mask", IX86_BUILTIN_VMOVRSD_512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrsqv8di_mask, "__builtin_ia32_vmovrsq512_mask", IX86_BUILTIN_VMOVRSQ_512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrswv32hi_mask, "__builtin_ia32_vmovrsw512_mask", IX86_BUILTIN_VMOVRSW_512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsbv32qi_mask, "__builtin_ia32_vmovrsb256_mask", IX86_BUILTIN_VMOVRSB_256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsdv8si_mask, "__builtin_ia32_vmovrsd256_mask", IX86_BUILTIN_VMOVRSD_256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsqv4di_mask, "__builtin_ia32_vmovrsq256_mask", IX86_BUILTIN_VMOVRSQ_256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrswv16hi_mask, "__builtin_ia32_vmovrsw256_mask", IX86_BUILTIN_VMOVRSW_256, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsbv16qi_mask, "__builtin_ia32_vmovrsb128_mask", IX86_BUILTIN_VMOVRSB_128, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsdv4si_mask, "__builtin_ia32_vmovrsd128_mask", IX86_BUILTIN_VMOVRSD_128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsqv2di_mask, "__builtin_ia32_vmovrsq128_mask", IX86_BUILTIN_VMOVRSQ_128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrswv8hi_mask, "__builtin_ia32_vmovrsw128_mask", IX86_BUILTIN_VMOVRSW_128, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI)
+
BDESC_END (SPECIAL_ARGS, PURE_ARGS)
/* AVX */
def_or_undef (parse_in, "__AMX_TRANSPOSE__");
if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP8)
def_or_undef (parse_in, "__AMX_FP8__");
+ if (isa_flag2 & OPTION_MASK_ISA2_MOVRS)
+ def_or_undef (parse_in, "__MOVRS__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
klass = load;
memory = 0;
break;
+ case CHAR_FTYPE_PCCHAR:
+ case SHORT_FTYPE_PCSHORT:
+ case INT_FTYPE_PCINT:
+ case INT64_FTYPE_PCINT64:
case UINT64_FTYPE_PUNSIGNED:
case V2DI_FTYPE_PV2DI:
case V4DI_FTYPE_PV4DI:
DEF_PTA(AMX_TF32)
DEF_PTA(AMX_TRANSPOSE)
DEF_PTA(AMX_FP8)
+DEF_PTA(MOVRS)
{ "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 },
{ "-mamx-tf32", OPTION_MASK_ISA2_AMX_TF32 },
{ "-mamx-transpose", OPTION_MASK_ISA2_AMX_TRANSPOSE },
- { "-mamx-fp8", OPTION_MASK_ISA2_AMX_FP8 }
+ { "-mamx-fp8", OPTION_MASK_ISA2_AMX_FP8 },
+ { "-mmovrs", OPTION_MASK_ISA2_MOVRS }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32),
IX86_ATTR_ISA ("amx-transpose", OPT_mamx_transpose),
IX86_ATTR_ISA ("amx-fp8", OPT_mamx_fp8),
+ IX86_ATTR_ISA ("movrs", OPT_mmovrs),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
;; For AMX-TILE
UNSPECV_LDTILECFG
UNSPECV_STTILECFG
+
+ ;; For MOVRS support
+ UNSPECV_MOVRS
])
;; Constants to represent rounding modes in the ROUND instruction
[(prefetch (match_operand 0 "address_operand")
(match_operand:SI 1 "const_int_operand")
(match_operand:SI 2 "const_int_operand"))]
- "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW"
+ "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW
+ || TARGET_MOVRS"
{
- bool write = operands[1] != const0_rtx;
+ int write = INTVAL (operands[1]);
int locality = INTVAL (operands[2]);
gcc_assert (IN_RANGE (locality, 0, 3));
+ gcc_assert (IN_RANGE (write, 0, 2));
/* Use 3dNOW prefetch in case we are asking for write prefetch not
supported by SSE counterpart (non-SSE2 athlon machines) or the
SSE prefetch is not available (K6 machines). Otherwise use SSE
prefetch as it allows specifying of locality. */
- if (write)
+ if (write == 1)
{
if (TARGET_PRFCHW)
operands[2] = GEN_INT (3);
operands[2] = GEN_INT (3);
else if (TARGET_PREFETCH_SSE)
operands[1] = const0_rtx;
- else
+ else if (write == 0)
{
gcc_assert (TARGET_3DNOW);
operands[2] = GEN_INT (3);
}
+ else
+ {
+ if (TARGET_MOVRS)
+ ;
+ else if (TARGET_PREFETCH_SSE)
+ operands[1] = const0_rtx;
+ else
+ {
+ gcc_assert (TARGET_3DNOW);
+ operands[1] = const0_rtx;
+ operands[2] = GEN_INT (3);
+ }
+ }
}
else
{
(symbol_ref "memory_address_length (operands[0], false)"))
(set_attr "memory" "none")])
+(define_insn "*prefetch_rst2"
+ [(prefetch (match_operand 0 "address_operand" "p")
+ (const_int 2)
+ (const_int 1))]
+ "TARGET_MOVRS"
+ "prefetchrst2\t%a0"
+ [(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "prefetch")
+ (set (attr "length_address")
+ (symbol_ref "memory_address_length (operands[0], false)"))
+ (set_attr "memory" "none")])
+
(define_insn "sse4_2_crc32<mode>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
(set_attr "prefix" "maybe_evex")
(set_attr "memory" "store")])
+(define_insn "movrs<mode>"
+ [(set (match_operand:SWI1248x 0 "register_operand" "=r")
+ (unspec_volatile:SWI1248x
+ [(match_operand:SWI1248x 1 "memory_operand" "m")]
+ UNSPECV_MOVRS))]
+ "TARGET_MOVRS && TARGET_64BIT"
+ "movrs<imodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "orig")
+ (set_attr "type" "other")
+ (set_attr "mode" "<MODE>")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
mamx-fp8
Target Mask(ISA2_AMX_FP8) Var(ix86_isa_flags2) Save
Support AMX-FP8 built-in functions and code generation.
+
+mmovrs
+Target Mask(ISA2_MOVRS) Var(ix86_isa_flags2) Save
+Support MOVRS built-in functions and code generation.
mamx-fp8
UrlSuffix(gcc/x86-Options.html#index-mamx-fp8)
+mmovrs
+UrlSuffix(gcc/x86-Options.html#index-mmovrs)
+
#include <avx10_2copyintrin.h>
+#include <movrsintrin.h>
+
#endif /* _IMMINTRIN_H_INCLUDED */
--- /dev/null
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <movrsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _MOVRSINTRIN_H_INCLUDED
+#define _MOVRSINTRIN_H_INCLUDED
+
+#ifndef __MOVRS__
+#pragma GCC push_options
+#pragma GCC target("movrs")
+#define __DISABLE_MOVRS__
+#endif /* __MOVRS__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchrs (void* __P)
+{
+ __builtin_ia32_prefetch (__P, 2, 1, 0 /* _MM_HINT_RST2 */);
+}
+
+#ifdef __x86_64__
+
+extern __inline char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i8 (void const * __P)
+{
+ return (char) __builtin_ia32_movrsqi ((const char *) __P);
+}
+
+extern __inline short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i16 (void const * __P)
+{
+ return (short) __builtin_ia32_movrshi ((const short *) __P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i32 (void const * __P)
+{
+ return (int) __builtin_ia32_movrssi ((const int *) __P);
+}
+
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i64 (void const * __P)
+{
+ return (long long) __builtin_ia32_movrsdi ((const long long *) __P);
+}
+
+#endif /* __x86_64__ */
+
+#ifdef __DISABLE_MOVRS__
+#undef __DISABLE_MOVRS__
+#pragma GCC pop_options
+#endif /* __DISABLE_MOVRS__ */
+
+#ifdef __x86_64__
+
+#if !defined (__AVX10_2_256__) || !defined (__MOVRS__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2,movrs")
+#define __DISABLE_MOVRS_AVX10_2__
+#endif /* __MOVRS_AVX10_2__ */
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi8 (void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi8 (__m256i __D, __mmask32 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A,
+ (__v32qi) __D,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi8 (__mmask32 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi32 (void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi32 (__m256i __D, __mmask8 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A,
+ (__v8si) __D,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi32 (__mmask8 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi64 (void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi64 (__m256i __D, __mmask8 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A,
+ (__v4di) __D,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi64 (__mmask8 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi16 (void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi16 (__m256i __D, __mmask16 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A,
+ (__v16hi) __D,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi16 (__mmask16 __U, void const *__A)
+{
+ return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi8 (void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi8 (__m128i __D, __mmask16 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A,
+ (__v16qi) __D,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi8 (__mmask16 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi32 (void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi32 (__m128i __D, __mmask8 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A,
+ (__v4si) __D,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi32 (__mmask8 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi64 (void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi64 (__m128i __D, __mmask8 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A,
+ (__v2di) __D,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi64 (__mmask8 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi16 (void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi16 (__m128i __D, __mmask8 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A,
+ (__v8hi) __D,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi16 (__mmask8 __U, void const *__A)
+{
+ return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+#ifdef __DISABLE_MOVRS_AVX10_2__
+#undef __DISABLE_MOVRS_AVX10_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_MOVRS_AVX10_2__ */
+
+#if !defined (__AVX10_2_512__) || !defined (__MOVRS__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2-512,movrs")
+#define __DISABLE_MOVRS_AVX10_2_512__
+#endif /* __MOVRS_AVX10_2_512__ */
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi8 (void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi8 (__m512i __D, __mmask64 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A,
+ (__v64qi) __D,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi8 (__mmask64 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi32 (void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi32 (__m512i __D, __mmask16 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A,
+ (__v16si) __D,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi32 (__mmask16 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi64 (void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi64 (__m512i __D, __mmask8 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A,
+ (__v8di) __D,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi64 (__mmask8 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi16 (void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi16 (__m512i __D, __mmask32 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A,
+ (__v32hi) __D,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi16 (__mmask32 __U, void const *__A)
+{
+ return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+#ifdef __DISABLE_MOVRS_AVX10_2_512__
+#undef __DISABLE_MOVRS_AVX10_2_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_MOVRS_AVX10_2_512__ */
+
+#endif /* __x86_64__ */
+
+#endif /* _MOVRSINTRIN_H_INCLUDED */
UNSPEC_UFIX_SATURATION
UNSPEC_MINMAXNEPBF16
UNSPEC_MINMAX
+
+ ;; For MOVRS suppport
+ UNSPEC_VMOVRS
])
(define_c_enum "unspecv" [
(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+(define_mode_iterator VI1248_AVX10_2
+ [(V64QI "TARGET_AVX10_2_512") V32QI V16QI
+ (V32HI "TARGET_AVX10_2_512") V16HI V8HI
+ (V16SI "TARGET_AVX10_2_512") V8SI V4SI
+ (V8DI "TARGET_AVX10_2_512") V4DI V2DI])
+
(define_mode_iterator VF_AVX512VL
[(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
"vminmax<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>"
+ [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v")
+ (unspec:VI1248_AVX10_2
+ [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")]
+ UNSPEC_VMOVRS))]
+ "TARGET_AVX10_2_256 && TARGET_MOVRS"
+ "vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "load")
+ (set_attr "mode" "<sseinsnmode>")])
{
_MM_HINT_IT0 = 19,
_MM_HINT_IT1 = 18,
+ _MM_HINT_RST2 = 9,
/* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
_MM_HINT_ET0 = 7,
_MM_HINT_T0 = 3,
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_prefetch (const void *__P, enum _mm_hint __I)
{
- __builtin_ia32_prefetch (__P, (__I & 0x4) >> 2,
+ __builtin_ia32_prefetch (__P, (__I & 0xC) >> 2,
__I & 0x3, (__I & 0x10) >> 4);
}
#else
#define _mm_prefetch(P, I) \
- __builtin_ia32_prefetch ((P), ((I) & 0x4) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4)
+ __builtin_ia32_prefetch ((P), ((I) & 0xC) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4)
#endif
#ifndef __SSE__
@itemx no-amx-fp8
Enable/disable the generation of the AMX-FP8 instructions.
+@cindex @code{target("movrs")} function attribute, x86
+@item movrs
+@itemx no-movrs
+Enable/disable the generation of the MOVRS instructions.
+
@cindex @code{target("cld")} function attribute, x86
@item cld
@itemx no-cld
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
-mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf
-musermsr -mavx10.1 -mavx10.1-256 -mavx10.1-512 -mevex512 -mavx10.2 -mavx10.2-256
--mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose
+-mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mmovrs
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@need 200
@itemx -mamx-fp8
@opindex mamx-fp8
+@need 200
+@opindex mmovrs
+@itemx -mmovrs
These switches enable the use of instructions in the MMX, SSE,
AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES,
PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT,
CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512,
SM4, APX_F, USER_MSR, AVX10.1, AVX10.2, AMX-AVX512, AMX-TF32, AMX-TRANSPOSE,
-AMX-FP8 or CLDEMOTE extended instruction sets. Each has a corresponding
+AMX-FP8, MOVRS or CLDEMOTE extended instruction sets. Each has a corresponding
@option{-mno-} option to disable use of these instructions.
These extensions are also available as built-in functions: see
and of @var{min} and @var{max} to @var{base}. See rtl.def for details.
@findex prefetch
-@item (prefetch:@var{m} @var{addr} @var{rw} @var{locality})
+@item (prefetch:@var{m} @var{addr} @var{rws} @var{locality} @var{cache})
Represents prefetch of memory at address @var{addr}.
-Operand @var{rw} is 1 if the prefetch is for data to be written, 0 otherwise;
-targets that do not support write prefetches should treat this as a normal
-prefetch.
+Operand @var{rws} is 0 if the prefetch is for data to be read, 1 for being
+written; 2 if read shared;
+targets that do not support write or read shared prefetches should treat this
+as a normal prefetch.
Operand @var{locality} specifies the amount of temporal locality; 0 if there
is none or 1, 2, or 3 for increasing levels of temporal locality;
targets that do not support locality hints should ignore this.
@item hard_float
Target supports FPU instructions.
+@item movrs
+Target supports the execution of @code{movrs} instructions.
+
@item non_strict_align
Target does not require strict alignment.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
/* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
/* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
#define MODERATE_TEMPORAL_LOCALITY 1
#define HIGH_TEMPORAL_LOCALITY 3
+#define READ_SHARED 2
#define WRITE_ACCESS 1
#define READ_ACCESS 0
enum locality { none, low, moderate, high };
-enum rw { read, write };
+enum rws { read, write, read-shared };
int arr[10];
extern void exit (int);
enum locality { none, low, moderate, high, bogus };
-enum rw { read, write };
+enum rws { read, write, read-shared };
int arr[10];
bad (int *p)
{
__builtin_prefetch (p, -1, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */
- __builtin_prefetch (p, 2, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */
+ __builtin_prefetch (p, 3, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */
__builtin_prefetch (p, bogus, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */
__builtin_prefetch (p, 0, -1); /* { dg-warning "invalid third argument to '__builtin_prefetch'; using zero" } */
__builtin_prefetch (p, 0, 4); /* { dg-warning "invalid third argument to '__builtin_prefetch'; using zero" } */
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mprefetchi -mavx10.2-512" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mprefetchi -mavx10.2-512 -mmovrs" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mmovrs" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -mmovrs -O2" } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+
+#include <immintrin.h>
+
+__m512i *px;
+volatile __m512i x;
+volatile __mmask64 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+volatile __mmask32 m4;
+
+void extern
+avx512movrs_test (void)
+{
+ x = _mm512_loadrs_epi8(px);
+ x = _mm512_mask_loadrs_epi8(x, m1, px);
+ x = _mm512_maskz_loadrs_epi8(m1, px);
+ x = _mm512_loadrs_epi32(px);
+ x = _mm512_mask_loadrs_epi32(x, m2, px);
+ x = _mm512_maskz_loadrs_epi32(m2, px);
+ x = _mm512_loadrs_epi64(px);
+ x = _mm512_mask_loadrs_epi64(x, m3, px);
+ x = _mm512_maskz_loadrs_epi64(m3, px);
+ x = _mm512_loadrs_epi16(px);
+ x = _mm512_mask_loadrs_epi16(x, m4, px);
+ x = _mm512_maskz_loadrs_epi16(m4, px);
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v3 -mavx10.2 -mmovrs -O2" } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */
+
+#include <immintrin.h>
+
+__m256i *px1;
+volatile __m256i x1;
+__m128i *px2;
+volatile __m128i x2;
+volatile __mmask32 m1;
+volatile __mmask8 m2;
+volatile __mmask16 m3;
+
+
+void extern
+avx512movrs_test (void)
+{
+ x1 = _mm256_loadrs_epi8(px1);
+ x1 = _mm256_mask_loadrs_epi8(x1, m1, px1);
+ x1 = _mm256_maskz_loadrs_epi8(m1, px1);
+ x1 = _mm256_loadrs_epi32(px1);
+ x1 = _mm256_mask_loadrs_epi32(x1, m2, px1);
+ x1 = _mm256_maskz_loadrs_epi32(m2, px1);
+ x1 = _mm256_loadrs_epi64(px1);
+ x1 = _mm256_mask_loadrs_epi64(x1, m2, px1);
+ x1 = _mm256_maskz_loadrs_epi64(m2, px1);
+ x1 = _mm256_loadrs_epi16(px1);
+ x1 = _mm256_mask_loadrs_epi16(x1, m3, px1);
+ x1 = _mm256_maskz_loadrs_epi16(m3, px1);
+
+ x2 = _mm_loadrs_epi8(px2);
+ x2 = _mm_mask_loadrs_epi8(x2, m3, px2);
+ x2 = _mm_maskz_loadrs_epi8(m3, px2);
+ x2 = _mm_loadrs_epi32(px2);
+ x2 = _mm_mask_loadrs_epi32(x2, m2, px2);
+ x2 = _mm_maskz_loadrs_epi32(m2, px2);
+ x2 = _mm_loadrs_epi64(px2);
+ x2 = _mm_mask_loadrs_epi64(x2, m2, px2);
+ x2 = _mm_maskz_loadrs_epi64(m2, px2);
+ x2 = _mm_loadrs_epi16(px2);
+ x2 = _mm_mask_loadrs_epi16(x2, m2, px2);
+ x2 = _mm_maskz_loadrs_epi16(m2, px2);
+}
extern void test_amx_tf32 (void) __attribute__((__target__("amx-tf32")));
extern void test_amx_transpose (void) __attribute__((__target__("amx-transpose")));
extern void test_amx_fp8 (void) __attribute__((__target__("amx-fp8")));
+extern void test_movrs (void) __attribute__((__target__("movrs")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq")));
extern void test_no_amx_tf32 (void) __attribute__((__target__("no-amx-tf32")));
extern void test_no_amx_transpose (void) __attribute__((__target__("no-amx-transpose")));
extern void test_no_amx_fp8 (void) __attribute__((__target__("no-amx-fp8")));
+extern void test_no_movrs (void) __attribute__((__target__("no-movrs")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mmovrs -O2" } */
+/* { dg-final { scan-assembler-times "movrsb\[ \\t\]\+\\(%r.x\\), %.l" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movrsw\[ \\t\]\+\\(%r.x\\), %.x" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movrsl\[ \\t\]\+\\(%r.x\\), %e.x" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movrsq\[ \\t\]\+\\(%r.x\\), %r.x" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "prefetchrst2\[ \\t\]" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile char x1;
+volatile short x2;
+volatile int x3;
+volatile long long x4;
+char * p1;
+short * p2;
+int * p3;
+long long * p4;
+
+
+void extern
+movrs_test (void)
+{
+ _m_prefetchrs (p1);
+#ifdef __x86_64__
+ x1 = _movrs_i8 (p1);
+ x2 = _movrs_i16 (p2);
+ x3 = _movrs_i32 (p3);
+ x4 = _movrs_i64 (p4);
+#endif
+}
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs")
#include <x86intrin.h>