set_feature (FEATURE_AVXVNNIINT16);
if (eax & bit_SM3)
set_feature (FEATURE_SM3);
+ if (eax & bit_SHA512)
+ set_feature (FEATURE_SHA512);
}
if (avx512_usable)
{
(OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX)
#define OPTION_MASK_ISA2_AVXVNNIINT16_SET OPTION_MASK_ISA2_AVXVNNIINT16
#define OPTION_MASK_ISA2_SM3_SET OPTION_MASK_ISA2_SM3
+#define OPTION_MASK_ISA2_SHA512_SET OPTION_MASK_ISA2_SHA512
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX
#define OPTION_MASK_ISA2_AVXVNNIINT16_UNSET OPTION_MASK_ISA2_AVXVNNIINT16
#define OPTION_MASK_ISA2_SM3_UNSET OPTION_MASK_ISA2_SM3
+#define OPTION_MASK_ISA2_SHA512_UNSET OPTION_MASK_ISA2_SHA512
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
OPTION_MASK_ISA2_SSE_UNSET
#define OPTION_MASK_ISA2_AVX_UNSET \
(OPTION_MASK_ISA2_AVX2_UNSET | OPTION_MASK_ISA2_VAES_UNSET \
- | OPTION_MASK_ISA2_SM3_UNSET)
+ | OPTION_MASK_ISA2_SM3_UNSET | OPTION_MASK_ISA2_SHA512_UNSET)
#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
}
return true;
+ case OPT_msha512:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_SHA512_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SHA512_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SHA512_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SHA512_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
FEATURE_AMX_COMPLEX,
FEATURE_AVXVNNIINT16,
FEATURE_SM3,
+ FEATURE_SHA512,
CPU_FEATURE_MAX
};
ISA_NAMES_TABLE_ENTRY("avxvnniint16", FEATURE_AVXVNNIINT16,
P_NONE, "-mavxvnniint16")
ISA_NAMES_TABLE_ENTRY("sm3", FEATURE_SM3, P_NONE, "-msm3")
+ ISA_NAMES_TABLE_ENTRY("sha512", FEATURE_SHA512, P_NONE, "-msha512")
ISA_NAMES_TABLE_END
avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h
cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h
raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h
- sm3intrin.h"
+ sm3intrin.h sha512intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
/* Extended Features Sub-leaf (%eax == 7, %ecx == 1) */
/* %eax */
+#define bit_SHA512 (1 << 0)
#define bit_SM3 (1 << 1)
#define bit_RAOINT (1 << 3)
#define bit_AVXVNNI (1 << 4)
# SM3 builtins
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT)
+
+# SHA512 builtins
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V2DI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3msg2, "__builtin_ia32_vsm3msg2", IX86_BUILTIN_VSM3MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3rnds2, "__builtin_ia32_vsm3rnds2", IX86_BUILTIN_VSM3RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+/* SHA512 */
+BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, "__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI)
+BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg2, "__builtin_ia32_vsha512msg2", IX86_BUILTIN_VSHA512MSG2, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
+BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512rnds2, "__builtin_ia32_vsha512rnds2", IX86_BUILTIN_VSHA512RNDS2, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V2DI)
+
/* AVX512VL. */
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT)
def_or_undef (parse_in, "__AVXVNNIINT16__");
if (isa_flag2 & OPTION_MASK_ISA2_SM3)
def_or_undef (parse_in, "__SM3__");
+ if (isa_flag2 & OPTION_MASK_ISA2_SHA512)
+ def_or_undef (parse_in, "__SHA512__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
case V4SF_FTYPE_V4SF_UINT:
case V4SF_FTYPE_V4SF_DI:
case V4SF_FTYPE_V4SF_SI:
+ case V4DI_FTYPE_V4DI_V2DI:
case V2DI_FTYPE_V2DI_V2DI:
case V2DI_FTYPE_V16QI_V16QI:
case V2DI_FTYPE_V4SI_V4SI:
case V8HI_FTYPE_V8DI_V8HI_UQI:
case V8SI_FTYPE_V8DI_V8SI_UQI:
case V4SI_FTYPE_V4SI_V4SI_V4SI:
+ case V4DI_FTYPE_V4DI_V4DI_V2DI:
case V16SI_FTYPE_V16SI_V16SI_V16SI:
case V8DI_FTYPE_V8DI_V8DI_V8DI:
case V32HI_FTYPE_V32HI_V32HI_V32HI:
DEF_PTA(AMX_COMPLEX)
DEF_PTA(AVXVNNIINT16)
DEF_PTA(SM3)
+DEF_PTA(SHA512)
{ "-mraoint", OPTION_MASK_ISA2_RAOINT },
{ "-mamx-complex", OPTION_MASK_ISA2_AMX_COMPLEX },
{ "-mavxvnniint16", OPTION_MASK_ISA2_AVXVNNIINT16 },
- { "-msm3", OPTION_MASK_ISA2_SM3 }
+ { "-msm3", OPTION_MASK_ISA2_SM3 },
+ { "-msha512", OPTION_MASK_ISA2_SHA512 }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("amx-complex", OPT_mamx_complex),
IX86_ATTR_ISA ("avxvnniint16", OPT_mavxvnniint16),
IX86_ATTR_ISA ("sm3", OPT_msm3),
+ IX86_ATTR_ISA ("sha512", OPT_msha512),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
Target Mask(ISA2_SM3) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
SM3 built-in functions and code generation.
+
+mvpinsrvpextr
+Target Mask(ISA2_VPINSRVPEXTR) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F,
+AVX512VL and VPINSRVPEXTR built-in functions and code generation.
+
+msha512
+Target Mask(ISA2_SHA512) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
+SHA512 built-in functions and code generation.
#include <sm3intrin.h>
+#include <sha512intrin.h>
+
#include <fmaintrin.h>
#include <f16cintrin.h>
--- /dev/null
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _SHA512INTRIN_H_INCLUDED
+#define _SHA512INTRIN_H_INCLUDED
+
+#ifndef __SHA512__
+#pragma GCC push_options
+#pragma GCC target("sha512")
+#define __DISABLE_SHA512__
+#endif /* __SHA512__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sha512msg1_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_vsha512msg1 ((__v4di) __A, (__v2di) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sha512msg2_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vsha512msg2 ((__v4di) __A, (__v4di) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sha512rnds2_epi64 (__m256i __A, __m256i __B, __m128i __C)
+{
+ return (__m256i) __builtin_ia32_vsha512rnds2 ((__v4di) __A, (__v4di) __B,
+ (__v2di) __C);
+}
+
+#ifdef __DISABLE_SHA512__
+#undef __DISABLE_SHA512__
+#pragma GCC pop_options
+#endif /* __DISABLE_SHA512__ */
+
+#endif /* _SHA512INTRIN_H_INCLUDED */
UNSPEC_VPDPWSUDS
UNSPEC_VPDPWUUD
UNSPEC_VPDPWUUDS
+
+ ;; For SHA512 support
+ UNSPEC_SHA512MSG1
+ UNSPEC_SHA512MSG2
+ UNSPEC_SHA512RNDS2
+
])
(define_c_enum "unspecv" [
(set_attr "mode" "TI")
(set_attr "length_immediate" "1")])
+(define_insn "vsha512msg1"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (unspec:V4DI
+ [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")]
+ UNSPEC_SHA512MSG1))]
+ "TARGET_SHA512"
+ "vsha512msg1\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "OI")])
+
+(define_insn "vsha512msg2"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (unspec:V4DI
+ [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "x")]
+ UNSPEC_SHA512MSG2))]
+ "TARGET_SHA512"
+ "vsha512msg2\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "OI")])
+
+(define_insn "vsha512rnds2"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (unspec:V4DI
+ [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "x")
+ (match_operand:V2DI 3 "register_operand" "x")]
+ UNSPEC_SHA512RNDS2))]
+ "TARGET_SHA512"
+ "vsha512rnds2\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "OI")])
+
(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
[(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
(vec_concat:AVX512MODE2P
@itemx no-sm3
Enable/disable the generation of the SM3 instructions.
+@cindex @code{target("sha512")} function attribute, x86
+@item sha512
+@itemx no-sha512
+Enable/disable the generation of the SHA512 instructions.
+
@cindex @code{target("cld")} function attribute, x86
@item cld
@itemx no-cld
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
--mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3
+-mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@need 200
@opindex msm3
@itemx -msm3
+@need 200
+@opindex msha512
+@itemx -msha512
These switches enable the use of instructions in the MMX, SSE,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT,
-AMX-COMPLEX, AVXVNNIINT16, SM3 or CLDEMOTE extended instruction sets. Each has a
-corresponding @option{-mno-} option to disable use of these instructions.
+AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512 or CLDEMOTE extended instruction sets.
+Each has a corresponding @option{-mno-} option to disable use of these
+instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@item rdrand
Target supports x86 @code{rdrand} instruction.
+@item sha512
+Target supports the execution of @code{sha512} instructions.
+
@item sm3
Target supports the execution of @code{sm3} instructions.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
extern void test_amx_complex (void) __attribute__((__target__("amx-complex")));
extern void test_avxvnniint16 (void) __attribute__((__target__("avxvnniint16")));
extern void test_sm3 (void) __attribute__((__target__("sm3")));
+extern void test_sha512 (void) __attribute__((__target__("sha512")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
extern void test_no_amx_complex (void) __attribute__((__target__("no-amx-complex")));
extern void test_no_avxvnniint16 (void) __attribute__((__target__("no-avxvnniint16")));
extern void test_no_sm3 (void) __attribute__((__target__("no-sm3")));
+extern void test_no_sha512 (void) __attribute__((__target__("no-sha512")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha512" } */
+/* { dg-final { scan-assembler "vsha512msg1\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsha512msg2\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsha512rnds2\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+volatile __m256i y;
+
+void extern
+sha512_test (void)
+{
+ y = _mm256_sha512msg1_epi64(y, x);
+ y = _mm256_sha512msg2_epi64(y, y);
+ y = _mm256_sha512rnds2_epi64(y, y, x);
+}
--- /dev/null
+#include <stdlib.h>
+#include "m256-check.h"
+
+static void sha512_test (void);
+
+static unsigned long long
+ror64 (unsigned long long w, int n)
+{
+ int count = n % 64;
+ return ((w >> n) | (w << (64 - n)));
+}
+
+static unsigned long long
+shr64 (unsigned long long w, int n)
+{
+ return (w >> n);
+}
+
+static void
+__attribute__ ((noinline))
+do_test(void)
+{
+ sha512_test ();
+}
+
+int
+main ()
+{
+ /* Check CPU support for SHA512. */
+ if (__builtin_cpu_supports ("sha512"))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -msha512" } */
+/* { dg-require-effective-target sha512 } */
+
+#include "sha512-check.h"
+#include <emmintrin.h>
+#include <immintrin.h>
+
+static unsigned long long
+s0 (unsigned long long w)
+{
+ return ror64 (w, 1) ^ ror64 (w, 8) ^ shr64 (w, 7);
+}
+
+static void
+compute_sha512msg1(long long* src1, long long* src2, long long* res)
+{
+ unsigned long long w0, w1, w2, w3, w4;
+
+ w0 = src1[0];
+ w1 = src1[1];
+ w2 = src1[2];
+ w3 = src1[3];
+ w4 = src2[0];
+
+ res[0] = w0 + s0 (w1);
+ res[1] = w1 + s0 (w2);
+ res[2] = w2 + s0 (w3);
+ res[3] = w3 + s0 (w4);
+}
+
+static void
+sha512_test(void)
+{
+ union256i_q s1, res;
+ union128i_q s2;
+ long long res_ref[4];
+
+ s1.x = _mm256_set_epi64x (111, 222, 333, 444);
+ s2.x = _mm_set_epi64x (0, 555);
+
+ res.x = _mm256_sha512msg1_epi64 (s1.x, s2.x);
+
+ compute_sha512msg1 (s1.a, s2.a, res_ref);
+
+ if (check_union256i_q (res, res_ref))
+ abort();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -msha512" } */
+/* { dg-require-effective-target sha512 } */
+
+#include "sha512-check.h"
+#include <immintrin.h>
+
+static unsigned long long
+s1 (unsigned long long w)
+{
+ return ror64 (w, 19) ^ ror64 (w, 61) ^ shr64 (w, 6);
+}
+
+static void
+compute_sha512msg2 (long long* src1, long long* src2, long long* res)
+{
+ unsigned long long w14, w15, w16, w17, w18, w19;
+
+ w14 = src2[2];
+ w15 = src2[3];
+ w16 = src1[0] + s1 (w14);
+ w17 = src1[1] + s1 (w15);
+ w18 = src1[2] + s1 (w16);
+ w19 = src1[3] + s1 (w17);
+
+ res[0] = w16;
+ res[1] = w17;
+ res[2] = w18;
+ res[3] = w19;
+}
+
+static void
+sha512_test (void)
+{
+ union256i_q s1, s2, res;
+ long long res_ref[4];
+
+ s1.x = _mm256_set_epi64x (111, 222, 333, 444);
+ s2.x = _mm256_set_epi64x (555, 666, 0, 0);
+
+ res.x = _mm256_sha512msg2_epi64 (s1.x, s2.x);
+
+ compute_sha512msg2 (s1.a, s2.a, res_ref);
+
+ if (check_union256i_q (res, res_ref))
+ abort();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -msha512" } */
+/* { dg-require-effective-target sha512 } */
+
+#include "sha512-check.h"
+#include <emmintrin.h>
+#include <immintrin.h>
+
+static unsigned long long
+ch (unsigned long long e, unsigned long long f, unsigned long long g)
+{
+ return (e & f) ^ (~e & g);
+}
+
+static unsigned long long
+maj (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ return (a & b) ^ (a & c) ^ (b & c);
+}
+
+static unsigned long long
+s0 (unsigned long long w)
+{
+ return ror64 (w, 28) ^ ror64 (w, 34) ^ ror64 (w, 39);
+}
+
+static unsigned long long
+s1 (unsigned long long w)
+{
+ return ror64 (w, 14) ^ ror64 (w, 18) ^ ror64 (w, 41);
+}
+
+static void
+compute_sha512rnds2(long long* src0, long long* src1, long long* src2, long long* res)
+{
+ unsigned long long wk[2] = { src2[0], src2[1] };
+ unsigned long long a[3], b[3], c[3], d[3], e[3], f[3], g[3], h[3];
+
+ a[0] = src1[3];
+ b[0] = src1[2];
+ c[0] = src0[3];
+ d[0] = src0[2];
+ e[0] = src1[1];
+ f[0] = src1[0];
+ g[0] = src0[1];
+ h[0] = src0[0];
+
+ int i;
+ for (i = 0; i <= 1; i++)
+ {
+ a[i + 1] = ch (e[i], f[i], g[i]) + s1 (e[i]) + wk[i] + h[i]
+ + maj (a[i], b[i], c[i]) + s0 (a[i]);
+ b[i + 1] = a[i];
+ c[i + 1] = b[i];
+ d[i + 1] = c[i];
+ e[i + 1] = ch (e[i], f[i], g[i]) + s1 (e[i]) + wk[i] + h[i] + d[i];
+ f[i + 1] = e[i];
+ g[i + 1] = f[i];
+ h[i + 1] = g[i];
+ }
+
+ res[0] = f[2];
+ res[1] = e[2];
+ res[2] = b[2];
+ res[3] = a[2];
+}
+
+static void
+sha512_test (void)
+{
+ union256i_q s0, s1, res;
+ union128i_q s2;
+ long long res_ref[4];
+
+ s0.x = _mm256_set_epi64x (111, 222, 333, 444);
+ s1.x = _mm256_set_epi64x (555, 666, 777, 888);
+ s2.x = _mm_set_epi64x (999, 123);
+
+ res.x = _mm256_sha512rnds2_epi64 (s0.x, s1.x, s2.x);
+
+ compute_sha512rnds2 (s0.a, s1.a, s2.a, res_ref);
+
+ if (check_union256i_q (res, res_ref))
+ abort();
+}
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
/* sm3intrin.h */
#define __builtin_ia32_vsm3rnds2(A, B, C, D) __builtin_ia32_vsm3rnds2 (A, B, C, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512")
#include <x86intrin.h>
} "-msm3" ]
}
+# Return 1 if sha512 instructions can be compiled.
+proc check_effective_target_sha512 { } {
+ return [check_no_compiler_messages sha512 object {
+ typedef long long __m256i __attribute__ ((__vector_size__ (32)));
+ typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+ __m256i
+ _mm256_sha512msg2_epi64 (__m256i __A, __m256i __B)
+ {
+ return (__m256i) __builtin_ia32_vsha512msg2 ((__v4di) __A,
+ (__v4di) __B);
+ }
+ } "-msha512" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {