set_feature (FEATURE_SM3);
if (eax & bit_SHA512)
set_feature (FEATURE_SHA512);
+ if (eax & bit_SM4)
+ set_feature (FEATURE_SM4);
}
if (avx512_usable)
{
#define OPTION_MASK_ISA2_AVXVNNIINT16_SET OPTION_MASK_ISA2_AVXVNNIINT16
#define OPTION_MASK_ISA2_SM3_SET OPTION_MASK_ISA2_SM3
#define OPTION_MASK_ISA2_SHA512_SET OPTION_MASK_ISA2_SHA512
+#define OPTION_MASK_ISA2_SM4_SET OPTION_MASK_ISA2_SM4
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
#define OPTION_MASK_ISA2_AVXVNNIINT16_UNSET OPTION_MASK_ISA2_AVXVNNIINT16
#define OPTION_MASK_ISA2_SM3_UNSET OPTION_MASK_ISA2_SM3
#define OPTION_MASK_ISA2_SHA512_UNSET OPTION_MASK_ISA2_SHA512
+#define OPTION_MASK_ISA2_SM4_UNSET OPTION_MASK_ISA2_SM4
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
OPTION_MASK_ISA2_SSE_UNSET
#define OPTION_MASK_ISA2_AVX_UNSET \
(OPTION_MASK_ISA2_AVX2_UNSET | OPTION_MASK_ISA2_VAES_UNSET \
- | OPTION_MASK_ISA2_SM3_UNSET | OPTION_MASK_ISA2_SHA512_UNSET)
+ | OPTION_MASK_ISA2_SM3_UNSET | OPTION_MASK_ISA2_SHA512_UNSET \
+ | OPTION_MASK_ISA2_SM4_UNSET)
#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
}
return true;
+ case OPT_msm4:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_SM4_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SM4_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SM4_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SM4_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
FEATURE_AVXVNNIINT16,
FEATURE_SM3,
FEATURE_SHA512,
+ FEATURE_SM4,
CPU_FEATURE_MAX
};
P_NONE, "-mavxvnniint16")
ISA_NAMES_TABLE_ENTRY("sm3", FEATURE_SM3, P_NONE, "-msm3")
ISA_NAMES_TABLE_ENTRY("sha512", FEATURE_SHA512, P_NONE, "-msha512")
+ ISA_NAMES_TABLE_ENTRY("sm4", FEATURE_SM4, P_NONE, "-msm4")
ISA_NAMES_TABLE_END
avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h
cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h
raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h
- sm3intrin.h sha512intrin.h"
+ sm3intrin.h sha512intrin.h sm4intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
/* %eax */
#define bit_SHA512 (1 << 0)
#define bit_SM3 (1 << 1)
+#define bit_SM4 (1 << 2)
#define bit_RAOINT (1 << 3)
#define bit_AVXVNNI (1 << 4)
#define bit_AVX512BF16 (1 << 5)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3msg2, "__builtin_ia32_vsm3msg2", IX86_BUILTIN_VSM3MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3rnds2, "__builtin_ia32_vsm3rnds2", IX86_BUILTIN_VSM3RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+/* SM4 */
+BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, "__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, "__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, "__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, "__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+
/* SHA512 */
BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, "__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI)
BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg2, "__builtin_ia32_vsha512msg2", IX86_BUILTIN_VSHA512MSG2, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
def_or_undef (parse_in, "__SM3__");
if (isa_flag2 & OPTION_MASK_ISA2_SHA512)
def_or_undef (parse_in, "__SHA512__");
+ if (isa_flag2 & OPTION_MASK_ISA2_SM4)
+ def_or_undef (parse_in, "__SM4__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
DEF_PTA(AVXVNNIINT16)
DEF_PTA(SM3)
DEF_PTA(SHA512)
+DEF_PTA(SM4)
{ "-mamx-complex", OPTION_MASK_ISA2_AMX_COMPLEX },
{ "-mavxvnniint16", OPTION_MASK_ISA2_AVXVNNIINT16 },
{ "-msm3", OPTION_MASK_ISA2_SM3 },
- { "-msha512", OPTION_MASK_ISA2_SHA512 }
+ { "-msha512", OPTION_MASK_ISA2_SHA512 },
+ { "-msm4", OPTION_MASK_ISA2_SM4 }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("avxvnniint16", OPT_mavxvnniint16),
IX86_ATTR_ISA ("sm3", OPT_msm3),
IX86_ATTR_ISA ("sha512", OPT_msha512),
+ IX86_ATTR_ISA ("sm4", OPT_msm4),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
Target Mask(ISA2_SHA512) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
SHA512 built-in functions and code generation.
+
+msm4
+Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
+SM4 built-in functions and code generation.
#include <sha512intrin.h>
+#include <sm4intrin.h>
+
#include <fmaintrin.h>
#include <f16cintrin.h>
--- /dev/null
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <sm4intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _SM4INTRIN_H_INCLUDED
+#define _SM4INTRIN_H_INCLUDED
+
+#ifndef __SM4__
+#pragma GCC push_options
+#pragma GCC target("sm4")
+#define __DISABLE_SM4__
+#endif /* __SM4__ */
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sm4key4_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vsm4key4128 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sm4key4_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vsm4key4256 ((__v8si) __A, (__v8si) __B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sm4rnds4_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vsm4rnds4128 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sm4rnds4_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vsm4rnds4256 ((__v8si) __A, (__v8si) __B);
+}
+
+#ifdef __DISABLE_SM4__
+#undef __DISABLE_SM4__
+#pragma GCC pop_options
+#endif /* __DISABLE_SM4__ */
+
+#endif /* _SM4INTRIN_H_INCLUDED */
UNSPEC_SHA512MSG2
UNSPEC_SHA512RNDS2
+ ;; For SM4 support
+ UNSPEC_SM4KEY4
+ UNSPEC_SM4RNDS4
+
])
(define_c_enum "unspecv" [
[(set_attr "type" "sselog1")
(set_attr "mode" "OI")])
+(define_insn "vsm4key4_<mode>"
+ [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
+ (unspec:VI4_AVX
+ [(match_operand:VI4_AVX 1 "register_operand" "x")
+ (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+ UNSPEC_SM4KEY4))]
+ "TARGET_SM4"
+ "vsm4key4\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "other")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vsm4rnds4_<mode>"
+ [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
+ (unspec:VI4_AVX
+ [(match_operand:VI4_AVX 1 "register_operand" "x")
+ (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+ UNSPEC_SM4RNDS4))]
+ "TARGET_SM4"
+ "vsm4rnds4\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "other")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
[(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
(vec_concat:AVX512MODE2P
@itemx no-sha512
Enable/disable the generation of the SHA512 instructions.
+@cindex @code{target("sm4")} function attribute, x86
+@item sm4
+@itemx no-sm4
+Enable/disable the generation of the SM4 instructions.
+
@cindex @code{target("cld")} function attribute, x86
@item cld
@itemx no-cld
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
--mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512
+-mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@need 200
@opindex msha512
@itemx -msha512
+@need 200
+@opindex msm4
+@itemx -msm4
These switches enable the use of instructions in the MMX, SSE,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT,
-AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512 or CLDEMOTE extended instruction sets.
-Each has a corresponding @option{-mno-} option to disable use of these
+AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4 or CLDEMOTE extended instruction
+sets. Each has a corresponding @option{-mno-} option to disable use of these
instructions.
These extensions are also available as built-in functions: see
@item sm3
Target supports the execution of @code{sm3} instructions.
+@item sm4
+Target supports the execution of @code{sm4} instructions.
+
@item sqrt_insn
Target has a square root instruction that the compiler can generate.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
extern void test_avxvnniint16 (void) __attribute__((__target__("avxvnniint16")));
extern void test_sm3 (void) __attribute__((__target__("sm3")));
extern void test_sha512 (void) __attribute__((__target__("sha512")));
+extern void test_sm4 (void) __attribute__((__target__("sm4")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
extern void test_no_avxvnniint16 (void) __attribute__((__target__("no-avxvnniint16")));
extern void test_no_sm3 (void) __attribute__((__target__("no-sm3")));
extern void test_no_sha512 (void) __attribute__((__target__("no-sha512")));
+extern void test_no_sm4 (void) __attribute__((__target__("no-sm4")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msm4" } */
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i a, b, c;
+volatile __m256i d, e, f;
+
+void extern
+sm4_test (void)
+{
+ a = _mm_sm4key4_epi32 (b, c);
+ d = _mm256_sm4key4_epi32 (e, f);
+ a = _mm_sm4rnds4_epi32 (b, c);
+ d = _mm256_sm4rnds4_epi32 (e, f);
+}
--- /dev/null
+#include <stdlib.h>
+#include "m256-check.h"
+
+static void sm4_test (void);
+
+typedef union
+{
+ unsigned int x;
+ unsigned char a[4];
+} union32ui_ub;
+
+unsigned char sbox[256] = {
+0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7,
+0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
+0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
+0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A,
+0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
+0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
+0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
+0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
+0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
+0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B,
+0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
+0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2,
+0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
+0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
+0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
+0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5,
+0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
+0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55,
+0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
+0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
+0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
+0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F,
+0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
+0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F,
+0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
+0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
+0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
+0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E,
+0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
+0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20,
+0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
+};
+
+static unsigned
+rol32 (unsigned w, int n)
+{
+ int count = n % 32;
+ return ((w << count) | (w >> (32 - count)));
+}
+
+static unsigned char
+sbox_byte (unsigned w, int i)
+{
+ union32ui_ub tmp;
+ tmp.x = w;
+ return sbox[tmp.a[i]];
+}
+
+static unsigned
+lower_t (unsigned w)
+{
+ union32ui_ub tmp;
+ tmp.a[0] = sbox_byte (w, 0);
+ tmp.a[1] = sbox_byte (w, 1);
+ tmp.a[2] = sbox_byte (w, 2);
+ tmp.a[3] = sbox_byte (w, 3);
+ return tmp.x;
+}
+
+static unsigned
+l_key (unsigned w)
+{
+ return w ^ rol32 (w, 13) ^ rol32 (w, 23);
+}
+
+static unsigned
+l_rnds (unsigned w)
+{
+ unsigned tmp = w;
+ tmp = tmp ^ rol32 (w, 2);
+ tmp = tmp ^ rol32 (w, 10);
+ tmp = tmp ^ rol32 (w, 18);
+ tmp = tmp ^ rol32 (w, 24);
+ return tmp;
+}
+
+#define SM4_FUNC(name) \
+static unsigned \
+t_##name (unsigned w) \
+{ \
+ return l_##name (lower_t (w)); \
+} \
+ \
+static unsigned \
+f_##name (unsigned x0, unsigned x1, unsigned x2, unsigned x3, unsigned k) \
+{ \
+ return x0 ^ t_##name (x1 ^ x2 ^ x3 ^ k); \
+} \
+ \
+static void \
+compute_sm4##name##4 (int *dst, int *src1, int *src2, int vl) \
+{ \
+ unsigned c[4], p[4]; \
+ \
+ int kl = vl / 128; \
+ int i; \
+ \
+ for (i = 0; i < kl; i++) \
+ { \
+ p[0] = src1[4 * i]; \
+ p[1] = src1[4 * i + 1]; \
+ p[2] = src1[4 * i + 2]; \
+ p[3] = src1[4 * i + 3]; \
+ \
+ c[0] = f_##name (p[0], p[1], p[2], p[3], src2[4 * i]); \
+ c[1] = f_##name (p[1], p[2], p[3], c[0], src2[4 * i + 1]); \
+ c[2] = f_##name (p[2], p[3], c[0], c[1], src2[4 * i + 2]); \
+ c[3] = f_##name (p[3], c[0], c[1], c[2], src2[4 * i + 3]); \
+ \
+ dst[4 * i] = c[0]; \
+ dst[4 * i + 1] = c[1]; \
+ dst[4 * i + 2] = c[2]; \
+ dst[4 * i + 3] = c[3]; \
+ } \
+}
+
+#define SM4_AVX_SIMULATE(name) \
+ union128i_d src1, src2, res1; \
+ int dst1[4] = {0, 0, 0, 0}; \
+ \
+ src1.x = _mm_set_epi32 (111, 222, 333, 444); \
+ src2.x = _mm_set_epi32 (555, 666, 777, 888); \
+ res1.x = _mm_set_epi32 (0, 0, 0, 0); \
+ \
+ res1.x = _mm_sm4##name##4_epi32 (src1.x, src2.x); \
+ \
+ compute_sm4##name##4 (dst1, src1.a, src2.a, 128); \
+ \
+ if (check_union128i_d (res1, dst1)) \
+ abort (); \
+ \
+ union256i_d src3, src4, res2; \
+ int dst2[8] = {0, 0, 0, 0, 0, 0, 0, 0}; \
+ \
+ src3.x = _mm256_set_epi32 (111, 222, 333, 444, 555, 666, 777, 888); \
+ src4.x = _mm256_set_epi32 (999, 123, 456, 789, 135, 792, 468, 147); \
+ res2.x = _mm256_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0); \
+ \
+ res2.x = _mm256_sm4##name##4_epi32 (src3.x, src4.x); \
+ \
+ compute_sm4##name##4 (dst2, src3.a, src4.a, 256); \
+ \
+ if (check_union256i_d (res2, dst2)) \
+ abort ();
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+ sm4_test ();
+}
+
+int
+main ()
+{
+ /* Check CPU support for SM4. */
+ if (__builtin_cpu_supports ("sm4"))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -msm4" } */
+/* { dg-require-effective-target sm4 } */
+
+#include "sm4-check.h"
+
+char key;
+SM4_FUNC (key);
+
+static void
+sm4_test (void)
+{
+ SM4_AVX_SIMULATE (key);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -msm4" } */
+/* { dg-require-effective-target sm4 } */
+
+#include "sm4-check.h"
+
+char rnds;
+SM4_FUNC (rnds);
+
+static void
+sm4_test (void)
+{
+ SM4_AVX_SIMULATE (rnds);
+}
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
/* sm3intrin.h */
#define __builtin_ia32_vsm3rnds2(A, B, C, D) __builtin_ia32_vsm3rnds2 (A, B, C, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4")
#include <x86intrin.h>
} "-msha512" ]
}
+# Return 1 if sm4 instructions can be compiled.
+proc check_effective_target_sm4 { } {
+ return [check_no_compiler_messages sm4 object {
+ typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+ typedef int __v4si __attribute__ ((__vector_size__ (16)));
+ __m128i
+ _mm_sm4key4_epi32 (__m128i __A, __m128i __B)
+ {
+ return (__m128i) __builtin_ia32_vsm4key4128 ((__v4si) __A,
+ (__v4si) __B);
+ }
+ } "-msm4" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {