DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
+
+# SM4 builtins
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
/* SM4 */
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, "__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, "__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vsm4key4_v16si, "__builtin_ia32_vsm4key4512", IX86_BUILTIN_VSM4KEY4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, "__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, "__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vsm4rnds4_v16si, "__builtin_ia32_vsm4rnds4512", IX86_BUILTIN_VSM4RNDS4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
/* SHA512 */
BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, "__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI)
case V16QI_FTYPE_V8HI_V8HI:
case V16HF_FTYPE_V16HF_V16HF:
case V16SF_FTYPE_V16SF_V16SF:
+ case V16SI_FTYPE_V16SI_V16SI:
case V8QI_FTYPE_V8QI_V8QI:
case V8QI_FTYPE_V4HI_V4HI:
case V8HI_FTYPE_V8HI_V8HI:
#pragma GCC pop_options
#endif /* __DISABLE_SM4__ */
+#if !defined (__SM4__) || !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("sm4,avx10.2-512")
+#define __DISABLE_SM4_512__
+#endif /* __SM4_512__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4key4_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vsm4key4512 ((__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4rnds4_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vsm4rnds4512 ((__v16si) __A, (__v16si) __B);
+}
+
+#ifdef __DISABLE_SM4_512__
+#undef __DISABLE_SM4_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_SM4_512__ */
+
#endif /* _SM4INTRIN_H_INCLUDED */
(set_attr "mode" "OI")])
(define_insn "vsm4key4_<mode>"
- [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
- (unspec:VI4_AVX
- [(match_operand:VI4_AVX 1 "register_operand" "x")
- (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+ [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX10_2
+ [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+ (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
UNSPEC_SM4KEY4))]
"TARGET_SM4"
"vsm4key4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "other")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "isa" "avx,avx10_2")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "vsm4rnds4_<mode>"
- [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
- (unspec:VI4_AVX
- [(match_operand:VI4_AVX 1 "register_operand" "x")
- (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+ [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX10_2
+ [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+ (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
UNSPEC_SM4RNDS4))]
"TARGET_SM4"
"vsm4rnds4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "other")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "isa" "avx,avx10_2")
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2" } */
+
+#include <immintrin.h>
+
+void
+f1 (__m128i x, __m128i y)
+{
+ register __m128i a __asm("xmm16");
+ register __m128i b __asm("xmm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm_sm4key4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m256i x, __m256i y)
+{
+ register __m256i a __asm("ymm16");
+ register __m256i b __asm("ymm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm256_sm4key4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m128i x, __m128i y)
+{
+ register __m128i a __asm("xmm16");
+ register __m128i b __asm("xmm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm_sm4rnds4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x, __m256i y)
+{
+ register __m256i a __asm("ymm16");
+ register __m256i b __asm("ymm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm256_sm4rnds4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+
+void extern
+sm4_test (void)
+{
+ x = _mm512_sm4key4_epi32 (y, z);
+ x = _mm512_sm4rnds4_epi32 (y, z);
+}
#include <stdlib.h>
-#include "m256-check.h"
+#include "m512-check.h"
+#ifdef AVX10_2_512
+static void sm4_avx512f_test (void);
+#else
static void sm4_test (void);
+#endif
typedef union
{
if (check_union256i_d (res2, dst2)) \
abort ();
+#define SM4_AVX512F_SIMULATE(name) \
+ union512i_d src5, src6, res3; \
+ int dst3[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
+ \
+ src5.x = _mm512_set_epi32 (111, 222, 333, 444, 555, 666, 777, 888, \
+ 999, 123, 456, 789, 135, 792, 468, 147); \
+ src6.x = _mm512_set_epi32 (258, 369, 159, 483, 726, 162, 738, 495, \
+ 174, 285, 396, 186, 429, 752, 198, 765); \
+ res3.x = _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); \
+ \
+ res3.x = _mm512_sm4##name##4_epi32 (src5.x, src6.x); \
+ \
+ compute_sm4##name##4 (dst3, src5.a, src6.a, 512); \
+ \
+ if (check_union512i_d (res3, dst3)) \
+ abort ();
+
static void
__attribute__ ((noinline))
do_test (void)
{
+#ifdef AVX10_512BIT
+ sm4_avx512f_test ();
+#else
sm4_test ();
+#endif
}
int
main ()
{
/* Check CPU support for SM4. */
- if (__builtin_cpu_supports ("sm4"))
+ if (__builtin_cpu_supports ("sm4")
+#ifdef AVX10_2
+ && __builtin_cpu_supports ("avx10.2")
+#endif
+#ifdef AVX10_2_512
+ && __builtin_cpu_supports ("avx10.2-512")
+#endif
+ )
{
do_test ();
#ifdef DEBUG
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-require-effective-target sm4 } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#include "sm4-check.h"
+
+char key;
+SM4_FUNC (key);
+
+static void
+sm4_avx512f_test (void)
+{
+ SM4_AVX512F_SIMULATE (key);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-require-effective-target sm4 } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#include "sm4-check.h"
+
+char rnds;
+SM4_FUNC (rnds);
+
+static void
+sm4_avx512f_test (void)
+{
+ SM4_AVX512F_SIMULATE (rnds);
+}