From: Julia Koval Date: Wed, 20 Dec 2017 06:20:44 +0000 (+0100) Subject: Enable VPCLMULQDQ support X-Git-Tag: basepoints/gcc-9~2545 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6557be99afd301b8d7f2b142b12fb47ae6cb823d;p=thirdparty%2Fgcc.git Enable VPCLMULQDQ support gcc/ * common/config/i386/i386-common.c (OPTION_MASK_ISA_VPCLMULQDQ_SET, OPTION_MASK_ISA_VPCLMULQDQ_UNSET): New. (ix86_handle_option): Handle -mvpclmulqdq, move cx6 to flags2. * config.gcc: Include vpclmulqdqintrin.h. * config/i386/cpuid.h: Handle bit_VPCLMULQDQ. * config/i386/driver-i386.c (host_detect_local_cpu): Handle -mvpclmulqdq. * config/i386/i386-builtin.def (__builtin_ia32_vpclmulqdq_v2di, __builtin_ia32_vpclmulqdq_v4di, __builtin_ia32_vpclmulqdq_v8di): New. * config/i386/i386-c.c (__VPCLMULQDQ__): New. * config/i386/i386.c (isa2_opts): Add -mcx16. (isa_opts): Add -mpclmulqdq, remove -mcx16. (ix86_option_override_internal): Move mcx16 to flags2. (ix86_valid_target_attribute_inner_p): Add vpclmulqdq. (ix86_expand_builtin): Handle OPTION_MASK_ISA_VPCLMULQDQ. * config/i386/i386.h (TARGET_VPCLMULQDQ, TARGET_VPCLMULQDQ_P): New. * config/i386/i386.opt: Add mvpclmulqdq, move mcx16 to flags2. * config/i386/immintrin.h: Include vpclmulqdqintrin.h. * config/i386/sse.md (vpclmulqdq_): New pattern. * config/i386/vpclmulqdqintrin.h (_mm512_clmulepi64_epi128, _mm_clmulepi64_epi128, _mm256_clmulepi64_epi128): New intrinsics. * doc/invoke.texi: Add -mvpclmulqdq. gcc/testsuite/ * gcc.target/i386/avx-1.c: Handle new intrinsics. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx512-check.h: Handle bit_VPCLMULQDQ. * gcc.target/i386/avx512f-vpclmulqdq-2.c: New test. * gcc.target/i386/avx512vl-vpclmulqdq-2.c: Ditto. * gcc.target/i386/vpclmulqdq.c: Ditto. * gcc.target/i386/i386.exp (check_effective_target_vpclmulqdq): New. From-SVN: r255850 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9284a8c07fc3..c90777035d0e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2017-12-20 Julia Koval + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_VPCLMULQDQ_SET, + OPTION_MASK_ISA_VPCLMULQDQ_UNSET): New. + (ix86_handle_option): Handle -mvpclmulqdq, move cx6 to flags2. + * config.gcc: Include vpclmulqdqintrin.h. + * config/i386/cpuid.h: Handle bit_VPCLMULQDQ. + * config/i386/driver-i386.c (host_detect_local_cpu): Handle + -mvpclmulqdq. + * config/i386/i386-builtin.def (__builtin_ia32_vpclmulqdq_v2di, + __builtin_ia32_vpclmulqdq_v4di, __builtin_ia32_vpclmulqdq_v8di): New. + * config/i386/i386-c.c (__VPCLMULQDQ__): New. + * config/i386/i386.c (isa2_opts): Add -mcx16. + (isa_opts): Add -mpclmulqdq, remove -mcx16. + (ix86_option_override_internal): Move mcx16 to flags2. + (ix86_valid_target_attribute_inner_p): Add vpclmulqdq. + (ix86_expand_builtin): Handle OPTION_MASK_ISA_VPCLMULQDQ. + * config/i386/i386.h (TARGET_VPCLMULQDQ, TARGET_VPCLMULQDQ_P): New. + * config/i386/i386.opt: Add mvpclmulqdq, move mcx16 to flags2. + * config/i386/immintrin.h: Include vpclmulqdqintrin.h. + * config/i386/sse.md (vpclmulqdq_): New pattern. + * config/i386/vpclmulqdqintrin.h (_mm512_clmulepi64_epi128, + _mm_clmulepi64_epi128, _mm256_clmulepi64_epi128): New intrinsics. + * doc/invoke.texi: Add -mvpclmulqdq. + 2017-12-20 Tom de Vries PR middle-end/83423 diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 575a914311f2..00eb01754bfb 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_IBT_SET OPTION_MASK_ISA_IBT #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA_VAES_SET OPTION_MASK_ISA_VAES +#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ /* Define a set of ISAs which aren't available when a given ISA is disabled. MMX and SSE ISAs are handled separately. */ @@ -214,6 +215,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_IBT_UNSET OPTION_MASK_ISA_IBT #define OPTION_MASK_ISA_SHSTK_UNSET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA_VAES_UNSET OPTION_MASK_ISA_VAES +#define OPTION_MASK_ISA_VPCLMULQDQ_UNSET OPTION_MASK_ISA_VPCLMULQDQ /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -554,6 +556,19 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mvpclmulqdq: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_VPCLMULQDQ_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_VPCLMULQDQ_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_VPCLMULQDQ_UNSET; + } + return true; + case OPT_mavx5124fmaps: if (value) { @@ -889,13 +904,13 @@ ix86_handle_option (struct gcc_options *opts, case OPT_mcx16: if (value) { - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_CX16_SET; } else { - opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_CX16_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_CX16_UNSET; } return true; diff --git a/gcc/config.gcc b/gcc/config.gcc index e208d00bd5bf..4c2b3824f981 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -381,7 +381,8 @@ i[34567]86-*-*) clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h - avx512vnnivlintrin.h gfniintrin.h vaesintrin.h" + avx512vnnivlintrin.h gfniintrin.h vaesintrin.h + vpclmulqdqintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -408,7 +409,8 @@ x86_64-*-*) clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h - avx512vnnivlintrin.h gfniintrin.h vaesintrin.h" + avx512vnnivlintrin.h gfniintrin.h vaesintrin.h + vpclmulqdqintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 41369c2e3485..37f3e1a96bb3 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -102,6 +102,7 @@ #define bit_GFNI (1 << 8) #define bit_VAES (1 << 9) #define bit_AVX512VNNI (1 << 11) +#define bit_VPCLMULQDQ (1 << 10) #define bit_AVX512VPOPCNTDQ (1 << 14) #define bit_RDPID (1 << 22) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 013107ae7220..99826fd651e8 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -420,6 +420,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_gfni = 0, has_avx512vbmi2 = 0; unsigned int has_ibt = 0, has_shstk = 0; unsigned int has_avx512vnni = 0, has_vaes = 0; + unsigned int has_vpclmulqdq = 0; bool arch; @@ -513,6 +514,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_rdpid = ecx & bit_RDPID; has_gfni = ecx & bit_GFNI; has_vaes = ecx & bit_VAES; + has_vpclmulqdq = ecx & bit_VPCLMULQDQ; has_avx5124vnniw = edx & bit_AVX5124VNNIW; has_avx5124fmaps = edx & bit_AVX5124FMAPS; @@ -1080,6 +1082,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *ibt = has_ibt ? " -mibt" : " -mno-ibt"; const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk"; const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes"; + const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, @@ -1090,7 +1093,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) xsavec, xsaves, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, - avx512vbmi2, avx512vnni, vaes, NULL); + avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL); } done: diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index e3b12bdc318e..7d65b0b3fc4f 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2414,6 +2414,11 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, CODE_FOR_vgf2p8mulb_v32q BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, CODE_FOR_vgf2p8mulb_v16qi_mask, "__builtin_ia32_vgf2p8mulb_v16qi_mask", IX86_BUILTIN_VGF2P8MULB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) +/* VPCLMULQDQ */ +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) + /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 072e49bbc0fb..de1b0e299b68 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -486,6 +486,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, } if (isa_flag2 & OPTION_MASK_ISA_VAES) def_or_undef (parse_in, "__VAES__"); + if (isa_flag & OPTION_MASK_ISA_VPCLMULQDQ) + def_or_undef (parse_in, "__VPCLMULQDQ__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 48d5640f84ed..ef321d32c0ba 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2751,6 +2751,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, ISAs come first. Target string will be displayed in the same order. */ static struct ix86_target_opts isa2_opts[] = { + { "-mcx16", OPTION_MASK_ISA_CX16 }, { "-mmpx", OPTION_MASK_ISA_MPX }, { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 }, { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI }, @@ -2765,6 +2766,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, }; static struct ix86_target_opts isa_opts[] = { + { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ }, { "-mgfni", OPTION_MASK_ISA_GFNI }, { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, @@ -2811,7 +2813,6 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, { "-mtbm", OPTION_MASK_ISA_TBM }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, - { "-mcx16", OPTION_MASK_ISA_CX16 }, { "-msahf", OPTION_MASK_ISA_SAHF }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mcrc32", OPTION_MASK_ISA_CRC32 }, @@ -3998,8 +3999,8 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; if (processor_alias_table[i].flags & PTA_CX16 - && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16; + && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; @@ -5330,6 +5331,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("ibt", OPT_mibt), IX86_ATTR_ISA ("shstk", OPT_mshstk), IX86_ATTR_ISA ("vaes", OPT_mvaes), + IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -35376,10 +35378,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, at all, -m64 is a whole TU option. */ if (((ix86_builtins_isa[fcode].isa & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX - | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI)) + | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI + | OPTION_MASK_ISA_VPCLMULQDQ)) && !(ix86_builtins_isa[fcode].isa & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX - | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI) + | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI + | OPTION_MASK_ISA_VPCLMULQDQ) & ix86_isa_flags)) || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL) && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 01fd6ce52bbd..7da8573bc405 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -111,6 +111,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_GFNI_P(x) TARGET_ISA_GFNI_P(x) #define TARGET_VAES TARGET_ISA_VAES #define TARGET_VAES_P(x) TARGET_ISA_VAES_P(x) +#define TARGET_VPCLMULQDQ TARGET_ISA_VPCLMULQDQ +#define TARGET_VPCLMULQDQ_P(x) TARGET_ISA_VPCLMULQDQ_P(x) #define TARGET_BMI TARGET_ISA_BMI #define TARGET_BMI_P(x) TARGET_ISA_BMI_P(x) #define TARGET_BMI2 TARGET_ISA_BMI2 diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 04e391d311d0..0e58d3862f5d 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -789,6 +789,10 @@ mvaes Target Report Mask(ISA_VAES) Var(ix86_isa_flags2) Save Support VAES built-in functions and code generation. +mvpclmulqdq +Target Report Mask(ISA_VPCLMULQDQ) Var(ix86_isa_flags) Save +Support VPCLMULQDQ built-in functions and code generation. + mbmi Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save Support BMI built-in functions and code generation. @@ -854,7 +858,7 @@ Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save Support TBM built-in functions and code generation. mcx16 -Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save +Target Report Mask(ISA_CX16) Var(ix86_isa_flags2) Save Support code generation of cmpxchg16b instruction. msahf diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index a6e27dd43a3f..7fcaa695b0a7 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -104,6 +104,8 @@ #include +#include + #ifndef __RDRND__ #pragma GCC push_options #pragma GCC target("rdrnd") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c1469f457f5d..20e7b160ac9a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -178,6 +178,9 @@ UNSPEC_VAESDECLAST UNSPEC_VAESENC UNSPEC_VAESENCLAST + + ;; For VPCLMULQDQ support + UNSPEC_VPCLMULQDQ ]) (define_c_enum "unspecv" [ @@ -340,6 +343,9 @@ (define_mode_iterator VI8 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) +(define_mode_iterator VI8_FVL + [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VI8_AVX512VL [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) @@ -20498,3 +20504,13 @@ "TARGET_VAES" "vaesenclast\t{%2, %1, %0|%0, %1, %2}" ) + +(define_insn "vpclmulqdq_" + [(set (match_operand:VI8_FVL 0 "register_operand" "=v") + (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v") + (match_operand:VI8_FVL 2 "vector_operand" "vm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_VPCLMULQDQ))] + "TARGET_VPCLMULQDQ" + "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "mode" "DI")]) diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h new file mode 100644 index 000000000000..483e1602c002 --- /dev/null +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -0,0 +1,108 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _VPCLMULQDQINTRIN_H_INCLUDED +#define _VPCLMULQDQINTRIN_H_INCLUDED + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512f") +#define __DISABLE_VPCLMULQDQF__ +#endif /* __VPCLMULQDQF__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C) +{ + return (__m512i) __builtin_ia32_vpclmulqdq_v8di ((__v8di)__A, + (__v8di) __B, __C); +} +#else +#define _mm512_clmulepi64_epi128(A, B, C) \ + ((__m512i) __builtin_ia32_vpclmulqdq_v8di ((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQF__ +#undef __DISABLE_VPCLMULQDQF__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQF__ */ + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512vl") +#define __DISABLE_VPCLMULQDQVL__ +#endif /* __VPCLMULQDQVL__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_clmulepi64_epi128 (__m128i __A, __m128i __B, const int __C) +{ + return (__m128i) __builtin_ia32_vpclmulqdq_v2di ((__v2di)__A, + (__v2di) __B, __C); +} +#else +#define _mm_clmulepi64_epi128(A, B, C) \ + ((__m128i) __builtin_ia32_vpclmulqdq_v2di ((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQVL__ +#undef __DISABLE_VPCLMULQDQVL__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQVL__ */ + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512vl") +#define __DISABLE_VPCLMULQDQ__ +#endif /* __VPCLMULQDQ__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_clmulepi64_epi128 (__m256i __A, __m256i __B, const int __C) +{ + return (__m256i) __builtin_ia32_vpclmulqdq_v4di ((__v4di)__A, + (__v4di) __B, __C); +} +#else +#define _mm256_clmulepi64_epi128(A, B, C) \ + ((__m256i) __builtin_ia32_vpclmulqdq_v4di ((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQ__ +#undef __DISABLE_VPCLMULQDQ__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQ__ */ + + +#endif /* _VPCLMULQDQINTRIN_H_INCLUDED */ + diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2049c2789db0..cde0c73f9e00 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1208,6 +1208,7 @@ See RS/6000 and PowerPC Options. -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol -mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes @gol -mcet -mibt -mshstk -mforce-indirect-call -mavx512vbmi2 @gol +-mvpclmulqdq @gol -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol @@ -26160,13 +26161,17 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mvaes @opindex mvaes +@need 200 +@itemx -mvpclmulqdq +@opindex mvpclmulqdq These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES, FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2, -GFNI, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. Each has a -corresponding @option{-mno-} option to disable use of these instructions. +GFNI, VPCLMULQDQ, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. +Each has a corresponding @option{-mno-} option to disable use of these +instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cb9e01223d42..aaa8c0551fe1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2017-12-20 Julia Koval + + * gcc.target/i386/avx-1.c: Handle new intrinsics. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/avx512-check.h: Handle bit_VPCLMULQDQ. + * gcc.target/i386/avx512f-vpclmulqdq-2.c: New test. + * gcc.target/i386/avx512vl-vpclmulqdq-2.c: Ditto. + * gcc.target/i386/vpclmulqdq.c: Ditto. + * gcc.target/i386/i386.exp (check_effective_target_vpclmulqdq): New. + 2017-12-19 Martin Sebor PR c++/83394 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index bbb4ae2a24a0..db772449c42e 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -655,6 +655,11 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + #include #include #include diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 8ea8751990e0..2d174f9df5d1 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -86,6 +86,9 @@ main () #endif #ifdef VAES && (ecx & bit_VAES) +#endif +#ifdef VPCLMULQDQ + && (ecx & bit_VPCLMULQDQ) #endif && avx512f_os_support ()) { diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c new file mode 100644 index 000000000000..fe746a1095cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f -mvpclmulqdq" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-require-effective-target vpclmulqdq } */ + +#define AVX512F + +#define VPCLMULQDQ +#include "avx512f-helper.h" + +#define SIZE (AVX512F_LEN / 64) + +#include "avx512f-mask-type.h" + +static void +CALC (unsigned long long *r, unsigned long long *s1, unsigned long long *s2, unsigned char imm) +{ + for (int len = 0; len < SIZE/2; len++) + { + unsigned long long src1, src2; + src1 = (imm & 1) ? s1[len*2 + 1] : s1[len*2]; + src2 = ((imm >> 4) & 1) ? s2[len*2 + 1] : s2[len*2]; + for (int i = 0; i < 64; i++) + { + if ((src1 >> i) & 1) + { + if (i) + r[len*2 + 1] ^= src2 >> (64 - i); + r[len*2] ^= src2 << i; + } + } + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_q) res, src1, src2; + unsigned long long res_ref[SIZE]; + unsigned char imm = 1; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 0xFFFFFFFFF + i; + src2.a[i] = 0xFFFFFFFFF + i*i; + } + + for (i = 0; i < SIZE; i++) + { + res.a[i] = 0; + res_ref[i] = 0; + } + + CALC (res_ref, src1.a, src2.a, imm); + res.x = INTRINSIC (_clmulepi64_epi128) (src1.x, src2.x, imm); + + if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c new file mode 100644 index 000000000000..61288a00307c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -mvpclmulqdq" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target vpclmulqdq } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512f-vpclmulqdq-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512f-vpclmulqdq-2.c" diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index bebc6dd7935c..0b53023945c0 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -470,6 +470,19 @@ proc check_effective_target_avx512vaes { } { } "-mvaes" ] } +# Return 1 if vpclmulqdq instructions can be compiled. +proc check_effective_target_vpclmulqdq { } { + return [check_no_compiler_messages vpclmulqdq object { + typedef long long __v4di __attribute__ ((__vector_size__ (32))); + + __v4di + _mm256_clmulepi64_epi128 (__v4di __A, __v4di __B) + { + return (__v4di) __builtin_ia32_vpclmulqdq_v4di (__A, __B, 0); + } + } "-mvpclmulqdq -mavx512vl" ] +} + # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 89feeca7d3fb..62f87f00b074 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -672,4 +672,9 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + #include diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index c1ae48b87d9a..65f6ccffe250 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -633,7 +633,6 @@ #define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E) #define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E) - /* avx512vbmi2intrin.h */ #define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1) #define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E) __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E) @@ -672,6 +671,11 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2") +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq") #include diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c new file mode 100644 index 000000000000..0ce1a061298f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m512i x1, x2; +volatile __m256i x3, x4; +volatile __m128i x5, x6; + +void extern +avx512vl_test (void) +{ + x1 = _mm512_clmulepi64_epi128(x1, x2, 3); + x3 = _mm256_clmulepi64_epi128(x3, x4, 3); + x5 = _mm_clmulepi64_epi128(x5, x6, 3); +} +