From 825d0041380378d978dfed6ea313e2ff9d2fce4c Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Tue, 28 Sep 2021 15:30:14 +0800 Subject: [PATCH] Support Intel AVX-IFMA gcc/ * common/config/i386/i386-common.cc (OPTION_MASK_ISA_AVXIFMA_SET, OPTION_MASK_ISA2_AVXIFMA_UNSET, OPTION_MASK_ISA2_AVX2_UNSET): New macro. (ix86_handle_option): Handle -mavxifma. * common/config/i386/i386-cpuinfo.h (processor_types): Add FEATURE_AVXIFMA. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for avxifma. * common/config/i386/cpuinfo.h (get_available_features): Detect avxifma. * config.gcc: Add avxifmaintrin.h * config/i386/avx512ifmavlintrin.h: (_mm_madd52lo_epu64): Change to macro. (_mm_madd52hi_epu64): Likewise. (_mm256_madd52lo_epu64): Likewise. (_mm256_madd52hi_epu64): Likewise. * config/i386/avxifmaintrin.h: New header. * config/i386/cpuid.h (bit_AVXIFMA): New. * config/i386/i386-builtin.def: Add new builtins, and correct pattern names for AVX512IFMA. * config/i386/i386-builtins.cc (def_builtin): Handle AVX-IFMA builtins like AVX-VNNI. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __AVXIFMA__. * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): Relax ISA masks for AVXIFMA. * config/i386/i386-isa.def: Add AVXIFMA. * config/i386/i386-options.cc (isa2_opts): Add -mavxifma. (ix86_valid_target_attribute_inner_p): Handle avxifma. * config/i386/i386.md (isa): Add attr avxifma and avxifmavl. * config/i386/i386.opt: Add option -mavxifma. * config/i386/immintrin.h: Inculde avxifmaintrin.h. * config/i386/sse.md (avx_vpmadd52_): Remove. (vpamdd52): Remove. (vpamdd52huq_maskz): Rename to ... (vpmadd52huq_maskz): ... this. (vpamdd52luq_maskz): Rename to ... (vpmadd52luq_maskz): ... this. (vpmadd52): New define_insn. (vpmadd52v8di): Likewise. (vpmadd52_maskz_1): Likewise. (vpamdd52_mask): Rename to ... (vpmadd52_mask): ... this. * doc/invoke.texi: Document -mavxifma. * doc/extend.texi: Document avxifma. * doc/sourcebuild.texi: Document target avxifma. gcc/testsuite/ * gcc.target/i386/avx-check.h: Add avxifma check. * gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Remane.. * gcc.target/i386/avx512ifma-vpmaddhuq-1a.c: To this. * gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddluq-1a.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddhuq-1b.c: New Test. * gcc.target/i386/avx512ifma-vpmaddluq-1b.c: Ditto. * gcc.target/i386/avx-ifma-1.c: Ditto. * gcc.target/i386/avx-ifma-2.c: Ditto. * gcc.target/i386/avx-ifma-3.c: Ditto. * gcc.target/i386/avx-ifma-4.c: Ditto. * gcc.target/i386/avx-ifma-5.c: Ditto. * gcc.target/i386/avx-ifma-6.c: Ditto. * gcc.target/i386/avx-ifma-vpmaddhuq-2.c: Ditto. * gcc.target/i386/avx-ifma-vpmaddluq-2.c: Ditto. * gcc.target/i386/sse-12.c: Add -mavxifma. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * lib/target-supports.exp (check_effective_target_avxifma): New. --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.cc | 20 ++++- gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 3 +- gcc/config/i386/avx512ifmavlintrin.h | 59 +++++--------- gcc/config/i386/avxifmaintrin.h | 78 +++++++++++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin.def | 28 ++++--- gcc/config/i386/i386-builtins.cc | 8 +- gcc/config/i386/i386-c.cc | 2 + gcc/config/i386/i386-expand.cc | 13 ++++ gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-options.cc | 4 +- gcc/config/i386/i386.md | 6 +- gcc/config/i386/i386.opt | 5 ++ gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md | 56 ++++++++++--- gcc/doc/extend.texi | 5 ++ gcc/doc/invoke.texi | 9 ++- gcc/doc/sourcebuild.texi | 3 + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/avx-check.h | 6 +- gcc/testsuite/gcc.target/i386/avx-ifma-1.c | 20 +++++ gcc/testsuite/gcc.target/i386/avx-ifma-2.c | 21 +++++ gcc/testsuite/gcc.target/i386/avx-ifma-3.c | 16 ++++ gcc/testsuite/gcc.target/i386/avx-ifma-4.c | 16 ++++ gcc/testsuite/gcc.target/i386/avx-ifma-5.c | 10 +++ gcc/testsuite/gcc.target/i386/avx-ifma-6.c | 20 +++++ .../gcc.target/i386/avx-ifma-vpmaddhuq-2.c | 72 +++++++++++++++++ .../gcc.target/i386/avx-ifma-vpmaddluq-2.c | 61 +++++++++++++++ ...pmaddhuq-1.c => avx512ifma-vpmaddhuq-1a.c} | 0 .../gcc.target/i386/avx512ifma-vpmaddhuq-1b.c | 33 ++++++++ ...pmaddluq-1.c => avx512ifma-vpmaddluq-1a.c} | 0 .../gcc.target/i386/avx512ifma-vpmaddluq-1b.c | 33 ++++++++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- gcc/testsuite/lib/target-supports.exp | 12 +++ 43 files changed, 563 insertions(+), 82 deletions(-) create mode 100644 gcc/config/i386/avxifmaintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-5.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-6.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c rename gcc/testsuite/gcc.target/i386/{avx512ifma-vpmaddhuq-1.c => avx512ifma-vpmaddhuq-1a.c} (100%) create mode 100644 gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c rename gcc/testsuite/gcc.target/i386/{avx512ifma-vpmaddluq-1.c => avx512ifma-vpmaddluq-1a.c} (100%) create mode 100644 gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index b5c1b21e5547..9bb21c6caccb 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -793,6 +793,8 @@ get_available_features (struct __processor_model *cpu_model, { if (eax & bit_AVXVNNI) set_feature (FEATURE_AVXVNNI); + if (eax & bit_AVXIFMA) + set_feature (FEATURE_AVXIFMA); } if (avx512_usable) { diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index d6a68dc9b1df..4de7906b2477 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -76,6 +76,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512IFMA_SET \ (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA2_AVXIFMA_SET OPTION_MASK_ISA2_AVXIFMA #define OPTION_MASK_ISA_AVX512VBMI_SET \ (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET) #define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS @@ -212,7 +213,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX2_UNSET \ (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET) #define OPTION_MASK_ISA2_AVX2_UNSET \ - (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET) + (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \ + | OPTION_MASK_ISA2_AVX512F_UNSET) #define OPTION_MASK_ISA_AVX512F_UNSET \ (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ @@ -230,6 +232,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET) #define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL #define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA +#define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA #define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI #define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS #define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW @@ -1124,6 +1127,21 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavxifma: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXIFMA_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXIFMA_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXIFMA_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXIFMA_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 643fbd973784..968f9a56a6cd 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -240,6 +240,7 @@ enum processor_features FEATURE_X86_64_V2, FEATURE_X86_64_V3, FEATURE_X86_64_V4, + FEATURE_AVXIFMA, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index 2d0646a68f83..b05b4bb8f0d8 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL) ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL) ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) + ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index 2fbf2e6fa699..dab622892edb 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -421,7 +421,8 @@ i[34567]86-*-* | x86_64-*-*) tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h hresetintrin.h keylockerintrin.h avxvnniintrin.h - mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h" + mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h + avxifmaintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h index a7a50d89df4c..506dce8e4772 100644 --- a/gcc/config/i386/avx512ifmavlintrin.h +++ b/gcc/config/i386/avx512ifmavlintrin.h @@ -34,45 +34,26 @@ #define __DISABLE_AVX512IFMAVL__ #endif /* __AVX512IFMAVL__ */ -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) -1); -} +#define _mm_madd52lo_epu64(A, B, C) \ + ((__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) (A), \ + (__v2di) (B), \ + (__v2di) (C))) + +#define _mm_madd52hi_epu64(A, B, C) \ + ((__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) (A), \ + (__v2di) (B), \ + (__v2di) (C))) + +#define _mm256_madd52lo_epu64(A, B, C) \ + ((__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) (A), \ + (__v4di) (B), \ + (__v4di) (C))) + + +#define _mm256_madd52hi_epu64(A, B, C) \ + ((__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) (A), \ + (__v4di) (B), \ + (__v4di) (C))) extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avxifmaintrin.h b/gcc/config/i386/avxifmaintrin.h new file mode 100644 index 000000000000..3878d10f991d --- /dev/null +++ b/gcc/config/i386/avxifmaintrin.h @@ -0,0 +1,78 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AVXIFMAINTRIN_H_INCLUDED +#define _AVXIFMAINTRIN_H_INCLUDED + +#ifndef __AVXIFMA__ +#pragma GCC push_options +#pragma GCC target("avxifma") +#define __DISABLE_AVXIFMA__ +#endif /* __AVXIFMA__ */ + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z); +} + +#ifdef __DISABLE_AVXIFMA__ +#undef __DISABLE_AVXIFMA__ +#pragma GCC pop_options +#endif /* __DISABLE_AVXIFMA__ */ + +#endif /* _AVXIFMAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index a4c2fed7edab..9885699efd50 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -28,6 +28,7 @@ #define bit_AVXVNNI (1 << 4) #define bit_AVX512BF16 (1 << 5) #define bit_HRESET (1 << 22) +#define bit_AVXIFMA (1 << 23) /* %ecx */ #define bit_SSE3 (1 << 0) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index dea52a28d285..d22d79df0548 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2486,18 +2486,22 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builti BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) /* AVX512IFMA */ -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52luqv4di, "__builtin_ia32_vpmadd52luq256", IX86_BUINTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv4di, "__builtin_ia32_vpmadd52huq256", IX86_BUINTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52luqv2di, "__builtin_ia32_vpmadd52luq128", IX86_BUINTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) /* AVX512VBMI */ BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 76668ccf4c1b..9412cf1acc86 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -279,10 +279,12 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) && (mask == 0 || (mask & ix86_isa_flags) != 0)) || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) - /* "Unified" builtin used by either AVXVNNI intrinsics or AVX512VNNIVL - non-mask intrinsics should be defined whenever avxvnni - or avx512vnni && avx512vl exist. */ + /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics + or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be + defined whenever avxvnni/avxifma or avx512vnni/avxifma && + avx512vl exist. */ || (mask2 == OPTION_MASK_ISA2_AVXVNNI) + || (mask2 == OPTION_MASK_ISA2_AVXIFMA) || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) { diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index eb0e3b36a764..3494ec035d59 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__WIDEKL__"); if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI) def_or_undef (parse_in, "__AVXVNNI__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA) + def_or_undef (parse_in, "__AVXIFMA__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 70fd82b27d44..0e8ba1445510 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12367,6 +12367,8 @@ ix86_check_builtin_isa_match (unsigned int fcode, OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or OPTION_MASK_ISA2_AVXVNNI + (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512IFMA) or + OPTION_MASK_ISA2_AVXIFMA where for each such pair it is sufficient if either of the ISAs is enabled, plus if it is ored with other options also those others. OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ @@ -12396,6 +12398,17 @@ ix86_check_builtin_isa_match (unsigned int fcode, isa2 |= OPTION_MASK_ISA2_AVXVNNI; } + if ((((bisa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + || (bisa2 & OPTION_MASK_ISA2_AVXIFMA) != 0) + && (((isa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + || (isa2 & OPTION_MASK_ISA2_AVXIFMA) != 0)) + { + isa |= OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL; + isa2 |= OPTION_MASK_ISA2_AVXIFMA; + } + if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE /* __builtin_ia32_maskmovq requires MMX registers. */ && fcode != IX86_BUILTIN_MASKMOVQ) diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 83659d0bea43..6e0254ce4183 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -109,3 +109,4 @@ DEF_PTA(KL) DEF_PTA(WIDEKL) DEF_PTA(AVXVNNI) DEF_PTA(AVX512FP16) +DEF_PTA(AVXIFMA) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index acb2291e70f8..5facb64c2a84 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -226,7 +226,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mkl", OPTION_MASK_ISA2_KL }, { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, - { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 } + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA } }; static struct ix86_target_opts isa_opts[] = { @@ -1072,6 +1073,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("hreset", OPT_mhreset), IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), + IX86_ATTR_ISA ("avxifma", OPT_mavxifma), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6688d92b63c0..93538c5b3c6b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -835,7 +835,8 @@ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, - avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16" + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, + avx512ifmavl" (const_string "base")) ;; Define instruction set of MMX instructions @@ -891,6 +892,9 @@ (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") (eq_attr "isa" "avx512fp16") (symbol_ref "TARGET_AVX512FP16") + (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA") + (eq_attr "isa" "avx512ifmavl") + (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 0dbaacb57ed2..36e28b7063d6 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1214,3 +1214,8 @@ Do not use GOT to access external symbols. -param=x86-stlf-window-ninsns= Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param Instructions number above which STFL stall penalty can be compensated. + +mavxifma +Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and +AVXIFMA built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 6afd78c2b6f5..e9d4e9752436 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -44,6 +44,8 @@ #include +#include + #include #include diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 076064f97e64..33f306a0c757 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -27867,7 +27867,7 @@ (define_int_attr vpmadd52type [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")]) -(define_expand "vpamdd52huq_maskz" +(define_expand "vpmadd52huq_maskz" [(match_operand:VI8_AVX512VL 0 "register_operand") (match_operand:VI8_AVX512VL 1 "register_operand") (match_operand:VI8_AVX512VL 2 "register_operand") @@ -27875,13 +27875,13 @@ (match_operand: 4 "register_operand")] "TARGET_AVX512IFMA" { - emit_insn (gen_vpamdd52huq_maskz_1 ( + emit_insn (gen_vpmadd52huq_maskz_1 ( operands[0], operands[1], operands[2], operands[3], CONST0_RTX (mode), operands[4])); DONE; }) -(define_expand "vpamdd52luq_maskz" +(define_expand "vpmadd52luq_maskz" [(match_operand:VI8_AVX512VL 0 "register_operand") (match_operand:VI8_AVX512VL 1 "register_operand") (match_operand:VI8_AVX512VL 2 "register_operand") @@ -27889,26 +27889,58 @@ (match_operand: 4 "register_operand")] "TARGET_AVX512IFMA" { - emit_insn (gen_vpamdd52luq_maskz_1 ( + emit_insn (gen_vpmadd52luq_maskz_1 ( operands[0], operands[1], operands[2], operands[3], CONST0_RTX (mode), operands[4])); DONE; }) -(define_insn "vpamdd52" - [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") - (unspec:VI8_AVX512VL - [(match_operand:VI8_AVX512VL 1 "register_operand" "0") - (match_operand:VI8_AVX512VL 2 "register_operand" "v") - (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] +(define_insn "vpmadd52v8di" + [(set (match_operand:V8DI 0 "register_operand" "=v") + (unspec:V8DI + [(match_operand:V8DI 1 "register_operand" "0") + (match_operand:V8DI 2 "register_operand" "v") + (match_operand:V8DI 3 "nonimmediate_operand" "vm")] VPMADD52))] "TARGET_AVX512IFMA" - "vpmadd52\t{%3, %2, %0|%0, %2, %3}" + "vpmadd52\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_insn "vpmadd52" + [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,v") + (unspec:VI8_AVX2 + [(match_operand:VI8_AVX2 1 "register_operand" "0,0") + (match_operand:VI8_AVX2 2 "register_operand" "x,v") + (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm,vm")] + VPMADD52))] + "TARGET_AVXIFMA || (TARGET_AVX512IFMA && TARGET_AVX512VL)" + "@ + %{vex%} vpmadd52\t{%3, %2, %0|%0, %2, %3} + vpmadd52\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "isa" "avxifma,avx512ifmavl") + (set_attr "type" "ssemuladd") + (set_attr "prefix" "vex,evex") + (set_attr "mode" "")]) + +(define_insn "vpmadd52_maskz_1" + [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI8_AVX512VL + (unspec:VI8_AVX512VL + [(match_operand:VI8_AVX512VL 1 "register_operand" "0") + (match_operand:VI8_AVX512VL 2 "register_operand" "v") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] + VPMADD52) + (match_operand:VI8_AVX512VL 4 "const0_operand" "C") + (match_operand: 5 "register_operand" "Yk")))] + "TARGET_AVX512IFMA" + "vpmadd52\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "vpamdd52_mask" +(define_insn "vpmadd52_mask" [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") (vec_merge:VI8_AVX512VL (unspec:VI8_AVX512VL diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 04af0584d82c..f205542dae91 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7060,6 +7060,11 @@ Enable/disable the generation of the WIDEKL instructions. @cindex @code{target("avxvnni")} function attribute, x86 Enable/disable the generation of the AVXVNNI instructions. +@item avxifma +@itemx no-avxifma +@cindex @code{target("avxifma")} function attribute, x86 +Enable/disable the generation of the AVXIFMA instructions. + @item cld @itemx no-cld @cindex @code{target("cld")} function attribute, x86 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index cd4d3c1d72ce..a4850dd9123e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options. -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol --mavx512fp16 @gol +-mavx512fp16 -mavxifma @gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -32904,6 +32904,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mwidekl @opindex mwidekl +@need 200 +@itemx -mavxifma +@opindex mavxifma These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, @@ -32913,8 +32916,8 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP, XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, -UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16 -or CLDEMOTE extended instruction sets. Each has a corresponding +UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16, +AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding @option{-mno-} option to disable use of these instructions. These extensions are also available as built-in functions: see diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index c81e2ffd43a3..0173acf4a65d 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2490,6 +2490,9 @@ Target supports the execution of @code{avx512f} instructions. @item avx512vp2intersect Target supports the execution of @code{avx512vp2intersect} instructions. +@item avxifma +Target supports the execution of @code{avxifma} instructions. + @item amx_tile Target supports the execution of @code{amx-tile} instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index fba3d1ac684c..5388606779b7 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 5cc0fa83457c..86cedd3d32fb 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h b/gcc/testsuite/gcc.target/i386/avx-check.h index 7ddca9d7b809..24ee6ab4efde 100644 --- a/gcc/testsuite/gcc.target/i386/avx-check.h +++ b/gcc/testsuite/gcc.target/i386/avx-check.h @@ -22,7 +22,11 @@ main () /* Run AVX test only if host has AVX support. */ if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE)) - && avx_os_support ()) + && avx_os_support () +#ifdef AVXIFMA + && __builtin_cpu_supports ("avxifma") +#endif + ) { do_test (); #ifdef DEBUG diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-1.c b/gcc/testsuite/gcc.target/i386/avx-ifma-1.c new file mode 100644 index 000000000000..a0cfc446e4d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxifma_test (void) +{ + x = _mm256_madd52hi_epu64 (x, y, z); + x = _mm256_madd52lo_epu64 (x, y, z); + x_ = _mm_madd52hi_epu64 (x_, y_, z_); + x_ = _mm_madd52lo_epu64 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-2.c b/gcc/testsuite/gcc.target/i386/avx-ifma-2.c new file mode 100644 index 000000000000..5f82ffec3e2f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +__attribute__((target("avxifma"))) +void +avxifma_test (void) +{ + x = _mm256_madd52hi_epu64 (x, y, z); + x = _mm256_madd52lo_epu64 (x, y, z); + x_ = _mm_madd52hi_epu64 (x_, y_, z_); + x_ = _mm_madd52lo_epu64 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-3.c b/gcc/testsuite/gcc.target/i386/avx-ifma-3.c new file mode 100644 index 000000000000..536c1de96c5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +__attribute__ ((__gnu_inline__, __always_inline__, target("avxifma"))) +inline int +foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +__attribute__ ((target("avx512ifma,avx512vl"))) +int +bar (void) +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-4.c b/gcc/testsuite/gcc.target/i386/avx-ifma-4.c new file mode 100644 index 000000000000..62d264975106 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-4.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +__attribute__ ((__gnu_inline__, __always_inline__, target("avx512ifma,avx512vl"))) +inline int +foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +__attribute__ ((target("avxifma"))) +int +bar (void) +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-5.c b/gcc/testsuite/gcc.target/i386/avx-ifma-5.c new file mode 100644 index 000000000000..b6110e5f7f0d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-5.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavxifma -mavx512ifma -mavx512vl" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ + +#include + +#include "avx-ifma-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-6.c b/gcc/testsuite/gcc.target/i386/avx-ifma-6.c new file mode 100644 index 000000000000..6388373123ca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-6.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxifma_test (void) +{ + x = _mm256_madd52hi_avx_epu64 (x, y, z); + x = _mm256_madd52lo_avx_epu64 (x, y, z); + x_ = _mm_madd52hi_avx_epu64 (x_, y_, z_); + x_ = _mm_madd52lo_avx_epu64 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c new file mode 100644 index 000000000000..c9efee330915 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxifma" } */ +/* { dg-require-effective-target avxifma } */ +#define AVXIFMA +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +void +CALC (long long *r, long long *s1, long long *s2, long long *s3, int size) +{ + int i; + long long a,b; + + for (i = 0; i < size; i++) + { + /* Simulate higher 52 bits out of 104 bit, + by shifting opernads with 0 in lower 26 bits. */ + a = s2[i] >> 26; + b = s3[i] >> 26; + r[i] = a * b + s1[i]; + } +} + +void +TEST (void) +{ + union256i_q src1_256, src2_256, dst_256; + union128i_q src1_128, src2_128, dst_128; + long long dst_ref_256[4], dst_ref_128[2]; + int i; + + for (i = 0; i < 4; i++) + { + src1_256.a[i] = 15 + 3467 * i; + src2_256.a[i] = 9217 + i; + src1_256.a[i] = src1_256.a[i] << 26; + src2_256.a[i] = src2_256.a[i] << 26; + src1_256.a[i] &= ((1LL << 52) - 1); + src2_256.a[i] &= ((1LL << 52) - 1); + dst_256.a[i] = -1; + } + + for (i = 0; i < 2; i++) + { + src1_128.a[i] = 16 + 3467 * i; + src2_128.a[i] = 9127 + i; + src1_128.a[i] = src1_128.a[i] << 26; + src2_128.a[i] = src2_128.a[i] << 26; + src1_128.a[i] &= ((1LL << 52) - 1); + src2_128.a[i] &= ((1LL << 52) - 1); + dst_128.a[i] = -1; + } + + CALC (dst_ref_256, dst_256.a, src1_256.a, src2_256.a, 4); + dst_256.x = _mm256_madd52hi_avx_epu64 (dst_256.x, src1_256.x, src2_256.x); + if (check_union256i_q (dst_256, dst_ref_256)) + abort (); + + CALC (dst_ref_128, dst_128.a, src1_128.a, src2_128.a, 2); + dst_128.x = _mm_madd52hi_avx_epu64 (dst_128.x, src1_128.x, src2_128.x); + if (check_union128i_q (dst_128, dst_ref_128)) + abort (); + +} + diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c new file mode 100644 index 000000000000..600978ea9ad5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxifma" } */ +/* { dg-require-effective-target avxifma } */ +#define AVXIFMA +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +void +CALC (unsigned long long *r, unsigned long long *s1, + unsigned long long *s2, unsigned long long *s3, + int size) +{ + int i; + + for (i = 0; i < size; i++) + { + r[i] = s2[i] * s3[i] + s1[i]; + } +} + +void +TEST (void) +{ + union256i_q src1_256, src2_256, dst_256; + union128i_q src1_128, src2_128, dst_128; + unsigned long long dst_ref_256[4], dst_ref_128[2]; + int i; + + for (i = 0; i < 4; i++) + { + src1_256.a[i] = 3450 * i; + src2_256.a[i] = 7863 * i; + dst_256.a[i] = 117; + } + + for (i = 0; i < 2; i++) + { + src1_128.a[i] = 3540 * i; + src2_128.a[i] = 7683 * i; + dst_128.a[i] = 117; + } + + CALC (dst_ref_256, dst_256.a, src1_256.a, src2_256.a, 4); + dst_256.x = _mm256_madd52lo_avx_epu64 (dst_256.x, src1_256.x, src2_256.x); + if (check_union256i_q (dst_256, dst_ref_256)) + abort (); + + CALC (dst_ref_128, dst_128.a, src1_128.a, src2_128.a, 2); + dst_128.x = _mm_madd52lo_avx_epu64 (dst_128.x, src1_128.x, src2_128.x); + if (check_union128i_q (dst_128, dst_ref_128)) + abort (); + +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1a.c similarity index 100% rename from gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c rename to gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1a.c diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c new file mode 100644 index 000000000000..67e94baa01b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -mavx512vl -mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ + +#include + +volatile __m512i _x1, _y1, _z1; +volatile __m256i _x2, _y2, _z2; +volatile __m128i _x3, _y3, _z3; + +void extern +avx512ifma_test (void) +{ + _x3 = _mm_madd52hi_epu64 (_x3, _y3, _z3); + _x3 = _mm_mask_madd52hi_epu64 (_x3, 2, _y3, _z3); + _x3 = _mm_maskz_madd52hi_epu64 (2, _x3, _y3, _z3); + _x2 = _mm256_madd52hi_epu64 (_x2, _y2, _z2); + _x2 = _mm256_mask_madd52hi_epu64 (_x2, 3, _y2, _z2); + _x2 = _mm256_maskz_madd52hi_epu64 (3, _x2, _y2, _z2); + _x1 = _mm512_madd52hi_epu64 (_x1, _y1, _z1); + _x1 = _mm512_mask_madd52hi_epu64 (_x1, 3, _y1, _z1); + _x1 = _mm512_maskz_madd52hi_epu64 (3, _x1, _y1, _z1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1a.c similarity index 100% rename from gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c rename to gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1a.c diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c new file mode 100644 index 000000000000..4b8ea27f403b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -mavx512vl -mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ + +#include + +volatile __m512i _x1, _y1, _z1; +volatile __m256i _x2, _y2, _z2; +volatile __m128i _x3, _y3, _z3; + +void extern +avx512ifma_test (void) +{ + _x3 = _mm_madd52lo_epu64 (_x3, _y3, _z3); + _x3 = _mm_mask_madd52lo_epu64 (_x3, 2, _y3, _z3); + _x3 = _mm_maskz_madd52lo_epu64 (2, _x3, _y3, _z3); + _x2 = _mm256_madd52lo_epu64 (_x2, _y2, _z2); + _x2 = _mm256_mask_madd52lo_epu64 (_x2, 3, _y2, _z2); + _x2 = _mm256_maskz_madd52lo_epu64 (3, _x2, _y2, _z2); + _x1 = _mm512_madd52lo_epu64 (_x1, _y1, _z1); + _x1 = _mm512_mask_madd52lo_epu64 (_x1, 3, _y1, _z1); + _x1 = _mm512_maskz_madd52lo_epu64 (3, _x1, _y1, _z1); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index b76dddb86a2c..466555c0d064 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -80,6 +80,7 @@ extern void test_keylocker (void) __attribute__((__target__("kl"))); extern void test_widekl (void) __attribute__((__target__("widekl"))); extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); +extern void test_avxifma (void) __attribute__((__target__("avxifma"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -161,6 +162,7 @@ extern void test_no_keylocker (void) __attribute__((__target__("no-kl"))); extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16"))); +extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 375d4d1b4ded..fde56261d8f4 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index e285c307d008..bb29555babe1 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index f41493b93f3d..f2701ddaaf97 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 31492ef36970..3d196975b1e0 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f71a7b291573..d3a233f90fcb 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -843,6 +843,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma") #include diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 8b5995a2f414..220fbeedc1d0 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9522,6 +9522,18 @@ proc check_effective_target_avxvnni { } { } "-mavxvnni" ] } +# Return 1 if avxifma instructions can be compiled. +proc check_effective_target_avxifma { } { + return [check_no_compiler_messages avxifma object { + typedef long long __v4di __attribute__ ((__vector_size__ (32))); + __v4di + _mm256_maddlo_epu64 (__v4di __A, __v4di __B, __v4di __C) + { + return __builtin_ia32_vpmadd52luq256 (__A, __B, __C); + } + } "-O0 -mavxifma" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.47.2