]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512bf16intrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512bf16intrin.h
CommitLineData
a945c346 1/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
4f0e90fa
HL
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512BF16INTRIN_H_INCLUDED
29#define _AVX512BF16INTRIN_H_INCLUDED
30
fd514717 31#if !defined (__AVX512BF16__) || defined (__EVEX512__)
4f0e90fa 32#pragma GCC push_options
fd514717 33#pragma GCC target("avx512bf16,no-evex512")
4f0e90fa
HL
34#define __DISABLE_AVX512BF16__
35#endif /* __AVX512BF16__ */
36
61e53698 37/* Convert One BF16 Data to One Single Float Data. */
38extern __inline float
39__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87235f1e 40_mm_cvtsbh_ss (__bf16 __A)
61e53698 41{
a1ecc560 42 return __builtin_ia32_cvtbf2sf (__A);
61e53698 43}
44
8108b22f
HJ
45#ifdef __DISABLE_AVX512BF16__
46#undef __DISABLE_AVX512BF16__
47#pragma GCC pop_options
48#endif /* __DISABLE_AVX512BF16__ */
49
50#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
51#pragma GCC push_options
52#pragma GCC target("avx512bf16,evex512")
53#define __DISABLE_AVX512BF16_512__
54#endif /* __AVX512BF16_512__ */
55
56/* Internal data types for implementing the intrinsics. */
57typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
58
59/* The Intel API is flexible enough that we must allow aliasing with other
60 vector types, and their scalar components. */
61typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
62
4f0e90fa
HL
63/* vcvtne2ps2bf16 */
64
65extern __inline __m512bh
66__attribute__((__gnu_inline__, __always_inline__, __artificial__))
67_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
68{
87235f1e 69 return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B);
4f0e90fa
HL
70}
71
72extern __inline __m512bh
73__attribute__((__gnu_inline__, __always_inline__, __artificial__))
74_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
75{
87235f1e 76 return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B);
4f0e90fa
HL
77}
78
79extern __inline __m512bh
80__attribute__((__gnu_inline__, __always_inline__, __artificial__))
81_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
82{
87235f1e 83 return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A);
4f0e90fa
HL
84}
85
86/* vcvtneps2bf16 */
87
88extern __inline __m256bh
89__attribute__((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_cvtneps_pbh (__m512 __A)
91{
92 return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
93}
94
95extern __inline __m256bh
96__attribute__((__gnu_inline__, __always_inline__, __artificial__))
97_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
98{
99 return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
100}
101
102extern __inline __m256bh
103__attribute__((__gnu_inline__, __always_inline__, __artificial__))
104_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
105{
106 return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
107}
108
109/* vdpbf16ps */
110
111extern __inline __m512
112__attribute__((__gnu_inline__, __always_inline__, __artificial__))
113_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
114{
115 return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
116}
117
118extern __inline __m512
119__attribute__((__gnu_inline__, __always_inline__, __artificial__))
120_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
121{
122 return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
123}
124
125extern __inline __m512
126__attribute__((__gnu_inline__, __always_inline__, __artificial__))
127_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
128{
129 return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
130}
131
61e53698 132extern __inline __m512
133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
134_mm512_cvtpbh_ps (__m256bh __A)
135{
136 return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
137 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
138}
139
140extern __inline __m512
141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
142_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
143{
144 return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
145 (__m512i)_mm512_maskz_cvtepi16_epi32 (
146 (__mmask16)__U, (__m256i)__A), 16));
147}
148
149extern __inline __m512
150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
151_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
152{
153 return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
154 (__m512i)__S, (__mmask16)__U,
155 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
156}
157
8108b22f
HJ
158#ifdef __DISABLE_AVX512BF16_512__
159#undef __DISABLE_AVX512BF16_512__
4f0e90fa 160#pragma GCC pop_options
8108b22f 161#endif /* __DISABLE_AVX512BF16_512__ */
4f0e90fa
HL
162
163#endif /* _AVX512BF16INTRIN_H_INCLUDED */