]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512bf16vlintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512bf16vlintrin.h
CommitLineData
83ffe9cd 1/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
4f0e90fa
HL
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
29#define _AVX512BF16VLINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
32#pragma GCC push_options
33#pragma GCC target("avx512bf16,avx512vl")
34#define __DISABLE_AVX512BF16VL__
35#endif /* __AVX512BF16__ */
36
37/* Internal data types for implementing the intrinsics. */
87235f1e 38typedef __bf16 __v16bf __attribute__ ((__vector_size__ (32)));
39typedef __bf16 __v8bf __attribute__ ((__vector_size__ (16)));
4f0e90fa
HL
40
41/* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
87235f1e 43typedef __bf16 __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
44typedef __bf16 __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
45
46typedef __bf16 __bfloat16;
4f0e90fa 47
58685b93 48#define _mm256_cvtneps_pbh(A) \
49 (__m128bh) __builtin_ia32_cvtneps2bf16_v8sf (A)
50#define _mm_cvtneps_pbh(A) \
51 (__m128bh) __builtin_ia32_cvtneps2bf16_v4sf (A)
52
4f0e90fa
HL
53/* vcvtne2ps2bf16 */
54
55extern __inline __m256bh
56__attribute__((__gnu_inline__, __always_inline__, __artificial__))
57_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
58{
87235f1e 59 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf(__A, __B);
4f0e90fa
HL
60}
61
62extern __inline __m256bh
63__attribute__((__gnu_inline__, __always_inline__, __artificial__))
64_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
65{
87235f1e 66 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf_mask(__C, __D, __A, __B);
4f0e90fa
HL
67}
68
69extern __inline __m256bh
70__attribute__((__gnu_inline__, __always_inline__, __artificial__))
71_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
72{
87235f1e 73 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf_maskz(__B, __C, __A);
4f0e90fa
HL
74}
75
76extern __inline __m128bh
77__attribute__((__gnu_inline__, __always_inline__, __artificial__))
78_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
79{
87235f1e 80 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf(__A, __B);
4f0e90fa
HL
81}
82
83extern __inline __m128bh
84__attribute__((__gnu_inline__, __always_inline__, __artificial__))
85_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
86{
87235f1e 87 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf_mask(__C, __D, __A, __B);
4f0e90fa
HL
88}
89
90extern __inline __m128bh
91__attribute__((__gnu_inline__, __always_inline__, __artificial__))
92_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
93{
87235f1e 94 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf_maskz(__B, __C, __A);
4f0e90fa
HL
95}
96
97/* vcvtneps2bf16 */
98
4f0e90fa
HL
99extern __inline __m128bh
100__attribute__((__gnu_inline__, __always_inline__, __artificial__))
101_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
102{
103 return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
104}
105
106extern __inline __m128bh
107__attribute__((__gnu_inline__, __always_inline__, __artificial__))
108_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
109{
110 return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
111}
112
4f0e90fa
HL
113extern __inline __m128bh
114__attribute__((__gnu_inline__, __always_inline__, __artificial__))
115_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
116{
117 return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
118}
119
120extern __inline __m128bh
121__attribute__((__gnu_inline__, __always_inline__, __artificial__))
122_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
123{
124 return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
125}
126
127/* vdpbf16ps */
128
129extern __inline __m256
130__attribute__((__gnu_inline__, __always_inline__, __artificial__))
131_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
132{
133 return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
134}
135
136extern __inline __m256
137__attribute__((__gnu_inline__, __always_inline__, __artificial__))
138_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
139{
140 return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
141}
142
143extern __inline __m256
144__attribute__((__gnu_inline__, __always_inline__, __artificial__))
145_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
146{
147 return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
148}
149
150extern __inline __m128
151__attribute__((__gnu_inline__, __always_inline__, __artificial__))
152_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
153{
154 return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
155}
156
157extern __inline __m128
158__attribute__((__gnu_inline__, __always_inline__, __artificial__))
159_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
160{
161 return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
162}
163
164extern __inline __m128
165__attribute__((__gnu_inline__, __always_inline__, __artificial__))
166_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
167{
168 return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
169}
170
87235f1e 171extern __inline __bf16
61e53698 172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173_mm_cvtness_sbh (float __A)
174{
175 __v4sf __V = {__A, 0, 0, 0};
87235f1e 176 __v8bf __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
177 (__v8bf)_mm_undefined_si128 (), (__mmask8)-1);
61e53698 178 return __R[0];
179}
180
181extern __inline __m128
182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183_mm_cvtpbh_ps (__m128bh __A)
184{
185 return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
186 (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
187}
188
189extern __inline __m256
190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191_mm256_cvtpbh_ps (__m128bh __A)
192{
193 return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
194 (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
195}
196
197extern __inline __m128
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
200{
201 return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
202 (__m128i)_mm_maskz_cvtepi16_epi32 (
203 (__mmask8)__U, (__m128i)__A), 16));
204}
205
206extern __inline __m256
207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208_mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
209{
210 return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
211 (__m256i)_mm256_maskz_cvtepi16_epi32 (
212 (__mmask8)__U, (__m128i)__A), 16));
213}
214
215extern __inline __m128
216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
217_mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
218{
219 return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
220 (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
221 (__m128i)__A), 16));
222}
223
224extern __inline __m256
225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
226_mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
227{
228 return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
229 (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
230 (__m128i)__A), 16));
231}
232
4f0e90fa
HL
233#ifdef __DISABLE_AVX512BF16VL__
234#undef __DISABLE_AVX512BF16VL__
235#pragma GCC pop_options
236#endif /* __DISABLE_AVX512BF16VL__ */
237
238#endif /* _AVX512BF16VLINTRIN_H_INCLUDED */