]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512bf16vlintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512bf16vlintrin.h
CommitLineData
7adcbafe 1/* Copyright (C) 2019-2022 Free Software Foundation, Inc.
4f0e90fa
HL
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
29#define _AVX512BF16VLINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
32#pragma GCC push_options
33#pragma GCC target("avx512bf16,avx512vl")
34#define __DISABLE_AVX512BF16VL__
35#endif /* __AVX512BF16__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef short __v16bh __attribute__ ((__vector_size__ (32)));
39typedef short __v8bh __attribute__ ((__vector_size__ (16)));
40
41/* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
44typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
45
61e53698 46typedef unsigned short __bfloat16;
4f0e90fa
HL
47/* vcvtne2ps2bf16 */
48
49extern __inline __m256bh
50__attribute__((__gnu_inline__, __always_inline__, __artificial__))
51_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
52{
53 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
54}
55
56extern __inline __m256bh
57__attribute__((__gnu_inline__, __always_inline__, __artificial__))
58_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
59{
60 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
61}
62
63extern __inline __m256bh
64__attribute__((__gnu_inline__, __always_inline__, __artificial__))
65_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
66{
67 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
68}
69
70extern __inline __m128bh
71__attribute__((__gnu_inline__, __always_inline__, __artificial__))
72_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
73{
74 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
75}
76
77extern __inline __m128bh
78__attribute__((__gnu_inline__, __always_inline__, __artificial__))
79_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
80{
81 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
82}
83
84extern __inline __m128bh
85__attribute__((__gnu_inline__, __always_inline__, __artificial__))
86_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
87{
88 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
89}
90
91/* vcvtneps2bf16 */
92
93extern __inline __m128bh
94__attribute__((__gnu_inline__, __always_inline__, __artificial__))
95_mm256_cvtneps_pbh (__m256 __A)
96{
97 return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
98}
99
100extern __inline __m128bh
101__attribute__((__gnu_inline__, __always_inline__, __artificial__))
102_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
103{
104 return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
105}
106
107extern __inline __m128bh
108__attribute__((__gnu_inline__, __always_inline__, __artificial__))
109_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
110{
111 return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
112}
113
114extern __inline __m128bh
115__attribute__((__gnu_inline__, __always_inline__, __artificial__))
116_mm_cvtneps_pbh (__m128 __A)
117{
118 return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
119}
120
121extern __inline __m128bh
122__attribute__((__gnu_inline__, __always_inline__, __artificial__))
123_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
124{
125 return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
126}
127
128extern __inline __m128bh
129__attribute__((__gnu_inline__, __always_inline__, __artificial__))
130_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
131{
132 return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
133}
134
135/* vdpbf16ps */
136
137extern __inline __m256
138__attribute__((__gnu_inline__, __always_inline__, __artificial__))
139_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
140{
141 return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
142}
143
144extern __inline __m256
145__attribute__((__gnu_inline__, __always_inline__, __artificial__))
146_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
147{
148 return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
149}
150
151extern __inline __m256
152__attribute__((__gnu_inline__, __always_inline__, __artificial__))
153_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
154{
155 return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
156}
157
158extern __inline __m128
159__attribute__((__gnu_inline__, __always_inline__, __artificial__))
160_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
161{
162 return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
163}
164
165extern __inline __m128
166__attribute__((__gnu_inline__, __always_inline__, __artificial__))
167_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
168{
169 return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
170}
171
172extern __inline __m128
173__attribute__((__gnu_inline__, __always_inline__, __artificial__))
174_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
175{
176 return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
177}
178
61e53698 179extern __inline __bfloat16
180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
181_mm_cvtness_sbh (float __A)
182{
183 __v4sf __V = {__A, 0, 0, 0};
184 __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
185 (__v8hi)_mm_undefined_si128 (), (__mmask8)-1);
186 return __R[0];
187}
188
189extern __inline __m128
190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191_mm_cvtpbh_ps (__m128bh __A)
192{
193 return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
194 (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
195}
196
197extern __inline __m256
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm256_cvtpbh_ps (__m128bh __A)
200{
201 return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
202 (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
203}
204
205extern __inline __m128
206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
207_mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
208{
209 return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
210 (__m128i)_mm_maskz_cvtepi16_epi32 (
211 (__mmask8)__U, (__m128i)__A), 16));
212}
213
214extern __inline __m256
215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216_mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
217{
218 return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
219 (__m256i)_mm256_maskz_cvtepi16_epi32 (
220 (__mmask8)__U, (__m128i)__A), 16));
221}
222
223extern __inline __m128
224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
225_mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
226{
227 return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
228 (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
229 (__m128i)__A), 16));
230}
231
232extern __inline __m256
233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
234_mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
235{
236 return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
237 (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
238 (__m128i)__A), 16));
239}
240
4f0e90fa
HL
241#ifdef __DISABLE_AVX512BF16VL__
242#undef __DISABLE_AVX512BF16VL__
243#pragma GCC pop_options
244#endif /* __DISABLE_AVX512BF16VL__ */
245
246#endif /* _AVX512BF16VLINTRIN_H_INCLUDED */