]>
Commit | Line | Data |
---|---|---|
a945c346 | 1 | /* Copyright (C) 2019-2024 Free Software Foundation, Inc. |
4f0e90fa HL |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | Under Section 7 of GPL version 3, you are granted additional | |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | #ifndef _IMMINTRIN_H_INCLUDED | |
25 | #error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead." | |
26 | #endif | |
27 | ||
28 | #ifndef _AVX512BF16INTRIN_H_INCLUDED | |
29 | #define _AVX512BF16INTRIN_H_INCLUDED | |
30 | ||
fd514717 | 31 | #if !defined (__AVX512BF16__) || defined (__EVEX512__) |
4f0e90fa | 32 | #pragma GCC push_options |
fd514717 | 33 | #pragma GCC target("avx512bf16,no-evex512") |
4f0e90fa HL |
34 | #define __DISABLE_AVX512BF16__ |
35 | #endif /* __AVX512BF16__ */ | |
36 | ||
61e53698 | 37 | /* Convert One BF16 Data to One Single Float Data. */ |
38 | extern __inline float | |
39 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
87235f1e | 40 | _mm_cvtsbh_ss (__bf16 __A) |
61e53698 | 41 | { |
a1ecc560 | 42 | return __builtin_ia32_cvtbf2sf (__A); |
61e53698 | 43 | } |
44 | ||
8108b22f HJ |
45 | #ifdef __DISABLE_AVX512BF16__ |
46 | #undef __DISABLE_AVX512BF16__ | |
47 | #pragma GCC pop_options | |
48 | #endif /* __DISABLE_AVX512BF16__ */ | |
49 | ||
50 | #if !defined (__AVX512BF16__) || !defined (__EVEX512__) | |
51 | #pragma GCC push_options | |
52 | #pragma GCC target("avx512bf16,evex512") | |
53 | #define __DISABLE_AVX512BF16_512__ | |
54 | #endif /* __AVX512BF16_512__ */ | |
55 | ||
56 | /* Internal data types for implementing the intrinsics. */ | |
57 | typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); | |
58 | ||
59 | /* The Intel API is flexible enough that we must allow aliasing with other | |
60 | vector types, and their scalar components. */ | |
61 | typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); | |
62 | ||
4f0e90fa HL |
63 | /* vcvtne2ps2bf16 */ |
64 | ||
65 | extern __inline __m512bh | |
66 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
67 | _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) | |
68 | { | |
87235f1e | 69 | return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B); |
4f0e90fa HL |
70 | } |
71 | ||
72 | extern __inline __m512bh | |
73 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
74 | _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D) | |
75 | { | |
87235f1e | 76 | return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B); |
4f0e90fa HL |
77 | } |
78 | ||
79 | extern __inline __m512bh | |
80 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
81 | _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C) | |
82 | { | |
87235f1e | 83 | return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A); |
4f0e90fa HL |
84 | } |
85 | ||
86 | /* vcvtneps2bf16 */ | |
87 | ||
88 | extern __inline __m256bh | |
89 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
90 | _mm512_cvtneps_pbh (__m512 __A) | |
91 | { | |
92 | return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A); | |
93 | } | |
94 | ||
95 | extern __inline __m256bh | |
96 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
97 | _mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C) | |
98 | { | |
99 | return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B); | |
100 | } | |
101 | ||
102 | extern __inline __m256bh | |
103 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
104 | _mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B) | |
105 | { | |
106 | return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A); | |
107 | } | |
108 | ||
109 | /* vdpbf16ps */ | |
110 | ||
111 | extern __inline __m512 | |
112 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
113 | _mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C) | |
114 | { | |
115 | return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C); | |
116 | } | |
117 | ||
118 | extern __inline __m512 | |
119 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
120 | _mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D) | |
121 | { | |
122 | return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B); | |
123 | } | |
124 | ||
125 | extern __inline __m512 | |
126 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
127 | _mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D) | |
128 | { | |
129 | return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A); | |
130 | } | |
131 | ||
61e53698 | 132 | extern __inline __m512 |
133 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
134 | _mm512_cvtpbh_ps (__m256bh __A) | |
135 | { | |
136 | return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 ( | |
137 | (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)); | |
138 | } | |
139 | ||
140 | extern __inline __m512 | |
141 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
142 | _mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A) | |
143 | { | |
144 | return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 ( | |
145 | (__m512i)_mm512_maskz_cvtepi16_epi32 ( | |
146 | (__mmask16)__U, (__m256i)__A), 16)); | |
147 | } | |
148 | ||
149 | extern __inline __m512 | |
150 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
151 | _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A) | |
152 | { | |
153 | return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 ( | |
154 | (__m512i)__S, (__mmask16)__U, | |
155 | (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16))); | |
156 | } | |
157 | ||
8108b22f HJ |
158 | #ifdef __DISABLE_AVX512BF16_512__ |
159 | #undef __DISABLE_AVX512BF16_512__ | |
4f0e90fa | 160 | #pragma GCC pop_options |
8108b22f | 161 | #endif /* __DISABLE_AVX512BF16_512__ */ |
4f0e90fa HL |
162 | |
163 | #endif /* _AVX512BF16INTRIN_H_INCLUDED */ |