]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512bf16intrin.h
Enable V4BFmode and V2BFmode.
[thirdparty/gcc.git] / gcc / config / i386 / avx512bf16intrin.h
CommitLineData
7adcbafe 1/* Copyright (C) 2019-2022 Free Software Foundation, Inc.
4f0e90fa
HL
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512BF16INTRIN_H_INCLUDED
29#define _AVX512BF16INTRIN_H_INCLUDED
30
31#ifndef __AVX512BF16__
32#pragma GCC push_options
33#pragma GCC target("avx512bf16")
34#define __DISABLE_AVX512BF16__
35#endif /* __AVX512BF16__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef short __v32bh __attribute__ ((__vector_size__ (64)));
39
40/* The Intel API is flexible enough that we must allow aliasing with other
41 vector types, and their scalar components. */
42typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
43
61e53698 44/* Convert One BF16 Data to One Single Float Data. */
45extern __inline float
46__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
47_mm_cvtsbh_ss (__bfloat16 __A)
48{
49 union{ float a; unsigned int b;} __tmp;
50 __tmp.b = ((unsigned int)(__A)) << 16;
51 return __tmp.a;
52}
53
4f0e90fa
HL
54/* vcvtne2ps2bf16 */
55
56extern __inline __m512bh
57__attribute__((__gnu_inline__, __always_inline__, __artificial__))
58_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
59{
60 return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
61}
62
63extern __inline __m512bh
64__attribute__((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
66{
67 return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
68}
69
70extern __inline __m512bh
71__attribute__((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
73{
74 return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
75}
76
77/* vcvtneps2bf16 */
78
79extern __inline __m256bh
80__attribute__((__gnu_inline__, __always_inline__, __artificial__))
81_mm512_cvtneps_pbh (__m512 __A)
82{
83 return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
84}
85
86extern __inline __m256bh
87__attribute__((__gnu_inline__, __always_inline__, __artificial__))
88_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
89{
90 return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
91}
92
93extern __inline __m256bh
94__attribute__((__gnu_inline__, __always_inline__, __artificial__))
95_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
96{
97 return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
98}
99
100/* vdpbf16ps */
101
102extern __inline __m512
103__attribute__((__gnu_inline__, __always_inline__, __artificial__))
104_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
105{
106 return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
107}
108
109extern __inline __m512
110__attribute__((__gnu_inline__, __always_inline__, __artificial__))
111_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
112{
113 return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
114}
115
116extern __inline __m512
117__attribute__((__gnu_inline__, __always_inline__, __artificial__))
118_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
119{
120 return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
121}
122
61e53698 123extern __inline __m512
124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125_mm512_cvtpbh_ps (__m256bh __A)
126{
127 return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
128 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
129}
130
131extern __inline __m512
132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
134{
135 return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
136 (__m512i)_mm512_maskz_cvtepi16_epi32 (
137 (__mmask16)__U, (__m256i)__A), 16));
138}
139
140extern __inline __m512
141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
142_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
143{
144 return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
145 (__m512i)__S, (__mmask16)__U,
146 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
147}
148
4f0e90fa
HL
149#ifdef __DISABLE_AVX512BF16__
150#undef __DISABLE_AVX512BF16__
151#pragma GCC pop_options
152#endif /* __DISABLE_AVX512BF16__ */
153
154#endif /* _AVX512BF16INTRIN_H_INCLUDED */