]>
Commit | Line | Data |
---|---|---|
2bcceb6f MK |
1 | // Simd PowerPC specific implementations -*- C++ -*- |
2 | ||
7adcbafe | 3 | // Copyright (C) 2020-2022 Free Software Foundation, Inc. |
2bcceb6f MK |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_ | |
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_ | |
27 | ||
28 | #if __cplusplus >= 201703L | |
29 | ||
30 | #ifndef __ALTIVEC__ | |
31 | #error "simd_ppc.h may only be included when AltiVec/VMX is available" | |
32 | #endif | |
81c2c32d | 33 | #include <altivec.h> |
2bcceb6f MK |
34 | |
35 | _GLIBCXX_SIMD_BEGIN_NAMESPACE | |
36 | ||
37 | // _SimdImplPpc {{{ | |
38 | template <typename _Abi> | |
39 | struct _SimdImplPpc : _SimdImplBuiltin<_Abi> | |
40 | { | |
41 | using _Base = _SimdImplBuiltin<_Abi>; | |
42 | ||
43 | // Byte and halfword shift instructions on PPC only consider the low 3 or 4 | |
44 | // bits of the RHS. Consequently, shifting by sizeof(_Tp)*CHAR_BIT (or more) | |
45 | // is UB without extra measures. To match scalar behavior, byte and halfword | |
46 | // shifts need an extra fixup step. | |
47 | ||
48 | // _S_bit_shift_left {{{ | |
49 | template <typename _Tp, size_t _Np> | |
50 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> | |
51 | _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) | |
52 | { | |
53 | __x = _Base::_S_bit_shift_left(__x, __y); | |
54 | if constexpr (sizeof(_Tp) < sizeof(int)) | |
55 | __x._M_data | |
56 | = (__y._M_data < sizeof(_Tp) * __CHAR_BIT__) & __x._M_data; | |
57 | return __x; | |
58 | } | |
59 | ||
60 | template <typename _Tp, size_t _Np> | |
61 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> | |
62 | _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y) | |
63 | { | |
64 | __x = _Base::_S_bit_shift_left(__x, __y); | |
65 | if constexpr (sizeof(_Tp) < sizeof(int)) | |
66 | { | |
67 | if (__y >= sizeof(_Tp) * __CHAR_BIT__) | |
68 | return {}; | |
69 | } | |
70 | return __x; | |
71 | } | |
72 | ||
73 | // }}} | |
74 | // _S_bit_shift_right {{{ | |
75 | template <typename _Tp, size_t _Np> | |
76 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> | |
77 | _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) | |
78 | { | |
79 | if constexpr (sizeof(_Tp) < sizeof(int)) | |
80 | { | |
81 | constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__; | |
82 | if constexpr (is_unsigned_v<_Tp>) | |
83 | return (__y._M_data < __nbits) | |
84 | & _Base::_S_bit_shift_right(__x, __y)._M_data; | |
85 | else | |
86 | { | |
87 | _Base::_S_masked_assign(_SimdWrapper<_Tp, _Np>(__y._M_data | |
88 | >= __nbits), | |
89 | __y, __nbits - 1); | |
90 | return _Base::_S_bit_shift_right(__x, __y); | |
91 | } | |
92 | } | |
93 | else | |
94 | return _Base::_S_bit_shift_right(__x, __y); | |
95 | } | |
96 | ||
97 | template <typename _Tp, size_t _Np> | |
98 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> | |
99 | _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y) | |
100 | { | |
101 | if constexpr (sizeof(_Tp) < sizeof(int)) | |
102 | { | |
103 | constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__; | |
104 | if (__y >= __nbits) | |
105 | { | |
106 | if constexpr (is_unsigned_v<_Tp>) | |
107 | return {}; | |
108 | else | |
109 | return _Base::_S_bit_shift_right(__x, __nbits - 1); | |
110 | } | |
111 | } | |
112 | return _Base::_S_bit_shift_right(__x, __y); | |
113 | } | |
114 | ||
115 | // }}} | |
116 | }; | |
117 | ||
81c2c32d MK |
118 | // }}} |
119 | // _MaskImplPpc {{{ | |
120 | template <typename _Abi> | |
121 | struct _MaskImplPpc : _MaskImplBuiltin<_Abi> | |
122 | { | |
123 | using _Base = _MaskImplBuiltin<_Abi>; | |
124 | ||
125 | // _S_popcount {{{ | |
126 | template <typename _Tp> | |
127 | _GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k) | |
128 | { | |
129 | const auto __kv = __as_vector(__k); | |
130 | if constexpr (__have_power10vec) | |
131 | { | |
132 | return vec_cntm(__to_intrin(__kv), 1); | |
133 | } | |
134 | else if constexpr (sizeof(_Tp) >= sizeof(int)) | |
135 | { | |
136 | using _Intrin = __intrinsic_type16_t<int>; | |
137 | const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3]; | |
138 | return __sum / (sizeof(_Tp) / sizeof(int)); | |
139 | } | |
140 | else | |
141 | { | |
142 | const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>()); | |
143 | return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3]; | |
144 | } | |
145 | } | |
146 | ||
147 | // }}} | |
148 | }; | |
149 | ||
2bcceb6f MK |
150 | // }}} |
151 | ||
152 | _GLIBCXX_SIMD_END_NAMESPACE | |
153 | #endif // __cplusplus >= 201703L | |
154 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_ | |
155 | ||
81c2c32d | 156 | // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100 |