using _Ip = typename _VecType::value_type;
_VecType __v0 = _Ip(__val);
constexpr int __bits_per_element = sizeof(_Ip) * __CHAR_BIT__;
- constexpr _VecType __pow2 = _VecType(1) << (__iota<_VecType> % __bits_per_element);
+ constexpr _VecType __pow2 = _VecType(cw<1>)
+ << (__iota<_VecType> % cw<__bits_per_element>);
if constexpr (_S_size < __bits_per_element)
- return ((__v0 & __pow2) > 0)._M_concat_data();
+ return ((__v0 & __pow2) > cw<0>)._M_concat_data();
else if constexpr (_S_size == __bits_per_element)
- return ((__v0 & __pow2) != 0)._M_concat_data();
+ return ((__v0 & __pow2) != cw<0>)._M_concat_data();
else
{
static_assert(_Bytes == 1);
};
__v1 *= 0x0101'0101'0101'0101ull;
__v0 = __builtin_bit_cast(_VecType, __v1);
- return ((__v0 & __pow2) != 0)._M_data;
+ return ((__v0 & __pow2) != cw<0>)._M_data;
}
else
{
__v0 = _VecType::_S_static_permute(__v1, [](int __i) {
return __i / __CHAR_BIT__;
});
- return ((__v0 & __pow2) != 0)._M_data;
+ return ((__v0 & __pow2) != cw<0>)._M_data;
}
}
}
else
{
using _UV = basic_vec<_Up, _UAbi>;
- return __select_impl(static_cast<_UV::mask_type>(*this), _UV(1), _UV(0));
+ return __select_impl(static_cast<_UV::mask_type>(*this), _Up(1), _UV());
}
}
constexpr int __n = _IV::size();
if constexpr (_Bytes * __CHAR_BIT__ >= __n) // '1 << __iota' cannot overflow
{ // reduce(select(k, powers_of_2, 0))
- constexpr _IV __pow2 = _IV(1) << __iota<_IV>;
+ constexpr _IV __pow2 = _IV(cw<1>) << __iota<_IV>;
return _Ur(_U0(__select_impl(__k, __pow2, _IV())
._M_reduce(bit_or<>()))) << _Offset;
}
}
else
{ // limit powers_of_2 to 1, 2, 4, ..., 128
- constexpr _IV __pow2 = _IV(1) << (__iota<_IV> % _IV(__CHAR_BIT__));
+ constexpr _IV __pow2 = _IV(cw<1>) << (__iota<_IV> % _IV(cw<__CHAR_BIT__>));
_IV __x = __select_impl(__k, __pow2, _IV());
// partial reductions of 8 neighboring elements
__x |= _IV::_S_static_permute(__x, _SwapNeighbors<4>());
*
* @note The constructor is implicit if the conversion (if any) is value-preserving.
*/
- template <__explicitly_convertible_to<value_type> _Up>
+ template <__broadcast_constructible<value_type> _Up>
[[__gnu__::__always_inline__]]
- constexpr explicit(!__broadcast_constructible<_Up, value_type>)
+ constexpr
basic_vec(_Up&& __x) noexcept
: _M_data(_DataType() == _DataType() ? static_cast<value_type>(__x) : value_type())
{}
- template <__simd_vec_bcast_consteval<value_type> _Up>
- consteval
- basic_vec(_Up&& __x)
- : _M_data(_DataType() == _DataType()
- ? __value_preserving_cast<value_type>(__x) : value_type())
- {}
-
// [simd.ctor] conversion constructor -----------------------------------
template <typename _Up, typename _UAbi, _TargetTraits _Traits = {}>
requires (_S_size == _UAbi::_S_size)
{ return _M_concat_data(); }
// [simd.ctor] broadcast constructor ------------------------------------
- template <__explicitly_convertible_to<value_type> _Up>
+ template <__broadcast_constructible<value_type> _Up>
[[__gnu__::__always_inline__]]
- constexpr explicit(!__broadcast_constructible<_Up, value_type>)
+ constexpr
basic_vec(_Up&& __x) noexcept
: _M_data0(static_cast<value_type>(__x)), _M_data1(static_cast<value_type>(__x))
{}
- template <__simd_vec_bcast_consteval<value_type> _Up>
- consteval
- basic_vec(_Up&& __x)
- : _M_data0(__value_preserving_cast<value_type>(__x)),
- _M_data1(__value_preserving_cast<value_type>(__x))
- {}
-
// [simd.ctor] conversion constructor -----------------------------------
template <typename _Up, typename _UAbi>
requires (_S_size == _UAbi::_S_size)
};
ADD_TEST(multiplication) {
- std::tuple {V(), V(RealV(1), RealV()), V(RealV(), RealV(1)), init_vec<V, C(0, 2), C(2, 0), C(-1, 2)>},
+ std::tuple {V(), V(RealV(Real(1)), RealV()), V(RealV(), RealV(Real(1))),
+ init_vec<V, C(0, 2), C(2, 0), C(-1, 2)>},
[](auto& t, V x, V one, V I, V z) {
t.verify_equal(x * x, x);
t.verify_equal(x * z, x);
t.verify_equal(y, x - T(1));
t.verify_equal(x - x, y);
t.verify_equal(x = z - x, init_vec<V, 0, 1, 2, 3, 4, 5, 6>);
- t.verify_equal(x = z - x, V(1));
+ t.verify_equal(x = z - x, T(1));
t.verify_equal(z -= x, init_vec<V, 0, 1, 2, 3, 4, 5, 6>);
t.verify_equal(z, init_vec<V, 0, 1, 2, 3, 4, 5, 6>);
- t.verify_equal(z -= z, V(0));
- t.verify_equal(z, V(0));
+ t.verify_equal(z -= z, V());
+ t.verify_equal(z, V());
}
};
ADD_TEST(divide0, std::is_floating_point_v<T> && !is_iec559) {
std::tuple{T(2), init_vec<V, 1, 2, 3, 4, 5, 6, 7>},
[](auto& t, V x, V y) {
- t.verify_equal_to_ulp(x / x, V(T(1)), 1);
- t.verify_equal_to_ulp(T(3) / x, V(T(3) / T(2)), 1);
- t.verify_equal_to_ulp(x / T(3), V(T(2) / T(3)), 1);
- t.verify_equal_to_ulp(y / x, init_vec<V, .5, 1, 1.5, 2, 2.5, 3, 3.5>, 1);
+ t.verify_equal_to_ulp(x / x, V(T(1)), std::cw<1>);
+ t.verify_equal_to_ulp(T(3) / x, V(T(3) / T(2)), std::cw<1>);
+ t.verify_equal_to_ulp(x / T(3), V(T(2) / T(3)), std::cw<1>);
+ t.verify_equal_to_ulp(y / x, init_vec<V, .5, 1, 1.5, 2, 2.5, 3, 3.5>, std::cw<1>);
}
};
[](auto& t, V a) {
V b = std::cw<2>;
V ref([&](int i) { return a[i] / 2; });
- t.verify_equal_to_ulp(a / b, ref, 1);
+ t.verify_equal_to_ulp(a / b, ref, std::cw<1>);
a = select(a == std::cw<0>, T(1), a);
// -freciprocal-math together with flush-to-zero makes
// the following range restriction necessary (i.e.
// 1/|a| must be >= min). Intel vrcpps and vrcp14ps
// need some extra slack (use 1.1 instead of 1).
a = select(fabs(a) >= T(1.1) / norm_min, T(1), a);
- t.verify_equal_to_ulp(a / a, V(1), 1)("\na = ", a);
+ t.verify_equal_to_ulp(a / a, V(std::cw<1>), std::cw<1>)("\na = ", a);
ref = V([&](int i) { return 2 / a[i]; });
- t.verify_equal_to_ulp(b / a, ref, 1)("\na = ", a);
- t.verify_equal_to_ulp(b /= a, ref, 1);
- t.verify_equal_to_ulp(b, ref, 1);
+ t.verify_equal_to_ulp(b / a, ref, std::cw<1>)("\na = ", a);
+ t.verify_equal_to_ulp(b /= a, ref, std::cw<1>);
+ t.verify_equal_to_ulp(b, ref, std::cw<1>);
}
};
std::tuple{T(2), init_vec<V, 1, 2, 3, 4, 5, 6, 7>, init_vec<V, T(max), T(norm_min)>,
init_vec<V, T(norm_min), T(max)>, init_vec<V, T(max), T(norm_min) + 1>},
[](auto& t, V x, V y, V z, V a, V b) {
- t.verify_equal(x / x, V(1));
- t.verify_equal(T(3) / x, V(T(3) / T(2)));
- t.verify_equal(x / T(3), V(T(2) / T(3)));
+ t.verify_equal(x / x, T(1));
+ t.verify_equal(T(3) / x, T(T(3) / T(2)));
+ t.verify_equal(x / T(3), T(T(2) / T(3)));
t.verify_equal(y / x, init_vec<V, .5, 1, 1.5, 2, 2.5, 3, 3.5>);
V ref = init_vec<V, T(max / 2), T(norm_min / 2)>;
t.verify_equal(z / x, ref);
ref = init_vec<V, T(norm_min / 2), T(max / 2)>;
t.verify_equal(a / x, ref);
- t.verify_equal(b / b, V(1));
+ t.verify_equal(b / b, T(1));
ref = init_vec<V, T(2 / max), T(2 / (norm_min + 1))>;
t.verify_equal(x / b, ref);
t.verify_equal(x /= b, ref);