template<>
really_inline SuperVector<16> SuperVector<16>::Ones()
{
- return {_mm_set1_epi8(0xFF)};
+ return SuperVector<16>(_mm_set1_epi8(0xFF));
}
template<>
really_inline SuperVector<16> SuperVector<16>::Zeroes(void)
{
- return {_mm_set1_epi8(0)};
+ return SuperVector<16>(_mm_set1_epi8(0));
}
// Methods
template <>
really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const
{
- return {_mm_and_si128(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_and_si128(u.v128[0], b.u.v128[0]));
}
template <>
really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const
{
- return {_mm_or_si128(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_or_si128(u.v128[0], b.u.v128[0]));
}
template <>
really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const
{
- return {_mm_xor_si128(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_xor_si128(u.v128[0], b.u.v128[0]));
}
template <>
really_inline SuperVector<16> SuperVector<16>::operator!() const
{
- return {_mm_xor_si128(u.v128[0], u.v128[0])};
+ return SuperVector<16>(_mm_xor_si128(u.v128[0], u.v128[0]));
}
template <>
really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const
{
- return {_mm_andnot_si128(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_andnot_si128(u.v128[0], b.u.v128[0]));
}
template <>
really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const
{
- return {_mm_cmpeq_epi8(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_cmpeq_epi8(u.v128[0], b.u.v128[0]));
}
template <>
template <>
really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const
{
- return {_mm_cmpgt_epi8(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_cmpgt_epi8(u.v128[0], b.u.v128[0]));
}
template <>
really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const
{
- return {_mm_cmplt_epi8(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_cmplt_epi8(u.v128[0], b.u.v128[0]));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const
{
- return {_mm_slli_epi16(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_epi16(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const
{
- return {_mm_slli_epi32(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_epi32(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const
{
- return {_mm_slli_epi64(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_epi64(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const
{
- return {_mm_slli_si128(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_si128(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const
{
- return {_mm_srli_epi16(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_epi16(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const
{
- return {_mm_srli_epi32(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_epi32(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
{
- return {_mm_srli_epi64(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_epi64(u.v128[0], N));
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const
{
- return {_mm_srli_si128(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_si128(u.v128[0], N));
}
template <>
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) {
- return {_mm_slli_epi16(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_epi16(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi16(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi16(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) {
- return {_mm_slli_epi32(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_epi32(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi32(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi32(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) {
- return {_mm_slli_epi64(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_epi64(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi64(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi64(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
- return {_mm_slli_si128(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_si128(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_si128(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_si128(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) {
- return {_mm_srli_epi16(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_epi16(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi16(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi16(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) {
- return {_mm_srli_epi32(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_epi32(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi32(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi32(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) {
- return {_mm_srli_epi64(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_epi64(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi64(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi64(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
- return {_mm_srli_si128(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_si128(u.v128[0], N));
}
#endif
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_si128(v->u.v128[0], n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_si128(v->u.v128[0], n))}; });
return result;
}
{
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
- return {_mm_srli_si128(u.v128[0], N)};
+ return SuperVector<16>(_mm_srli_si128(u.v128[0], N));
}
#endif
return vshr_128(N);
{
#if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND)
if (__builtin_constant_p(N)) {
- return {_mm_slli_si128(u.v128[0], N)};
+ return SuperVector<16>(_mm_slli_si128(u.v128[0], N));
}
#endif
return vshl_128(N);
template <>
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
{
- return _mm_loadu_si128((const m128 *)ptr);
+ return SuperVector<16>(_mm_loadu_si128((const m128 *)ptr));
}
template <>
{
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
ptr = vectorscan_assume_aligned(ptr, SuperVector::size);
- return _mm_load_si128((const m128 *)ptr);
+ return SuperVector<16>(_mm_load_si128((const m128 *)ptr));
}
template <>
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
{
SuperVector mask = Ones_vshr(16 -len);
- SuperVector v = _mm_loadu_si128((const m128 *)ptr);
+ SuperVector v = SuperVector<16>(_mm_loadu_si128((const m128 *)ptr));
return mask & v;
}
if (offset == 16) {
return *this;
} else {
- return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)};
+ return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset));
}
}
#endif
switch(offset) {
case 0: return other; break;
- case 1: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)}; break;
- case 2: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 2)}; break;
- case 3: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 3)}; break;
- case 4: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 4)}; break;
- case 5: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 5)}; break;
- case 6: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 6)}; break;
- case 7: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 7)}; break;
- case 8: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 8)}; break;
- case 9: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 9)}; break;
- case 10: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 10)}; break;
- case 11: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 11)}; break;
- case 12: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 12)}; break;
- case 13: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 13)}; break;
- case 14: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 14)}; break;
- case 15: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 15)}; break;
+ case 1: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)); break;
+ case 2: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 2)); break;
+ case 3: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 3)); break;
+ case 4: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 4)); break;
+ case 5: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 5)); break;
+ case 6: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 6)); break;
+ case 7: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 7)); break;
+ case 8: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 8)); break;
+ case 9: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 9)); break;
+ case 10: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 10)); break;
+ case 11: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 11)); break;
+ case 12: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 12)); break;
+ case 13: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 13)); break;
+ case 14: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 14)); break;
+ case 15: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 15)); break;
default: break;
}
return *this;
template<>
really_inline SuperVector<16> SuperVector<16>::pshufb<true>(SuperVector<16> b)
{
- return {_mm_shuffle_epi8(u.v128[0], b.u.v128[0])};
+ return SuperVector<16>(_mm_shuffle_epi8(u.v128[0], b.u.v128[0]));
}
template<>
template<>
really_inline SuperVector<32> SuperVector<32>::Ones(void)
{
- return {_mm256_set1_epi8(0xFF)};
+ return SuperVector<32>(_mm256_set1_epi8(0xFF));
}
template<>
really_inline SuperVector<32> SuperVector<32>::Zeroes(void)
{
- return {_mm256_set1_epi8(0)};
+ return SuperVector<32>(_mm256_set1_epi8(0));
}
template <>
template <>
really_inline SuperVector<32> SuperVector<32>::operator&(SuperVector<32> const &b) const
{
- return {_mm256_and_si256(u.v256[0], b.u.v256[0])};
+ return SuperVector<32>(_mm256_and_si256(u.v256[0], b.u.v256[0]));
}
template <>
really_inline SuperVector<32> SuperVector<32>::operator|(SuperVector<32> const &b) const
{
- return {_mm256_or_si256(u.v256[0], b.u.v256[0])};
+ return SuperVector<32>(_mm256_or_si256(u.v256[0], b.u.v256[0]));
}
template <>
really_inline SuperVector<32> SuperVector<32>::operator^(SuperVector<32> const &b) const
{
- return {_mm256_xor_si256(u.v256[0], b.u.v256[0])};
+ return SuperVector<32>(_mm256_xor_si256(u.v256[0], b.u.v256[0]));
}
template <>
really_inline SuperVector<32> SuperVector<32>::operator!() const
{
- return {_mm256_xor_si256(u.v256[0], u.v256[0])};
+ return SuperVector<32>(_mm256_xor_si256(u.v256[0], u.v256[0]));
}
template <>
really_inline SuperVector<32> SuperVector<32>::opandnot(SuperVector<32> const &b) const
{
- return {_mm256_andnot_si256(u.v256[0], b.u.v256[0])};
+ return SuperVector<32>(_mm256_andnot_si256(u.v256[0], b.u.v256[0]));
}
template <>
really_inline SuperVector<32> SuperVector<32>::operator==(SuperVector<32> const &b) const
{
- return {_mm256_cmpeq_epi8(u.v256[0], b.u.v256[0])};
+ return SuperVector<32>(_mm256_cmpeq_epi8(u.v256[0], b.u.v256[0]));
}
template <>
template <>
really_inline SuperVector<32> SuperVector<32>::operator>(SuperVector<32> const &b) const
{
- return {_mm256_cmpgt_epi8(u.v256[0], b.u.v256[0])};
+ return SuperVector<32>(_mm256_cmpgt_epi8(u.v256[0], b.u.v256[0]));
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshl_16_imm() const
{
- return {_mm256_slli_epi16(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_slli_epi16(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshl_32_imm() const
{
- return {_mm256_slli_epi32(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_slli_epi32(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshl_64_imm() const
{
- return {_mm256_slli_epi64(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_slli_epi64(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshl_128_imm() const
{
- return {_mm256_slli_si256(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_slli_si256(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshr_16_imm() const
{
- return {_mm256_srli_epi16(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_srli_epi16(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshr_32_imm() const
{
- return {_mm256_srli_epi32(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_srli_epi32(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshr_64_imm() const
{
- return {_mm256_srli_epi64(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_srli_epi64(u.v256[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<32> SuperVector<32>::vshr_128_imm() const
{
- return {_mm256_srli_si256(u.v256[0], N)};
+ return {SuperVector<32>(_mm256_srli_si256(u.v256[0], N))};
}
template <>
really_inline SuperVector<32> SuperVector<32>::vshr_256_imm() const
{
if (N == 0) return *this;
- if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))};
+ if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))};
if (N == 32) return Zeroes();
if (N < 16) {
- return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)};
+ return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))};
} else {
- return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)};
+ return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))};
}
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi16(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi16(v->u.v256[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi32(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi32(v->u.v256[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi64(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi64(v->u.v256[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_si256(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_si256(v->u.v256[0], n))}; });
return result;
}
really_inline SuperVector<32> SuperVector<32>::vshl_256(uint8_t const N) const
{
if (N == 0) return *this;
- if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))};
+ if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))};
if (N == 32) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value;
- if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};;
+ if (N == n) result = {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n))};;
});
Unroller<17, 32>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value;
- if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)};
+ if (N == n) result = {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16))};
});
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi16(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi16(v->u.v256[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi32(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi32(v->u.v256[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi64(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi64(v->u.v256[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 32) return Zeroes();
SuperVector result;
- Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_si256(v->u.v256[0], n)}; });
+ Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_si256(v->u.v256[0], n))}; });
return result;
}
really_inline SuperVector<32> SuperVector<32>::vshr_256(uint8_t const N) const
{
if (N == 0) return *this;
- if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))};
+ if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))};
if (N == 32) return Zeroes();
SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value;
- if (N == n) result = {_mm256_alignr_epi8(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), v->u.v256[0], n)};
+ if (N == n) result = {SuperVector<32>(_mm256_alignr_epi8(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), v->u.v256[0], n))};
});
Unroller<17, 32>::iterator([&,v=this](auto const i) {
constexpr uint8_t n = i.value;
- if (N == n) result = {_mm256_srli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), n - 16)};
+ if (N == n) result = {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), n - 16))};
});
return result;
}
if (__builtin_constant_p(N)) {
// As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx
if (N < 16) {
- return {_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N)};
+ return {SuperVector<32>(_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N))};
} else if (N == 16) {
- return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))};
+ return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))};
} else {
- return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)};
+ return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))};
}
}
#endif
if (__builtin_constant_p(N)) {
// As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx
if (N < 16) {
- return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)};
+ return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))};
} else if (N == 16) {
- return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))};
+ return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))};
} else {
- return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)};
+ return {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16))};
}
}
#endif
template <>
really_inline SuperVector<32> SuperVector<32>::loadu(void const *ptr)
{
- return {_mm256_loadu_si256((const m256 *)ptr)};
+ return {SuperVector<32>(_mm256_loadu_si256((const m256 *)ptr))};
}
template <>
{
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
ptr = vectorscan_assume_aligned(ptr, SuperVector::size);
- return {_mm256_load_si256((const m256 *)ptr)};
+ return {SuperVector<32>(_mm256_load_si256((const m256 *)ptr))};
}
template <>
{
#ifdef HAVE_AVX512
u32 mask = (~0ULL) >> (32 - len);
- SuperVector<32> v = _mm256_mask_loadu_epi8(Zeroes().u.v256[0], mask, (const m256 *)ptr);
+ SuperVector<32> v = SuperVector<32>(_mm256_mask_loadu_epi8(Zeroes().u.v256[0], mask, (const m256 *)ptr));
v.print8("v");
return v;
#else
#endif
// As found here: https://stackoverflow.com/questions/8517970/mm-alignr-epi8-palignr-equivalent-in-avx2#8637458
switch (offset){
- case 0 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0)); break;
- case 1 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 1), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 1)); break;
- case 2 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 2), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 2)); break;
- case 3 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 3), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 3)); break;
- case 4 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 4), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 4)); break;
- case 5 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 5), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 5)); break;
- case 6 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 6), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 6)); break;
- case 7 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 7), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 7)); break;
- case 8 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 8), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 8)); break;
- case 9 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 9), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 9)); break;
- case 10 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 10), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 10)); break;
- case 11 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 11), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 11)); break;
- case 12 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 12), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 12)); break;
- case 13 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 13), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 13)); break;
- case 14 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 14), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 14)); break;
- case 15 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 15), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 15)); break;
- case 16 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 0), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 0)); break;
- case 17 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 1), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 1)); break;
- case 18 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 2), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 2)); break;
- case 19 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 3), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 3)); break;
- case 20 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 4), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 4)); break;
- case 21 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 5), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 5)); break;
- case 22 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 6), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 6)); break;
- case 23 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 7), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 7)); break;
- case 24 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 8), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 8)); break;
- case 25 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 9), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 9)); break;
- case 26 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 10), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 10)); break;
- case 27 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 11), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 11)); break;
- case 28 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 12), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 12)); break;
- case 29 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 13), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 13)); break;
- case 30 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 14), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 14)); break;
- case 31 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 15), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 15)); break;
+ case 0 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0))); break;
+ case 1 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 1), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 1))); break;
+ case 2 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 2), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 2))); break;
+ case 3 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 3), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 3))); break;
+ case 4 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 4), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 4))); break;
+ case 5 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 5), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 5))); break;
+ case 6 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 6), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 6))); break;
+ case 7 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 7), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 7))); break;
+ case 8 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 8), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 8))); break;
+ case 9 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 9), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 9))); break;
+ case 10 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 10), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 10))); break;
+ case 11 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 11), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 11))); break;
+ case 12 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 12), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 12))); break;
+ case 13 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 13), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 13))); break;
+ case 14 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 14), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 14))); break;
+ case 15 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 15), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 15))); break;
+ case 16 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 0), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 0))); break;
+ case 17 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 1), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 1))); break;
+ case 18 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 2), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 2))); break;
+ case 19 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 3), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 3))); break;
+ case 20 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 4), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 4))); break;
+ case 21 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 5), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 5))); break;
+ case 22 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 6), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 6))); break;
+ case 23 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 7), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 7))); break;
+ case 24 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 8), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 8))); break;
+ case 25 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 9), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 9))); break;
+ case 26 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 10), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 10))); break;
+ case 27 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 11), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 11))); break;
+ case 28 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 12), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 12))); break;
+ case 29 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 13), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 13))); break;
+ case 30 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 14), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 14))); break;
+ case 31 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 15), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 15))); break;
default: break;
}
return *this;
template<>
really_inline SuperVector<32> SuperVector<32>::pshufb<true>(SuperVector<32> b)
{
- return {_mm256_shuffle_epi8(u.v256[0], b.u.v256[0])};
+ return {SuperVector<32>(_mm256_shuffle_epi8(u.v256[0], b.u.v256[0]))};
}
template<>
template<>
really_inline SuperVector<64> SuperVector<64>::Ones(void)
{
- return {_mm512_set1_epi8(0xFF)};
+ return {SuperVector<64>(_mm512_set1_epi8(0xFF))};
}
template<>
really_inline SuperVector<64> SuperVector<64>::Zeroes(void)
{
- return {_mm512_set1_epi8(0)};
+ return {SuperVector<64>(_mm512_set1_epi8(0))};
}
// Methods
template <>
really_inline SuperVector<64> SuperVector<64>::operator&(SuperVector<64> const &b) const
{
- return {_mm512_and_si512(u.v512[0], b.u.v512[0])};
+ return {SuperVector<64>(_mm512_and_si512(u.v512[0], b.u.v512[0]))};
}
template <>
really_inline SuperVector<64> SuperVector<64>::operator|(SuperVector<64> const &b) const
{
- return {_mm512_or_si512(u.v512[0], b.u.v512[0])};
+ return {SuperVector<64>(_mm512_or_si512(u.v512[0], b.u.v512[0]))};
}
template <>
really_inline SuperVector<64> SuperVector<64>::operator^(SuperVector<64> const &b) const
{
- return {_mm512_xor_si512(u.v512[0], b.u.v512[0])};
+ return {SuperVector<64>(_mm512_xor_si512(u.v512[0], b.u.v512[0]))};
}
template <>
really_inline SuperVector<64> SuperVector<64>::operator!() const
{
- return {_mm512_xor_si512(u.v512[0], u.v512[0])};
+ return {SuperVector<64>(_mm512_xor_si512(u.v512[0], u.v512[0]))};
}
template <>
really_inline SuperVector<64> SuperVector<64>::opandnot(SuperVector<64> const &b) const
{
- return {_mm512_andnot_si512(u.v512[0], b.u.v512[0])};
+ return {SuperVector<64>(_mm512_andnot_si512(u.v512[0], b.u.v512[0]))};
}
template <>
{
SuperVector<64>::comparemask_type mask =
_mm512_cmpeq_epi8_mask(u.v512[0], b.u.v512[0]);
- return {_mm512_movm_epi8(mask)};
+ return {SuperVector<64>(_mm512_movm_epi8(mask))};
}
template <>
{
SuperVector<64>::comparemask_type mask =
_mm512_cmpneq_epi8_mask(u.v512[0], b.u.v512[0]);
- return {_mm512_movm_epi8(mask)};
+ return {SuperVector<64>(_mm512_movm_epi8(mask))};
}
template <>
{
SuperVector<64>::comparemask_type mask =
_mm512_cmpgt_epi8_mask(u.v512[0], b.u.v512[0]);
- return {_mm512_movm_epi8(mask)};
+ return {SuperVector<64>(_mm512_movm_epi8(mask))};
}
template <>
{
SuperVector<64>::comparemask_type mask =
_mm512_cmplt_epi8_mask(u.v512[0], b.u.v512[0]);
- return {_mm512_movm_epi8(mask)};
+ return {SuperVector<64>(_mm512_movm_epi8(mask))};
}
template <>
{
SuperVector<64>::comparemask_type mask =
_mm512_cmpge_epi8_mask(u.v512[0], b.u.v512[0]);
- return {_mm512_movm_epi8(mask)};
+ return {SuperVector<64>(_mm512_movm_epi8(mask))};
}
template <>
{
SuperVector<64>::comparemask_type mask =
_mm512_cmple_epi8_mask(u.v512[0], b.u.v512[0]);
- return {_mm512_movm_epi8(mask)};
+ return {SuperVector<64>(_mm512_movm_epi8(mask))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshl_16_imm() const
{
- return {_mm512_slli_epi16(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_slli_epi16(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshl_32_imm() const
{
- return {_mm512_slli_epi32(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_slli_epi32(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshl_64_imm() const
{
- return {_mm512_slli_epi64(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_slli_epi64(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshl_128_imm() const
{
- return {_mm512_bslli_epi128(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_bslli_epi128(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshr_16_imm() const
{
- return {_mm512_srli_epi16(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_srli_epi16(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshr_32_imm() const
{
- return {_mm512_srli_epi32(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_srli_epi32(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshr_64_imm() const
{
- return {_mm512_srli_epi64(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_srli_epi64(u.v512[0], N))};
}
template <>
template<uint8_t N>
really_inline SuperVector<64> SuperVector<64>::vshr_128_imm() const
{
- return {_mm512_bsrli_epi128(u.v512[0], N)};
+ return {SuperVector<64>(_mm512_bsrli_epi128(u.v512[0], N))};
}
template <>
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi16(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi16(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi32(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi32(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi64(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi64(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_bslli_epi128(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_bslli_epi128(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi16(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi16(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi32(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi32(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi64(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi64(v->u.v512[0], n))}; });
return result;
}
if (N == 0) return *this;
if (N == 64) return Zeroes();
SuperVector result;
- Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_bsrli_epi128(v->u.v512[0], n)}; });
+ Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_bsrli_epi128(v->u.v512[0], n))}; });
return result;
}
if (N == 0) {
return *this;
} else if (N < 32) {
- SuperVector<32> lo256 = u.v256[0];
- SuperVector<32> hi256 = u.v256[1];
+ SuperVector<32> lo256 = SuperVector<32>(u.v256[0]);
+ SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
SuperVector<32> carry = hi256 << (32 - N);
hi256 = hi256 >> N;
lo256 = (lo256 >> N) | carry;
- return SuperVector(lo256, hi256);
+ return SuperVector<64>(lo256, hi256);
} else if (N == 32) {
- SuperVector<32> hi256 = u.v256[1];
- return SuperVector(hi256, SuperVector<32>::Zeroes());
+ SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
+ return SuperVector<64>(hi256, SuperVector<32>::Zeroes());
} else if (N < 64) {
- SuperVector<32> hi256 = u.v256[1];
- return SuperVector(hi256 >> (N - 32), SuperVector<32>::Zeroes());
+ SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
+ return SuperVector<64>(hi256 >> (N - 32), SuperVector<32>::Zeroes());
} else {
return Zeroes();
}
if (N == 0) {
return *this;
} else if (N < 32) {
- SuperVector<32> lo256 = u.v256[0];
- SuperVector<32> hi256 = u.v256[1];
+ SuperVector<32> lo256 = SuperVector<32>(u.v256[0]);
+ SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
SuperVector<32> carry = lo256 >> (32 - N);
hi256 = (hi256 << N) | carry;
lo256 = lo256 << N;
- return SuperVector(lo256, hi256);
+ return SuperVector<64>(lo256, hi256);
} else if (N == 32) {
- SuperVector<32> lo256 = u.v256[0];
- return SuperVector(SuperVector<32>::Zeroes(), lo256);
+ SuperVector<32> lo256 = SuperVector<32>(u.v256[0]);
+ return SuperVector<64>(SuperVector<32>::Zeroes(), lo256);
} else if (N < 64) {
- SuperVector<32> lo256 = u.v256[0];
- return SuperVector(SuperVector<32>::Zeroes(), lo256 << (N - 32));
+ SuperVector<32> lo256 = SuperVector<32>(u.v256[0]);
+ return SuperVector<64>(SuperVector<32>::Zeroes(), lo256 << (N - 32));
} else {
return Zeroes();
}
template <>
really_inline SuperVector<64> SuperVector<64>::loadu(void const *ptr)
{
- return {_mm512_loadu_si512((const m512 *)ptr)};
+ return {SuperVector<64>(_mm512_loadu_si512((const m512 *)ptr))};
}
template <>
{
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
ptr = vectorscan_assume_aligned(ptr, SuperVector::size);
- return {_mm512_load_si512((const m512 *)ptr)};
+ return {SuperVector<64>(_mm512_load_si512((const m512 *)ptr))};
}
template <>
{
u64a mask = (~0ULL) >> (64 - len);
DEBUG_PRINTF("mask = %016llx\n", mask);
- SuperVector<64> v = _mm512_mask_loadu_epi8(Zeroes().u.v512[0], mask, (const m512 *)ptr);
+ SuperVector<64> v = SuperVector<64>(_mm512_mask_loadu_epi8(Zeroes().u.v512[0], mask, (const m512 *)ptr));
v.print8("v");
return v;
}
template<>
really_inline SuperVector<64> SuperVector<64>::pshufb<true>(SuperVector<64> b)
{
- return {_mm512_shuffle_epi8(u.v512[0], b.u.v512[0])};
+ return {SuperVector<64>(_mm512_shuffle_epi8(u.v512[0], b.u.v512[0]))};
}
template<>
{
u64a mask = (~0ULL) >> (64 - len);
DEBUG_PRINTF("mask = %016llx\n", mask);
- return {_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0])};
+ return {SuperVector<64>(_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0]))};
}
template<>
if (offset == 16) {
return *this;
} else {
- return {_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset)};
+ return {SuperVector<64>(_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset))};
}
}
#endif
if(offset == 0) {
return *this;
} else if (offset < 32){
- SuperVector<32> lo256 = u.v256[0];
- SuperVector<32> hi256 = u.v256[1];
- SuperVector<32> o_lo256 = l.u.v256[0];
- SuperVector<32> carry1 = hi256.alignr(lo256,offset);
- SuperVector<32> carry2 = o_lo256.alignr(hi256,offset);
- return SuperVector(carry1, carry2);
+ SuperVector<32> lo256 = SuperVector<32>(u.v256[0]);
+ SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
+ SuperVector<32> o_lo256 = SuperVector<32>(l.u.v256[0]);
+ SuperVector<32> carry1 = SuperVector<32>(hi256.alignr(lo256,offset));
+ SuperVector<32> carry2 = SuperVector<32>(o_lo256.alignr(hi256,offset));
+ return SuperVector<64>(carry1, carry2);
} else if (offset <= 64){
- SuperVector<32> hi256 = u.v256[1];
- SuperVector<32> o_lo256 = l.u.v256[0];
- SuperVector<32> o_hi256 = l.u.v256[1];
- SuperVector<32> carry1 = o_lo256.alignr(hi256, offset - 32);
- SuperVector<32> carry2 = o_hi256.alignr(o_lo256,offset -32);
- return SuperVector(carry1, carry2);
+ SuperVector<32> hi256 = SuperVector<32>(u.v256[1]);
+ SuperVector<32> o_lo256 = SuperVector<32>(l.u.v256[0]);
+ SuperVector<32> o_hi256 = SuperVector<32>(l.u.v256[1]);
+ SuperVector<32> carry1 = SuperVector<32>(o_lo256.alignr(hi256, offset - 32));
+ SuperVector<32> carry2 = SuperVector<32>(o_hi256.alignr(o_lo256,offset -32));
+ return SuperVector<64>(carry1, carry2);
} else {
return *this;
}