}
template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const UNUSED N) const
{
//if (N == 0) return *this;
//if (N == 16) return Zeroes();
}
template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const UNUSED N) const
{
//if (N == 0) return *this;
//if (N == 16) return Zeroes();
}
template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const UNUSED N) const
{
//if (N == 0) return *this;
//if (N == 16) return Zeroes();
}
template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const UNUSED N) const
{
//if (N == 0) return *this;
//if (N == 16) return Zeroes();
}
template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N) const
{
//if (N == 0) return *this;
//if (N == 16) return Zeroes();
return mask & v;
}
-template<>
-really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b)
-{
- return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
-}
-
template<>
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
{
return *this;
}
+template<>
+template<>
+really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
+{
+ return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
+}
+
+template<>
+template<>
+really_inline SuperVector<16> SuperVector<16>::pshufb<true>(SuperVector<16> b)
+{
+ /* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
+ In NEON, if >=16, then the result is zero, otherwise it is that lane.
+ btranslated is the version that is converted from Intel to NEON. */
+ SuperVector<16> btranslated = b & SuperVector<16>::dup_s8(0x8f);
+ return pshufb<false>(btranslated);
+}
+
template<>
really_inline SuperVector<16> SuperVector<16>::pshufb_maskz(SuperVector<16> b, uint8_t const len)
}
+
+/*
template<>
really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const N)
{
{
return *this >> N;
}
+*/
#endif