#include <string.h> // for memcpy
+typedef __vector unsigned long long int uint64x2_t;
+typedef __vector signed long long int int64x2_t;
+typedef __vector unsigned int uint32x4_t;
+typedef __vector signed int int32x4_t;
+typedef __vector unsigned short int uint16x8_t;
+typedef __vector signed short int int16x8_t;
+typedef __vector unsigned char uint8x16_t;
+typedef __vector signed char int8x16_t;
+
+typedef unsigned long long int ulong64_t;
+typedef signed long long int long64_t;
+/*
typedef __vector uint64_t uint64x2_t;
typedef __vector int64_t int64x2_t;
typedef __vector uint32_t uint32x4_t;
typedef __vector uint16_t uint16x8_t;
typedef __vector int16_t int16x8_t;
typedef __vector uint8_t uint8x16_t;
-typedef __vector int8_t int8x16_t;
+typedef __vector int8_t int8x16_t;*/
#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) {
- uint64x2_t shift_indices = vec_splats((uint64_t)b);
+ uint64x2_t shift_indices = vec_splats((ulong64_t)b);
return (m128) vec_sl((int64x2_t)a, shift_indices);
}
static really_really_inline
m128 rshift64_m128(m128 a, unsigned b) {
- uint64x2_t shift_indices = vec_splats((uint64_t)b);
+ uint64x2_t shift_indices = vec_splats((ulong64_t)b);
return (m128) vec_sr((int64x2_t)a, shift_indices);
}
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
- uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff));
+ uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
uint64x2_t ss4 = vec_sld((uint64x2_t)vec_splats(0), s4, 9);
- uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
+ uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
return s5[0];
#include "util/supervector/supervector.hpp"
#include <iostream>
-
-typedef __vector uint64_t uint64x2_t;
-typedef __vector int64_t int64x2_t;
-typedef __vector uint32_t uint32x4_t;
-typedef __vector int32_t int32x4_t;
-typedef __vector uint16_t uint16x8_t;
-typedef __vector int16_t int16x8_t;
-typedef __vector uint8_t uint8x16_t;
-typedef __vector int8_t int8x16_t;
-
// 128-bit Powerpc64le implementation
template<>
template<>
template<>
-really_inline SuperVector<16>::SuperVector<int8_t>(int8_t const other)
+really_inline SuperVector<16>::SuperVector(int8_t const other)
{
u.v128[0] = (m128) vec_splats(other);
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<uint8_t>(uint8_t const other)
+really_inline SuperVector<16>::SuperVector(uint8_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint8_t>(other));
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<int16_t>(int16_t const other)
+really_inline SuperVector<16>::SuperVector(int16_t const other)
{
u.v128[0] = (m128) vec_splats(other);
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<uint16_t>(uint16_t const other)
+really_inline SuperVector<16>::SuperVector(uint16_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint16_t>(other));
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<int32_t>(int32_t const other)
+really_inline SuperVector<16>::SuperVector(int32_t const other)
{
u.v128[0] = (m128) vec_splats(other);
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<uint32_t>(uint32_t const other)
+really_inline SuperVector<16>::SuperVector(uint32_t const other)
{
u.v128[0] = (m128) vec_splats(static_cast<uint32_t>(other));
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<int64_t>(int64_t const other)
+really_inline SuperVector<16>::SuperVector(int64_t const other)
{
- u.v128[0] = (m128) vec_splats(other);
+ u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
}
template<>
template<>
-really_inline SuperVector<16>::SuperVector<uint64_t>(uint64_t const other)
+really_inline SuperVector<16>::SuperVector(uint64_t const other)
{
- u.v128[0] = (m128) vec_splats(static_cast<uint64_t>(other));
+ u.v128[0] = (m128) vec_splats(static_cast<ulong64_t>(other));
}
// Constants
uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2);
uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28));
- uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff));
+ uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff));
uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3);
uint64x2_t ss4 = vec_sld((uint64x2_t) vec_splats(0), s4, 9);
- uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff));
+ uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff));
uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4);
return s5[0];
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const
{
- return { (m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)N)) };
+ return { (m128) vec_sl(u.s64x2[0], vec_splats((ulong64_t)N)) };
}
template <>
template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
{
- return { (m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)N)) };
+ return { (m128) vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) };
}
template <>
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s8x16[0], vec_splats((uint8_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s16x8[0], vec_splats((uint16_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s32x4[0], vec_splats((uint32_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(v->u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s8x16[0], vec_splats((uint8_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s8x16[0], vec_splats((uint8_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s16x8[0], vec_splats((uint16_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s16x8[0], vec_splats((uint16_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s32x4[0], vec_splats((uint32_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s32x4[0], vec_splats((uint32_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)n))}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s64x2[0], vec_splats((ulong64_t)n))}; });
return result;
}
if (N == 0) return *this;
if (N == 16) return Zeroes();
SuperVector result;
- Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), u.s8x16[0], 16 - n)}; });
+ Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), v->u.s8x16[0], 16 - n)}; });
return result;
}
template <>
really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)
{
- return (m128) vec_xl(0, (const int64_t*)ptr);
+ return (m128) vec_xl(0, (const long64_t*)ptr);
}
template <>
really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
{
assert(ISALIGNED_N(ptr, alignof(SuperVector::size)));
- return (m128) vec_xl(0, (const int64_t*)ptr);
+ return (m128) vec_xl(0, (const long64_t*)ptr);
}
template <>