static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb_m128(s, permute);
- //int8x16_t res = (int8x16_t) pshufb_m128(s, permute);
- //printf("shufled:");
- //for(int i=15; i>=0; i--) {printf("%02x ", res[i]);}
- //printf("\n");
+ print_m128_16x8("shufled", shuffled);
m128 compared = and128(shuffled, compare);
u16 rv = ~movemask128(eq128(compared, shuffled));
return (u32)rv;
static really_inline
m128 pshufb_m128(m128 a, m128 b) {
return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
- //return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)a, (uint8x16_t)b);;
- //uint8x16_t btransparent = vec_and((uint8x16_t)b, (uint8x16_t)vec_splats(0x8f));
- //return (m128) vec_perm(a, a, btransparent);
- //return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)b, (uint8x16_t)a);
-
- //return (m128) vec_perm((int8x16_t)a, (int8x16_t)b, (uint8x16_t)vec_splat_s8(0));
- //return (m128) vec_perm((int8x16_t)b, (int8x16_t)a, (uint8x16_t)vec_splat_s8(0));
-
}
static really_inline
template<>
really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
{
- return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
+ return (m128) vec_perm((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
}
template<>
}
u8 vec2[16];
for (int i=0; i<16; i++) {
- vec2[i]=i;
- }
+ vec2[i]=i + (rand() % 16 + 0);
+ }
m128 v1 = loadu128(vec);
m128 v2 = loadu128(vec2);
m128 vres = pshufb_m128(v1, v2);
u8 res[16];
store128(res, vres);
for (int i=0; i<16; i++) {
- ASSERT_EQ(vec[vec2[i]], res[i]);
+ ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
}
}
}
u8 vec2[16];
for (int i=0; i<16; i++) {
- vec2[i]=i;
+ vec2[i]=i + (rand() % 15 + 0);
}
auto SP1 = SuperVector<16>::loadu(vec);
auto SP2 = SuperVector<16>::loadu(vec2);
auto SResult = SP1.template pshufb<true>(SP2);
for (int i=0; i<16; i++) {
- ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]);
+ ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]);
}
}