--- /dev/null
+/*
+ * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2020-2021, VectorCamp PC
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Naive dynamic shuffles.
+ *
+ * These are written with the assumption that the provided masks are sparsely
+ * populated and never contain more than 32 on bits. Other implementations will
+ * be faster and actually correct if these assumptions don't hold true.
+ */
+
+#ifndef LIMEX_SHUFFLE_HPP
+#define LIMEX_SHUFFLE_HPP
+
+#include "ue2common.h"
+#include "util/arch.h"
+#include "util/bitutils.h"
+#include "util/unaligned.h"
+#include "util/supervector/supervector.hpp"
+
+template <u16 S>
+u32 packedExtract(SuperVector<S> s, const SuperVector<S> permute, const SuperVector<S> compare);
+
+
+template <>
+really_really_inline
+u32 packedExtract<16>(SuperVector<16> s, const SuperVector<16> permute, const SuperVector<16> compare) {
+ SuperVector<16> shuffled = s.pshufb(permute);
+ SuperVector<16> compared = shuffled & compare;
+ u16 rv = ~compared.eqmask(shuffled);
+ return (u32)rv;
+}
+
+template <>
+really_really_inline
+u32 packedExtract<32>(SuperVector<32> s, const SuperVector<32> permute, const SuperVector<32> compare) {
+ SuperVector<32> shuffled = s.pshufb(permute);
+ SuperVector<32> compared = shuffled & compare;
+ u32 rv = ~compared.eqmask(shuffled);
+ return (u32)((rv >> 16) | (rv & 0xffffU));
+}
+
+template <>
+really_really_inline
+u32 packedExtract<64>(SuperVector<64> s, const SuperVector<64> permute, const SuperVector<64> compare) {
+ SuperVector<64> shuffled = s.pshufb(permute);
+ SuperVector<64> compared = shuffled & compare;
+ u64a rv = ~compared.eqmask(shuffled);
+ rv = rv >> 32 | rv;
+ return (u32)(((rv >> 16) | rv) & 0xffffU);
+}
+
+
+#endif // LIMEX_SHUFFLE_HPP
\ No newline at end of file
#include "util/arch.h"
#include "util/simd_utils.h"
#include "nfa/limex_shuffle.h"
+#include"util/supervector/supervector.hpp"
+#include "nfa/limex_shuffle.hpp"
+
namespace {
}
}
+TEST(Shuffle, PackedExtract_templatized_128_1) {
+ // Try all possible one-bit masks
+ for (unsigned int i = 0; i < 128; i++) {
+ // shuffle a single 1 bit to the front
+ SuperVector<16> permute = SuperVector<16>::Zeroes();
+ SuperVector<16> compare = SuperVector<16>::Zeroes();
+ build_pshufb_masks_onebit(i, &permute.u.v128[0], &compare.u.v128[0]);
+ EXPECT_EQ(1U, packedExtract<16>(setbit<m128>(i), permute, compare));
+ EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>::Ones(), permute, compare));
+ // we should get zero out of these cases
+ EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>::Zeroes(), permute, compare));
+ EXPECT_EQ(0U, packedExtract<16>(not128(setbit<m128>(i)), permute, compare));
+ // we should get zero out of all the other bit positions
+ for (unsigned int j = 0; (j != i && j < 128); j++) {
+ EXPECT_EQ(0U, packedExtract<16>(setbit<m128>(j), permute, compare));
+ }
+ }
+}
+
+
#if defined(HAVE_AVX2)
TEST(Shuffle, PackedExtract256_1) {
// Try all possible one-bit masks
}
}
}
+
+
+TEST(Shuffle, PackedExtract_templatized_256_1) {
+ // Try all possible one-bit masks
+ for (unsigned int i = 0; i < 256; i++) {
+ // shuffle a single 1 bit to the front
+ SuperVector<32> permute = SuperVector<32>::Zeroes();
+ SuperVector<32> compare = SuperVector<32>::Zeroes();
+ build_pshufb_masks_onebit(i, &permute.u.v256[0], &compare.u.v256[0]);
+ EXPECT_EQ(1U, packedExtract<32>(setbit<m256>(i), permute, compare));
+ EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>::Ones(), permute, compare));
+ // we should get zero out of these cases
+ EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>::Zeroes(), permute, compare));
+ EXPECT_EQ(0U, packedExtract<32>(not256(setbit<m256>(i)), permute, compare));
+ // we should get zero out of all the other bit positions
+ for (unsigned int j = 0; (j != i && j < 256); j++) {
+ EXPECT_EQ(0U, packedExtract<32>(setbit<m256>(j), permute, compare));
+ }
+ }
+}
+
#endif
#if defined(HAVE_AVX512)
}
}
}
+
+TEST(Shuffle, PackedExtract_templatized_512_1) {
+ // Try all possible one-bit masks
+ for (unsigned int i = 0; i < 512; i++) {
+ // shuffle a single 1 bit to the front
+ SuperVector<64> permute = SuperVector<64>::Zeroes();
+ SuperVector<64> compare = SuperVector<64>::Zeroes();
+ build_pshufb_masks_onebit(i, &permute.u.v512[0], &compare.u.v512[0]);
+ EXPECT_EQ(1U, packedExtract<64>(setbit<m512>(i), permute, compare));
+ EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>::Ones(), permute, compare));
+ // we should get zero out of these cases
+ EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>::Zeroes(), permute, compare));
+ EXPECT_EQ(0U, packedExtract<64>(not512(setbit<m512>(i)), permute, compare));
+ // we should get zero out of all the other bit positions
+ for (unsigned int j = 0; (j != i && j < 512); j++) {
+ EXPECT_EQ(0U, packedExtract<64>(setbit<m512>(j), permute, compare));
+ }
+ }
+}
+
#endif
} // namespace
}
}
+
+/*Define LSHIFT128_128 macro*/
+#define TEST_LSHIFT128_128(buf, vec, v, l) { \
+ auto v_shifted = SP.lshift128(l); \
+ for (int i=15; i>= l; --i) { \
+ buf[i] = vec[i-l]; \
+ } \
+ for (int i=0; i<l; i++) { \
+ buf[i] = 0; \
+ } \
+ for(int i=0; i<16; i++) { \
+ ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
+ } \
+ }
+
+TEST(SuperVectorUtilsTest,LShift128_128c){
+ u8 vec[16];
+ for (int i = 0; i<16; i++ ){ vec[i] = i+1; }
+ auto SP = SuperVector<16>::loadu(vec);
+ u8 buf[16];
+ for (int j = 0; j<16; j++) {
+ TEST_LSHIFT128_128(buf, vec, SP, j);
+ }
+}
+
+/*Define RSHIFT128_128 macro*/
+#define TEST_RSHIFT128_128(buf, vec, v, l) { \
+ auto v_shifted = SP.rshift128(l); \
+ for (int i=0; i<16-l; i++) { \
+ buf[i] = vec[i+l]; \
+ } \
+ for (int i=16-l; i<16; i++) { \
+ buf[i] = 0; \
+ } \
+ for(int i=0; i<16; i++) { \
+ ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
+ } \
+ }
+
+TEST(SuperVectorUtilsTest,RShift128_128c){
+ u8 vec[16];
+ for (int i = 0; i<16; i++ ){ vec[i] = i+1; }
+ auto SP = SuperVector<16>::loadu(vec);
+ u8 buf[16];
+ for (int j = 0; j<16; j++) {
+ TEST_RSHIFT128_128(buf, vec, SP, j);
+ }
+}
+
/*Define ALIGNR128 macro*/
#define TEST_ALIGNR128(v1, v2, buf, l) { \
auto v_aligned = v2.alignr(v1, l); \
}
}
-/*
+
TEST(SuperVectorUtilsTest,LShift64_256c){
u64a vec[4] = {128, 512, 256, 1024};
auto SP = SuperVector<32>::loadu(vec);
}
}
}
-*/
+
/*Define RSHIFT256 macro*/
#define TEST_RSHIFT256(buf, vec, v, l) { \
}
+
+
+
+/*Define LSHIFT128_256 macro*/
+#define TEST_LSHIFT128_256(buf, vec, v, l) { \
+ auto v_shifted = SP.lshift128(l); \
+ for (int i=16; i>= l; --i) { \
+ buf[i] = vec[i-l]; \
+ buf[i+16] = vec[(16+i)-l]; \
+ } \
+ for (int i=0; i<l; i++) { \
+ buf[i] = 0; \
+ buf[i+16]= 0; \
+ } \
+ for(int i=0; i<32; i++) { \
+ ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
+ } \
+ }
+
+TEST(SuperVectorUtilsTest,LShift128_256c){
+ u8 vec[32];
+ for (int i = 0; i<32; i++) { vec[i]= i+1;}
+ auto SP = SuperVector<32>::loadu(vec);
+ u8 buf[32];
+ for (int j=0; j<16; j++) {
+ TEST_LSHIFT128_256(buf, vec, SP, j);
+ }
+}
+
+/*Define RSHIFT128_128 macro*/
+#define TEST_RSHIFT128_256(buf, vec, v, l) { \
+ auto v_shifted = SP.rshift128(l); \
+ for (int i=0; i<16-l; i++) { \
+ buf[i] = vec[i+l]; \
+ buf[i+16] = vec[(i+16)+l]; \
+ } \
+ for (int i=16-l; i<16; i++) { \
+ buf[i] = 0; \
+ buf[i+16] = 0; \
+ } \
+ for(int i=0; i<32; i++) { \
+ ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
+ } \
+ }
+
+TEST(SuperVectorUtilsTest,RShift128_256c){
+ u8 vec[32];
+ for (int i = 0; i<32; i++ ){ vec[i] = i+1; }
+ auto SP = SuperVector<32>::loadu(vec);
+ u8 buf[32];
+ for(int j=0; j<16; j++) {
+ TEST_RSHIFT128_256(buf, vec, SP, j);
+ }
+}
+
+
/*Define ALIGNR256 macro*/
/*
#define TEST_ALIGNR256(v1, v2, buf, l) { \
}
}
-/*
+
TEST(SuperVectorUtilsTest,Movemask512c){
srand (time(NULL));
u8 vec[64] = {0};
u64a r = rand() % 100 + 1;
for(int i=0; i<64; i++) {
- if (r & (1 << i)) {
+ if (r & (1ULL << i)) {
vec[i] = 0xff;
}
}
u8 vec2[64] = {0};
u64a mask = SP.movemask();
for(int i=0; i<64; i++) {
- if (mask & (1 << i)) {
+ if (mask & (1ULL << i)) {
vec2[i] = 0xff;
}
}
for (int i=0; i<64; i++){
- printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]);
- //ASSERT_EQ(vec[i],vec2[i]);
+ //printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]);
+ ASSERT_EQ(vec[i],vec2[i]);
}
}
-*/
+
TEST(SuperVectorUtilsTest,Eqmask512c){
srand (time(NULL));
}
}
-/*
+
TEST(SuperVectorUtilsTest,LShift64_512c){
u64a vec[8] = {32, 64, 128, 256, 512, 512, 256, 1024};
auto SP = SuperVector<64>::loadu(vec);
}
}
}
-*/
+
/*Define RSHIFT512 macro*/
#define TEST_RSHIFT512(buf, vec, v, l) { \
}
}
+
+/*Define RSHIFT128_512 macro*/
+#define TEST_RSHIFT128_512(buf, vec, v, l) { \
+ auto v_shifted = SP.rshift128(l); \
+ for (int i=0; i<16-l; i++) { \
+ buf[i] = vec[i+l]; \
+ buf[i+16] = vec[(i+16)+l]; \
+ buf[i+32] = vec[(i+32)+l]; \
+ buf[i+48] = vec[(i+48)+l]; \
+ } \
+ for (int i=16-l; i<16; i++) { \
+ buf[i] = 0; \
+ buf[i+16] = 0; \
+ buf[i+32] = 0; \
+ buf[i+48] = 0; \
+ } \
+ for(int i=0; i<64; i++) { \
+ ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
+ } \
+ }
+TEST(SuperVectorUtilsTest,RShift128_512c){
+ u8 vec[64];
+ for (int i = 0; i<64; i++ ){ vec[i] = i+1; }
+ auto SP = SuperVector<64>::loadu(vec);
+ u8 buf[64] = {1};
+ for(int j=0; j<16; j++){
+ TEST_RSHIFT128_512(buf, vec, SP, j)
+ }
+}
+
+/*Define LSHIFT512 macro*/
+#define TEST_LSHIFT128_512(buf, vec, v, l) { \
+ auto v_shifted = SP.lshift128(l); \
+ for (int i=16; i>=l; --i) { \
+ buf[i] = vec[i-l]; \
+ buf[i+16] = vec[(i+16)-l]; \
+ buf[i+32] = vec[(i+32)-l]; \
+ buf[i+48] = vec[(i+48)-l]; \
+ } \
+ for (int i=0; i<l; i++) { \
+ buf[i] = 0; \
+ buf[i+16] = 0; \
+ buf[i+32] = 0; \
+ buf[i+48] = 0; \
+ } \
+ for(int i=0; i<64; i++) { \
+ ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
+ } \
+ }
+
+TEST(SuperVectorUtilsTest,LShift128_512c){
+ u8 vec[64];
+ for (int i = 0; i<64; i++) { vec[i]= i+1;}
+ auto SP = SuperVector<64>::loadu(vec);
+ u8 buf[64] = {1};
+ for(int j=0; j<16;j++){
+ TEST_LSHIFT128_512(buf, vec, SP, j);
+ }
+}
+
+
/*Define ALIGNR512 macro*/
/*
#define TEST_ALIGNR512(v1, v2, buf, l) { \