--- /dev/null
+/*
+ * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2020-2021, VectorCamp PC
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+template <>
+really_really_inline
+const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) {
+ SuperVector<16>::movemask_type z = v.movemask();
+ DEBUG_PRINTF("buf %p z %08x \n", buf, z);
+ DEBUG_PRINTF("z %08x\n", z);
+ if (unlikely(z != 0xffff)) {
+ u32 pos = ctz32(~z & 0xffff);
+ DEBUG_PRINTF("~z %08x\n", ~z);
+ DEBUG_PRINTF("match @ pos %u\n", pos);
+ assert(pos < 16);
+ return buf + pos;
+ } else {
+ return NULL; // no match
+ }
+}
+
+template <>
+really_really_inline
+const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) {
+ SuperVector<16>::movemask_type z = v.movemask();
+ DEBUG_PRINTF("buf %p z %08x \n", buf, z);
+ DEBUG_PRINTF("z %08x\n", z);
+ if (unlikely(z != 0xffff)) {
+ u32 pos = clz32(~z & 0xffff);
+ DEBUG_PRINTF("~z %08x\n", ~z);
+ DEBUG_PRINTF("match @ pos %u\n", pos);
+ assert(pos >= 16 && pos < 32);
+ return buf + (31 - pos);
+ } else {
+ return NULL; // no match
+ }
+}
+
+
typedef __vector int8_t int8x16_t;
+#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0
+#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0
+#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8
+
+/** \brief LUT for the mask1bit functions. */
+ALIGN_CL_DIRECTIVE static const u8 simd_onebit_masks[] = {
+ ZEROES_32, ZEROES_32,
+ ZEROES_31, 0x01, ZEROES_32,
+ ZEROES_31, 0x02, ZEROES_32,
+ ZEROES_31, 0x04, ZEROES_32,
+ ZEROES_31, 0x08, ZEROES_32,
+ ZEROES_31, 0x10, ZEROES_32,
+ ZEROES_31, 0x20, ZEROES_32,
+ ZEROES_31, 0x40, ZEROES_32,
+ ZEROES_31, 0x80, ZEROES_32,
+ ZEROES_32, ZEROES_32,
+};
+
static really_inline m128 ones128(void) {
return (m128) vec_splat_s8(-1);
}
}
}
-#ifdef __cplusplus
-extern "C" {
-#endif
-extern const u8 simd_onebit_masks[];
-#ifdef __cplusplus
-}
-#endif
-
static really_inline
m128 mask1bit128(unsigned int n) {
assert(n < sizeof(m128) * 8);