#include "teddy.h"
#include "teddy_internal.h"
#include "util/arch.h"
+#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/uniform_ops.h"
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
};
-/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
- * so we force its generation.
- */
-static really_inline
-u64a andn(const u32 a, const u8 *b) {
- u64a r;
-#if defined(HAVE_BMI) && !defined(NO_ASM)
- __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
-#else
- r = unaligned_load_u32(b) & ~a;
-#endif
- return r;
-}
-
/* generates an initial state mask based on the last byte-ish of history rather
* than being all accepting. If there is no history to consider, the state is
* generated based on the minimum length of each bucket in order to prevent
#define BITUTILS_ARCH_COMMON_H
#include "util/popcount.h"
+#include "util/unaligned.h"
static really_inline
u32 clz32_impl_c(u32 x) {
return result;
}
+/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
+ * so we force its generation.
+ */
+static really_inline
+u64a andn_impl_c(const u32 a, const u8 *b) {
+ return unaligned_load_u32(b) & ~a;
+}
+
#endif // BITUTILS_ARCH_COMMON_H
}
#endif
+/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
+ * so we force its generation.
+ */
+static really_inline
+u64a andn_impl(const u32 a, const u8 *b) {
+#if defined(HAVE_BMI) && !defined(NO_ASM)
+ u64a r;
+ __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
+ return r;
+#else
+ return andn_impl_c(a, b);
+#endif
+}
+
#endif // BITUTILS_ARCH_X86_H
return pext64_impl(x, mask);
}
+/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
+ * so we force its generation.
+ */
+static really_inline
+u64a andn(const u32 a, const u8 *b) {
+ return andn_impl_c(a, b);
+}
+
#endif // BITUTILS_H