* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef ARM_NEON_ADLER32
-#ifdef _M_ARM64
-# include <arm64_neon.h>
-#else
-# include <arm_neon.h>
-#endif
+#include "neon_intrins.h"
#include "../../zbuild.h"
#include "../../adler32_p.h"
-#include "../../fallback_builtins.h"
static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
static const uint16_t ALIGNED_(16) taps[64] = {
*/
#ifdef ARM_NEON_CHUNKSET
-#ifdef _M_ARM64
-# include <arm64_neon.h>
-#else
-# include <arm_neon.h>
-#endif
+#include "neon_intrins.h"
#include "../../zbuild.h"
#include "../generic/chunk_permute_table.h"
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
-#ifdef _M_ARM64
-# include <arm64_neon.h>
-#else
-# include <arm_neon.h>
-#endif
#include "../../zbuild.h"
+#include "fallback_builtins.h"
+
+#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
+#include "neon_intrins.h"
+
static inline uint32_t compare256_neon_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
--- /dev/null
+#ifndef ARM_NEON_INTRINS_H
+#define ARM_NEON_INTRINS_H
+
+#ifdef _M_ARM64
+# include <arm64_neon.h>
+#else
+# include <arm_neon.h>
+#endif
+
+#if defined(ARM_NEON_ADLER32) && !defined(__aarch64__) && !defined(_M_ARM64)
+/* Compatibility shim for the _high family of functions */
+#define vmull_high_u8(a, b) vmull_u8(vget_high_u8(a), vget_high_u8(b))
+#define vmlal_high_u8(a, b, c) vmlal_u8(a, vget_high_u8(b), vget_high_u8(c))
+#define vmlal_high_u16(a, b, c) vmlal_u16(a, vget_high_u16(b), vget_high_u16(c))
+#define vaddw_high_u8(a, b) vaddw_u8(a, vget_high_u8(b))
+#endif
+
+#ifdef ARM_NEON_SLIDEHASH
+
+#define vqsubq_u16_x4_x1(out, a, b) do { \
+ out.val[0] = vqsubq_u16(a.val[0], b); \
+ out.val[1] = vqsubq_u16(a.val[1], b); \
+ out.val[2] = vqsubq_u16(a.val[2], b); \
+ out.val[3] = vqsubq_u16(a.val[3], b); \
+} while (0)
+
+/* Have to check for hard float ABI on GCC/clang, but not
+ * on MSVC (we don't compile for the soft float ABI on windows)
+ */
+#if !defined(ARM_NEON_HASLD4) && (defined(__ARM_FP) || defined(_MSC_VER))
+
+static inline uint16x8x4_t vld1q_u16_x4(uint16_t *a) {
+ uint16x8x4_t ret = (uint16x8x4_t) {{
+ vld1q_u16(a),
+ vld1q_u16(a+8),
+ vld1q_u16(a+16),
+ vld1q_u16(a+24)}};
+ return ret;
+}
+
+static inline uint8x16x4_t vld1q_u8_x4(uint8_t *a) {
+ uint8x16x4_t ret = (uint8x16x4_t) {{
+ vld1q_u8(a),
+ vld1q_u8(a+16),
+ vld1q_u8(a+32),
+ vld1q_u8(a+48)}};
+ return ret;
+}
+
+static inline void vst1q_u16_x4(uint16_t *p, uint16x8x4_t a) {
+ vst1q_u16(p, a.val[0]);
+ vst1q_u16(p + 8, a.val[1]);
+ vst1q_u16(p + 16, a.val[2]);
+ vst1q_u16(p + 24, a.val[3]);
+}
+#endif // HASLD4 check and hard float
+#endif // ARM_NEON_SLIDEHASH
+
+#endif // include guard ARM_NEON_INTRINS_H
*/
#if defined(ARM_NEON_SLIDEHASH)
-#ifdef _M_ARM64
-# include <arm64_neon.h>
-#else
-# include <arm_neon.h>
-#endif
+#include "neon_intrins.h"
#include "../../zbuild.h"
#include "../../deflate.h"
-#include "../../fallback_builtins.h"
/* SIMD version of hash_chain rebase */
static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
#endif // __AVX2__
-#if defined(ARM_NEON_ADLER32) && !defined(__aarch64__) && !defined(_M_ARM64)
-/* Compatibility shim for the _high family of functions */
-#define vmull_high_u8(a, b) vmull_u8(vget_high_u8(a), vget_high_u8(b))
-#define vmlal_high_u8(a, b, c) vmlal_u8(a, vget_high_u8(b), vget_high_u8(c))
-#define vmlal_high_u16(a, b, c) vmlal_u16(a, vget_high_u16(b), vget_high_u16(c))
-#define vaddw_high_u8(a, b) vaddw_u8(a, vget_high_u8(b))
-#endif
-
-#ifdef ARM_NEON_SLIDEHASH
-
-#define vqsubq_u16_x4_x1(out, a, b) do { \
- out.val[0] = vqsubq_u16(a.val[0], b); \
- out.val[1] = vqsubq_u16(a.val[1], b); \
- out.val[2] = vqsubq_u16(a.val[2], b); \
- out.val[3] = vqsubq_u16(a.val[3], b); \
-} while (0)
-
-/* Have to check for hard float ABI on GCC/clang, but not
- * on MSVC (we don't compile for the soft float ABI on windows)
- */
-#if !defined(ARM_NEON_HASLD4) && (defined(__ARM_FP) || defined(_MSC_VER))
-
-#ifdef _M_ARM64
-# include <arm64_neon.h>
-#else
-# include <arm_neon.h>
-#endif
-
-static inline uint16x8x4_t vld1q_u16_x4(uint16_t *a) {
- uint16x8x4_t ret = (uint16x8x4_t) {{
- vld1q_u16(a),
- vld1q_u16(a+8),
- vld1q_u16(a+16),
- vld1q_u16(a+24)}};
- return ret;
-}
-
-static inline uint8x16x4_t vld1q_u8_x4(uint8_t *a) {
- uint8x16x4_t ret = (uint8x16x4_t) {{
- vld1q_u8(a),
- vld1q_u8(a+16),
- vld1q_u8(a+32),
- vld1q_u8(a+48)}};
- return ret;
-}
-
-static inline void vst1q_u16_x4(uint16_t *p, uint16x8x4_t a) {
- vst1q_u16(p, a.val[0]);
- vst1q_u16(p + 8, a.val[1]);
- vst1q_u16(p + 16, a.val[2]);
- vst1q_u16(p + 24, a.val[3]);
-}
-#endif // HASLD4 check and hard float
-#endif // ARM_NEON_SLIDEHASH
-
#endif // include guard FALLBACK_BUILTINS_H