From: Mika Lindqvist Date: Tue, 13 Jan 2026 21:07:56 +0000 (-0800) Subject: Add Windows ARM support for EOR3 feature detection and MSVC intrinsics X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1c20d159c300b96ddb4474632f554553f3935d0d;p=thirdparty%2Fzlib-ng.git Add Windows ARM support for EOR3 feature detection and MSVC intrinsics --- diff --git a/arch/arm/arm_features.c b/arch/arm/arm_features.c index d5eece1a3..625144097 100644 --- a/arch/arm/arm_features.c +++ b/arch/arm/arm_features.c @@ -160,6 +160,10 @@ static int arm_has_eor3(void) { has_eor3 = sysctlbyname("hw.optional.armv8_2_sha3", &has_feat, &size, NULL, 0) == 0 && has_feat == 1; } +#elif defined(_WIN32) +# ifdef PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE + has_eor3 = IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE); +# endif #elif defined(__ARM_FEATURE_SHA3) /* Compile-time check */ has_eor3 = 1; diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c index d7a150744..bc1df8cd3 100644 --- a/arch/arm/crc32_armv8_pmull_eor3.c +++ b/arch/arm/crc32_armv8_pmull_eor3.c @@ -16,9 +16,15 @@ /* Carryless multiply low 64 bits: a[0] * b[0] */ static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) { +#ifdef _MSC_VER + return vreinterpretq_u64_p128(vmull_p64( + vget_low_p64(vreinterpret_p64_u64(a)), + vget_low_p64(vreinterpret_p64_u64(b)))); +#else return vreinterpretq_u64_p128(vmull_p64( vget_lane_p64(vreinterpret_p64_u64(vget_low_u64(a)), 0), vget_lane_p64(vreinterpret_p64_u64(vget_low_u64(b)), 0))); +#endif } /* Carryless multiply high 64 bits: a[1] * b[1] */ @@ -28,7 +34,11 @@ static inline uint64x2_t clmul_hi(uint64x2_t a, uint64x2_t b) { /* Carryless multiply of two 32-bit scalars: a * b (returns 64-bit result in 128-bit vector) */ static inline uint64x2_t clmul_scalar(uint32_t a, uint32_t b) { - return vreinterpretq_u64_p128(vmull_p64((poly64_t)a, (poly64_t)b)); +#ifdef _MSC_VER + return vreinterpretq_u64_p128(vmull_p64(vdup_n_p64((poly64_t)a), vdup_n_p64((poly64_t)b))); +#else + return vreinterpretq_u64_p128(vmull_p64((poly64_t)a, (poly64_t)b)); +#endif } /* Compute x^n mod P (CRC-32 polynomial) in log(n) time, where P = 0x104c11db7 */ diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index 510910d24..118bfbc2a 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -78,7 +78,11 @@ macro(check_armv8_pmull_eor3_compiler_flag) #else # include #endif + #ifdef _MSC_VER + __n128 f(__n64 a, __n64 b) { + #else poly128_t f(poly64_t a, poly64_t b) { + #endif return vmull_p64(a, b); } uint64x2_t g(uint64x2_t a, uint64x2_t b, uint64x2_t c) {