]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add Windows ARM support for EOR3 feature detection and MSVC intrinsics
authorMika Lindqvist <postmaster@raasu.org>
Tue, 13 Jan 2026 21:07:56 +0000 (13:07 -0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Thu, 22 Jan 2026 00:16:07 +0000 (01:16 +0100)
arch/arm/arm_features.c
arch/arm/crc32_armv8_pmull_eor3.c
cmake/detect-intrinsics.cmake

index d5eece1a30bc1adeb22175f8b437973528730653..625144097c9ceebb28cb6693c19a34e5aab9d731 100644 (file)
@@ -160,6 +160,10 @@ static int arm_has_eor3(void) {
         has_eor3 = sysctlbyname("hw.optional.armv8_2_sha3", &has_feat, &size, NULL, 0) == 0
             && has_feat == 1;
     }
+#elif defined(_WIN32)
+#  ifdef PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE
+    has_eor3 = IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE);
+#  endif
 #elif defined(__ARM_FEATURE_SHA3)
     /* Compile-time check */
     has_eor3 = 1;
index d7a1507442c01c92e92161d73d7223da3a73038b..bc1df8cd30301ca70b6fc723ccc512e2aa597e5a 100644 (file)
 
 /* Carryless multiply low 64 bits: a[0] * b[0] */
 static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) {
+#ifdef _MSC_VER
+    return vreinterpretq_u64_p128(vmull_p64(
+        vget_low_p64(vreinterpret_p64_u64(a)),
+        vget_low_p64(vreinterpret_p64_u64(b))));
+#else
     return vreinterpretq_u64_p128(vmull_p64(
         vget_lane_p64(vreinterpret_p64_u64(vget_low_u64(a)), 0),
         vget_lane_p64(vreinterpret_p64_u64(vget_low_u64(b)), 0)));
+#endif
 }
 
 /* Carryless multiply high 64 bits: a[1] * b[1] */
@@ -28,7 +34,11 @@ static inline uint64x2_t clmul_hi(uint64x2_t a, uint64x2_t b) {
 
 /* Carryless multiply of two 32-bit scalars: a * b (returns 64-bit result in 128-bit vector) */
 static inline uint64x2_t clmul_scalar(uint32_t a, uint32_t b) {
-  return vreinterpretq_u64_p128(vmull_p64((poly64_t)a, (poly64_t)b));
+#ifdef _MSC_VER
+    return vreinterpretq_u64_p128(vmull_p64(vdup_n_p64((poly64_t)a), vdup_n_p64((poly64_t)b)));
+#else
+    return vreinterpretq_u64_p128(vmull_p64((poly64_t)a, (poly64_t)b));
+#endif
 }
 
 /* Compute x^n mod P (CRC-32 polynomial) in log(n) time, where P = 0x104c11db7 */
index 510910d24b99a82661f905313a6fce0f326796a3..118bfbc2a53146df958f9a92f526c23effef797b 100644 (file)
@@ -78,7 +78,11 @@ macro(check_armv8_pmull_eor3_compiler_flag)
         #else
         #  include <arm_neon.h>
         #endif
+        #ifdef _MSC_VER
+        __n128 f(__n64 a, __n64 b) {
+        #else
         poly128_t f(poly64_t a, poly64_t b) {
+        #endif
             return vmull_p64(a, b);
         }
         uint64x2_t g(uint64x2_t a, uint64x2_t b, uint64x2_t c) {