has_eor3 = sysctlbyname("hw.optional.armv8_2_sha3", &has_feat, &size, NULL, 0) == 0
&& has_feat == 1;
}
+#elif defined(_WIN32)
+# ifdef PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE
+ has_eor3 = IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE);
+# endif
#elif defined(__ARM_FEATURE_SHA3)
/* Compile-time check */
has_eor3 = 1;
/* Carryless multiply low 64 bits: a[0] * b[0] */
static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) {
+#ifdef _MSC_VER
+ return vreinterpretq_u64_p128(vmull_p64(
+ vget_low_p64(vreinterpret_p64_u64(a)),
+ vget_low_p64(vreinterpret_p64_u64(b))));
+#else
return vreinterpretq_u64_p128(vmull_p64(
vget_lane_p64(vreinterpret_p64_u64(vget_low_u64(a)), 0),
vget_lane_p64(vreinterpret_p64_u64(vget_low_u64(b)), 0)));
+#endif
}
/* Carryless multiply high 64 bits: a[1] * b[1] */
/* Carryless multiply of two 32-bit scalars: a * b (returns 64-bit result in 128-bit vector) */
static inline uint64x2_t clmul_scalar(uint32_t a, uint32_t b) {
- return vreinterpretq_u64_p128(vmull_p64((poly64_t)a, (poly64_t)b));
+#ifdef _MSC_VER
+ return vreinterpretq_u64_p128(vmull_p64(vdup_n_p64((poly64_t)a), vdup_n_p64((poly64_t)b)));
+#else
+ return vreinterpretq_u64_p128(vmull_p64((poly64_t)a, (poly64_t)b));
+#endif
}
/* Compute x^n mod P (CRC-32 polynomial) in log(n) time, where P = 0x104c11db7 */
#else
# include <arm_neon.h>
#endif
+ #ifdef _MSC_VER
+ __n128 f(__n64 a, __n64 b) {
+ #else
poly128_t f(poly64_t a, poly64_t b) {
+ #endif
return vmull_p64(a, b);
}
uint64x2_t g(uint64x2_t a, uint64x2_t b, uint64x2_t c) {