From: Cameron Cawley Date: Tue, 18 Jul 2023 10:17:03 +0000 (+0100) Subject: Move the AVX compatibility functions into a separate file X-Git-Tag: 2.1.4~56 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a339d85c80e0509018cd7e692969f11514ae2067;p=thirdparty%2Fzlib-ng.git Move the AVX compatibility functions into a separate file --- diff --git a/arch/x86/adler32_avx2_tpl.h b/arch/x86/adler32_avx2_tpl.h index a94f44b4f..2104b053e 100644 --- a/arch/x86/adler32_avx2_tpl.h +++ b/arch/x86/adler32_avx2_tpl.h @@ -7,7 +7,7 @@ #include #include "../../adler32_fold.h" #include "../../adler32_p.h" -#include "../../fallback_builtins.h" +#include "x86_intrins.h" #include "adler32_avx2_p.h" #ifdef X86_SSE42 diff --git a/arch/x86/adler32_avx512_tpl.h b/arch/x86/adler32_avx512_tpl.h index 7546afef5..d92ad7f66 100644 --- a/arch/x86/adler32_avx512_tpl.h +++ b/arch/x86/adler32_avx512_tpl.h @@ -7,8 +7,8 @@ #include "../../adler32_p.h" #include "../../adler32_fold.h" #include "../../cpu_features.h" -#include "../../fallback_builtins.h" #include +#include "x86_intrins.h" #include "adler32_avx512_p.h" #ifdef X86_AVX512 diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c index 8dcc93d05..771f7ebe0 100644 --- a/arch/x86/adler32_avx512_vnni.c +++ b/arch/x86/adler32_avx512_vnni.c @@ -12,9 +12,9 @@ #include "../../zbuild.h" #include "../../adler32_p.h" #include "../../cpu_features.h" -#include "../../fallback_builtins.h" #include #include "../../adler32_fold.h" +#include "x86_intrins.h" #include "adler32_avx512_p.h" #include "adler32_avx2_p.h" diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index 0d66663cb..05d3b1525 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -28,7 +28,7 @@ #include "../../crc32_fold.h" #include "../../crc32_braid_p.h" -#include "../../fallback_builtins.h" +#include "x86_intrins.h" #include #ifdef X86_VPCLMULQDQ diff --git a/arch/x86/x86_intrins.h b/arch/x86/x86_intrins.h new file mode 100644 index 000000000..52e1085d6 --- /dev/null +++ b/arch/x86/x86_intrins.h @@ -0,0 +1,87 @@ +#ifndef X86_INTRINS_H +#define X86_INTRINS_H + +/* Unfortunately GCC didn't support these things until version 10. + * Similarly, AppleClang didn't support them in Xcode 9.2 but did in 9.3. + */ +#ifdef __AVX2__ +#include + +#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \ + || (defined(__apple_build_version__) && __apple_build_version__ < 9020039) +static inline __m256i _mm256_zextsi128_si256(__m128i a) { + __m128i r; + __asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a)); + return _mm256_castsi128_si256(r); +} + +#ifdef __AVX512F__ +static inline __m512i _mm512_zextsi128_si512(__m128i a) { + __m128i r; + __asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a)); + return _mm512_castsi128_si512(r); +} +#endif // __AVX512F__ +#endif // gcc/AppleClang version test + +#endif // __AVX2__ + +/* GCC <9 is missing some AVX512 intrinsics. + */ +#ifdef __AVX512F__ +#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9) +#include + +#define PACK(c0, c1, c2, c3) (((int)(unsigned char)(c0) << 24) | ((int)(unsigned char)(c1) << 16) | \ + ((int)(unsigned char)(c2) << 8) | ((int)(unsigned char)(c3))) + +static inline __m512i _mm512_set_epi8(char __q63, char __q62, char __q61, char __q60, + char __q59, char __q58, char __q57, char __q56, + char __q55, char __q54, char __q53, char __q52, + char __q51, char __q50, char __q49, char __q48, + char __q47, char __q46, char __q45, char __q44, + char __q43, char __q42, char __q41, char __q40, + char __q39, char __q38, char __q37, char __q36, + char __q35, char __q34, char __q33, char __q32, + char __q31, char __q30, char __q29, char __q28, + char __q27, char __q26, char __q25, char __q24, + char __q23, char __q22, char __q21, char __q20, + char __q19, char __q18, char __q17, char __q16, + char __q15, char __q14, char __q13, char __q12, + char __q11, char __q10, char __q09, char __q08, + char __q07, char __q06, char __q05, char __q04, + char __q03, char __q02, char __q01, char __q00) { + return _mm512_set_epi32(PACK(__q63, __q62, __q61, __q60), PACK(__q59, __q58, __q57, __q56), + PACK(__q55, __q54, __q53, __q52), PACK(__q51, __q50, __q49, __q48), + PACK(__q47, __q46, __q45, __q44), PACK(__q43, __q42, __q41, __q40), + PACK(__q39, __q38, __q37, __q36), PACK(__q35, __q34, __q33, __q32), + PACK(__q31, __q30, __q29, __q28), PACK(__q27, __q26, __q25, __q24), + PACK(__q23, __q22, __q21, __q20), PACK(__q19, __q18, __q17, __q16), + PACK(__q15, __q14, __q13, __q12), PACK(__q11, __q10, __q09, __q08), + PACK(__q07, __q06, __q05, __q04), PACK(__q03, __q02, __q01, __q00)); +} + +#undef PACK + +#endif // gcc version test +#endif // __AVX512F__ + +/* Missing zero-extension AVX and AVX512 intrinsics. + * Fixed in Microsoft Visual Studio 2017 version 15.7 + * https://developercommunity.visualstudio.com/t/missing-zero-extension-avx-and-avx512-intrinsics/175737 + */ +#if defined(_MSC_VER) && _MSC_VER < 1914 +#ifdef __AVX2__ +static inline __m256i _mm256_zextsi128_si256(__m128i a) { + return _mm256_inserti128_si256(_mm256_setzero_si256(), a, 0); +} +#endif // __AVX2__ + +#ifdef __AVX512F__ +static inline __m512i _mm512_zextsi128_si512(__m128i a) { + return _mm512_inserti32x4(_mm512_setzero_si512(), a, 0); +} +#endif // __AVX512F__ +#endif // defined(_MSC_VER) && _MSC_VER < 1914 + +#endif // include guard X86_INTRINS_H diff --git a/fallback_builtins.h b/fallback_builtins.h index 6e8ed9f40..79072a102 100644 --- a/fallback_builtins.h +++ b/fallback_builtins.h @@ -47,87 +47,4 @@ static __forceinline int __builtin_ctzll(unsigned long long value) { #endif // Microsoft AMD64/IA64/x86/ARM/ARM64 test #endif // _MSC_VER & !clang -/* Unfortunately GCC didn't support these things until version 10. - * Similarly, AppleClang didn't support them in Xcode 9.2 but did in 9.3. - */ -#ifdef __AVX2__ -#include - -#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \ - || (defined(__apple_build_version__) && __apple_build_version__ < 9020039) -static inline __m256i _mm256_zextsi128_si256(__m128i a) { - __m128i r; - __asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a)); - return _mm256_castsi128_si256(r); -} - -#ifdef __AVX512F__ -static inline __m512i _mm512_zextsi128_si512(__m128i a) { - __m128i r; - __asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a)); - return _mm512_castsi128_si512(r); -} -#endif // __AVX512F__ -#endif // gcc/AppleClang version test - -#endif // __AVX2__ - -/* GCC <9 is missing some AVX512 intrinsics. - */ -#ifdef __AVX512F__ -#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9) -#include - -#define PACK(c0, c1, c2, c3) (((int)(unsigned char)(c0) << 24) | ((int)(unsigned char)(c1) << 16) | \ - ((int)(unsigned char)(c2) << 8) | ((int)(unsigned char)(c3))) - -static inline __m512i _mm512_set_epi8(char __q63, char __q62, char __q61, char __q60, - char __q59, char __q58, char __q57, char __q56, - char __q55, char __q54, char __q53, char __q52, - char __q51, char __q50, char __q49, char __q48, - char __q47, char __q46, char __q45, char __q44, - char __q43, char __q42, char __q41, char __q40, - char __q39, char __q38, char __q37, char __q36, - char __q35, char __q34, char __q33, char __q32, - char __q31, char __q30, char __q29, char __q28, - char __q27, char __q26, char __q25, char __q24, - char __q23, char __q22, char __q21, char __q20, - char __q19, char __q18, char __q17, char __q16, - char __q15, char __q14, char __q13, char __q12, - char __q11, char __q10, char __q09, char __q08, - char __q07, char __q06, char __q05, char __q04, - char __q03, char __q02, char __q01, char __q00) { - return _mm512_set_epi32(PACK(__q63, __q62, __q61, __q60), PACK(__q59, __q58, __q57, __q56), - PACK(__q55, __q54, __q53, __q52), PACK(__q51, __q50, __q49, __q48), - PACK(__q47, __q46, __q45, __q44), PACK(__q43, __q42, __q41, __q40), - PACK(__q39, __q38, __q37, __q36), PACK(__q35, __q34, __q33, __q32), - PACK(__q31, __q30, __q29, __q28), PACK(__q27, __q26, __q25, __q24), - PACK(__q23, __q22, __q21, __q20), PACK(__q19, __q18, __q17, __q16), - PACK(__q15, __q14, __q13, __q12), PACK(__q11, __q10, __q09, __q08), - PACK(__q07, __q06, __q05, __q04), PACK(__q03, __q02, __q01, __q00)); -} - -#undef PACK - -#endif // gcc version test -#endif // __AVX512F__ - -/* Missing zero-extension AVX and AVX512 intrinsics. - * Fixed in Microsoft Visual Studio 2017 version 15.7 - * https://developercommunity.visualstudio.com/t/missing-zero-extension-avx-and-avx512-intrinsics/175737 - */ -#if defined(_MSC_VER) && _MSC_VER < 1914 -#ifdef __AVX2__ -static inline __m256i _mm256_zextsi128_si256(__m128i a) { - return _mm256_inserti128_si256(_mm256_setzero_si256(), a, 0); -} -#endif // __AVX2__ - -#ifdef __AVX512F__ -static inline __m512i _mm512_zextsi128_si512(__m128i a) { - return _mm512_inserti32x4(_mm512_setzero_si512(), a, 0); -} -#endif // __AVX512F__ -#endif // defined(_MSC_VER) && _MSC_VER < 1914 - #endif // include guard FALLBACK_BUILTINS_H