From: Tulio Magno Quites Machado Filho Date: Tue, 14 May 2024 14:06:46 +0000 (-0300) Subject: Fix illegal instruction usage in Xeon Phi x200 processors X-Git-Tag: 2.2.0~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1a15c4b20e6ff63251ada13acc9bcaaf2b2c9b37;p=thirdparty%2Fzlib-ng.git Fix illegal instruction usage in Xeon Phi x200 processors The Xeon Phi x200 family of processors (Knights Landing) supports AVX512 (F, CD, ER, PF) but does not support AVX512 (VL, DQ, BW). Because of processors like this, the Intel Software Developer's Manual suggests the bits AVX512 (DQ,BW,VL) are also tested in EBX together with AVX512F before deciding to run AVX512 (DQ,BW,VL) instructions. This also adds a new x86 feature called avx512_common that indicates that AVX512 (F,DQ,BW,VL) are all available and start using this for both adler32_avx512 and crc32_vpclmulqdq implementations because they are both built with -mavx512dq -mavx512bw -mavx512vl. This has been reported downstream as https://bugzilla.redhat.com/show_bug.cgi?id=2280347 . --- diff --git a/arch/x86/x86_features.c b/arch/x86/x86_features.c index 450429cb9..58cb4df34 100644 --- a/arch/x86/x86_features.c +++ b/arch/x86/x86_features.c @@ -99,7 +99,16 @@ void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { // check AVX512 bits if the OS supports saving ZMM registers if (features->has_os_save_zmm) { - features->has_avx512 = ebx & 0x00010000; + features->has_avx512f = ebx & 0x00010000; + if (features->has_avx512f) { + // According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable + // AVX512(DQ,BW,VL). + features->has_avx512dq = ebx & 0x00020000; + features->has_avx512bw = ebx & 0x40000000; + features->has_avx512vl = ebx & 0x80000000; + } + features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \ + && features->has_avx512vl; features->has_avx512vnni = ecx & 0x800; } } diff --git a/arch/x86/x86_features.h b/arch/x86/x86_features.h index fc56ae2f4..6daa5e382 100644 --- a/arch/x86/x86_features.h +++ b/arch/x86/x86_features.h @@ -8,7 +8,11 @@ struct x86_cpu_features { int has_avx2; - int has_avx512; + int has_avx512f; + int has_avx512dq; + int has_avx512bw; + int has_avx512vl; + int has_avx512_common; // Enabled when AVX512(F,DQ,BW,VL) are all enabled. int has_avx512vnni; int has_sse2; int has_ssse3; diff --git a/functable.c b/functable.c index 8012a40b1..495d11edd 100644 --- a/functable.c +++ b/functable.c @@ -125,7 +125,7 @@ static void init_functable(void) { #endif // X86 - AVX512 (F,DQ,BW,Vl) #ifdef X86_AVX512 - if (cf.x86.has_avx512) { + if (cf.x86.has_avx512_common) { ft.adler32 = &adler32_avx512; ft.adler32_fold_copy = &adler32_fold_copy_avx512; } @@ -138,7 +138,7 @@ static void init_functable(void) { #endif // X86 - VPCLMULQDQ #ifdef X86_VPCLMULQDQ_CRC - if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) { + if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq) { ft.crc32 = &crc32_vpclmulqdq; ft.crc32_fold = &crc32_fold_vpclmulqdq; ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy; diff --git a/test/benchmarks/benchmark_adler32.cc b/test/benchmarks/benchmark_adler32.cc index 84f0d617c..b1278950d 100644 --- a/test/benchmarks/benchmark_adler32.cc +++ b/test/benchmarks/benchmark_adler32.cc @@ -91,7 +91,7 @@ BENCHMARK_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3); BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2); #endif #ifdef X86_AVX512 -BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512); +BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common); #endif #ifdef X86_AVX512VNNI BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni); diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc index e052ee76d..50e6333c4 100644 --- a/test/benchmarks/benchmark_adler32_copy.cc +++ b/test/benchmarks/benchmark_adler32_copy.cc @@ -119,8 +119,8 @@ BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2); #endif #ifdef X86_AVX512 -BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512); -BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512); +BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512_common); +BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512_common); #endif #ifdef X86_AVX512VNNI BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni); diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc index 8611b2805..7aba7c336 100644 --- a/test/benchmarks/benchmark_crc32.cc +++ b/test/benchmarks/benchmark_crc32.cc @@ -77,7 +77,7 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) #endif #ifdef X86_VPCLMULQDQ_CRC /* CRC32 fold does a memory copy while hashing */ -BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq)); +BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq)); #endif #endif diff --git a/test/test_adler32.cc b/test/test_adler32.cc index 85c4c78bb..b3d03021e 100644 --- a/test/test_adler32.cc +++ b/test/test_adler32.cc @@ -386,7 +386,7 @@ TEST_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3) TEST_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2) #endif #ifdef X86_AVX512 -TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512) +TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common) #endif #ifdef X86_AVX512VNNI TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni) diff --git a/test/test_crc32.cc b/test/test_crc32.cc index f8322085e..245a92fb9 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -225,7 +225,7 @@ TEST_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx) TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) #endif #ifdef X86_VPCLMULQDQ_CRC -TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq)) +TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq)) #endif #endif