From: Hans Kristian Rosbach Date: Thu, 24 Aug 2017 10:32:18 +0000 (+0200) Subject: Revert "x86: use TZCNT (#113)" X-Git-Tag: 1.9.9-b1~660^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=79a262f7a4da3b1ebb2d543df65ca8420c623a3f;p=thirdparty%2Fzlib-ng.git Revert "x86: use TZCNT (#113)" Reverted after objections to its inclusion. This reverts commit a7271104bf9a2d82dc6a69090c12442eacd2fd71. --- diff --git a/arch/x86/ctzl.h b/arch/x86/ctzl.h index 7482788bf..bc9e9bd5b 100644 --- a/arch/x86/ctzl.h +++ b/arch/x86/ctzl.h @@ -2,32 +2,24 @@ #define X86_CTZL_H #include +#ifdef X86_CPUID +# include "x86.h" +#endif #if defined(_MSC_VER) && !defined(__clang__) -/* __builtin_ctzl - * - For 0, the result is undefined - * - On the x86 architecture, it is typically implemented using BSF - * - the equivalent intrinsic on MSC is _BitScanForward - * - * _tzcnt_u32 - * - For 0, the result is the size of the operand - * - On processors that do not support TZCNT, the instruction byte encoding is executed as BSF. In this case the result for 0 - * is undefined. - * - Performance: - * + AMD: The reciprocal throughput for TZCNT is 2 vs 3 for BSF - * + Intel: On modern Intel CPUs (Haswell), the performance of TZCNT is equivalent to BSF - * Reference: http://www.agner.org/optimize/instruction_tables.pdf -*/ -#if defined(_M_IX86) || defined(_M_AMD64) -#define __builtin_ctzl _tzcnt_u32 -#else +/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0 + * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked + */ static __forceinline unsigned long __builtin_ctzl(unsigned long value) { - unsigned long trailing_zero; - _BitScanForward(&trailing_zero, value); - return trailing_zero; -} +#ifdef X86_CPUID + if (x86_cpu_has_tzcnt) + return _tzcnt_u32(value); #endif + unsigned long trailing_zero; + _BitScanForward(&trailing_zero, value); + return trailing_zero; +} #endif #endif