From: Joel Rosdahl Date: Sat, 28 May 2022 17:48:20 +0000 (+0200) Subject: bump: Upgrade xxh_x86dispatch.c to xxHash 0.8.1 X-Git-Tag: v4.7~215 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8c2da59c3ed363d606049dd516ce3f02bc325d53;p=thirdparty%2Fccache.git bump: Upgrade xxh_x86dispatch.c to xxHash 0.8.1 xxh_x86dispatch.c was by mistake not included in 6eb7eb340bf163b807cb9267935e39fa619dde0b. See #1078. --- diff --git a/src/third_party/xxh_x86dispatch.c b/src/third_party/xxh_x86dispatch.c index a618ae898..399bad904 100644 --- a/src/third_party/xxh_x86dispatch.c +++ b/src/third_party/xxh_x86dispatch.c @@ -33,23 +33,164 @@ */ +/*! + * @file xxh_x86dispatch.c + * + * Automatic dispatcher code for the @ref xxh3_family on x86-based targets. + * + * Optional add-on. + * + * **Compile this file with the default flags for your target.** Do not compile + * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be + * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details. + * + * @defgroup dispatch x86 Dispatcher + * @{ + */ + #if defined (__cplusplus) extern "C" { #endif -/* - * Dispatcher code for XXH3 on x86-based targets. - */ #if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)) # error "Dispatching is currently only supported on x86 and x86_64." #endif -#ifndef __GNUC__ -# error "Dispatching requires __attribute__((__target__)) capability" +/*! + * @def XXH_X86DISPATCH_ALLOW_AVX + * @brief Disables the AVX sanity check. + * + * Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`, + * or `/arch:AVX*`. It is intended to be compiled for the minimum target, and + * it selectively enables SSE2, AVX2, and AVX512 when it is needed. + * + * Using this option _globally_ allows this feature, and therefore makes it + * undefined behavior to execute on any CPU without said feature. + * + * Even if the source code isn't directly using AVX intrinsics in a function, + * the compiler can still generate AVX code from autovectorization and by + * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128). + * + * Use the same flags that you use to compile the rest of the program; this + * file will safely generate SSE2, AVX2, and AVX512 without these flags. + * + * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open + * an issue if there is a target in the future where AVX is a default feature. + */ +#ifdef XXH_DOXYGEN +# define XXH_X86DISPATCH_ALLOW_AVX +#endif + +#if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX) +# error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above." +#endif + +#ifdef __has_include +# define XXH_HAS_INCLUDE(header) __has_include(header) +#else +# define XXH_HAS_INCLUDE(header) 0 #endif -#define XXH_DISPATCH_AVX2 /* enable dispatch towards AVX2 */ -#define XXH_DISPATCH_AVX512 /* enable dispatch towards AVX512 */ +/*! + * @def XXH_DISPATCH_SCALAR + * @brief Enables/dispatching the scalar code path. + * + * If this is defined to 0, SSE2 support is assumed. This reduces code size + * when the scalar path is not needed. + * + * This is automatically defined to 0 when... + * - SSE2 support is enabled in the compiler + * - Targeting x86_64 + * - Targeting Android x86 + * - Targeting macOS + */ +#ifndef XXH_DISPATCH_SCALAR +# if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \ + || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \ + || defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */ +# define XXH_DISPATCH_SCALAR 0 /* disable */ +# else +# define XXH_DISPATCH_SCALAR 1 +# endif +#endif +/*! + * @def XXH_DISPATCH_AVX2 + * @brief Enables/disables dispatching for AVX2. + * + * This is automatically detected if it is not defined. + * - GCC 4.7 and later are known to support AVX2, but >4.9 is required for + * to get the AVX2 intrinsics and typedefs without -mavx -mavx2. + * - Visual Studio 2013 Update 2 and later are known to support AVX2. + * - The GCC/Clang internal header `` is detected. While this is + * not allowed to be included directly, it still appears in the builtin + * include path and is detectable with `__has_include`. + * + * @see XXH_AVX2 + */ +#ifndef XXH_DISPATCH_AVX2 +# if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \ + || (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \ + || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \ + || XXH_HAS_INCLUDE() /* GCC/Clang internal header */ +# define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */ +# else +# define XXH_DISPATCH_AVX2 0 +# endif +#endif /* XXH_DISPATCH_AVX2 */ + +/*! + * @def XXH_DISPATCH_AVX512 + * @brief Enables/disables dispatching for AVX512. + * + * Automatically detected if one of the following conditions is met: + * - GCC 4.9 and later are known to support AVX512. + * - Visual Studio 2017 and later are known to support AVX2. + * - The GCC/Clang internal header `` is detected. While this + * is not allowed to be included directly, it still appears in the builtin + * include path and is detectable with `__has_include`. + * + * @see XXH_AVX512 + */ +#ifndef XXH_DISPATCH_AVX512 +# if (defined(__GNUC__) \ + && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \ + || (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \ + || XXH_HAS_INCLUDE() /* GCC/Clang internal header */ +# define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */ +# else +# define XXH_DISPATCH_AVX512 0 +# endif +#endif /* XXH_DISPATCH_AVX512 */ + +/*! + * @def XXH_TARGET_SSE2 + * @brief Allows a function to be compiled with SSE2 intrinsics. + * + * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used + * even with `-mno-sse2`. + * + * @def XXH_TARGET_AVX2 + * @brief Like @ref XXH_TARGET_SSE2, but for AVX2. + * + * @def XXH_TARGET_AVX512 + * @brief Like @ref XXH_TARGET_SSE2, but for AVX512. + */ +#if defined(__GNUC__) +# include /* SSE2 */ +# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 +# include /* AVX2, AVX512F */ +# endif +# define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) +# define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) +# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) +#elif defined(_MSC_VER) +# include +# define XXH_TARGET_SSE2 +# define XXH_TARGET_AVX2 +# define XXH_TARGET_AVX512 +#else +# error "Dispatching is currently not supported for your compiler." +#endif #ifdef XXH_DISPATCH_DEBUG /* debug logging */ @@ -62,28 +203,10 @@ extern "C" { #endif #include -#if defined(__GNUC__) -# include /* sse2 */ -# include /* avx2 */ -#elif defined(_MSC_VER) -# include -#endif - #define XXH_INLINE_ALL #define XXH_X86DISPATCH -#define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) -#define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) -#define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) #include "xxhash.h" -/* - * Modified version of Intel's guide - * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family - */ -#if defined(_MSC_VER) -# include -#endif - /* * Support both AT&T and Intel dialects * @@ -97,12 +220,18 @@ extern "C" { * Note: Comments are written in the inline assembly itself. */ #ifdef __clang__ -# define I_ATT(intel, att) att "\n\t" +# define XXH_I_ATT(intel, att) att "\n\t" #else -# define I_ATT(intel, att) "{" att "|" intel "}\n\t" +# define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t" #endif - +/*! + * @internal + * @brief Runs CPUID. + * + * @param eax , ecx The parameters to pass to CPUID, %eax and %ecx respectively. + * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }` + */ static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) { #if defined(_MSC_VER) @@ -115,14 +244,14 @@ static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) "#\n\t" "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t" "# EBX, so we use EDI instead.\n\t" - I_ATT("mov edi, ebx", "movl %%ebx, %%edi") - I_ATT("cpuid", "cpuid" ) - I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") + XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi") + XXH_I_ATT("cpuid", "cpuid" ) + XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") : "=D" (ebx), # else __asm__( "# Call CPUID\n\t" - I_ATT("cpuid", "cpuid") + XXH_I_ATT("cpuid", "cpuid") : "=b" (ebx), # endif "+a" (eax), "+c" (ecx), "=d" (edx)); @@ -133,8 +262,16 @@ static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) #endif } -#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) /* + * Modified version of Intel's guide + * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + */ + +#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 +/*! + * @internal + * @brief Runs `XGETBV`. + * * While the CPU may support AVX2, the operating system might not properly save * the full YMM/ZMM registers. * @@ -166,20 +303,28 @@ static xxh_u64 XXH_xgetbv(void) } #endif -#define SSE2_CPUID_MASK (1 << 26) -#define OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) -#define AVX2_CPUID_MASK (1 << 5) -#define AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) -#define AVX512F_CPUID_MASK (1 << 16) -#define AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) +#define XXH_SSE2_CPUID_MASK (1 << 26) +#define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) +#define XXH_AVX2_CPUID_MASK (1 << 5) +#define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) +#define XXH_AVX512F_CPUID_MASK (1 << 16) +#define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) -/* Returns the best XXH3 implementation */ +/*! + * @internal + * @brief Returns the best XXH3 implementation. + * + * Runs various CPUID/XGETBV tests to try and determine the best implementation. + * + * @ret The best @ref XXH_VECTOR implementation. + * @see XXH_VECTOR_TYPES + */ static int XXH_featureTest(void) { xxh_u32 abcd[4]; xxh_u32 max_leaves; int best = XXH_SCALAR; -#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) +#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 xxh_u64 xgetbv_val; #endif #if defined(__GNUC__) && defined(__i386__) @@ -197,23 +342,23 @@ static int XXH_featureTest(void) "# Routine is from .\n\t" "# Save EFLAGS\n\t" - I_ATT("pushfd", "pushfl" ) + XXH_I_ATT("pushfd", "pushfl" ) "# Store EFLAGS\n\t" - I_ATT("pushfd", "pushfl" ) + XXH_I_ATT("pushfd", "pushfl" ) "# Invert the ID bit in stored EFLAGS\n\t" - I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") + XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") "# Load stored EFLAGS (with ID bit inverted)\n\t" - I_ATT("popfd", "popfl" ) + XXH_I_ATT("popfd", "popfl" ) "# Store EFLAGS again (ID bit may or not be inverted)\n\t" - I_ATT("pushfd", "pushfl" ) + XXH_I_ATT("pushfd", "pushfl" ) "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t" - I_ATT("pop eax", "popl %%eax" ) + XXH_I_ATT("pop eax", "popl %%eax" ) "# eax = whichever bits were changed\n\t" - I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) + XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) "# Restore original EFLAGS\n\t" - I_ATT("popfd", "popfl" ) + XXH_I_ATT("popfd", "popfl" ) "# eax = zero if ID bit can't be changed, else non-zero\n\t" - I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) + XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) : "=a" (cpuid_supported) :: "cc"); if (XXH_unlikely(!cpuid_supported)) { @@ -238,32 +383,32 @@ static int XXH_featureTest(void) /* * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt. */ - if (XXH_unlikely((abcd[3] & SSE2_CPUID_MASK) != SSE2_CPUID_MASK)) + if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK)) return best; XXH_debugPrint("SSE2 support detected."); best = XXH_SSE2; -#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) +#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 /* Make sure we have enough leaves */ if (XXH_unlikely(max_leaves < 7)) return best; /* Test for OSXSAVE and XGETBV */ - if ((abcd[2] & OSXSAVE_CPUID_MASK) != OSXSAVE_CPUID_MASK) + if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK) return best; /* CPUID check for AVX features */ XXH_cpuid(7, 0, abcd); xgetbv_val = XXH_xgetbv(); -#if defined(XXH_DISPATCH_AVX2) +#if XXH_DISPATCH_AVX2 /* Validate that AVX2 is supported by the CPU */ - if ((abcd[1] & AVX2_CPUID_MASK) != AVX2_CPUID_MASK) + if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK) return best; /* Validate that the OS supports YMM registers */ - if ((xgetbv_val & AVX2_XGETBV_MASK) != AVX2_XGETBV_MASK) { + if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) { XXH_debugPrint("AVX2 supported by the CPU, but not the OS."); return best; } @@ -272,15 +417,15 @@ static int XXH_featureTest(void) XXH_debugPrint("AVX2 support detected."); best = XXH_AVX2; #endif -#if defined(XXH_DISPATCH_AVX512) +#if XXH_DISPATCH_AVX512 /* Check if AVX512F is supported by the CPU */ - if ((abcd[1] & AVX512F_CPUID_MASK) != AVX512F_CPUID_MASK) { + if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) { XXH_debugPrint("AVX512F not supported by CPU"); return best; } /* Validate that the OS supports ZMM registers */ - if ((xgetbv_val & AVX512F_XGETBV_MASK) != AVX512F_XGETBV_MASK) { + if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) { XXH_debugPrint("AVX512F supported by the CPU, but not the OS."); return best; } @@ -296,269 +441,107 @@ static int XXH_featureTest(void) /* === Vector implementations === */ -/* === XXH3, default variants === */ - -XXH_NO_INLINE XXH64_hash_t -XXHL64_default_scalar(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t -XXHL64_default_sse2(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t -XXHL64_default_avx2(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t -XXHL64_default_avx512(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); -} -#endif - -/* === XXH3, Seeded variants === */ - -XXH_NO_INLINE XXH64_hash_t -XXHL64_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t -XXHL64_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t -XXHL64_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t -XXHL64_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512); -} -#endif - -/* === XXH3, Secret variants === */ - -XXH_NO_INLINE XXH64_hash_t -XXHL64_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) -{ - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, - XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t -XXHL64_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) -{ - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, - XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t -XXHL64_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) -{ - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, - XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t -XXHL64_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) -{ - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, - XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); -} -#endif - -/* === XXH3 update variants === */ - -XXH_NO_INLINE XXH_errorcode -XXH3_64bits_update_scalar(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode -XXH3_64bits_update_sse2(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode -XXH3_64bits_update_avx2(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode -XXH3_64bits_update_avx512(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); -} -#endif - -/* === XXH128 default variants === */ - -XXH_NO_INLINE XXH128_hash_t -XXHL128_default_scalar(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t -XXHL128_default_sse2(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t -XXHL128_default_avx2(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t -XXHL128_default_avx512(const void* XXH_RESTRICT input, size_t len) -{ - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); -} -#endif - -/* === XXH128 Secret variants === */ - -XXH_NO_INLINE XXH128_hash_t -XXHL128_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) -{ - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t -XXHL128_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) -{ - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t -XXHL128_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) -{ - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t -XXHL128_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) -{ - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); -} -#endif - -/* === XXH128 Seeded variants === */ - -XXH_NO_INLINE XXH128_hash_t -XXHL128_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_128b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t -XXHL128_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_128b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t -XXHL128_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_128b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t -XXHL128_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) -{ - return XXH3_hashLong_128b_withSeed_internal(input, len, seed, - XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512); -} -#endif - -/* === XXH128 update variants === */ - -XXH_NO_INLINE XXH_errorcode -XXH3_128bits_update_scalar(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); -} - -XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode -XXH3_128bits_update_sse2(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); -} - -#ifdef XXH_DISPATCH_AVX2 -XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode -XXH3_128bits_update_avx2(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); -} -#endif - -#ifdef XXH_DISPATCH_AVX512 -XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode -XXH3_128bits_update_avx512(XXH3_state_t* state, const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); -} -#endif +/*! + * @internal + * @brief Defines the various dispatch functions. + * + * TODO: Consolidate? + * + * @param suffix The suffix for the functions, e.g. sse2 or scalar + * @param target XXH_TARGET_* or empty. + */ +#define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \ + \ +/* === XXH3, default variants === */ \ + \ +XXH_NO_INLINE target XXH64_hash_t \ +XXHL64_default_##suffix(const void* XXH_RESTRICT input, size_t len) \ +{ \ + return XXH3_hashLong_64b_internal( \ + input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ + XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ + ); \ +} \ + \ +/* === XXH3, Seeded variants === */ \ + \ +XXH_NO_INLINE target XXH64_hash_t \ +XXHL64_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \ + XXH64_hash_t seed) \ +{ \ + return XXH3_hashLong_64b_withSeed_internal( \ + input, len, seed, XXH3_accumulate_512_##suffix, \ + XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \ + ); \ +} \ + \ +/* === XXH3, Secret variants === */ \ + \ +XXH_NO_INLINE target XXH64_hash_t \ +XXHL64_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \ + const void* secret, size_t secretLen) \ +{ \ + return XXH3_hashLong_64b_internal( \ + input, len, secret, secretLen, \ + XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ + ); \ +} \ + \ +/* === XXH3 update variants === */ \ + \ +XXH_NO_INLINE target XXH_errorcode \ +XXH3_update_##suffix(XXH3_state_t* state, const void* input, size_t len) \ +{ \ + return XXH3_update(state, (const xxh_u8*)input, len, \ + XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \ +} \ + \ +/* === XXH128 default variants === */ \ + \ +XXH_NO_INLINE target XXH128_hash_t \ +XXHL128_default_##suffix(const void* XXH_RESTRICT input, size_t len) \ +{ \ + return XXH3_hashLong_128b_internal( \ + input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ + XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ + ); \ +} \ + \ +/* === XXH128 Secret variants === */ \ + \ +XXH_NO_INLINE target XXH128_hash_t \ +XXHL128_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \ + const void* XXH_RESTRICT secret, size_t secretLen) \ +{ \ + return XXH3_hashLong_128b_internal( \ + input, len, (const xxh_u8*)secret, secretLen, \ + XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \ +} \ + \ +/* === XXH128 Seeded variants === */ \ + \ +XXH_NO_INLINE target XXH128_hash_t \ +XXHL128_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \ + XXH64_hash_t seed) \ +{ \ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \ + XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix, \ + XXH3_initCustomSecret_##suffix); \ +} + +/* End XXH_DEFINE_DISPATCH_FUNCS */ + +#if XXH_DISPATCH_SCALAR +XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */) +#endif +XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2) +#if XXH_DISPATCH_AVX2 +XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2) +#endif +#if XXH_DISPATCH_AVX512 +XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512) +#endif +#undef XXH_DEFINE_DISPATCH_FUNCS /* ==== Dispatchers ==== */ @@ -575,25 +558,40 @@ typedef struct { XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed; XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret; XXH3_dispatchx86_update update; -} dispatchFunctions_s; +} XXH_dispatchFunctions_s; -static dispatchFunctions_s g_dispatch = { NULL, NULL, NULL, NULL}; +#define XXH_NB_DISPATCHES 4 -#define NB_DISPATCHES 4 -static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = { - /* scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_64bits_update_scalar }, - /* sse2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_64bits_update_sse2 }, -#ifdef XXH_DISPATCH_AVX2 - /* avx2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_64bits_update_avx2 }, +/*! + * @internal + * @brief Table of dispatchers for @ref XXH3_64bits(). + * + * @pre The indices must match @ref XXH_VECTOR_TYPE. + */ +static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = { +#if XXH_DISPATCH_SCALAR + /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar }, +#else + /* Scalar */ { NULL, NULL, NULL, NULL }, +#endif + /* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 }, +#if XXH_DISPATCH_AVX2 + /* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 }, #else - /* avx2 */ { NULL, NULL, NULL, NULL }, + /* AVX2 */ { NULL, NULL, NULL, NULL }, #endif -#ifdef XXH_DISPATCH_AVX512 - /* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_64bits_update_avx512 } +#if XXH_DISPATCH_AVX512 + /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 } #else - /* avx512 */ { NULL, NULL, NULL, NULL } + /* AVX512 */ { NULL, NULL, NULL, NULL } #endif }; +/*! + * @internal + * @brief The selected dispatch table for @ref XXH3_64bits(). + */ +static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL }; + typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t); @@ -606,38 +604,60 @@ typedef struct { XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed; XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret; XXH3_dispatchx86_update update; -} dispatch128Functions_s; +} XXH_dispatch128Functions_s; -static dispatch128Functions_s g_dispatch128 = { NULL, NULL, NULL, NULL }; -static const dispatch128Functions_s k_dispatch128[NB_DISPATCHES] = { - /* scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_128bits_update_scalar }, - /* sse2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_128bits_update_sse2 }, -#ifdef XXH_DISPATCH_AVX2 - /* avx2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_128bits_update_avx2 }, +/*! + * @internal + * @brief Table of dispatchers for @ref XXH3_128bits(). + * + * @pre The indices must match @ref XXH_VECTOR_TYPE. + */ +static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = { +#if XXH_DISPATCH_SCALAR + /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar }, +#else + /* Scalar */ { NULL, NULL, NULL, NULL }, +#endif + /* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 }, +#if XXH_DISPATCH_AVX2 + /* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 }, #else - /* avx2 */ { NULL, NULL, NULL, NULL }, + /* AVX2 */ { NULL, NULL, NULL, NULL }, #endif -#ifdef XXH_DISPATCH_AVX512 - /* avx512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_128bits_update_avx512 } +#if XXH_DISPATCH_AVX512 + /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 } #else - /* avx512 */ { NULL, NULL, NULL, NULL } + /* AVX512 */ { NULL, NULL, NULL, NULL } #endif }; -static void setDispatch(void) +/*! + * @internal + * @brief The selected dispatch table for @ref XXH3_64bits(). + */ +static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL }; + +/*! + * @internal + * @brief Runs a CPUID check and sets the correct dispatch tables. + */ +static void XXH_setDispatch(void) { int vecID = XXH_featureTest(); - XXH_STATIC_ASSERT(XXH_AVX512 == NB_DISPATCHES-1); + XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1); assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512); -#ifndef XXH_DISPATCH_AVX512 +#if !XXH_DISPATCH_SCALAR + assert(vecID != XXH_SCALAR); +#endif +#if !XXH_DISPATCH_AVX512 assert(vecID != XXH_AVX512); #endif -#ifndef XXH_DISPATCH_AVX2 +#if !XXH_DISPATCH_AVX2 assert(vecID != XXH_AVX2); #endif - g_dispatch = k_dispatch[vecID]; - g_dispatch128 = k_dispatch128[vecID]; + XXH_g_dispatch = XXH_kDispatch[vecID]; + XXH_g_dispatch128 = XXH_kDispatch128[vecID]; } @@ -648,8 +668,8 @@ XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len, XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) { (void)seed64; (void)secret; (void)secretLen; - if (g_dispatch.hashLong64_default == NULL) setDispatch(); - return g_dispatch.hashLong64_default(input, len); + if (XXH_g_dispatch.hashLong64_default == NULL) XXH_setDispatch(); + return XXH_g_dispatch.hashLong64_default(input, len); } XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len) @@ -662,8 +682,8 @@ XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len, XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) { (void)secret; (void)secretLen; - if (g_dispatch.hashLong64_seed == NULL) setDispatch(); - return g_dispatch.hashLong64_seed(input, len, seed64); + if (XXH_g_dispatch.hashLong64_seed == NULL) XXH_setDispatch(); + return XXH_g_dispatch.hashLong64_seed(input, len, seed64); } XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) @@ -676,8 +696,8 @@ XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len, XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) { (void)seed64; - if (g_dispatch.hashLong64_secret == NULL) setDispatch(); - return g_dispatch.hashLong64_secret(input, len, secret, secretLen); + if (XXH_g_dispatch.hashLong64_secret == NULL) XXH_setDispatch(); + return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen); } XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) @@ -688,8 +708,8 @@ XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, cons XXH_errorcode XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) { - if (g_dispatch.update == NULL) setDispatch(); - return g_dispatch.update(state, (const xxh_u8*)input, len); + if (XXH_g_dispatch.update == NULL) XXH_setDispatch(); + return XXH_g_dispatch.update(state, (const xxh_u8*)input, len); } @@ -700,8 +720,8 @@ XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len, XXH64_hash_t seed64, const void* secret, size_t secretLen) { (void)seed64; (void)secret; (void)secretLen; - if (g_dispatch128.hashLong128_default == NULL) setDispatch(); - return g_dispatch128.hashLong128_default(input, len); + if (XXH_g_dispatch128.hashLong128_default == NULL) XXH_setDispatch(); + return XXH_g_dispatch128.hashLong128_default(input, len); } XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len) @@ -714,8 +734,8 @@ XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len, XXH64_hash_t seed64, const void* secret, size_t secretLen) { (void)secret; (void)secretLen; - if (g_dispatch128.hashLong128_seed == NULL) setDispatch(); - return g_dispatch128.hashLong128_seed(input, len, seed64); + if (XXH_g_dispatch128.hashLong128_seed == NULL) XXH_setDispatch(); + return XXH_g_dispatch128.hashLong128_seed(input, len, seed64); } XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) @@ -728,8 +748,8 @@ XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len, XXH64_hash_t seed64, const void* secret, size_t secretLen) { (void)seed64; - if (g_dispatch128.hashLong128_secret == NULL) setDispatch(); - return g_dispatch128.hashLong128_secret(input, len, secret, secretLen); + if (XXH_g_dispatch128.hashLong128_secret == NULL) XXH_setDispatch(); + return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen); } XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) @@ -740,10 +760,11 @@ XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, co XXH_errorcode XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) { - if (g_dispatch128.update == NULL) setDispatch(); - return g_dispatch128.update(state, (const xxh_u8*)input, len); + if (XXH_g_dispatch128.update == NULL) XXH_setDispatch(); + return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len); } #if defined (__cplusplus) } #endif +/*! @} */