src/liblzma/check/check.c
src/liblzma/check/check.h
src/liblzma/check/crc_common.h
+ src/liblzma/check/crc_x86_clmul.h
src/liblzma/common/block_util.c
src/liblzma/common/common.c
src/liblzma/common/common.h
int main(void) { return 0; }
"
HAVE_USABLE_CLMUL)
-
- if(HAVE_USABLE_CLMUL)
- target_sources(liblzma PRIVATE src/liblzma/check/crc_clmul.c)
- target_compile_definitions(liblzma PRIVATE HAVE_USABLE_CLMUL)
- endif()
+ tuklib_add_definition_if(liblzma HAVE_USABLE_CLMUL)
endif()
endif()
])
AC_MSG_RESULT([$enable_clmul_crc])
])
-AM_CONDITIONAL([COND_CRC_CLMUL], [test "x$enable_clmul_crc" = xyes])
# Check for sandbox support. If one is found, set enable_sandbox=found.
#
liblzma_la_SOURCES += \
check/check.c \
check/check.h \
- check/crc_common.h
+ check/crc_common.h \
+ check/crc_x86_clmul.h
if COND_SMALL
liblzma_la_SOURCES += check/crc32_small.c
liblzma_la_SOURCES += check/crc32_x86.S
else
liblzma_la_SOURCES += check/crc32_fast.c
-if COND_CRC_CLMUL
-liblzma_la_SOURCES += check/crc_clmul.c
-endif
endif
endif
#include "check.h"
#include "crc_common.h"
+#ifdef CRC_CLMUL
+# define BUILDING_CRC32_CLMUL
+# include "crc_x86_clmul.h"
+#endif
+
#ifdef CRC_GENERIC
static crc32_func_type
crc32_resolve(void)
{
- return is_clmul_supported() ? &lzma_crc32_clmul : &crc32_generic;
+ return is_clmul_supported() ? &crc32_clmul : &crc32_generic;
}
#if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__)
return crc32_func(buf, size, crc);
#elif defined(CRC_CLMUL)
- return lzma_crc32_clmul(buf, size, crc);
+ return crc32_clmul(buf, size, crc);
#else
return crc32_generic(buf, size, crc);
#include "check.h"
#include "crc_common.h"
+#ifdef CRC_CLMUL
+# define BUILDING_CRC64_CLMUL
+# include "crc_x86_clmul.h"
+#endif
+
#ifdef CRC_GENERIC
static crc64_func_type
crc64_resolve(void)
{
- return is_clmul_supported() ? &lzma_crc64_clmul : &crc64_generic;
+ return is_clmul_supported() ? &crc64_clmul : &crc64_generic;
}
#if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__)
//
// FIXME: Lookup table isn't currently omitted on 32-bit x86,
// see crc64_table.c.
- return lzma_crc64_clmul(buf, size, crc);
+ return crc64_clmul(buf, size, crc);
#else
return crc64_generic(buf, size, crc);
# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
# endif
*/
-
-# if defined(_MSC_VER)
-# include <intrin.h>
-# elif defined(HAVE_CPUID_H)
-# include <cpuid.h>
-# endif
-
-// is_clmul_supported() must be inlined in this header file because the
-// ifunc resolver function may not support calling a function in another
-// translation unit. Depending on compiler-toolchain and flags, a call to
-// a function defined in another translation unit could result in a
-// reference to the PLT, which is unsafe to do in an ifunc resolver. The
-// ifunc resolver runs very early when loading a shared library, so the PLT
-// entries may not be setup at that time. Inlining this function duplicates
-// the function body in crc32_resolve() and crc64_resolve(), but this is
-// acceptable because the function results in very few instructions.
-static inline bool
-is_clmul_supported(void)
-{
- int success = 1;
- uint32_t r[4]; // eax, ebx, ecx, edx
-
-#if defined(_MSC_VER)
- // This needs <intrin.h> with MSVC. ICC has it as a built-in
- // on all platforms.
- __cpuid(r, 1);
-#elif defined(HAVE_CPUID_H)
- // Compared to just using __asm__ to run CPUID, this also checks
- // that CPUID is supported and saves and restores ebx as that is
- // needed with GCC < 5 with position-independent code (PIC).
- success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]);
-#else
- // Just a fallback that shouldn't be needed.
- __asm__("cpuid\n\t"
- : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3])
- : "a"(1), "c"(0));
#endif
- // Returns true if these are supported:
- // CLMUL (bit 1 in ecx)
- // SSSE3 (bit 9 in ecx)
- // SSE4.1 (bit 19 in ecx)
- const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19);
- return success && (r[2] & ecx_mask) == ecx_mask;
-
- // Alternative methods that weren't used:
- // - ICC's _may_i_use_cpu_feature: the other methods should work too.
- // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul")
- //
- // CPUID decding is needed with MSVC anyway and older GCC. This keeps
- // the feature checks in the build system simpler too. The nice thing
- // about __builtin_cpu_supports would be that it generates very short
- // code as is it only reads a variable set at startup but a few bytes
- // doesn't matter here.
-}
-
-#endif
-
-/// CRC32 implemented with the x86 CLMUL instruction.
-extern uint32_t lzma_crc32_clmul(const uint8_t *buf, size_t size,
- uint32_t crc);
-
-/// CRC64 implemented with the x86 CLMUL instruction.
-extern uint64_t lzma_crc64_clmul(const uint8_t *buf, size_t size,
- uint64_t crc);
-
#endif
///////////////////////////////////////////////////////////////////////////////
//
-/// \file crc_clmul.c
+/// \file crc_x86_clmul.h
/// \brief CRC32 and CRC64 implementations using CLMUL instructions.
///
-/// lzma_crc32_clmul() and lzma_crc64_clmul() use 32/64-bit x86
-/// SSSE3, SSE4.1, and CLMUL instructions. This is compatible with
-/// Elbrus 2000 (E2K) too.
+/// crc32_clmul() and crc64_clmul() use 32/64-bit x86 SSSE3, SSE4.1, and
+/// CLMUL instructions. This is compatible with Elbrus 2000 (E2K) too.
///
/// They were derived from
/// https://www.researchgate.net/publication/263424619_Fast_CRC_computation
//
///////////////////////////////////////////////////////////////////////////////
-#include "crc_common.h"
+// This file must not be included more than once.
+#ifdef LZMA_CRC_X86_CLMUL_H
+# error crc_x86_clmul.h was included twice.
+#endif
+#define LZMA_CRC_X86_CLMUL_H
+
#include <immintrin.h>
+#if defined(_MSC_VER)
+# include <intrin.h>
+#elif defined(HAVE_CPUID_H)
+# include <cpuid.h>
+#endif
+
// EDG-based compilers (Intel's classic compiler and compiler for E2K) can
// define __GNUC__ but the attribute must not be used with them.
}
*/
-#ifdef HAVE_CHECK_CRC32
+#ifdef BUILDING_CRC32_CLMUL
crc_attr_target
crc_attr_no_sanitize_address
-extern uint32_t
-lzma_crc32_clmul(const uint8_t *buf, size_t size, uint32_t crc)
+static uint32_t
+crc32_clmul(const uint8_t *buf, size_t size, uint32_t crc)
{
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
// The code assumes that there is at least one byte of input.
v0 = _mm_xor_si128(v0, v2); // [2]
return ~(uint32_t)_mm_extract_epi32(v0, 2);
}
-#endif // HAVE_CHECK_CRC32
+#endif // BUILDING_CRC32_CLMUL
/////////////////////
}
*/
-#ifdef HAVE_CHECK_CRC64
+#ifdef BUILDING_CRC64_CLMUL
// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC
// code when optimizations are enabled (release build). According to the bug
crc_attr_target
crc_attr_no_sanitize_address
-extern uint64_t
-lzma_crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc)
+static uint64_t
+crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc)
{
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
// The code assumes that there is at least one byte of input.
# pragma optimize("", on)
#endif
-#endif // HAVE_CHECK_CRC64
+#endif // BUILDING_CRC64_CLMUL
+
+
+// is_clmul_supported() must be inlined in this header file because the
+// ifunc resolver function may not support calling a function in another
+// translation unit. Depending on compiler-toolchain and flags, a call to
+// a function defined in another translation unit could result in a
+// reference to the PLT, which is unsafe to do in an ifunc resolver. The
+// ifunc resolver runs very early when loading a shared library, so the PLT
+// entries may not be setup at that time. Inlining this function duplicates
+// the function body in crc32_resolve() and crc64_resolve(), but this is
+// acceptable because the function results in very few instructions.
+static inline bool
+is_clmul_supported(void)
+{
+ int success = 1;
+ uint32_t r[4]; // eax, ebx, ecx, edx
+
+#if defined(_MSC_VER)
+ // This needs <intrin.h> with MSVC. ICC has it as a built-in
+ // on all platforms.
+ __cpuid(r, 1);
+#elif defined(HAVE_CPUID_H)
+ // Compared to just using __asm__ to run CPUID, this also checks
+ // that CPUID is supported and saves and restores ebx as that is
+ // needed with GCC < 5 with position-independent code (PIC).
+ success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]);
+#else
+ // Just a fallback that shouldn't be needed.
+ __asm__("cpuid\n\t"
+ : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3])
+ : "a"(1), "c"(0));
+#endif
+
+ // Returns true if these are supported:
+ // CLMUL (bit 1 in ecx)
+ // SSSE3 (bit 9 in ecx)
+ // SSE4.1 (bit 19 in ecx)
+ const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19);
+ return success && (r[2] & ecx_mask) == ecx_mask;
+
+ // Alternative methods that weren't used:
+ // - ICC's _may_i_use_cpu_feature: the other methods should work too.
+ // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul")
+ //
+ // CPUID decding is needed with MSVC anyway and older GCC. This keeps
+ // the feature checks in the build system simpler too. The nice thing
+ // about __builtin_cpu_supports would be that it generates very short
+ // code as is it only reads a variable set at startup but a few bytes
+ // doesn't matter here.
+}