From: Hans Kristian Rosbach Date: Thu, 25 Jan 2024 12:28:10 +0000 (+0100) Subject: Speed up crc32_[v]pclmulqdq on small strings. X-Git-Tag: 2.2.0~104 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=34e18dbfc77936b74cf56d06dfb6f7b482e10a96;p=thirdparty%2Fzlib-ng.git Speed up crc32_[v]pclmulqdq on small strings. --- diff --git a/arch/generic/crc32_braid_c.c b/arch/generic/crc32_braid_c.c index 7bf83f17..7d8028f6 100644 --- a/arch/generic/crc32_braid_c.c +++ b/arch/generic/crc32_braid_c.c @@ -11,8 +11,6 @@ #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" -/* ========================================================================= */ - /* A CRC of a message is computed on N braids of words in the message, where each word consists of W bytes (4 or 8). If N is 3, for example, then three @@ -34,24 +32,6 @@ level. Your mileage may vary. */ -/* ========================================================================= */ - -#if BYTE_ORDER == LITTLE_ENDIAN -# define ZSWAPWORD(word) (word) -# define BRAID_TABLE crc_braid_table -#elif BYTE_ORDER == BIG_ENDIAN -# if W == 8 -# define ZSWAPWORD(word) ZSWAP64(word) -# elif W == 4 -# define ZSWAPWORD(word) ZSWAP32(word) -# endif -# define BRAID_TABLE crc_braid_big_table -#else -# error "No endian defined" -#endif -#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 - /* ========================================================================= */ #ifdef W /* @@ -80,7 +60,7 @@ static z_word_t crc_word(z_word_t data) { /* ========================================================================= */ Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { - Z_REGISTER uint32_t c; + uint32_t c; /* Pre-condition the CRC */ c = (~crc) & 0xffffffff; diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index 8c06a60e..80a35b03 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -28,6 +28,7 @@ #include "crc32.h" #include "crc32_braid_p.h" +#include "crc32_braid_tbl.h" #include "x86_intrins.h" #include @@ -350,11 +351,22 @@ Z_INTERNAL uint32_t CRC32_FOLD_FINAL(crc32_fold *crc) { return crc->value; } +static inline uint32_t crc32_small(uint32_t crc, const uint8_t *buf, size_t len) { + uint32_t c = (~crc) & 0xffffffff; + + while (len) { + len--; + DO1; + } + + return c ^ 0xffffffff; +} + Z_INTERNAL uint32_t CRC32(uint32_t crc32, const uint8_t *buf, size_t len) { - /* For lens < 64, crc32_braid method is faster. The CRC32 instruction for - * these short lengths might also prove to be effective */ - if (len < 64) - return PREFIX(crc32_braid)(crc32, buf, len); + /* For lens smaller than ~12, crc32_small method is faster. + * But there are also minimum requirements for the pclmul functions due to alignment */ + if (len < 32) + return crc32_small(crc32, buf, len); crc32_fold ALIGNED_(16) crc_state; CRC32_FOLD_RESET(&crc_state); diff --git a/crc32_braid_p.h b/crc32_braid_p.h index 65a535dc..4bfe37f5 100644 --- a/crc32_braid_p.h +++ b/crc32_braid_p.h @@ -41,6 +41,23 @@ # endif #endif +#if BYTE_ORDER == LITTLE_ENDIAN +# define ZSWAPWORD(word) (word) +# define BRAID_TABLE crc_braid_table +#elif BYTE_ORDER == BIG_ENDIAN +# if W == 8 +# define ZSWAPWORD(word) ZSWAP64(word) +# elif W == 4 +# define ZSWAPWORD(word) ZSWAP32(word) +# endif +# define BRAID_TABLE crc_braid_big_table +#else +# error "No endian defined" +#endif + +#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + /* CRC polynomial. */ #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */