From: Nathan Moinvaziri Date: Sun, 20 Jun 2021 00:24:00 +0000 (-0700) Subject: Remove deflate_state dependency from crc_folding. X-Git-Tag: 2.1.0-beta1~536 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f5d8abb47eadb60b514f067da754f3fd374f5356;p=thirdparty%2Fzlib-ng.git Remove deflate_state dependency from crc_folding. --- diff --git a/arch/x86/crc_folding.c b/arch/x86/crc_folding.c index 4030db598..2f38f6332 100644 --- a/arch/x86/crc_folding.c +++ b/arch/x86/crc_folding.c @@ -18,21 +18,20 @@ #ifdef X86_PCLMULQDQ_CRC -#include "../../zbuild.h" #include #include #include #include "crc_folding.h" -Z_INTERNAL void crc_fold_init(deflate_state *const s) { +Z_INTERNAL uint32_t crc_fold_init(unsigned int crc0[4 * 5]) { /* CRC_SAVE */ - _mm_storeu_si128((__m128i *)s->crc0 + 0, _mm_cvtsi32_si128(0x9db42487)); - _mm_storeu_si128((__m128i *)s->crc0 + 1, _mm_setzero_si128()); - _mm_storeu_si128((__m128i *)s->crc0 + 2, _mm_setzero_si128()); - _mm_storeu_si128((__m128i *)s->crc0 + 3, _mm_setzero_si128()); + _mm_storeu_si128((__m128i *)crc0 + 0, _mm_cvtsi32_si128(0x9db42487)); + _mm_storeu_si128((__m128i *)crc0 + 1, _mm_setzero_si128()); + _mm_storeu_si128((__m128i *)crc0 + 2, _mm_setzero_si128()); + _mm_storeu_si128((__m128i *)crc0 + 3, _mm_setzero_si128()); - s->strm->adler = 0; + return 0; } static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { @@ -227,16 +226,16 @@ static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, *xmm_crc3 = _mm_castps_si128(ps_res); } -Z_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) { +Z_INTERNAL void crc_fold_copy(unsigned int crc0[4 * 5], unsigned char *dst, const unsigned char *src, long len) { unsigned long algn_diff; __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; char ALIGNED_(16) partial_buf[16] = { 0 }; /* CRC_LOAD */ - __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0); - __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1); - __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2); - __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3); + __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)crc0 + 0); + __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)crc0 + 1); + __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)crc0 + 2); + __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)crc0 + 3); __m128i xmm_crc_part; if (len < 16) { @@ -361,11 +360,11 @@ partial: partial_fold((size_t)len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); done: /* CRC_SAVE */ - _mm_storeu_si128((__m128i *)s->crc0 + 0, xmm_crc0); - _mm_storeu_si128((__m128i *)s->crc0 + 1, xmm_crc1); - _mm_storeu_si128((__m128i *)s->crc0 + 2, xmm_crc2); - _mm_storeu_si128((__m128i *)s->crc0 + 3, xmm_crc3); - _mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part); + _mm_storeu_si128((__m128i *)crc0 + 0, xmm_crc0); + _mm_storeu_si128((__m128i *)crc0 + 1, xmm_crc1); + _mm_storeu_si128((__m128i *)crc0 + 2, xmm_crc2); + _mm_storeu_si128((__m128i *)crc0 + 3, xmm_crc3); + _mm_storeu_si128((__m128i *)crc0 + 4, xmm_crc_part); } static const unsigned ALIGNED_(16) crc_k[] = { @@ -385,7 +384,7 @@ static const unsigned ALIGNED_(16) crc_mask2[4] = { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; -uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) { +uint32_t Z_INTERNAL crc_fold_512to32(unsigned int crc0[4 * 5]) { const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask); const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2); @@ -393,10 +392,10 @@ uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) { __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; /* CRC_LOAD */ - __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0); - __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1); - __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2); - __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3); + __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)crc0 + 0); + __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)crc0 + 1); + __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)crc0 + 2); + __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)crc0 + 3); /* * k1 diff --git a/arch/x86/crc_folding.h b/arch/x86/crc_folding.h index 0d3c24b29..a9ed41f3c 100644 --- a/arch/x86/crc_folding.h +++ b/arch/x86/crc_folding.h @@ -10,10 +10,10 @@ #ifndef CRC_FOLDING_H_ #define CRC_FOLDING_H_ -#include "../../deflate.h" +#include "../../zutil.h" -Z_INTERNAL void crc_fold_init(deflate_state *const); -Z_INTERNAL uint32_t crc_fold_512to32(deflate_state *const); -Z_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long); +Z_INTERNAL uint32_t crc_fold_init(unsigned int crc0[4 * 5]); +Z_INTERNAL uint32_t crc_fold_512to32(unsigned int crc0[4 * 5]); +Z_INTERNAL void crc_fold_copy(unsigned int crc0[4 * 5], unsigned char *, const unsigned char *, long); #endif diff --git a/crc32.c b/crc32.c index 4b488e617..74d9c414c 100644 --- a/crc32.c +++ b/crc32.c @@ -175,7 +175,7 @@ Z_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t l Z_INTERNAL void crc_finalize(deflate_state *const s) { if (x86_cpu_has_pclmulqdq) - s->strm->adler = crc_fold_512to32(s); + s->strm->adler = crc_fold_512to32(s->crc0); } #endif @@ -183,7 +183,7 @@ Z_INTERNAL void crc_reset(deflate_state *const s) { #ifdef X86_PCLMULQDQ_CRC x86_check_features(); if (x86_cpu_has_pclmulqdq) { - crc_fold_init(s); + s->strm->adler = crc_fold_init(s->crc0); return; } #endif @@ -193,7 +193,7 @@ Z_INTERNAL void crc_reset(deflate_state *const s) { Z_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) { #ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) { - crc_fold_copy(strm->state, dst, strm->next_in, size); + crc_fold_copy(strm->state->crc0, dst, strm->next_in, size); return; } #endif