]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Remove deflate_state dependency from crc_folding.
authorNathan Moinvaziri <nathan@nathanm.com>
Sun, 20 Jun 2021 00:24:00 +0000 (17:24 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 26 Jun 2021 06:37:19 +0000 (08:37 +0200)
arch/x86/crc_folding.c
arch/x86/crc_folding.h
crc32.c

index 4030db59888702c4cb18ff18228981abc5fe9700..2f38f6332e0692e6f4d9146fec9b33a3cefab106 100644 (file)
 
 #ifdef X86_PCLMULQDQ_CRC
 
-#include "../../zbuild.h"
 #include <inttypes.h>
 #include <immintrin.h>
 #include <wmmintrin.h>
 
 #include "crc_folding.h"
 
-Z_INTERNAL void crc_fold_init(deflate_state *const s) {
+Z_INTERNAL uint32_t crc_fold_init(unsigned int crc0[4 * 5]) {
     /* CRC_SAVE */
-    _mm_storeu_si128((__m128i *)s->crc0 + 0, _mm_cvtsi32_si128(0x9db42487));
-    _mm_storeu_si128((__m128i *)s->crc0 + 1, _mm_setzero_si128());
-    _mm_storeu_si128((__m128i *)s->crc0 + 2, _mm_setzero_si128());
-    _mm_storeu_si128((__m128i *)s->crc0 + 3, _mm_setzero_si128());
+    _mm_storeu_si128((__m128i *)crc0 + 0, _mm_cvtsi32_si128(0x9db42487));
+    _mm_storeu_si128((__m128i *)crc0 + 1, _mm_setzero_si128());
+    _mm_storeu_si128((__m128i *)crc0 + 2, _mm_setzero_si128());
+    _mm_storeu_si128((__m128i *)crc0 + 3, _mm_setzero_si128());
 
-    s->strm->adler = 0;
+    return 0;
 }
 
 static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
@@ -227,16 +226,16 @@ static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1,
     *xmm_crc3 = _mm_castps_si128(ps_res);
 }
 
-Z_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
+Z_INTERNAL void crc_fold_copy(unsigned int crc0[4 * 5], unsigned char *dst, const unsigned char *src, long len) {
     unsigned long algn_diff;
     __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
     char ALIGNED_(16) partial_buf[16] = { 0 };
 
     /* CRC_LOAD */
-    __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
-    __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
-    __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
-    __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
+    __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)crc0 + 0);
+    __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)crc0 + 1);
+    __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)crc0 + 2);
+    __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)crc0 + 3);
     __m128i xmm_crc_part;
 
     if (len < 16) {
@@ -361,11 +360,11 @@ partial:
     partial_fold((size_t)len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
 done:
     /* CRC_SAVE */
-    _mm_storeu_si128((__m128i *)s->crc0 + 0, xmm_crc0);
-    _mm_storeu_si128((__m128i *)s->crc0 + 1, xmm_crc1);
-    _mm_storeu_si128((__m128i *)s->crc0 + 2, xmm_crc2);
-    _mm_storeu_si128((__m128i *)s->crc0 + 3, xmm_crc3);
-    _mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part);
+    _mm_storeu_si128((__m128i *)crc0 + 0, xmm_crc0);
+    _mm_storeu_si128((__m128i *)crc0 + 1, xmm_crc1);
+    _mm_storeu_si128((__m128i *)crc0 + 2, xmm_crc2);
+    _mm_storeu_si128((__m128i *)crc0 + 3, xmm_crc3);
+    _mm_storeu_si128((__m128i *)crc0 + 4, xmm_crc_part);
 }
 
 static const unsigned ALIGNED_(16) crc_k[] = {
@@ -385,7 +384,7 @@ static const unsigned ALIGNED_(16) crc_mask2[4] = {
     0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
 };
 
-uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) {
+uint32_t Z_INTERNAL crc_fold_512to32(unsigned int crc0[4 * 5]) {
     const __m128i xmm_mask  = _mm_load_si128((__m128i *)crc_mask);
     const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2);
 
@@ -393,10 +392,10 @@ uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) {
     __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
 
     /* CRC_LOAD */
-    __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
-    __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
-    __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
-    __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
+    __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)crc0 + 0);
+    __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)crc0 + 1);
+    __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)crc0 + 2);
+    __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)crc0 + 3);
 
     /*
      * k1
index 0d3c24b29ffde375290b2739ba226c289af8313a..a9ed41f3c05b6ba55b51920a19b10592f2eb4ac0 100644 (file)
 #ifndef CRC_FOLDING_H_
 #define CRC_FOLDING_H_
 
-#include "../../deflate.h"
+#include "../../zutil.h"
 
-Z_INTERNAL void crc_fold_init(deflate_state *const);
-Z_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
-Z_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
+Z_INTERNAL uint32_t crc_fold_init(unsigned int crc0[4 * 5]);
+Z_INTERNAL uint32_t crc_fold_512to32(unsigned int crc0[4 * 5]);
+Z_INTERNAL void crc_fold_copy(unsigned int crc0[4 * 5], unsigned char *, const unsigned char *, long);
 
 #endif
diff --git a/crc32.c b/crc32.c
index 4b488e617e9d0ec7b441ef5c0936e8cc9d4296b8..74d9c414c2a71ea01ec72bae1feab318a38e7a4f 100644 (file)
--- a/crc32.c
+++ b/crc32.c
@@ -175,7 +175,7 @@ Z_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t l
 
 Z_INTERNAL void crc_finalize(deflate_state *const s) {
     if (x86_cpu_has_pclmulqdq)
-        s->strm->adler = crc_fold_512to32(s);
+        s->strm->adler = crc_fold_512to32(s->crc0);
 }
 #endif
 
@@ -183,7 +183,7 @@ Z_INTERNAL void crc_reset(deflate_state *const s) {
 #ifdef X86_PCLMULQDQ_CRC
     x86_check_features();
     if (x86_cpu_has_pclmulqdq) {
-        crc_fold_init(s);
+        s->strm->adler = crc_fold_init(s->crc0);
         return;
     }
 #endif
@@ -193,7 +193,7 @@ Z_INTERNAL void crc_reset(deflate_state *const s) {
 Z_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
 #ifdef X86_PCLMULQDQ_CRC
     if (x86_cpu_has_pclmulqdq) {
-        crc_fold_copy(strm->state, dst, strm->next_in, size);
+        crc_fold_copy(strm->state->crc0, dst, strm->next_in, size);
         return;
     }
 #endif