s->strm->adler = 0;
}
-static void fold_1(deflate_state *const s, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
- const __m128i xmm_fold4 = _mm_set_epi32(
- 0x00000001, 0x54442bd4,
- 0x00000001, 0xc6e41596);
-
+static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
+ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
+ 0x00000001, 0xc6e41596);
__m128i x_tmp3;
__m128 ps_crc0, ps_crc3, ps_res;
*xmm_crc3 = _mm_castps_si128(ps_res);
}
-static void fold_2(deflate_state *const s, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
- const __m128i xmm_fold4 = _mm_set_epi32(
- 0x00000001, 0x54442bd4,
- 0x00000001, 0xc6e41596);
-
+static void fold_2(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
+ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
+ 0x00000001, 0xc6e41596);
__m128i x_tmp3, x_tmp2;
__m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res31, ps_res20;
*xmm_crc3 = _mm_castps_si128(ps_res31);
}
-static void fold_3(deflate_state *const s, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
- const __m128i xmm_fold4 = _mm_set_epi32(
- 0x00000001, 0x54442bd4,
- 0x00000001, 0xc6e41596);
-
+static void fold_3(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
+ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
+ 0x00000001, 0xc6e41596);
__m128i x_tmp3;
__m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res32, ps_res21, ps_res10;
*xmm_crc3 = _mm_castps_si128(ps_res32);
}
-static void fold_4(deflate_state *const s, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
- const __m128i xmm_fold4 = _mm_set_epi32(
- 0x00000001, 0x54442bd4,
- 0x00000001, 0xc6e41596);
-
+static void fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
+ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
+ 0x00000001, 0xc6e41596);
__m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3;
__m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3;
__m128 ps_t0, ps_t1, ps_t2, ps_t3;
0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/
};
-static void partial_fold(deflate_state *const s, const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1,
- __m128i *xmm_crc2, __m128i *xmm_crc3, __m128i *xmm_crc_part) {
+static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
+ __m128i *xmm_crc3, __m128i *xmm_crc_part) {
- const __m128i xmm_fold4 = _mm_set_epi32(
- 0x00000001, 0x54442bd4,
- 0x00000001, 0xc6e41596);
+ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
+ 0x00000001, 0xc6e41596);
const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080);
__m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3;
src += algn_diff;
len -= algn_diff;
- partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3,
- &xmm_crc_part);
+ partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
}
while ((len -= 64) >= 0) {
xmm_t2 = _mm_load_si128((__m128i *)src + 2);
xmm_t3 = _mm_load_si128((__m128i *)src + 3);
- fold_4(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
+ fold_4(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
_mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
xmm_t1 = _mm_load_si128((__m128i *)src + 1);
xmm_t2 = _mm_load_si128((__m128i *)src + 2);
- fold_3(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
+ fold_3(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
_mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
xmm_t0 = _mm_load_si128((__m128i *)src);
xmm_t1 = _mm_load_si128((__m128i *)src + 1);
- fold_2(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
+ fold_2(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
_mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
xmm_t0 = _mm_load_si128((__m128i *)src);
- fold_1(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
+ fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
partial:
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
- partial_fold(s, len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3,
- &xmm_crc_part);
+ partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
done:
CRC_SAVE(s)
}