From: Nathan Moinvaziri Date: Sun, 13 Jun 2021 22:57:28 +0000 (-0700) Subject: Reduce number of branches in partial chunk copy based on chunk size. X-Git-Tag: 2.1.0-beta1~565 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=815faea92c3d32c71fb68d08e8cb227ed3364357;p=thirdparty%2Fzlib-ng.git Reduce number of branches in partial chunk copy based on chunk size. --- diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index b15329827..22c3785c1 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -13,6 +13,8 @@ typedef uint8x16_t chunk_t; +#define CHUNK_SIZE 16 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 diff --git a/arch/x86/chunkset_avx.c b/arch/x86/chunkset_avx.c index eb76c0db9..7a9a56a09 100644 --- a/arch/x86/chunkset_avx.c +++ b/arch/x86/chunkset_avx.c @@ -9,6 +9,8 @@ typedef __m256i chunk_t; +#define CHUNK_SIZE 32 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 diff --git a/arch/x86/chunkset_sse.c b/arch/x86/chunkset_sse.c index 1d5a0faa9..d38e99dad 100644 --- a/arch/x86/chunkset_sse.c +++ b/arch/x86/chunkset_sse.c @@ -10,6 +10,8 @@ typedef __m128i chunk_t; +#define CHUNK_SIZE 16 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 diff --git a/chunkset.c b/chunkset.c index 2aa8d4e47..b07e6f482 100644 --- a/chunkset.c +++ b/chunkset.c @@ -14,6 +14,8 @@ typedef struct chunk_t { uint32_t u32[2]; } chunk_t; typedef struct chunk_t { uint8_t u8[8]; } chunk_t; #endif +#define CHUNK_SIZE 8 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 diff --git a/chunkset_tpl.h b/chunkset_tpl.h index 62cd4aa78..2026ff37c 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -40,17 +40,20 @@ Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) { len = MIN(len, safe - out + 1); if (len < sizeof(chunk_t)) { - int32_t use_chunk16 = sizeof(chunk_t) > 16 && (len & 16); - if (use_chunk16) { +#if CHUNK_SIZE > 16 + if (len & 16) { memcpy(out, from, 16); out += 16; from += 16; } +#endif +#if CHUNK_SIZE > 8 if (len & 8) { memcpy(out, from, 8); out += 8; from += 8; } +#endif if (len & 4) { memcpy(out, from, 4); out += 4;