From: Nathan Moinvaziri Date: Sun, 4 Jan 2026 07:52:27 +0000 (-0800) Subject: Add ALIGN_DIFF to perform alignment needed to next boundary X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9d916b327868283f218ff62f586b181a6adaf96b;p=thirdparty%2Fzlib-ng.git Add ALIGN_DIFF to perform alignment needed to next boundary --- diff --git a/arch/generic/crc32_braid_c.c b/arch/generic/crc32_braid_c.c index 71f2ac43f..3b447ed9c 100644 --- a/arch/generic/crc32_braid_c.c +++ b/arch/generic/crc32_braid_c.c @@ -70,10 +70,10 @@ Z_INTERNAL uint32_t crc32_braid_internal(uint32_t c, const uint8_t *buf, size_t int k; /* Compute the CRC up to a z_word_t boundary. */ - while (len && ((uintptr_t)buf & (BRAID_W - 1)) != 0) { - len--; + size_t align_diff = (size_t)MIN(ALIGN_DIFF(buf, BRAID_W), len); + len -= align_diff; + while (align_diff--) CRC_DO1; - } /* Compute the CRC on as many BRAID_N z_word_t blocks as are available. */ blks = len / (BRAID_N * BRAID_W); diff --git a/arch/generic/crc32_chorba_c.c b/arch/generic/crc32_chorba_c.c index 303a516a0..9455b55ee 100644 --- a/arch/generic/crc32_chorba_c.c +++ b/arch/generic/crc32_chorba_c.c @@ -1448,27 +1448,27 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const #endif // OPTIMAL_CMP == 64 Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) { - uint64_t* aligned_buf; + uint64_t *aligned_buf; uint32_t c = (~crc) & 0xffffffff; - uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7; + uintptr_t align_diff = ALIGN_DIFF(buf, 8); - if (len > algn_diff + CHORBA_SMALL_THRESHOLD) { - if (algn_diff) { - c = crc32_braid_internal(c, buf, algn_diff); - len -= algn_diff; + if (len > align_diff + CHORBA_SMALL_THRESHOLD) { + if (align_diff) { + c = crc32_braid_internal(c, buf, align_diff); + len -= align_diff; } - aligned_buf = (uint64_t*) (buf + algn_diff); + aligned_buf = (uint64_t*)(buf + align_diff); if(len > CHORBA_LARGE_THRESHOLD) { - c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); + c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len); # if OPTIMAL_CMP == 64 } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) { - c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len); + c = crc32_chorba_32768_nondestructive(c, (uint64_t*)aligned_buf, len); # endif } else { # if OPTIMAL_CMP == 64 - c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len); + c = crc32_chorba_small_nondestructive(c, (uint64_t*)aligned_buf, len); # else - c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len); + c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*)aligned_buf, len); # endif } } else { diff --git a/arch/power/adler32_vmx.c b/arch/power/adler32_vmx.c index 004d3fce6..0478d052d 100644 --- a/arch/power/adler32_vmx.c +++ b/arch/power/adler32_vmx.c @@ -123,7 +123,7 @@ static inline uint32_t adler32_impl(uint32_t adler, const uint8_t *buf, size_t l uint32_t pair[16] ALIGNED_(16); memset(&pair[2], 0, 14); int n = NMAX; - unsigned int done = 0, i; + unsigned int done = 0; /* Split Adler-32 into component sums, it can be supplied by * the caller sites (e.g. in a PNG file). @@ -146,23 +146,17 @@ static inline uint32_t adler32_impl(uint32_t adler, const uint8_t *buf, size_t l return adler32_copy_len_16(adler, NULL, buf, len, sum2, 0); // Align buffer - unsigned int al = 0; - if ((uintptr_t)buf & 0xf) { - al = 16-((uintptr_t)buf & 0xf); - if (al > len) { - al=len; - } - vmx_handle_head_or_tail(pair, buf, al); - - done += al; + size_t align_len = (size_t)MIN(ALIGN_DIFF(buf, 16), len); + if (align_len) { + vmx_handle_head_or_tail(pair, buf, align_len); + done += align_len; /* Rather than rebasing, we can reduce the max sums for the * first round only */ - n -= al; + n -= align_len; } - for (i = al; i < len; i += n) { + for (size_t i = align_len; i < len; i += n) { int remaining = (int)(len-i); - n = MIN(remaining, (i == al) ? n : NMAX); - + n = MIN(remaining, (i == align_len) ? n : NMAX); if (n < 16) break; diff --git a/arch/power/crc32_power8.c b/arch/power/crc32_power8.c index 20ea89ed2..914cca8ab 100644 --- a/arch/power/crc32_power8.c +++ b/arch/power/crc32_power8.c @@ -63,7 +63,7 @@ Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, size_t _l } if ((unsigned long)p & VMX_ALIGN_MASK) { - prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + prealign = (unsigned int)ALIGN_DIFF(p, VMX_ALIGN); crc = crc32_align(crc, p, prealign); len -= prealign; p += prealign; diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c index 10105990d..c1de8b6f8 100644 --- a/arch/s390/crc32-vx.c +++ b/arch/s390/crc32-vx.c @@ -205,7 +205,7 @@ uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t return crc32_braid(crc, buf, len); if ((uintptr_t)buf & VX_ALIGN_MASK) { - prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); + prealign = (size_t)ALIGN_DIFF(buf, VX_ALIGNMENT); len -= prealign; crc = crc32_braid(crc, buf, prealign); buf += prealign; diff --git a/arch/x86/chorba_sse2.c b/arch/x86/chorba_sse2.c index 95da8e99b..a5567faf3 100644 --- a/arch/x86/chorba_sse2.c +++ b/arch/x86/chorba_sse2.c @@ -847,19 +847,19 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_ } Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) { - uint64_t* aligned_buf; + uint64_t *aligned_buf; uint32_t c = (~crc) & 0xffffffff; - uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15; + uintptr_t align_diff = ALIGN_DIFF(buf, 16); - if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) { - if (algn_diff) { - c = crc32_braid_internal(c, buf, algn_diff); - len -= algn_diff; + if (len > align_diff + CHORBA_SMALL_THRESHOLD_64BIT) { + if (align_diff) { + c = crc32_braid_internal(c, buf, align_diff); + len -= align_diff; } - aligned_buf = (uint64_t*) (buf + algn_diff); + aligned_buf = (uint64_t*)(buf + align_diff); #if !defined(WITHOUT_CHORBA) - if(len > CHORBA_LARGE_THRESHOLD) { - c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); + if (len > CHORBA_LARGE_THRESHOLD) { + c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len); } else #endif { diff --git a/arch/x86/chorba_sse41.c b/arch/x86/chorba_sse41.c index 9f2b65a3e..07daf7094 100644 --- a/arch/x86/chorba_sse41.c +++ b/arch/x86/chorba_sse41.c @@ -305,19 +305,19 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c } Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) { - uint64_t* aligned_buf; + uint64_t *aligned_buf; uint32_t c = (~crc) & 0xffffffff; - uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15; + uintptr_t align_diff = ALIGN_DIFF(buf, 16); - if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) { - if (algn_diff) { - c = crc32_braid_internal(c, buf, algn_diff); - len -= algn_diff; + if (len > align_diff + CHORBA_SMALL_THRESHOLD_64BIT) { + if (align_diff) { + c = crc32_braid_internal(c, buf, align_diff); + len -= align_diff; } - aligned_buf = (uint64_t*) (buf + algn_diff); + aligned_buf = (uint64_t*)(buf + align_diff); #if !defined(WITHOUT_CHORBA) - if(len > CHORBA_LARGE_THRESHOLD) { - c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); + if (len > CHORBA_LARGE_THRESHOLD) { + c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len); } else #endif if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) { diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index 079c22e05..fc35b449c 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -337,11 +337,11 @@ Z_FORCEINLINE static uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const size_t copy_len = len; if (len >= 16) { /* Calculate 16-byte alignment offset */ - unsigned algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF; + uintptr_t align_diff = ALIGN_DIFF(src, 16); /* If total length is less than (alignment bytes + 16), use the faster small method. * Handles both initially small buffers and cases where alignment would leave < 16 bytes */ - copy_len = len < algn_diff + 16 ? len : algn_diff; + copy_len = len < align_diff + 16 ? len : align_diff; } if (copy_len > 0) { diff --git a/zbuild.h b/zbuild.h index 7db64e5b4..1354cc945 100644 --- a/zbuild.h +++ b/zbuild.h @@ -245,6 +245,10 @@ #define HINT_ALIGNED_64(p) HINT_ALIGNED((p),64) #define HINT_ALIGNED_4096(p) HINT_ALIGNED((p),4096) +/* Number of bytes needed to align ptr to the next alignment boundary */ +#define ALIGN_DIFF(ptr, align) \ + (((uintptr_t)(align) - ((uintptr_t)(ptr) & ((align) - 1))) & ((align) - 1)) + /* PADSZ returns needed bytes to pad bpos to pad size * PAD_NN calculates pad size and adds it to bpos, returning the result. * All take an integer or a pointer as bpos input. diff --git a/zutil.c b/zutil.c index e77bf1aea..937c83da4 100644 --- a/zutil.c +++ b/zutil.c @@ -114,8 +114,8 @@ void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr) { /* Provide aligned allocations, only used by gz* code */ void Z_INTERNAL *zng_alloc_aligned(unsigned size, unsigned align) { - uintptr_t return_ptr, original_ptr; - uint32_t alloc_size, align_diff; + uintptr_t return_ptr, original_ptr, align_diff; + uint32_t alloc_size; void *ptr; /* Allocate enough memory for proper alignment and to store the original memory pointer */ @@ -125,7 +125,7 @@ void Z_INTERNAL *zng_alloc_aligned(unsigned size, unsigned align) { return NULL; /* Calculate return pointer address with space enough to store original pointer */ - align_diff = align - ((uintptr_t)ptr % align); + align_diff = ALIGN_DIFF(ptr, align); return_ptr = (uintptr_t)ptr + align_diff; if (align_diff < sizeof(void *)) return_ptr += align;