From: Nathan Moinvaziri Date: Thu, 8 Jan 2026 02:23:23 +0000 (-0800) Subject: Add ALIGN_UP and ALIGN_DOWN macros for readability X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8cf6fc32d0d23d6f4a1b0c4dc03689e4bf6472ca;p=thirdparty%2Fzlib-ng.git Add ALIGN_UP and ALIGN_DOWN macros for readability --- diff --git a/arch/loongarch/adler32_lasx.c b/arch/loongarch/adler32_lasx.c index d93f41b0e..92b942ea4 100644 --- a/arch/loongarch/adler32_lasx.c +++ b/arch/loongarch/adler32_lasx.c @@ -67,8 +67,7 @@ rem_peel: __m256i vs3 = __lasx_xvldi(0); vs2_0 = vs3; - size_t k = MIN(len, NMAX); - k -= k % 32; + size_t k = ALIGN_DOWN(MIN(len, NMAX), 32); len -= k; while (k >= 64) { diff --git a/arch/loongarch/adler32_lsx.c b/arch/loongarch/adler32_lsx.c index e0c8ca969..4c3603193 100644 --- a/arch/loongarch/adler32_lsx.c +++ b/arch/loongarch/adler32_lsx.c @@ -49,8 +49,7 @@ rem_peel: while (len >= 16) { - k = MIN(len, NMAX); - k -= k % 16; + k = ALIGN_DOWN(MIN(len, NMAX), 16); len -= k; vs1 = __lsx_vinsgr2vr_w(zero, adler0, 0); diff --git a/arch/power/crc32_power8.c b/arch/power/crc32_power8.c index 2d118a9c0..a7a2fb743 100644 --- a/arch/power/crc32_power8.c +++ b/arch/power/crc32_power8.c @@ -67,11 +67,11 @@ Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, size_t _l p += prealign; } - crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + crc = __crc32_vpmsum(crc, p, ALIGN_DOWN(len, VMX_ALIGN)); tail = len & VMX_ALIGN_MASK; if (tail) { - p += len & ~VMX_ALIGN_MASK; + p += ALIGN_DOWN(len, VMX_ALIGN); crc = crc32_align(crc, p, tail); } diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c index 7d0de8629..ba00f9a37 100644 --- a/arch/s390/crc32-vx.c +++ b/arch/s390/crc32-vx.c @@ -212,7 +212,7 @@ uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t crc = crc32_braid(crc, buf, prealign); buf += prealign; } - aligned = len & ~VX_ALIGN_MASK; + aligned = ALIGN_DOWN(len, VX_ALIGNMENT); remaining = len & VX_ALIGN_MASK; crc = ~crc32_le_vgfm_16(~crc, buf, aligned); diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h index ae6001ba3..b367f42f3 100644 --- a/arch/s390/dfltcc_detail.h +++ b/arch/s390/dfltcc_detail.h @@ -214,8 +214,6 @@ static inline dfltcc_cc dfltcc(int fn, void *param, return (cc >> 28) & 3; } -#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) - static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { /* Initialize available functions */ if (is_dfltcc_enabled()) { diff --git a/arch/x86/adler32_avx2.c b/arch/x86/adler32_avx2.c index bf748eb8f..4b1f0dac9 100644 --- a/arch/x86/adler32_avx2.c +++ b/arch/x86/adler32_avx2.c @@ -50,8 +50,7 @@ rem_peel: __m256i vs3 = _mm256_setzero_si256(); vs2_0 = vs3; - size_t k = MIN(len, NMAX); - k -= k % 32; + size_t k = ALIGN_DOWN(MIN(len, NMAX), 32); len -= k; while (k >= 64) { diff --git a/arch/x86/adler32_avx512.c b/arch/x86/adler32_avx512.c index 5fffbd360..8a8e165bb 100644 --- a/arch/x86/adler32_avx512.c +++ b/arch/x86/adler32_avx512.c @@ -40,7 +40,6 @@ rem_peel: 56, 57, 58, 59, 60, 61, 62, 63, 64); const __m512i dot3v = _mm512_set1_epi16(1); const __m512i zero = _mm512_setzero_si512(); - size_t k; while (len >= 64) { __m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0)); @@ -48,8 +47,7 @@ rem_peel: vs1_0 = vs1; vs3 = _mm512_setzero_si512(); - k = MIN(len, NMAX); - k -= k % 64; + size_t k = ALIGN_DOWN(MIN(len, NMAX), 64); len -= k; while (k >= 64) { diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c index 741176cac..8bebffbf8 100644 --- a/arch/x86/adler32_avx512_vnni.c +++ b/arch/x86/adler32_avx512_vnni.c @@ -40,8 +40,7 @@ rem_peel: while (len >= 64) { vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0)); vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1)); - size_t k = MIN(len, NMAX); - k -= k % 64; + size_t k = ALIGN_DOWN(MIN(len, NMAX), 64); len -= k; __m512i vs1_0 = vs1; __m512i vs3 = _mm512_setzero_si512(); @@ -131,9 +130,10 @@ rem_peel_copy: while (len >= 32) { vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0)); vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1)); - size_t k = MIN(len, NMAX); - k -= k % 32; + + size_t k = ALIGN_DOWN(MIN(len, NMAX), 32); len -= k; + __m256i vs1_0 = vs1; __m256i vs3 = _mm256_setzero_si256(); /* We might get a tad bit more ILP here if we sum to a second register in the loop */ diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c index c611034ab..ea1d37037 100644 --- a/arch/x86/adler32_sse42.c +++ b/arch/x86/adler32_sse42.c @@ -30,12 +30,9 @@ rem_peel: const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); const __m128i dot3v = _mm_set1_epi16(1); - size_t k; while (len >= 16) { - - k = MIN(len, NMAX); - k -= k % 16; + size_t k = ALIGN_DOWN(MIN(len, NMAX), 16); len -= k; vs1 = _mm_cvtsi32_si128(adler0); diff --git a/arch/x86/adler32_ssse3.c b/arch/x86/adler32_ssse3.c index 91003fab1..4f9ee22f9 100644 --- a/arch/x86/adler32_ssse3.c +++ b/arch/x86/adler32_ssse3.c @@ -79,8 +79,7 @@ Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len vs2_0 = _mm_setzero_si128(); vs1_0 = vs1; - k = (len < max_iters ? len : max_iters); - k -= k % 16; + k = ALIGN_DOWN(MIN(len, max_iters), 16); len -= k; while (k >= 32) { diff --git a/deflate.h b/deflate.h index b108b7e59..5235ce9d1 100644 --- a/deflate.h +++ b/deflate.h @@ -439,9 +439,9 @@ void Z_INTERNAL PREFIX(flush_pending)(PREFIX3(streamp) strm); /* Bit buffer and compress bits calculation debugging */ #ifdef ZLIB_DEBUG # define cmpr_bits_add(s, len) s->compressed_len += (len) -# define cmpr_bits_align(s) s->compressed_len = (s->compressed_len + 7) & ~7L +# define cmpr_bits_align(s) s->compressed_len = ALIGN_UP(s->compressed_len, 8) # define sent_bits_add(s, bits) s->bits_sent += (bits) -# define sent_bits_align(s) s->bits_sent = (s->bits_sent + 7) & ~7L +# define sent_bits_align(s) s->bits_sent = ALIGN_UP(s->bits_sent, 8) #else # define cmpr_bits_add(s, len) Z_UNUSED(len) # define cmpr_bits_align(s) diff --git a/zbuild.h b/zbuild.h index b17362767..1d5b6286b 100644 --- a/zbuild.h +++ b/zbuild.h @@ -261,6 +261,14 @@ #define ALIGN_DIFF(ptr, align) \ (((uintptr_t)(align) - ((uintptr_t)(ptr) & ((align) - 1))) & ((align) - 1)) +/* Round up value to the nearest multiple of align (align must be power of 2) */ +#define ALIGN_UP(value, align) \ + (((value) + ((align) - 1)) & ~((align) - 1)) + +/* Round down value to the nearest multiple of align (align must be power of 2) */ +#define ALIGN_DOWN(value, align) \ + ((value) & ~((align) - 1)) + /* PADSZ returns needed bytes to pad bpos to pad size * PAD_NN calculates pad size and adds it to bpos, returning the result. * All take an integer or a pointer as bpos input.