Add ALIGN_UP and ALIGN_DOWN macros for readability

author Nathan Moinvaziri <nathan@nathanm.com>

Thu, 8 Jan 2026 02:23:23 +0000 (18:23 -0800)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Sat, 24 Jan 2026 14:22:09 +0000 (15:22 +0100)
author Nathan Moinvaziri <nathan@nathanm.com>
Thu, 8 Jan 2026 02:23:23 +0000 (18:23 -0800)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 24 Jan 2026 14:22:09 +0000 (15:22 +0100)
diff --git a/arch/loongarch/adler32_lasx.c b/arch/loongarch/adler32_lasx.c

index d93f41b0e3e3980f7677512536cfed73966c5f29..92b942ea4bf3cdef6b1b30ca2cfbed13b827d6a3 100644 (file)
--- a/arch/loongarch/adler32_lasx.c
+++ b/arch/loongarch/adler32_lasx.c
@@ -67,8 +67,7 @@ rem_peel:
          __m256i vs3 = __lasx_xvldi(0);
          vs2_0 = vs3;
  
-        size_t k = MIN(len, NMAX);
-        k -= k % 32;
+        size_t k = ALIGN_DOWN(MIN(len, NMAX), 32);
          len -= k;
  
          while (k >= 64) {
diff --git a/arch/loongarch/adler32_lsx.c b/arch/loongarch/adler32_lsx.c

index e0c8ca9694b08d9c8ef9798472e7494d2d602ae7..4c3603193aed91eafb21074e357309fdbdfefa13 100644 (file)
--- a/arch/loongarch/adler32_lsx.c
+++ b/arch/loongarch/adler32_lsx.c
@@ -49,8 +49,7 @@ rem_peel:
  
      while (len >= 16) {
  
-        k = MIN(len, NMAX);
-        k -= k % 16;
+        k = ALIGN_DOWN(MIN(len, NMAX), 16);
          len -= k;
  
          vs1 = __lsx_vinsgr2vr_w(zero, adler0, 0);
diff --git a/arch/power/crc32_power8.c b/arch/power/crc32_power8.c

index 2d118a9c060b3a57db2d3abc7dce601efe10761d..a7a2fb7435a2dae6fc4f9732de0f0fdfe2ec7e3d 100644 (file)
--- a/arch/power/crc32_power8.c
+++ b/arch/power/crc32_power8.c
@@ -67,11 +67,11 @@ Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, size_t _l
          p += prealign;
      }
  
-    crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+    crc = __crc32_vpmsum(crc, p, ALIGN_DOWN(len, VMX_ALIGN));
  
      tail = len & VMX_ALIGN_MASK;
      if (tail) {
-        p += len & ~VMX_ALIGN_MASK;
+        p += ALIGN_DOWN(len, VMX_ALIGN);
          crc = crc32_align(crc, p, tail);
      }
  
diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c

index 7d0de86293cc08526db83ec4238dfd9fae071295..ba00f9a3706145fa68355e5ca958c3fb42c6cb76 100644 (file)
--- a/arch/s390/crc32-vx.c
+++ b/arch/s390/crc32-vx.c
@@ -212,7 +212,7 @@ uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t
          crc = crc32_braid(crc, buf, prealign);
          buf += prealign;
      }
-    aligned = len & ~VX_ALIGN_MASK;
+    aligned = ALIGN_DOWN(len, VX_ALIGNMENT);
      remaining = len & VX_ALIGN_MASK;
  
      crc = ~crc32_le_vgfm_16(~crc, buf, aligned);
diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h

index ae6001ba38634eace4073e43932e0402d559050b..b367f42f37ef9d2afdc883036930e768615f6a66 100644 (file)
--- a/arch/s390/dfltcc_detail.h
+++ b/arch/s390/dfltcc_detail.h
@@ -214,8 +214,6 @@ static inline dfltcc_cc dfltcc(int fn, void *param,
      return (cc >> 28) & 3;
  }
  
-#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
-
  static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) {
      /* Initialize available functions */
      if (is_dfltcc_enabled()) {
diff --git a/arch/x86/adler32_avx2.c b/arch/x86/adler32_avx2.c

index bf748eb8f31d8df205ef448d783f8e72f5cd5938..4b1f0dac984acdcfa06421863dda2a2caad81885 100644 (file)
--- a/arch/x86/adler32_avx2.c
+++ b/arch/x86/adler32_avx2.c
@@ -50,8 +50,7 @@ rem_peel:
          __m256i vs3 = _mm256_setzero_si256();
          vs2_0 = vs3;
  
-        size_t k = MIN(len, NMAX);
-        k -= k % 32;
+        size_t k = ALIGN_DOWN(MIN(len, NMAX), 32);
          len -= k;
  
          while (k >= 64) {
diff --git a/arch/x86/adler32_avx512.c b/arch/x86/adler32_avx512.c

index 5fffbd360676597b535b7e80eb52443ee82a4027..8a8e165bb9dc70a926305ee67a9396a4cb7f3757 100644 (file)
--- a/arch/x86/adler32_avx512.c
+++ b/arch/x86/adler32_avx512.c
@@ -40,7 +40,6 @@ rem_peel:
                                            56, 57, 58, 59, 60, 61, 62, 63, 64);
      const __m512i dot3v = _mm512_set1_epi16(1);
      const __m512i zero = _mm512_setzero_si512();
-    size_t k;
  
      while (len >= 64) {
          __m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
@@ -48,8 +47,7 @@ rem_peel:
          vs1_0 = vs1;
          vs3 = _mm512_setzero_si512();
  
-        k = MIN(len, NMAX);
-        k -= k % 64;
+        size_t k = ALIGN_DOWN(MIN(len, NMAX), 64);
          len -= k;
  
          while (k >= 64) {
diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c

index 741176cacd25e723612cdbf581f7057c06debbfc..8bebffbf88c6c7d0a4aa88b20d4ac0e2e7034dbc 100644 (file)
--- a/arch/x86/adler32_avx512_vnni.c
+++ b/arch/x86/adler32_avx512_vnni.c
@@ -40,8 +40,7 @@ rem_peel:
      while (len >= 64) {
          vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
          vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1));
-        size_t k = MIN(len, NMAX);
-        k -= k % 64;
+        size_t k = ALIGN_DOWN(MIN(len, NMAX), 64);
          len -= k;
          __m512i vs1_0 = vs1;
          __m512i vs3 = _mm512_setzero_si512();
@@ -131,9 +130,10 @@ rem_peel_copy:
      while (len >= 32) {
          vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0));
          vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1));
-        size_t k = MIN(len, NMAX);
-        k -= k % 32;
+
+        size_t k = ALIGN_DOWN(MIN(len, NMAX), 32);
          len -= k;
+
          __m256i vs1_0 = vs1;
          __m256i vs3 = _mm256_setzero_si256();
          /* We might get a tad bit more ILP here if we sum to a second register in the loop */
diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c

index c611034aba98420e7fd714de12f552ec8b65844d..ea1d3703724c55bb18c0d13c999b300bdcc4cd99 100644 (file)
--- a/arch/x86/adler32_sse42.c
+++ b/arch/x86/adler32_sse42.c
@@ -30,12 +30,9 @@ rem_peel:
      const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
      const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
      const __m128i dot3v = _mm_set1_epi16(1);
-    size_t k;
  
      while (len >= 16) {
-
-        k = MIN(len, NMAX);
-        k -= k % 16;
+        size_t k = ALIGN_DOWN(MIN(len, NMAX), 16);
          len -= k;
  
          vs1 = _mm_cvtsi32_si128(adler0);
diff --git a/arch/x86/adler32_ssse3.c b/arch/x86/adler32_ssse3.c

index 91003fab17545edb56963481705237c92c3e1b76..4f9ee22f9162cadbcebfe1b221f091e7b50e7640 100644 (file)
--- a/arch/x86/adler32_ssse3.c
+++ b/arch/x86/adler32_ssse3.c
@@ -79,8 +79,7 @@ Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len
          vs2_0 = _mm_setzero_si128();
          vs1_0 = vs1;
  
-        k = (len < max_iters ? len : max_iters);
-        k -= k % 16;
+        k = ALIGN_DOWN(MIN(len, max_iters), 16);
          len -= k;
  
          while (k >= 32) {
diff --git a/deflate.h b/deflate.h

index b108b7e59bfae8e6a57d6e34df213712bbea63de..5235ce9d187f983ecc1871c6cb8f06a9ef07d8f4 100644 (file)
--- a/deflate.h
+++ b/deflate.h
@@ -439,9 +439,9 @@ void Z_INTERNAL PREFIX(flush_pending)(PREFIX3(streamp) strm);
  /* Bit buffer and compress bits calculation debugging */
  #ifdef ZLIB_DEBUG
  #  define cmpr_bits_add(s, len)     s->compressed_len += (len)
-#  define cmpr_bits_align(s)        s->compressed_len = (s->compressed_len + 7) & ~7L
+#  define cmpr_bits_align(s)        s->compressed_len = ALIGN_UP(s->compressed_len, 8)
  #  define sent_bits_add(s, bits)    s->bits_sent += (bits)
-#  define sent_bits_align(s)        s->bits_sent = (s->bits_sent + 7) & ~7L
+#  define sent_bits_align(s)        s->bits_sent = ALIGN_UP(s->bits_sent, 8)
  #else
  #  define cmpr_bits_add(s, len)     Z_UNUSED(len)
  #  define cmpr_bits_align(s)
diff --git a/zbuild.h b/zbuild.h

index b17362767f819b350cb5e88867d8f1b3d68fd280..1d5b6286b3b8179232ee0e8ea63360888221d832 100644 (file)
--- a/zbuild.h
+++ b/zbuild.h
@@ -261,6 +261,14 @@
  #define ALIGN_DIFF(ptr, align) \
      (((uintptr_t)(align) - ((uintptr_t)(ptr) & ((align) - 1))) & ((align) - 1))
  
+/* Round up value to the nearest multiple of align (align must be power of 2) */
+#define ALIGN_UP(value, align) \
+    (((value) + ((align) - 1)) & ~((align) - 1))
+
+/* Round down value to the nearest multiple of align (align must be power of 2) */
+#define ALIGN_DOWN(value, align) \
+    ((value) & ~((align) - 1))
+
  /* PADSZ returns needed bytes to pad bpos to pad size
   * PAD_NN calculates pad size and adds it to bpos, returning the result.
   * All take an integer or a pointer as bpos input.
author	Nathan Moinvaziri <nathan@nathanm.com>
	Thu, 8 Jan 2026 02:23:23 +0000 (18:23 -0800)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Sat, 24 Jan 2026 14:22:09 +0000 (15:22 +0100)
arch/loongarch/adler32_lasx.c		patch \| blob \| blame \| history
arch/loongarch/adler32_lsx.c		patch \| blob \| blame \| history
arch/power/crc32_power8.c		patch \| blob \| blame \| history
arch/s390/crc32-vx.c		patch \| blob \| blame \| history
arch/s390/dfltcc_detail.h		patch \| blob \| blame \| history
arch/x86/adler32_avx2.c		patch \| blob \| blame \| history
arch/x86/adler32_avx512.c		patch \| blob \| blame \| history
arch/x86/adler32_avx512_vnni.c		patch \| blob \| blame \| history
arch/x86/adler32_sse42.c		patch \| blob \| blame \| history
arch/x86/adler32_ssse3.c		patch \| blob \| blame \| history
deflate.h		patch \| blob \| blame \| history
zbuild.h		patch \| blob \| blame \| history