__m256i vs3 = __lasx_xvldi(0);
vs2_0 = vs3;
- size_t k = MIN(len, NMAX);
- k -= k % 32;
+ size_t k = ALIGN_DOWN(MIN(len, NMAX), 32);
len -= k;
while (k >= 64) {
while (len >= 16) {
- k = MIN(len, NMAX);
- k -= k % 16;
+ k = ALIGN_DOWN(MIN(len, NMAX), 16);
len -= k;
vs1 = __lsx_vinsgr2vr_w(zero, adler0, 0);
p += prealign;
}
- crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+ crc = __crc32_vpmsum(crc, p, ALIGN_DOWN(len, VMX_ALIGN));
tail = len & VMX_ALIGN_MASK;
if (tail) {
- p += len & ~VMX_ALIGN_MASK;
+ p += ALIGN_DOWN(len, VMX_ALIGN);
crc = crc32_align(crc, p, tail);
}
crc = crc32_braid(crc, buf, prealign);
buf += prealign;
}
- aligned = len & ~VX_ALIGN_MASK;
+ aligned = ALIGN_DOWN(len, VX_ALIGNMENT);
remaining = len & VX_ALIGN_MASK;
crc = ~crc32_le_vgfm_16(~crc, buf, aligned);
return (cc >> 28) & 3;
}
-#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
-
static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) {
/* Initialize available functions */
if (is_dfltcc_enabled()) {
__m256i vs3 = _mm256_setzero_si256();
vs2_0 = vs3;
- size_t k = MIN(len, NMAX);
- k -= k % 32;
+ size_t k = ALIGN_DOWN(MIN(len, NMAX), 32);
len -= k;
while (k >= 64) {
56, 57, 58, 59, 60, 61, 62, 63, 64);
const __m512i dot3v = _mm512_set1_epi16(1);
const __m512i zero = _mm512_setzero_si512();
- size_t k;
while (len >= 64) {
__m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
vs1_0 = vs1;
vs3 = _mm512_setzero_si512();
- k = MIN(len, NMAX);
- k -= k % 64;
+ size_t k = ALIGN_DOWN(MIN(len, NMAX), 64);
len -= k;
while (k >= 64) {
while (len >= 64) {
vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1));
- size_t k = MIN(len, NMAX);
- k -= k % 64;
+ size_t k = ALIGN_DOWN(MIN(len, NMAX), 64);
len -= k;
__m512i vs1_0 = vs1;
__m512i vs3 = _mm512_setzero_si512();
while (len >= 32) {
vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0));
vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1));
- size_t k = MIN(len, NMAX);
- k -= k % 32;
+
+ size_t k = ALIGN_DOWN(MIN(len, NMAX), 32);
len -= k;
+
__m256i vs1_0 = vs1;
__m256i vs3 = _mm256_setzero_si256();
/* We might get a tad bit more ILP here if we sum to a second register in the loop */
const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
const __m128i dot3v = _mm_set1_epi16(1);
- size_t k;
while (len >= 16) {
-
- k = MIN(len, NMAX);
- k -= k % 16;
+ size_t k = ALIGN_DOWN(MIN(len, NMAX), 16);
len -= k;
vs1 = _mm_cvtsi32_si128(adler0);
vs2_0 = _mm_setzero_si128();
vs1_0 = vs1;
- k = (len < max_iters ? len : max_iters);
- k -= k % 16;
+ k = ALIGN_DOWN(MIN(len, max_iters), 16);
len -= k;
while (k >= 32) {
/* Bit buffer and compress bits calculation debugging */
#ifdef ZLIB_DEBUG
# define cmpr_bits_add(s, len) s->compressed_len += (len)
-# define cmpr_bits_align(s) s->compressed_len = (s->compressed_len + 7) & ~7L
+# define cmpr_bits_align(s) s->compressed_len = ALIGN_UP(s->compressed_len, 8)
# define sent_bits_add(s, bits) s->bits_sent += (bits)
-# define sent_bits_align(s) s->bits_sent = (s->bits_sent + 7) & ~7L
+# define sent_bits_align(s) s->bits_sent = ALIGN_UP(s->bits_sent, 8)
#else
# define cmpr_bits_add(s, len) Z_UNUSED(len)
# define cmpr_bits_align(s)
#define ALIGN_DIFF(ptr, align) \
(((uintptr_t)(align) - ((uintptr_t)(ptr) & ((align) - 1))) & ((align) - 1))
+/* Round up value to the nearest multiple of align (align must be power of 2) */
+#define ALIGN_UP(value, align) \
+ (((value) + ((align) - 1)) & ~((align) - 1))
+
+/* Round down value to the nearest multiple of align (align must be power of 2) */
+#define ALIGN_DOWN(value, align) \
+ ((value) & ~((align) - 1))
+
/* PADSZ returns needed bytes to pad bpos to pad size
* PAD_NN calculates pad size and adds it to bpos, returning the result.
* All take an integer or a pointer as bpos input.