int k;
/* Compute the CRC up to a z_word_t boundary. */
- while (len && ((uintptr_t)buf & (BRAID_W - 1)) != 0) {
- len--;
+ size_t align_diff = (size_t)MIN(ALIGN_DIFF(buf, BRAID_W), len);
+ len -= align_diff;
+ while (align_diff--)
CRC_DO1;
- }
/* Compute the CRC on as many BRAID_N z_word_t blocks as are available. */
blks = len / (BRAID_N * BRAID_W);
#endif // OPTIMAL_CMP == 64
Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
- uint64_t* aligned_buf;
+ uint64_t *aligned_buf;
uint32_t c = (~crc) & 0xffffffff;
- uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7;
+ uintptr_t align_diff = ALIGN_DIFF(buf, 8);
- if (len > algn_diff + CHORBA_SMALL_THRESHOLD) {
- if (algn_diff) {
- c = crc32_braid_internal(c, buf, algn_diff);
- len -= algn_diff;
+ if (len > align_diff + CHORBA_SMALL_THRESHOLD) {
+ if (align_diff) {
+ c = crc32_braid_internal(c, buf, align_diff);
+ len -= align_diff;
}
- aligned_buf = (uint64_t*) (buf + algn_diff);
+ aligned_buf = (uint64_t*)(buf + align_diff);
if(len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len);
# if OPTIMAL_CMP == 64
} else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
- c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len);
+ c = crc32_chorba_32768_nondestructive(c, (uint64_t*)aligned_buf, len);
# endif
} else {
# if OPTIMAL_CMP == 64
- c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len);
+ c = crc32_chorba_small_nondestructive(c, (uint64_t*)aligned_buf, len);
# else
- c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len);
+ c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*)aligned_buf, len);
# endif
}
} else {
uint32_t pair[16] ALIGNED_(16);
memset(&pair[2], 0, 14);
int n = NMAX;
- unsigned int done = 0, i;
+ unsigned int done = 0;
/* Split Adler-32 into component sums, it can be supplied by
* the caller sites (e.g. in a PNG file).
return adler32_copy_len_16(adler, NULL, buf, len, sum2, 0);
// Align buffer
- unsigned int al = 0;
- if ((uintptr_t)buf & 0xf) {
- al = 16-((uintptr_t)buf & 0xf);
- if (al > len) {
- al=len;
- }
- vmx_handle_head_or_tail(pair, buf, al);
-
- done += al;
+ size_t align_len = (size_t)MIN(ALIGN_DIFF(buf, 16), len);
+ if (align_len) {
+ vmx_handle_head_or_tail(pair, buf, align_len);
+ done += align_len;
/* Rather than rebasing, we can reduce the max sums for the
* first round only */
- n -= al;
+ n -= align_len;
}
- for (i = al; i < len; i += n) {
+ for (size_t i = align_len; i < len; i += n) {
int remaining = (int)(len-i);
- n = MIN(remaining, (i == al) ? n : NMAX);
-
+ n = MIN(remaining, (i == align_len) ? n : NMAX);
if (n < 16)
break;
}
if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+ prealign = (unsigned int)ALIGN_DIFF(p, VMX_ALIGN);
crc = crc32_align(crc, p, prealign);
len -= prealign;
p += prealign;
return crc32_braid(crc, buf, len);
if ((uintptr_t)buf & VX_ALIGN_MASK) {
- prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK);
+ prealign = (size_t)ALIGN_DIFF(buf, VX_ALIGNMENT);
len -= prealign;
crc = crc32_braid(crc, buf, prealign);
buf += prealign;
}
Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
- uint64_t* aligned_buf;
+ uint64_t *aligned_buf;
uint32_t c = (~crc) & 0xffffffff;
- uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
+ uintptr_t align_diff = ALIGN_DIFF(buf, 16);
- if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
- if (algn_diff) {
- c = crc32_braid_internal(c, buf, algn_diff);
- len -= algn_diff;
+ if (len > align_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
+ if (align_diff) {
+ c = crc32_braid_internal(c, buf, align_diff);
+ len -= align_diff;
}
- aligned_buf = (uint64_t*) (buf + algn_diff);
+ aligned_buf = (uint64_t*)(buf + align_diff);
#if !defined(WITHOUT_CHORBA)
- if(len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
+ if (len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len);
} else
#endif
{
}
Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
- uint64_t* aligned_buf;
+ uint64_t *aligned_buf;
uint32_t c = (~crc) & 0xffffffff;
- uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
+ uintptr_t align_diff = ALIGN_DIFF(buf, 16);
- if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
- if (algn_diff) {
- c = crc32_braid_internal(c, buf, algn_diff);
- len -= algn_diff;
+ if (len > align_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
+ if (align_diff) {
+ c = crc32_braid_internal(c, buf, align_diff);
+ len -= align_diff;
}
- aligned_buf = (uint64_t*) (buf + algn_diff);
+ aligned_buf = (uint64_t*)(buf + align_diff);
#if !defined(WITHOUT_CHORBA)
- if(len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
+ if (len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*)aligned_buf, len);
} else
#endif
if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
size_t copy_len = len;
if (len >= 16) {
/* Calculate 16-byte alignment offset */
- unsigned algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF;
+ uintptr_t align_diff = ALIGN_DIFF(src, 16);
/* If total length is less than (alignment bytes + 16), use the faster small method.
* Handles both initially small buffers and cases where alignment would leave < 16 bytes */
- copy_len = len < algn_diff + 16 ? len : algn_diff;
+ copy_len = len < align_diff + 16 ? len : align_diff;
}
if (copy_len > 0) {
#define HINT_ALIGNED_64(p) HINT_ALIGNED((p),64)
#define HINT_ALIGNED_4096(p) HINT_ALIGNED((p),4096)
+/* Number of bytes needed to align ptr to the next alignment boundary */
+#define ALIGN_DIFF(ptr, align) \
+ (((uintptr_t)(align) - ((uintptr_t)(ptr) & ((align) - 1))) & ((align) - 1))
+
/* PADSZ returns needed bytes to pad bpos to pad size
* PAD_NN calculates pad size and adds it to bpos, returning the result.
* All take an integer or a pointer as bpos input.
/* Provide aligned allocations, only used by gz* code */
void Z_INTERNAL *zng_alloc_aligned(unsigned size, unsigned align) {
- uintptr_t return_ptr, original_ptr;
- uint32_t alloc_size, align_diff;
+ uintptr_t return_ptr, original_ptr, align_diff;
+ uint32_t alloc_size;
void *ptr;
/* Allocate enough memory for proper alignment and to store the original memory pointer */
return NULL;
/* Calculate return pointer address with space enough to store original pointer */
- align_diff = align - ((uintptr_t)ptr % align);
+ align_diff = ALIGN_DIFF(ptr, align);
return_ptr = (uintptr_t)ptr + align_diff;
if (align_diff < sizeof(void *))
return_ptr += align;