#define CONSTANT_RU 0x1F7011641ULL
static inline uint64_t clmul(uint64_t a, uint64_t b) {
- uint64_t res;
- __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
- return res;
+ uint64_t res;
+ __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+ return res;
}
static inline uint64_t clmulh(uint64_t a, uint64_t b) {
- uint64_t res;
- __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
- return res;
+ uint64_t res;
+ __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+ return res;
}
Z_FORCEINLINE static uint32_t crc32_clmul_impl(uint64_t crc, const unsigned char *buf, uint64_t len) {
- const uint64_t *buf64 = (const uint64_t *)buf;
- uint64_t low = buf64[0] ^ crc;
- uint64_t high = buf64[1];
+ const uint64_t *buf64 = (const uint64_t *)buf;
+ uint64_t low = buf64[0] ^ crc;
+ uint64_t high = buf64[1];
- if (len < 16)
- goto finish_fold;
- len -= 16;
- buf64 += 2;
+ if (len < 16)
+ goto finish_fold;
+ len -= 16;
+ buf64 += 2;
- // process each 16-byte block
- while (len >= 16) {
- uint64_t t2 = clmul(CONSTANT_R4, high);
- uint64_t t3 = clmulh(CONSTANT_R4, high);
+ // process each 16-byte block
+ while (len >= 16) {
+ uint64_t t2 = clmul(CONSTANT_R4, high);
+ uint64_t t3 = clmulh(CONSTANT_R4, high);
- uint64_t t0_new = clmul(CONSTANT_R3, low);
- uint64_t t1_new = clmulh(CONSTANT_R3, low);
+ uint64_t t0_new = clmul(CONSTANT_R3, low);
+ uint64_t t1_new = clmulh(CONSTANT_R3, low);
- // Combine the results and XOR with new data
- low = t0_new ^ t2;
- high = t1_new ^ t3;
- low ^= buf64[0];
- high ^= buf64[1];
+ // Combine the results and XOR with new data
+ low = t0_new ^ t2;
+ high = t1_new ^ t3;
+ low ^= buf64[0];
+ high ^= buf64[1];
- buf64 += 2;
- len -= 16;
- }
+ buf64 += 2;
+ len -= 16;
+ }
finish_fold:
- // Fold the 128-bit result into 64 bits
- uint64_t fold_t3 = clmulh(low, CONSTANT_R4);
- uint64_t fold_t2 = clmul(low, CONSTANT_R4);
- low = high ^ fold_t2;
- high = fold_t3;
-
- // Combine the low and high parts and perform polynomial reduction
- uint64_t combined = (low >> 32) | ((high & MASK32) << 32);
- uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined;
-
- // Barrett reduction step
- uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32;
- barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL);
- uint64_t final = barrett ^ reduced_low;
-
- // Return the high 32 bits as the final CRC
- return (uint32_t)(final >> 32);
+ // Fold the 128-bit result into 64 bits
+ uint64_t fold_t3 = clmulh(low, CONSTANT_R4);
+ uint64_t fold_t2 = clmul(low, CONSTANT_R4);
+ low = high ^ fold_t2;
+ high = fold_t3;
+
+ // Combine the low and high parts and perform polynomial reduction
+ uint64_t combined = (low >> 32) | ((high & MASK32) << 32);
+ uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined;
+
+ // Barrett reduction step
+ uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32;
+ barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL);
+ uint64_t final = barrett ^ reduced_low;
+
+ // Return the high 32 bits as the final CRC
+ return (uint32_t)(final >> 32);
}
-Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf,
- size_t len) {
- if (len < CLMUL_MIN_LEN) {
- return crc32_braid(crc, buf, len);
- }
-
- uint64_t unaligned_length = len % CLMUL_CHUNK_LEN;
- if (unaligned_length) {
- crc = crc32_braid(crc, buf, unaligned_length);
- buf += unaligned_length;
- len -= unaligned_length;
- }
- crc ^= 0xFFFFFFFF;
- crc = crc32_clmul_impl(crc, buf, len);
- return crc ^ 0xFFFFFFFF;
+Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len) {
+ if (len < CLMUL_MIN_LEN) {
+ return crc32_braid(crc, buf, len);
+ }
+
+ uint64_t unaligned_length = len % CLMUL_CHUNK_LEN;
+ if (unaligned_length) {
+ crc = crc32_braid(crc, buf, unaligned_length);
+ buf += unaligned_length;
+ len -= unaligned_length;
+ }
+ crc ^= 0xFFFFFFFF;
+ crc = crc32_clmul_impl(crc, buf, len);
+ return crc ^ 0xFFFFFFFF;
}
Z_INTERNAL uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc = crc32_riscv64_zbc(crc, src, len);
- memcpy(dst, src, len);
- return crc;
+ crc = crc32_riscv64_zbc(crc, src, len);
+ memcpy(dst, src, len);
+ return crc;
}
#endif