Forcing this to be inline has a significant speed improvement at the
cost of a few repeated instructions. The compilers tested on did not
inline this function since it is large and is used twice in the same
translation unit.
#if lzma_has_attribute(__no_sanitize_address__)
__attribute__((__no_sanitize_address__))
#endif
-static inline void
+static crc_always_inline void
crc_simd_body(const uint8_t *buf, const size_t size, __m128i *v0, __m128i *v1,
const __m128i vfold16, const __m128i initial_crc)
{