]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
x86/crc32: optimize tail handling for crc32c short inputs
authorEric Biggers <ebiggers@google.com>
Tue, 4 Mar 2025 21:32:16 +0000 (13:32 -0800)
committerEric Biggers <ebiggers@google.com>
Mon, 10 Mar 2025 16:29:29 +0000 (09:29 -0700)
For handling the 0 <= len < sizeof(unsigned long) bytes left at the end,
do a 4-2-1 step-down instead of a byte-at-a-time loop.  This allows
taking advantage of wider CRC instructions.  Note that crc32c-3way.S
already uses this same optimization too.

crc_kunit shows an improvement of about 25% for len=127.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Uros Bizjak <ubizjak@gmail.com>
Link: https://lore.kernel.org/r/20250304213216.108925-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
arch/x86/lib/crc32-glue.c

index 4b4721176799a9a89e9e8265f4d2f56df468be07..e3f93b17ac3f160eb1e88efcdad6791c052e2a23 100644 (file)
@@ -57,7 +57,15 @@ u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
             num_longs != 0; num_longs--, p += sizeof(unsigned long))
                asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
 
-       for (len %= sizeof(unsigned long); len; len--, p++)
+       if (sizeof(unsigned long) > 4 && (len & 4)) {
+               asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
+               p += 4;
+       }
+       if (len & 2) {
+               asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
+               p += 2;
+       }
+       if (len & 1)
                asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
 
        return crc;