summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2025-03-04 13:32:16 -0800
committerEric Biggers <ebiggers@google.com>2025-03-10 09:29:29 -0700
commit5aebe00b2f7215d996926517cc9710a1d2d8b7f9 (patch)
treef870143ade76edc79af3573ba248d5f85d19fbbc
parent511484fa881e8ce32fda63c5c3d3492394dbddda (diff)
x86/crc32: optimize tail handling for crc32c short inputs
For handling the 0 <= len < sizeof(unsigned long) bytes left at the end, do a 4-2-1 step-down instead of a byte-at-a-time loop. This allows taking advantage of wider CRC instructions. Note that crc32c-3way.S already uses this same optimization too. crc_kunit shows an improvement of about 25% for len=127. Suggested-by: "H. Peter Anvin" <hpa@zytor.com> Acked-by: Uros Bizjak <ubizjak@gmail.com> Link: https://lore.kernel.org/r/20250304213216.108925-1-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com>
-rw-r--r--arch/x86/lib/crc32-glue.c10
1 files changed, 9 insertions, 1 deletions
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
index 4b4721176799..e3f93b17ac3f 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32-glue.c
@@ -57,7 +57,15 @@ u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
num_longs != 0; num_longs--, p += sizeof(unsigned long))
asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
- for (len %= sizeof(unsigned long); len; len--, p++)
+ if (sizeof(unsigned long) > 4 && (len & 4)) {
+ asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
+ p += 4;
+ }
+ if (len & 2) {
+ asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
+ p += 2;
+ }
+ if (len & 1)
asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
return crc;