diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 18:33:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 18:33:04 -0700 |
commit | ee6740fd34eb53c5c76be01201c15310f461b69f (patch) | |
tree | 09a51108245a2e8f644d1956a9e5b9f974bc23b8 /arch/x86/lib/crc32-glue.c | |
parent | a86c6d0b2ad12f6ce6560f735f4799cf1f631ab2 (diff) | |
parent | acf9f8da5e19fc1cbf26f2ecb749369e13e7cd85 (diff) |
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers:
"Another set of improvements to the kernel's CRC (cyclic redundancy
check) code:
- Rework the CRC64 library functions to be directly optimized, like
what I did last cycle for the CRC32 and CRC-T10DIF library
functions
- Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ
support and acceleration for crc64_be and crc64_nvme
- Rewrite the riscv Zbc-optimized CRC code, and add acceleration for
crc_t10dif, crc64_be, and crc64_nvme
- Remove crc_t10dif and crc64_rocksoft from the crypto API, since
they are no longer needed there
- Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect
- Add kunit test cases for crc64_nvme and crc7
- Eliminate redundant functions for calculating the Castagnoli CRC32,
settling on just crc32c()
- Remove unnecessary prompts from some of the CRC kconfig options
- Further optimize the x86 crc32c code"
* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (36 commits)
x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512
lib/crc: remove unnecessary prompt for CONFIG_CRC64
lib/crc: remove unnecessary prompt for CONFIG_LIBCRC32C
lib/crc: remove unnecessary prompt for CONFIG_CRC8
lib/crc: remove unnecessary prompt for CONFIG_CRC7
lib/crc: remove unnecessary prompt for CONFIG_CRC4
lib/crc7: unexport crc7_be_syndrome_table
lib/crc_kunit.c: update comment in crc_benchmark()
lib/crc_kunit.c: add test and benchmark for crc7_be()
x86/crc32: optimize tail handling for crc32c short inputs
riscv/crc64: add Zbc optimized CRC64 functions
riscv/crc-t10dif: add Zbc optimized CRC-T10DIF function
riscv/crc32: reimplement the CRC32 functions using new template
riscv/crc: add "template" for Zbc optimized CRC functions
x86/crc: add ANNOTATE_NOENDBR to suppress objtool warnings
x86/crc32: improve crc32c_arch() code generation with clang
x86/crc64: implement crc64_be and crc64_nvme using new template
x86/crc-t10dif: implement crc_t10dif using new template
x86/crc32: implement crc32_le using new template
x86/crc: add "template" for [V]PCLMULQDQ based CRC functions
...
Diffstat (limited to 'arch/x86/lib/crc32-glue.c')
-rw-r--r-- | arch/x86/lib/crc32-glue.c | 57 |
1 files changed, 22 insertions, 35 deletions
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c index 2dd18a886ded..e3f93b17ac3f 100644 --- a/arch/x86/lib/crc32-glue.c +++ b/arch/x86/lib/crc32-glue.c @@ -7,43 +7,20 @@ * Copyright 2024 Google LLC */ -#include <asm/cpufeatures.h> -#include <asm/simd.h> -#include <crypto/internal/simd.h> #include <linux/crc32.h> -#include <linux/linkage.h> #include <linux/module.h> - -/* minimum size of buffer for crc32_pclmul_le_16 */ -#define CRC32_PCLMUL_MIN_LEN 64 +#include "crc-pclmul-template.h" static DEFINE_STATIC_KEY_FALSE(have_crc32); static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); -u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); +DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32); u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) { - if (len >= CRC32_PCLMUL_MIN_LEN + 15 && - static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { - size_t n = -(uintptr_t)p & 15; - - /* align p to 16-byte boundary */ - if (n) { - crc = crc32_le_base(crc, p, n); - p += n; - len -= n; - } - n = round_down(len, 16); - kernel_fpu_begin(); - crc = crc32_pclmul_le_16(crc, p, n); - kernel_fpu_end(); - p += n; - len -= n; - } - if (len) - crc = crc32_le_base(crc, p, len); - return crc; + CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts, + have_pclmulqdq); + return crc32_le_base(crc, p, len); } EXPORT_SYMBOL(crc32_le_arch); @@ -61,12 +38,12 @@ EXPORT_SYMBOL(crc32_le_arch); asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); -u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { size_t num_longs; if (!static_branch_likely(&have_crc32)) - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { @@ -78,14 +55,22 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) for (num_longs = len / sizeof(unsigned long); num_longs != 0; num_longs--, p += sizeof(unsigned long)) - asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); + asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p)); - for (len %= sizeof(unsigned long); len; len--, p++) - asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); + if (sizeof(unsigned long) > 4 && (len & 4)) { + asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p)); + p += 4; + } + if (len & 2) { + asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p)); + p += 2; + } + if (len & 1) + asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p)); return crc; } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { @@ -97,8 +82,10 @@ static int __init crc32_x86_init(void) { if (boot_cpu_has(X86_FEATURE_XMM4_2)) static_branch_enable(&have_crc32); - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); + INIT_CRC_PCLMUL(crc32_lsb); + } return 0; } arch_initcall(crc32_x86_init); |