summaryrefslogtreecommitdiff
path: root/arch/x86/lib/crc32-glue.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 18:33:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 18:33:04 -0700
commitee6740fd34eb53c5c76be01201c15310f461b69f (patch)
tree09a51108245a2e8f644d1956a9e5b9f974bc23b8 /arch/x86/lib/crc32-glue.c
parenta86c6d0b2ad12f6ce6560f735f4799cf1f631ab2 (diff)
parentacf9f8da5e19fc1cbf26f2ecb749369e13e7cd85 (diff)
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers: "Another set of improvements to the kernel's CRC (cyclic redundancy check) code: - Rework the CRC64 library functions to be directly optimized, like what I did last cycle for the CRC32 and CRC-T10DIF library functions - Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ support and acceleration for crc64_be and crc64_nvme - Rewrite the riscv Zbc-optimized CRC code, and add acceleration for crc_t10dif, crc64_be, and crc64_nvme - Remove crc_t10dif and crc64_rocksoft from the crypto API, since they are no longer needed there - Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect - Add kunit test cases for crc64_nvme and crc7 - Eliminate redundant functions for calculating the Castagnoli CRC32, settling on just crc32c() - Remove unnecessary prompts from some of the CRC kconfig options - Further optimize the x86 crc32c code" * tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (36 commits) x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512 lib/crc: remove unnecessary prompt for CONFIG_CRC64 lib/crc: remove unnecessary prompt for CONFIG_LIBCRC32C lib/crc: remove unnecessary prompt for CONFIG_CRC8 lib/crc: remove unnecessary prompt for CONFIG_CRC7 lib/crc: remove unnecessary prompt for CONFIG_CRC4 lib/crc7: unexport crc7_be_syndrome_table lib/crc_kunit.c: update comment in crc_benchmark() lib/crc_kunit.c: add test and benchmark for crc7_be() x86/crc32: optimize tail handling for crc32c short inputs riscv/crc64: add Zbc optimized CRC64 functions riscv/crc-t10dif: add Zbc optimized CRC-T10DIF function riscv/crc32: reimplement the CRC32 functions using new template riscv/crc: add "template" for Zbc optimized CRC functions x86/crc: add ANNOTATE_NOENDBR to suppress objtool warnings x86/crc32: improve crc32c_arch() code generation with clang x86/crc64: implement crc64_be and crc64_nvme using new template x86/crc-t10dif: implement crc_t10dif using new template x86/crc32: implement crc32_le using new template x86/crc: add "template" for [V]PCLMULQDQ based CRC functions ...
Diffstat (limited to 'arch/x86/lib/crc32-glue.c')
-rw-r--r--arch/x86/lib/crc32-glue.c57
1 files changed, 22 insertions, 35 deletions
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
index 2dd18a886ded..e3f93b17ac3f 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32-glue.c
@@ -7,43 +7,20 @@
* Copyright 2024 Google LLC
*/
-#include <asm/cpufeatures.h>
-#include <asm/simd.h>
-#include <crypto/internal/simd.h>
#include <linux/crc32.h>
-#include <linux/linkage.h>
#include <linux/module.h>
-
-/* minimum size of buffer for crc32_pclmul_le_16 */
-#define CRC32_PCLMUL_MIN_LEN 64
+#include "crc-pclmul-template.h"
static DEFINE_STATIC_KEY_FALSE(have_crc32);
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
- if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
- static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
- size_t n = -(uintptr_t)p & 15;
-
- /* align p to 16-byte boundary */
- if (n) {
- crc = crc32_le_base(crc, p, n);
- p += n;
- len -= n;
- }
- n = round_down(len, 16);
- kernel_fpu_begin();
- crc = crc32_pclmul_le_16(crc, p, n);
- kernel_fpu_end();
- p += n;
- len -= n;
- }
- if (len)
- crc = crc32_le_base(crc, p, len);
- return crc;
+ CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
+ have_pclmulqdq);
+ return crc32_le_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_le_arch);
@@ -61,12 +38,12 @@ EXPORT_SYMBOL(crc32_le_arch);
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
size_t num_longs;
if (!static_branch_likely(&have_crc32))
- return crc32c_le_base(crc, p, len);
+ return crc32c_base(crc, p, len);
if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
@@ -78,14 +55,22 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
for (num_longs = len / sizeof(unsigned long);
num_longs != 0; num_longs--, p += sizeof(unsigned long))
- asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+ asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
- for (len %= sizeof(unsigned long); len; len--, p++)
- asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+ if (sizeof(unsigned long) > 4 && (len & 4)) {
+ asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
+ p += 4;
+ }
+ if (len & 2) {
+ asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
+ p += 2;
+ }
+ if (len & 1)
+ asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
return crc;
}
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
@@ -97,8 +82,10 @@ static int __init crc32_x86_init(void)
{
if (boot_cpu_has(X86_FEATURE_XMM4_2))
static_branch_enable(&have_crc32);
- if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
+ if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
static_branch_enable(&have_pclmulqdq);
+ INIT_CRC_PCLMUL(crc32_lsb);
+ }
return 0;
}
arch_initcall(crc32_x86_init);