Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu: "API: - Add speed testing on 1420-byte blocks for networking Algorithms: - Improve performance of chacha on ARM for network packets - Improve performance of aegis128 on ARM for network packets Drivers: - Add support for Keem Bay OCS AES/SM4 - Add support for QAT 4xxx devices - Enable crypto-engine retry mechanism in caam - Enable support for crypto engine on sdm845 in qce - Add HiSilicon PRNG driver support" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (161 commits) crypto: qat - add capability detection logic in qat_4xxx crypto: qat - add AES-XTS support for QAT GEN4 devices crypto: qat - add AES-CTR support for QAT GEN4 devices crypto: atmel-i2c - select CONFIG_BITREVERSE crypto: hisilicon/trng - replace atomic_add_return() crypto: keembay - Add support for Keem Bay OCS AES/SM4 dt-bindings: Add Keem Bay OCS AES bindings crypto: aegis128 - avoid spurious references crypto_aegis128_update_simd crypto: seed - remove trailing semicolon in macro definition crypto: x86/poly1305 - Use TEST %reg,%reg instead of CMP $0,%reg crypto: x86/sha512 - Use TEST %reg,%reg instead of CMP $0,%reg crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg crypto: cpt - Fix sparse warnings in cptpf hwrng: ks-sa - Add dependency on IOMEM and OF crypto: lib/blake2s - Move selftest prototype into header file crypto: arm/aes-ce - work around Cortex-A57/A72 silion errata crypto: ecdh - avoid unaligned accesses in ecdh_set_secret() crypto: ccree - rework cache parameters handling crypto: cavium - Use dma_set_mask_and_coherent to simplify code crypto: marvell/octeontx - Use dma_set_mask_and_coherent to simplify code ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-12-14 12:18:19 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-12-14 12:18:19 -0800
commit: 9e4b0d55d84a66dbfede56890501dc96e696059c (patch)
tree: db60e36510c170109f0fe28003d6959cd4264c72 /arch
parent: 51895d58c7c0c65afac21570cc14a7189942959a (diff)
parent: 93cebeb1c21a65b92636aaa278a32fbc0415ec67 (diff)
58 files changed, 326 insertions, 257 deletions
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 4d1707388d94..312428d83eed 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt)
 .Lctrloop4x:
 	subs		r4, r4, #4
 	bmi		.Lctr1x
-	add		r6, r6, #1
+
+	/*
+	 * NOTE: the sequence below has been carefully tweaked to avoid
+	 * a silicon erratum that exists in Cortex-A57 (#1742098) and
+	 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
+	 * may produce an incorrect result if they take their input from a
+	 * register of which a single 32-bit lane has been updated the last
+	 * time it was modified. To work around this, the lanes of registers
+	 * q0-q3 below are not manipulated individually, and the different
+	 * counter values are prepared by successive manipulations of q7.
+	 */
+	add		ip, r6, #1
 	vmov		q0, q7
+	rev		ip, ip
+	add		lr, r6, #2
+	vmov		s31, ip			@ set lane 3 of q1 via q7
+	add		ip, r6, #3
+	rev		lr, lr
 	vmov		q1, q7
-	rev		ip, r6
-	add		r6, r6, #1
+	vmov		s31, lr			@ set lane 3 of q2 via q7
+	rev		ip, ip
 	vmov		q2, q7
-	vmov		s7, ip
-	rev		ip, r6
-	add		r6, r6, #1
+	vmov		s31, ip			@ set lane 3 of q3 via q7
+	add		r6, r6, #4
 	vmov		q3, q7
-	vmov		s11, ip
-	rev		ip, r6
-	add		r6, r6, #1
-	vmov		s15, ip
+
 	vld1.8		{q4-q5}, [r1]!
 	vld1.8		{q6}, [r1]!
 	vld1.8		{q15}, [r1]!
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index bda8bf17631e..f70af1d0514b 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
 MODULE_ALIAS_CRYPTO("ecb(aes)");
-MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)-all");
 MODULE_ALIAS_CRYPTO("ctr(aes)");
 MODULE_ALIAS_CRYPTO("xts(aes)");
 
@@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm)
 	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	unsigned int reqsize;
 
-	ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+	ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->enc_tfm))
 		return PTR_ERR(ctx->enc_tfm);
 
@@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_blocksize	= AES_BLOCK_SIZE,
 	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctx),
 	.base.cra_module	= THIS_MODULE,
-	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL |
+				  CRYPTO_ALG_NEED_FALLBACK,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
 	.max_keysize		= AES_MAX_KEY_SIZE,
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index 59da6c0b63b6..7b5cf8430c6d 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -23,7 +23,7 @@
 asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
 				      int nrounds);
 asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
-				       int nrounds);
+				       int nrounds, unsigned int nbytes);
 asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 
@@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 {
 	u8 buf[CHACHA_BLOCK_SIZE];
 
-	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
-		chacha_4block_xor_neon(state, dst, src, nrounds);
-		bytes -= CHACHA_BLOCK_SIZE * 4;
-		src += CHACHA_BLOCK_SIZE * 4;
-		dst += CHACHA_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha_block_xor_neon(state, dst, src, nrounds);
-		bytes -= CHACHA_BLOCK_SIZE;
-		src += CHACHA_BLOCK_SIZE;
-		dst += CHACHA_BLOCK_SIZE;
-		state[12]++;
+	while (bytes > CHACHA_BLOCK_SIZE) {
+		unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
+
+		chacha_4block_xor_neon(state, dst, src, nrounds, l);
+		bytes -= l;
+		src += l;
+		dst += l;
+		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
 	}
 	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha_block_xor_neon(state, buf, buf, nrounds);
-		memcpy(dst, buf, bytes);
+		const u8 *s = src;
+		u8 *d = dst;
+
+		if (bytes != CHACHA_BLOCK_SIZE)
+			s = d = memcpy(buf, src, bytes);
+		chacha_block_xor_neon(state, d, s, nrounds);
+		if (d != dst)
+			memcpy(dst, buf, bytes);
 	}
 }
 
diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S
index eb22926d4912..13d12f672656 100644
--- a/arch/arm/crypto/chacha-neon-core.S
+++ b/arch/arm/crypto/chacha-neon-core.S
@@ -47,6 +47,7 @@
   */
 
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
 	.text
 	.fpu		neon
@@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon)
 
 	.align		5
 ENTRY(chacha_4block_xor_neon)
-	push		{r4-r5}
+	push		{r4, lr}
 	mov		r4, sp			// preserve the stack pointer
 	sub		ip, sp, #0x20		// allocate a 32 byte buffer
 	bic		ip, ip, #0x1f		// aligned to 32 bytes
@@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon)
 	vld1.32		{q0-q1}, [r0]
 	vld1.32		{q2-q3}, [ip]
 
-	adr		r5, .Lctrinc
+	adr		lr, .Lctrinc
 	vdup.32		q15, d7[1]
 	vdup.32		q14, d7[0]
-	vld1.32		{q4}, [r5, :128]
+	vld1.32		{q4}, [lr, :128]
 	vdup.32		q13, d6[1]
 	vdup.32		q12, d6[0]
 	vdup.32		q11, d5[1]
@@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon)
 
 	// Re-interleave the words in the first two rows of each block (x0..7).
 	// Also add the counter values 0-3 to x12[0-3].
-	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
+	  vld1.32	{q8}, [lr, :128]	// load counter values 0-3
 	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
 	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
 	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
@@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon)
 
 	// Re-interleave the words in the last two rows of each block (x8..15).
 	vld1.32		{q8-q9}, [sp, :256]
+	  mov		sp, r4		// restore original stack pointer
+	  ldr		r4, [r4, #8]	// load number of bytes
 	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
 	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
 	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
@@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon)
 	// XOR the rest of the data with the keystream
 
 	vld1.8		{q0-q1}, [r2]!
+	subs		r4, r4, #96
 	veor		q0, q0, q8
 	veor		q1, q1, q12
+	ble		.Lle96
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]!
+	subs		r4, r4, #32
 	veor		q0, q0, q2
 	veor		q1, q1, q6
+	ble		.Lle128
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]!
+	subs		r4, r4, #32
 	veor		q0, q0, q10
 	veor		q1, q1, q14
+	ble		.Lle160
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]!
+	subs		r4, r4, #32
 	veor		q0, q0, q4
 	veor		q1, q1, q5
+	ble		.Lle192
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]!
+	subs		r4, r4, #32
 	veor		q0, q0, q9
 	veor		q1, q1, q13
+	ble		.Lle224
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]!
+	subs		r4, r4, #32
 	veor		q0, q0, q3
 	veor		q1, q1, q7
+	blt		.Llt256
+.Lout:
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]
-	  mov		sp, r4		// restore original stack pointer
 	veor		q0, q0, q11
 	veor		q1, q1, q15
 	vst1.8		{q0-q1}, [r1]
 
-	pop		{r4-r5}
-	bx		lr
+	pop		{r4, pc}
+
+.Lle192:
+	vmov		q4, q9
+	vmov		q5, q13
+
+.Lle160:
+	// nothing to do
+
+.Lfinalblock:
+	// Process the final block if processing less than 4 full blocks.
+	// Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the
+	// previous 32 byte output block that still needs to be written at
+	// [r1] in q0-q1.
+	beq		.Lfullblock
+
+.Lpartialblock:
+	adr		lr, .Lpermute + 32
+	add		r2, r2, r4
+	add		lr, lr, r4
+	add		r4, r4, r1
+
+	vld1.8		{q2-q3}, [lr]
+	vld1.8		{q6-q7}, [r2]
+
+	add		r4, r4, #32
+
+	vtbl.8		d4, {q4-q5}, d4
+	vtbl.8		d5, {q4-q5}, d5
+	vtbl.8		d6, {q4-q5}, d6
+	vtbl.8		d7, {q4-q5}, d7
+
+	veor		q6, q6, q2
+	veor		q7, q7, q3
+
+	vst1.8		{q6-q7}, [r4]	// overlapping stores
+	vst1.8		{q0-q1}, [r1]
+	pop		{r4, pc}
+
+.Lfullblock:
+	vmov		q11, q4
+	vmov		q15, q5
+	b		.Lout
+.Lle96:
+	vmov		q4, q2
+	vmov		q5, q6
+	b		.Lfinalblock
+.Lle128:
+	vmov		q4, q10
+	vmov		q5, q14
+	b		.Lfinalblock
+.Lle224:
+	vmov		q4, q3
+	vmov		q5, q7
+	b		.Lfinalblock
+.Llt256:
+	vmov		q4, q11
+	vmov		q5, q15
+	b		.Lpartialblock
 ENDPROC(chacha_4block_xor_neon)
+
+	.align		L1_CACHE_SHIFT
+.Lpermute:
+	.byte		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+	.byte		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+	.byte		0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+	.byte		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+	.byte		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+	.byte		0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index e79b1fb4b4dc..de9100c67b37 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -7,7 +7,7 @@
 
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm/crypto/sha1.h b/arch/arm/crypto/sha1.h
index 758db3e9ff0a..b1b7e21da2c3 100644
--- a/arch/arm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -3,7 +3,7 @@
 #define ASM_ARM_CRYPTO_SHA1_H
 
 #include <linux/crypto.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 
 extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
 			   unsigned int len);
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 4e954b3f7ecd..6c2b849e459d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
 #include <asm/byteorder.h>
 
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 0071e5e4411a..cfe36ae0f3f5 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
 #include <asm/neon.h>
 #include <asm/simd.h>
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index 87f0b62386c6..c62ce89dd3e0 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -7,7 +7,7 @@
 
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
index b8a4f79020cf..433ee4ddce6c 100644
--- a/arch/arm/crypto/sha256_glue.c
+++ b/arch/arm/crypto/sha256_glue.c
@@ -17,7 +17,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <asm/simd.h>
 #include <asm/neon.h>
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
index 79820b9e2541..701706262ef3 100644
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -13,7 +13,7 @@
 #include <crypto/internal/simd.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <asm/byteorder.h>
 #include <asm/simd.h>
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
index 8775aa42bbbe..0635a65aa488 100644
--- a/arch/arm/crypto/sha512-glue.c
+++ b/arch/arm/crypto/sha512-glue.c
@@ -6,7 +6,7 @@
  */
 
 #include <crypto/internal/hash.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
index 96cb94403540..c879ad32db51 100644
--- a/arch/arm/crypto/sha512-neon-glue.c
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -7,7 +7,7 @@
 
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 5cfe3cf6f2ac..5e7d86cf5dfa 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1082,6 +1082,7 @@ CONFIG_CRYPTO_DEV_CCREE=m
 CONFIG_CRYPTO_DEV_HISI_SEC2=m
 CONFIG_CRYPTO_DEV_HISI_ZIP=m
 CONFIG_CRYPTO_DEV_HISI_HPRE=m
+CONFIG_CRYPTO_DEV_HISI_TRNG=m
 CONFIG_CMA_SIZE_MBYTES=32
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 395bbf64b2ab..34b8a89197be 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -10,7 +10,7 @@
 #include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/ctr.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index e90386a7db8e..b70ac76f2610 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -195,7 +195,6 @@ SYM_FUNC_START(chacha_4block_xor_neon)
 	adr_l		x10, .Lpermute
 	and		x5, x4, #63
 	add		x10, x10, x5
-	add		x11, x10, #64
 
 	//
 	// This function encrypts four consecutive ChaCha blocks by loading
@@ -645,11 +644,11 @@ CPU_BE(	  rev		a15, a15	)
 	zip2		v31.4s, v14.4s, v15.4s
 	  eor		a15, a15, w9
 
-	mov		x3, #64
+	add		x3, x2, x4
+	sub		x3, x3, #128		// start of last block
+
 	subs		x5, x4, #128
-	add		x6, x5, x2
-	csel		x3, x3, xzr, ge
-	csel		x2, x2, x6, ge
+	csel		x2, x2, x3, ge
 
 	// interleave 64-bit words in state n, n+2
 	zip1		v0.2d, v16.2d, v18.2d
@@ -658,13 +657,10 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v8.2d, v17.2d, v19.2d
 	zip2		v12.2d, v17.2d, v19.2d
 	  stp		a2, a3, [x1, #-56]
-	ld1		{v16.16b-v19.16b}, [x2], x3
 
 	subs		x6, x4, #192
-	ccmp		x3, xzr, #4, lt
-	add		x7, x6, x2
-	csel		x3, x3, xzr, eq
-	csel		x2, x2, x7, eq
+	ld1		{v16.16b-v19.16b}, [x2], #64
+	csel		x2, x2, x3, ge
 
 	zip1		v1.2d, v20.2d, v22.2d
 	zip2		v5.2d, v20.2d, v22.2d
@@ -672,13 +668,10 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v9.2d, v21.2d, v23.2d
 	zip2		v13.2d, v21.2d, v23.2d
 	  stp		a6, a7, [x1, #-40]
-	ld1		{v20.16b-v23.16b}, [x2], x3
 
 	subs		x7, x4, #256
-	ccmp		x3, xzr, #4, lt
-	add		x8, x7, x2
-	csel		x3, x3, xzr, eq
-	csel		x2, x2, x8, eq
+	ld1		{v20.16b-v23.16b}, [x2], #64
+	csel		x2, x2, x3, ge
 
 	zip1		v2.2d, v24.2d, v26.2d
 	zip2		v6.2d, v24.2d, v26.2d
@@ -686,12 +679,10 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v10.2d, v25.2d, v27.2d
 	zip2		v14.2d, v25.2d, v27.2d
 	  stp		a10, a11, [x1, #-24]
-	ld1		{v24.16b-v27.16b}, [x2], x3
 
 	subs		x8, x4, #320
-	ccmp		x3, xzr, #4, lt
-	add		x9, x8, x2
-	csel		x2, x2, x9, eq
+	ld1		{v24.16b-v27.16b}, [x2], #64
+	csel		x2, x2, x3, ge
 
 	zip1		v3.2d, v28.2d, v30.2d
 	zip2		v7.2d, v28.2d, v30.2d
@@ -699,151 +690,105 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v11.2d, v29.2d, v31.2d
 	zip2		v15.2d, v29.2d, v31.2d
 	  stp		a14, a15, [x1, #-8]
+
+	tbnz		x5, #63, .Lt128
 	ld1		{v28.16b-v31.16b}, [x2]
 
 	// xor with corresponding input, write to output
-	tbnz		x5, #63, 0f
 	eor		v16.16b, v16.16b, v0.16b
 	eor		v17.16b, v17.16b, v1.16b
 	eor		v18.16b, v18.16b, v2.16b
 	eor		v19.16b, v19.16b, v3.16b
-	st1		{v16.16b-v19.16b}, [x1], #64
-	cbz		x5, .Lout
 
-	tbnz		x6, #63, 1f
+	tbnz		x6, #63, .Lt192
+
 	eor		v20.16b, v20.16b, v4.16b
 	eor		v21.16b, v21.16b, v5.16b
 	eor		v22.16b, v22.16b, v6.16b
 	eor		v23.16b, v23.16b, v7.16b
-	st1		{v20.16b-v23.16b}, [x1], #64
-	cbz		x6, .Lout
 
-	tbnz		x7, #63, 2f
+	st1		{v16.16b-v19.16b}, [x1], #64
+	tbnz		x7, #63, .Lt256
+
 	eor		v24.16b, v24.16b, v8.16b
 	eor		v25.16b, v25.16b, v9.16b
 	eor		v26.16b, v26.16b, v10.16b
 	eor		v27.16b, v27.16b, v11.16b
-	st1		{v24.16b-v27.16b}, [x1], #64
-	cbz		x7, .Lout
 
-	tbnz		x8, #63, 3f
+	st1		{v20.16b-v23.16b}, [x1], #64
+	tbnz		x8, #63, .Lt320
+
 	eor		v28.16b, v28.16b, v12.16b
 	eor		v29.16b, v29.16b, v13.16b
 	eor		v30.16b, v30.16b, v14.16b
 	eor		v31.16b, v31.16b, v15.16b
+
+	st1		{v24.16b-v27.16b}, [x1], #64
 	st1		{v28.16b-v31.16b}, [x1]
 
 .Lout:	frame_pop
 	ret
 
-	// fewer than 128 bytes of in/output
-0:	ld1		{v8.16b}, [x10]
-	ld1		{v9.16b}, [x11]
-	movi		v10.16b, #16
-	sub		x2, x1, #64
-	add		x1, x1, x5
-	ld1		{v16.16b-v19.16b}, [x2]
-	tbl		v4.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v20.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v5.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v21.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v6.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v22.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v7.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v23.16b, {v16.16b-v19.16b}, v9.16b
-
-	eor		v20.16b, v20.16b, v4.16b
-	eor		v21.16b, v21.16b, v5.16b
-	eor		v22.16b, v22.16b, v6.16b
-	eor		v23.16b, v23.16b, v7.16b
-	st1		{v20.16b-v23.16b}, [x1]
-	b		.Lout
-
 	// fewer than 192 bytes of in/output
-1:	ld1		{v8.16b}, [x10]
-	ld1		{v9.16b}, [x11]
-	movi		v10.16b, #16
-	add		x1, x1, x6
-	tbl		v0.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v20.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v1.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v21.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v2.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v22.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v3.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v23.16b, {v16.16b-v19.16b}, v9.16b
-
-	eor		v20.16b, v20.16b, v0.16b
-	eor		v21.16b, v21.16b, v1.16b
-	eor		v22.16b, v22.16b, v2.16b
-	eor		v23.16b, v23.16b, v3.16b
-	st1		{v20.16b-v23.16b}, [x1]
+.Lt192:	cbz		x5, 1f				// exactly 128 bytes?
+	ld1		{v28.16b-v31.16b}, [x10]
+	add		x5, x5, x1
+	tbl		v28.16b, {v4.16b-v7.16b}, v28.16b
+	tbl		v29.16b, {v4.16b-v7.16b}, v29.16b
+	tbl		v30.16b, {v4.16b-v7.16b}, v30.16b
+	tbl		v31.16b, {v4.16b-v7.16b}, v31.16b
+
+0:	eor		v20.16b, v20.16b, v28.16b
+	eor		v21.16b, v21.16b, v29.16b
+	eor		v22.16b, v22.16b, v30.16b
+	eor		v23.16b, v23.16b, v31.16b
+	st1		{v20.16b-v23.16b}, [x5]		// overlapping stores
+1:	st1		{v16.16b-v19.16b}, [x1]
 	b		.Lout
 
+	// fewer than 128 bytes of in/output
+.Lt128:	ld1		{v28.16b-v31.16b}, [x10]
+	add		x5, x5, x1
+	sub		x1, x1, #64
+	tbl		v28.16b, {v0.16b-v3.16b}, v28.16b
+	tbl		v29.16b, {v0.16b-v3.16b}, v29.16b
+	tbl		v30.16b, {v0.16b-v3.16b}, v30.16b
+	tbl		v31.16b, {v0.16b-v3.16b}, v31.16b
+	ld1		{v16.16b-v19.16b}, [x1]		// reload first output block
+	b		0b
+
 	// fewer than 256 bytes of in/output
-2:	ld1		{v4.16b}, [x10]
-	ld1		{v5.16b}, [x11]
-	movi		v6.16b, #16
-	add		x1, x1, x7
+.Lt256:	cbz		x6, 2f				// exactly 192 bytes?
+	ld1		{v4.16b-v7.16b}, [x10]
+	add		x6, x6, x1
 	tbl		v0.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v24.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v1.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v25.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v2.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v26.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v3.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v27.16b, {v20.16b-v23.16b}, v5.16b
-
-	eor		v24.16b, v24.16b, v0.16b
-	eor		v25.16b, v25.16b, v1.16b
-	eor		v26.16b, v26.16b, v2.16b
-	eor		v27.16b, v27.16b, v3.16b
-	st1		{v24.16b-v27.16b}, [x1]
+	tbl		v1.16b, {v8.16b-v11.16b}, v5.16b
+	tbl		v2.16b, {v8.16b-v11.16b}, v6.16b
+	tbl		v3.16b, {v8.16b-v11.16b}, v7.16b
+
+	eor		v28.16b, v28.16b, v0.16b
+	eor		v29.16b, v29.16b, v1.16b
+	eor		v30.16b, v30.16b, v2.16b
+	eor		v31.16b, v31.16b, v3.16b
+	st1		{v28.16b-v31.16b}, [x6]		// overlapping stores
+2:	st1		{v20.16b-v23.16b}, [x1]
 	b		.Lout
 
 	// fewer than 320 bytes of in/output
-3:	ld1		{v4.16b}, [x10]
-	ld1		{v5.16b}, [x11]
-	movi		v6.16b, #16
-	add		x1, x1, x8
+.Lt320:	cbz		x7, 3f				// exactly 256 bytes?
+	ld1		{v4.16b-v7.16b}, [x10]
+	add		x7, x7, x1
 	tbl		v0.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v28.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v1.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v29.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v2.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v30.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v3.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v31.16b, {v24.16b-v27.16b}, v5.16b
+	tbl		v1.16b, {v12.16b-v15.16b}, v5.16b
+	tbl		v2.16b, {v12.16b-v15.16b}, v6.16b
+	tbl		v3.16b, {v12.16b-v15.16b}, v7.16b
 
 	eor		v28.16b, v28.16b, v0.16b
 	eor		v29.16b, v29.16b, v1.16b
 	eor		v30.16b, v30.16b, v2.16b
 	eor		v31.16b, v31.16b, v3.16b
-	st1		{v28.16b-v31.16b}, [x1]
+	st1		{v28.16b-v31.16b}, [x7]		// overlapping stores
+3:	st1		{v24.16b-v27.16b}, [x1]
 	b		.Lout
 SYM_FUNC_END(chacha_4block_xor_neon)
 
@@ -851,7 +796,7 @@ SYM_FUNC_END(chacha_4block_xor_neon)
 	.align		L1_CACHE_SHIFT
 .Lpermute:
 	.set		.Li, 0
-	.rept		192
+	.rept		128
 	.byte		(.Li - 64)
 	.set		.Li, .Li + 1
 	.endr
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 6b958dcdf136..7868330dd54e 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -544,7 +544,22 @@ CPU_LE(	rev		w8, w8		)
 	ext		XL.16b, XL.16b, XL.16b, #8
 	rev64		XL.16b, XL.16b
 	eor		XL.16b, XL.16b, KS0.16b
+
+	.if		\enc == 1
 	st1		{XL.16b}, [x10]			// store tag
+	.else
+	ldp		x11, x12, [sp, #40]		// load tag pointer and authsize
+	adr_l		x17, .Lpermute_table
+	ld1		{KS0.16b}, [x11]		// load supplied tag
+	add		x17, x17, x12
+	ld1		{KS1.16b}, [x17]		// load permute vector
+
+	cmeq		XL.16b, XL.16b, KS0.16b		// compare tags
+	mvn		XL.16b, XL.16b			// -1 for fail, 0 for pass
+	tbl		XL.16b, {XL.16b}, KS1.16b	// keep authsize bytes only
+	sminv		b0, XL.16b			// signed minimum across XL
+	smov		w0, v0.b[0]			// return b0
+	.endif
 
 4:	ldp		x29, x30, [sp], #32
 	ret
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 8536008e3e35..720cd3a58da3 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
 				  u64 const h[][2], u64 dg[], u8 ctr[],
 				  u32 const rk[], int rounds, u8 tag[]);
-
-asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
-				  u64 const h[][2], u64 dg[], u8 ctr[],
-				  u32 const rk[], int rounds, u8 tag[]);
+asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+				 u64 const h[][2], u64 dg[], u8 ctr[],
+				 u32 const rk[], int rounds, const u8 l[],
+				 const u8 tag[], u64 authsize);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -168,7 +168,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
 
-	*ctx = (struct ghash_desc_ctx){};
+	memzero_explicit(ctx, sizeof(*ctx));
 	return 0;
 }
 
@@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req)
 	unsigned int authsize = crypto_aead_authsize(aead);
 	int nrounds = num_rounds(&ctx->aes_key);
 	struct skcipher_walk walk;
+	u8 otag[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u8 iv[AES_BLOCK_SIZE];
 	u64 dg[2] = {};
@@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req)
 	memcpy(iv, req->iv, GCM_IV_SIZE);
 	put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
+	scatterwalk_map_and_copy(otag, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
 	err = skcipher_walk_aead_decrypt(&walk, req, false);
 
 	if (likely(crypto_simd_usable())) {
+		int ret;
+
 		do {
 			const u8 *src = walk.src.virt.addr;
 			u8 *dst = walk.dst.virt.addr;
@@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req)
 			}
 
 			kernel_neon_begin();
-			pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
-					  dg, iv, ctx->aes_key.key_enc, nrounds,
-					  tag);
+			ret = pmull_gcm_decrypt(nbytes, dst, src,
+						ctx->ghash_key.h,
+						dg, iv, ctx->aes_key.key_enc,
+						nrounds, tag, otag, authsize);
 			kernel_neon_end();
 
 			if (unlikely(!nbytes))
@@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req)
 
 			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 		} while (walk.nbytes);
+
+		if (err)
+			return err;
+		if (ret)
+			return -EBADMSG;
 	} else {
 		while (walk.nbytes >= AES_BLOCK_SIZE) {
 			int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req)
 			err = skcipher_walk_done(&walk, 0);
 		}
 
+		if (err)
+			return err;
+
 		put_unaligned_be64(dg[1], tag);
 		put_unaligned_be64(dg[0], tag + 8);
 		put_unaligned_be32(1, iv + GCM_IV_SIZE);
 		aes_encrypt(&ctx->aes_key, iv, iv);
 		crypto_xor(tag, iv, AES_BLOCK_SIZE);
-	}
-
-	if (err)
-		return err;
 
-	/* compare calculated auth tag with the stored one */
-	scatterwalk_map_and_copy(buf, req->src,
-				 req->assoclen + req->cryptlen - authsize,
-				 authsize, 0);
-
-	if (crypto_memneq(tag, buf, authsize))
-		return -EBADMSG;
+		if (crypto_memneq(tag, otag, authsize)) {
+			memzero_explicit(tag, AES_BLOCK_SIZE);
+			return -EBADMSG;
+		}
+	}
 	return 0;
 }
 
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
index 6e5576d19af8..cbc980fb02e3 100644
--- a/arch/arm64/crypto/poly1305-armv8.pl
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -840,7 +840,6 @@ poly1305_blocks_neon:
 	 ldp	d14,d15,[sp,#64]
 	addp	$ACC2,$ACC2,$ACC2
 	 ldr	x30,[sp,#8]
-	 .inst	0xd50323bf		// autiasp
 
 	////////////////////////////////////////////////////////////////
 	// lazy reduction, but without narrowing
@@ -882,6 +881,7 @@ poly1305_blocks_neon:
 	str	x4,[$ctx,#8]		// set is_base2_26
 
 	ldr	x29,[sp],#80
+	 .inst	0xd50323bf		// autiasp
 	ret
 .size	poly1305_blocks_neon,.-poly1305_blocks_neon
 
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
index 8d1c4e420ccd..fb2822abf63a 100644
--- a/arch/arm64/crypto/poly1305-core.S_shipped
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -779,7 +779,6 @@ poly1305_blocks_neon:
 	 ldp	d14,d15,[sp,#64]
 	addp	v21.2d,v21.2d,v21.2d
 	 ldr	x30,[sp,#8]
-	 .inst	0xd50323bf		// autiasp
 
 	////////////////////////////////////////////////////////////////
 	// lazy reduction, but without narrowing
@@ -821,6 +820,7 @@ poly1305_blocks_neon:
 	str	x4,[x0,#8]		// set is_base2_26
 
 	ldr	x29,[sp],#80
+	 .inst	0xd50323bf		// autiasp
 	ret
 .size	poly1305_blocks_neon,.-poly1305_blocks_neon
 
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index f33ada70c4ed..683de671741a 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -177,7 +177,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
 	}
 
 	poly1305_emit(&dctx->h, dst, dctx->s);
-	*dctx = (struct poly1305_desc_ctx){};
+	memzero_explicit(dctx, sizeof(*dctx));
 }
 EXPORT_SYMBOL(poly1305_final_arch);
 
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index c63b99211db3..c93121bcfdeb 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -10,7 +10,7 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 5e956d7582a5..31ba3da5e61b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -10,7 +10,7 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index 77bc6e72abae..9462f6088b3f 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -10,7 +10,7 @@
 #include <asm/simd.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 9a4bbfc45f40..e5a2936f0886 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -94,7 +94,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out)
 	if (digest_size & 4)
 		put_unaligned_le32(sctx->st[i], (__le32 *)digest);
 
-	*sctx = (struct sha3_state){};
+	memzero_explicit(sctx, sizeof(*sctx));
 	return 0;
 }
 
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index dc890a719f54..faa83f6cf376 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -14,7 +14,7 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index 370ccb29602f..2acff1c7df5d 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -8,7 +8,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <asm/neon.h>
 
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
index 7315cc307397..cb68f9e284bb 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
@@ -41,7 +41,7 @@ do {							\
  */
 #define read_octeon_64bit_hash_dword(index)		\
 ({							\
-	u64 __value;					\
+	__be64 __value;					\
 							\
 	__asm__ __volatile__ (				\
 	"dmfc2 %[rt],0x0048+" STR(index)		\
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index 8c8ea139653e..5ee4ade99b99 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -68,10 +68,11 @@ static int octeon_md5_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = cpu_to_le32(MD5_H0);
-	mctx->hash[1] = cpu_to_le32(MD5_H1);
-	mctx->hash[2] = cpu_to_le32(MD5_H2);
-	mctx->hash[3] = cpu_to_le32(MD5_H3);
+	mctx->hash[0] = MD5_H0;
+	mctx->hash[1] = MD5_H1;
+	mctx->hash[2] = MD5_H2;
+	mctx->hash[3] = MD5_H3;
+	cpu_to_le32_array(mctx->hash, 4);
 	mctx->byte_count = 0;
 
 	return 0;
@@ -139,8 +140,9 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
 	}
 
 	memset(p, 0, padding);
-	mctx->block[14] = cpu_to_le32(mctx->byte_count << 3);
-	mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29);
+	mctx->block[14] = mctx->byte_count << 3;
+	mctx->block[15] = mctx->byte_count >> 29;
+	cpu_to_le32_array(mctx->block + 14, 2);
 	octeon_md5_transform(mctx->block);
 
 	octeon_md5_read_hash(mctx);
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
index 75e79b47abfe..30f1d75208a5 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha1.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/mm.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/module.h>
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
index a682ce76716a..36cb92895d72 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -15,7 +15,7 @@
  */
 
 #include <linux/mm.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/module.h>
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
index 50722a0cfb53..359f039820d8 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha512.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/mm.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/module.h>
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
index cb57be4ada61..b1e577cbf00c 100644
--- a/arch/powerpc/crypto/sha1-spe-glue.c
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <asm/byteorder.h>
 #include <asm/switch_to.h>
 #include <linux/hardirq.h>
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
index b40dc50a6908..7a55d790cdb1 100644
--- a/arch/powerpc/crypto/sha1.c
+++ b/arch/powerpc/crypto/sha1.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <asm/byteorder.h>
 
 void powerpc_sha_transform(u32 *state, const u8 *src);
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
index ceb0b6c980b3..a6e650a97d8f 100644
--- a/arch/powerpc/crypto/sha256-spe-glue.c
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -13,7 +13,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <asm/byteorder.h>
 #include <asm/switch_to.h>
 #include <linux/hardirq.h>
@@ -177,7 +177,7 @@ static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
 
 static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
 {
-	u32 D[SHA256_DIGEST_SIZE >> 2];
+	__be32 D[SHA256_DIGEST_SIZE >> 2];
 	__be32 *dst = (__be32 *)out;
 
 	ppc_spe_sha256_final(desc, (u8 *)D);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index ada2f98c27b7..65ea12fc87a1 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -11,7 +11,8 @@
 #define _CRYPTO_ARCH_S390_SHA_H
 
 #include <linux/crypto.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
 #include <crypto/sha3.h>
 
 /* must be big enough for the largest SHA variant */
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 698b1e6d3c14..a3fabf310a38 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <asm/cpacf.h>
 
 #include "sha.h"
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index b52c87e44939..24983f175676 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <asm/cpacf.h>
 
 #include "sha.h"
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index 460cbbbaa44a..30ac49b635bf 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
-#include <crypto/sha.h>
 #include <crypto/sha3.h>
 #include <asm/cpacf.h>
 
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index 72cf460a53e5..e70d50f7620f 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -11,7 +11,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
-#include <crypto/sha.h>
 #include <crypto/sha3.h>
 #include <asm/cpacf.h>
 
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index ad29db085a18..29a6bd404c59 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -8,7 +8,7 @@
  * Author(s): Jan Glauber (jang@de.ibm.com)
  */
 #include <crypto/internal/hash.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 0a423bcf6746..030efda05dbe 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -9,7 +9,7 @@
 
 #include <linux/kexec.h>
 #include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <asm/purgatory.h>
 
 int verify_sha256_digest(void)
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index 4e9323229e71..82efb7f81c28 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -35,7 +35,7 @@ static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
 
 	if (keylen != sizeof(u32))
 		return -EINVAL;
-	*(__le32 *)mctx = le32_to_cpup((__le32 *)key);
+	*mctx = le32_to_cpup((__le32 *)key);
 	return 0;
 }
 
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index 111283fe837e..511db98d590a 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -33,10 +33,11 @@ static int md5_sparc64_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = cpu_to_le32(MD5_H0);
-	mctx->hash[1] = cpu_to_le32(MD5_H1);
-	mctx->hash[2] = cpu_to_le32(MD5_H2);
-	mctx->hash[3] = cpu_to_le32(MD5_H3);
+	mctx->hash[0] = MD5_H0;
+	mctx->hash[1] = MD5_H1;
+	mctx->hash[2] = MD5_H2;
+	mctx->hash[3] = MD5_H3;
+	le32_to_cpu_array(mctx->hash, 4);
 	mctx->byte_count = 0;
 
 	return 0;
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
index dc017782be52..86a654cce5ab 100644
--- a/arch/sparc/crypto/sha1_glue.c
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 
 #include <asm/pstate.h>
 #include <asm/elf.h>
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
index ca2547df9652..60ec524cf9ca 100644
--- a/arch/sparc/crypto/sha256_glue.c
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 
 #include <asm/pstate.h>
 #include <asm/elf.h>
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
index 3b2ca732ff7a..273ce21918c1 100644
--- a/arch/sparc/crypto/sha512_glue.c
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -15,7 +15,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 
 #include <asm/pstate.h>
 #include <asm/elf.h>
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
deleted file mode 100644
index 7b7dc05fa1a4..000000000000
--- a/arch/x86/crypto/aes_glue.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 1852b19a73a0..d1436c37008b 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -318,7 +318,7 @@ _initial_blocks_\@:
 
 	# Main loop - Encrypt/Decrypt remaining blocks
 
-	cmp	$0, %r13
+	test	%r13, %r13
 	je	_zero_cipher_left_\@
 	sub	$64, %r13
 	je	_four_cipher_left_\@
@@ -437,7 +437,7 @@ _multiple_of_16_bytes_\@:
 
 	mov PBlockLen(%arg2), %r12
 
-	cmp $0, %r12
+	test %r12, %r12
 	je _partial_done\@
 
 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
@@ -474,7 +474,7 @@ _T_8_\@:
 	add	$8, %r10
 	sub	$8, %r11
 	psrldq	$8, %xmm0
-	cmp	$0, %r11
+	test	%r11, %r11
 	je	_return_T_done_\@
 _T_4_\@:
 	movd	%xmm0, %eax
@@ -482,7 +482,7 @@ _T_4_\@:
 	add	$4, %r10
 	sub	$4, %r11
 	psrldq	$4, %xmm0
-	cmp	$0, %r11
+	test	%r11, %r11
 	je	_return_T_done_\@
 _T_123_\@:
 	movd	%xmm0, %eax
@@ -619,7 +619,7 @@ _get_AAD_blocks\@:
 
 	/* read the last <16B of AAD */
 _get_AAD_rest\@:
-	cmp	   $0, %r11
+	test	   %r11, %r11
 	je	   _get_AAD_done\@
 
 	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
@@ -640,7 +640,7 @@ _get_AAD_done\@:
 .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
 	AAD_HASH operation
 	mov 	PBlockLen(%arg2), %r13
-	cmp	$0, %r13
+	test	%r13, %r13
 	je	_partial_block_done_\@	# Leave Macro if no partial blocks
 	# Read in input data without over reading
 	cmp	$16, \PLAIN_CYPH_LEN
@@ -692,7 +692,7 @@ _no_extra_mask_1_\@:
 	pshufb	%xmm2, %xmm3
 	pxor	%xmm3, \AAD_HASH
 
-	cmp	$0, %r10
+	test	%r10, %r10
 	jl	_partial_incomplete_1_\@
 
 	# GHASH computation for the last <16 Byte block
@@ -727,7 +727,7 @@ _no_extra_mask_2_\@:
 	pshufb	%xmm2, %xmm9
 	pxor	%xmm9, \AAD_HASH
 
-	cmp	$0, %r10
+	test	%r10, %r10
 	jl	_partial_incomplete_2_\@
 
 	# GHASH computation for the last <16 Byte block
@@ -747,7 +747,7 @@ _encode_done_\@:
 	pshufb	%xmm2, %xmm9
 .endif
 	# output encrypted Bytes
-	cmp	$0, %r10
+	test	%r10, %r10
 	jl	_partial_fill_\@
 	mov	%r13, %r12
 	mov	$16, %r13
@@ -2720,7 +2720,7 @@ SYM_FUNC_END(aesni_ctr_enc)
  */
 SYM_FUNC_START(aesni_xts_crypt8)
 	FRAME_BEGIN
-	cmpb $0, %cl
+	testb %cl, %cl
 	movl $0, %ecx
 	movl $240, %r10d
 	leaq _aesni_enc4, %r11
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 5fee47956f3b..2cf8e94d986a 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -369,7 +369,7 @@ _initial_num_blocks_is_0\@:
 
 
 _initial_blocks_encrypted\@:
-        cmp     $0, %r13
+        test    %r13, %r13
         je      _zero_cipher_left\@
 
         sub     $128, %r13
@@ -528,7 +528,7 @@ _multiple_of_16_bytes\@:
         vmovdqu HashKey(arg2), %xmm13
 
         mov PBlockLen(arg2), %r12
-        cmp $0, %r12
+        test %r12, %r12
         je _partial_done\@
 
 	#GHASH computation for the last <16 Byte block
@@ -573,7 +573,7 @@ _T_8\@:
         add     $8, %r10
         sub     $8, %r11
         vpsrldq $8, %xmm9, %xmm9
-        cmp     $0, %r11
+        test    %r11, %r11
         je     _return_T_done\@
 _T_4\@:
         vmovd   %xmm9, %eax
@@ -581,7 +581,7 @@ _T_4\@:
         add     $4, %r10
         sub     $4, %r11
         vpsrldq     $4, %xmm9, %xmm9
-        cmp     $0, %r11
+        test    %r11, %r11
         je     _return_T_done\@
 _T_123\@:
         vmovd     %xmm9, %eax
@@ -625,7 +625,7 @@ _get_AAD_blocks\@:
 	cmp     $16, %r11
 	jge     _get_AAD_blocks\@
 	vmovdqu \T8, \T7
-	cmp     $0, %r11
+	test    %r11, %r11
 	je      _get_AAD_done\@
 
 	vpxor   \T7, \T7, \T7
@@ -644,7 +644,7 @@ _get_AAD_rest8\@:
 	vpxor   \T1, \T7, \T7
 	jmp     _get_AAD_rest8\@
 _get_AAD_rest4\@:
-	cmp     $0, %r11
+	test    %r11, %r11
 	jle      _get_AAD_rest0\@
 	mov     (%r10), %eax
 	movq    %rax, \T1
@@ -749,7 +749,7 @@ _done_read_partial_block_\@:
 .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
         AAD_HASH ENC_DEC
         mov 	PBlockLen(arg2), %r13
-        cmp	$0, %r13
+        test	%r13, %r13
         je	_partial_block_done_\@	# Leave Macro if no partial blocks
         # Read in input data without over reading
         cmp	$16, \PLAIN_CYPH_LEN
@@ -801,7 +801,7 @@ _no_extra_mask_1_\@:
         vpshufb	%xmm2, %xmm3, %xmm3
         vpxor	%xmm3, \AAD_HASH, \AAD_HASH
 
-        cmp	$0, %r10
+        test	%r10, %r10
         jl	_partial_incomplete_1_\@
 
         # GHASH computation for the last <16 Byte block
@@ -836,7 +836,7 @@ _no_extra_mask_2_\@:
         vpshufb %xmm2, %xmm9, %xmm9
         vpxor	%xmm9, \AAD_HASH, \AAD_HASH
 
-        cmp	$0, %r10
+        test	%r10, %r10
         jl	_partial_incomplete_2_\@
 
         # GHASH computation for the last <16 Byte block
@@ -856,7 +856,7 @@ _encode_done_\@:
         vpshufb	%xmm2, %xmm9, %xmm9
 .endif
         # output encrypted Bytes
-        cmp	$0, %r10
+        test	%r10, %r10
         jl	_partial_fill_\@
         mov	%r13, %r12
         mov	$16, %r13
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 7d568012cc15..71fae5a09e56 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -251,7 +251,7 @@ $code.=<<___;
 	mov	%rax,8($ctx)
 	mov	%rax,16($ctx)
 
-	cmp	\$0,$inp
+	test	$inp,$inp
 	je	.Lno_key
 ___
 $code.=<<___ if (!$kernel);
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index c44aba290fbb..646da46e8d10 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -210,7 +210,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
 	}
 
 	poly1305_simd_emit(&dctx->h, dst, dctx->s);
-	*dctx = (struct poly1305_desc_ctx){};
+	memzero_explicit(dctx, sizeof(*dctx));
 }
 EXPORT_SYMBOL(poly1305_final_arch);
 
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 18200135603f..44340a1139e0 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -22,7 +22,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
 #include <asm/simd.h>
 
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index dd06249229e1..3a5f6be7dbba 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -35,7 +35,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/string.h>
 #include <asm/simd.h>
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 63470fd6ae32..684d58c8bc4f 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -278,7 +278,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 # "blocks" is the message length in SHA512 blocks
 ########################################################################
 SYM_FUNC_START(sha512_transform_avx)
-	cmp $0, msglen
+	test msglen, msglen
 	je nowork
 
 	# Allocate Stack Space
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index 7946a1bee85b..50812af0b083 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -280,7 +280,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 ########################################################################
 SYM_FUNC_START(sha512_transform_ssse3)
 
-	cmp $0, msglen
+	test msglen, msglen
 	je nowork
 
 	# Allocate Stack Space
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index b0b05c93409e..30e70f4fe2f7 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -34,7 +34,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <asm/simd.h>
 
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 7b37a412f829..f03b64d9cb51 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/bug.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <asm/purgatory.h>
 
 #include "../boot/string.h"
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-12-14 12:18:19 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-12-14 12:18:19 -0800
commit	9e4b0d55d84a66dbfede56890501dc96e696059c (patch)
tree	db60e36510c170109f0fe28003d6959cd4264c72 /arch
parent	51895d58c7c0c65afac21570cc14a7189942959a (diff)
parent	93cebeb1c21a65b92636aaa278a32fbc0415ec67 (diff)