109 files changed, 25959 insertions, 2968 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4d2b81f2b223..473d31288ad8 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -62,19 +62,32 @@ config CRYPTO_DEV_GEODE
 	  will be called geode-aes.
 
 config ZCRYPT
-	tristate "Support for PCI-attached cryptographic adapters"
+	tristate "Support for s390 cryptographic adapters"
 	depends on S390
 	select HW_RANDOM
 	help
-	  Select this option if you want to use a PCI-attached cryptographic
-	  adapter like:
-	  + PCI Cryptographic Accelerator (PCICA)
-	  + PCI Cryptographic Coprocessor (PCICC)
+	  Select this option if you want to enable support for
+	  s390 cryptographic adapters like:
 	  + PCI-X Cryptographic Coprocessor (PCIXCC)
-	  + Crypto Express2 Coprocessor (CEX2C)
-	  + Crypto Express2 Accelerator (CEX2A)
-	  + Crypto Express3 Coprocessor (CEX3C)
-	  + Crypto Express3 Accelerator (CEX3A)
+	  + Crypto Express 2,3,4 or 5 Coprocessor (CEXxC)
+	  + Crypto Express 2,3,4 or 5 Accelerator (CEXxA)
+	  + Crypto Express 4 or 5 EP11 Coprocessor (CEXxP)
+
+config PKEY
+	tristate "Kernel API for protected key handling"
+	depends on S390
+	depends on ZCRYPT
+	help
+	  With this option enabled the pkey kernel module provides an API
+	  for creation and handling of protected keys. Other parts of the
+	  kernel or userspace applications may use these functions.
+
+	  Select this option if you want to enable the kernel and userspace
+	  API for proteced key handling.
+
+	  Please note that creation of protected keys from secure keys
+	  requires to have at least one CEX card in coprocessor mode
+	  available at runtime.
 
 config CRYPTO_SHA1_S390
 	tristate "SHA1 digest algorithm"
@@ -124,6 +137,7 @@ config CRYPTO_AES_S390
 	depends on S390
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
+	select PKEY
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197).
@@ -339,7 +353,7 @@ config CRYPTO_DEV_OMAP_DES
 
 config CRYPTO_DEV_PICOXCELL
 	tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
-	depends on ARCH_PICOXCELL && HAVE_CLK
+	depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK
 	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_AUTHENC
@@ -415,10 +429,23 @@ config CRYPTO_DEV_BFIN_CRC
 	  Newer Blackfin processors have CRC hardware. Select this if you
 	  want to use the Blackfin CRC module.
 
+config CRYPTO_DEV_ATMEL_AUTHENC
+	tristate "Support for Atmel IPSEC/SSL hw accelerator"
+	depends on HAS_DMA
+	depends on ARCH_AT91 || COMPILE_TEST
+	select CRYPTO_AUTHENC
+	select CRYPTO_DEV_ATMEL_AES
+	select CRYPTO_DEV_ATMEL_SHA
+	help
+	  Some Atmel processors can combine the AES and SHA hw accelerators
+	  to enhance support of IPSEC/SSL.
+	  Select this if you want to use the Atmel modules for
+	  authenc(hmac(shaX),Y(cbc)) algorithms.
+
 config CRYPTO_DEV_ATMEL_AES
 	tristate "Support for Atmel AES hw accelerator"
 	depends on HAS_DMA
-	depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST
+	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
 	select CRYPTO_BLKCIPHER
@@ -432,7 +459,8 @@ config CRYPTO_DEV_ATMEL_AES
 
 config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
-	depends on ARCH_AT91
+	depends on HAS_DMA
+	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_DES
 	select CRYPTO_BLKCIPHER
 	help
@@ -445,7 +473,8 @@ config CRYPTO_DEV_ATMEL_TDES
 
 config CRYPTO_DEV_ATMEL_SHA
 	tristate "Support for Atmel SHA hw accelerator"
-	depends on ARCH_AT91
+	depends on HAS_DMA
+	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_HASH
 	help
 	  Some Atmel processors have SHA1/SHA224/SHA256/SHA384/SHA512
@@ -484,6 +513,7 @@ config CRYPTO_DEV_MXS_DCP
 	  will be called mxs-dcp.
 
 source "drivers/crypto/qat/Kconfig"
+source "drivers/crypto/cavium/cpt/Kconfig"
 
 config CRYPTO_DEV_QCE
 	tristate "Qualcomm crypto engine accelerator"
@@ -553,6 +583,40 @@ config CRYPTO_DEV_ROCKCHIP
 	  This driver interfaces with the hardware crypto accelerator.
 	  Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
 
+config CRYPTO_DEV_MEDIATEK
+	tristate "MediaTek's EIP97 Cryptographic Engine driver"
+	depends on HAS_DMA
+	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
+	select CRYPTO_AES
+	select CRYPTO_AEAD
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_CTR
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	select CRYPTO_HMAC
+	help
+	  This driver allows you to utilize the hardware crypto accelerator
+	  EIP97 which can be found on the MT7623 MT2701, MT8521p, etc ....
+	  Select this if you want to use it for AES/SHA1/SHA2 algorithms.
+
 source "drivers/crypto/chelsio/Kconfig"
 
+source "drivers/crypto/virtio/Kconfig"
+
+config CRYPTO_DEV_BCM_SPU
+	tristate "Broadcom symmetric crypto/hash acceleration support"
+	depends on ARCH_BCM_IPROC
+	depends on BCM_PDC_MBOX
+	default m
+	select CRYPTO_DES
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	help
+	  This driver provides support for Broadcom crypto acceleration using the
+	  Secure Processing Unit (SPU). The SPU driver registers ablkcipher,
+	  ahash, and aead algorithms with the kernel cryptographic API.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index ad7250fa1348..739609471169 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
 obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
+obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
@@ -10,7 +12,9 @@ obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/
+obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/
 obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
+obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
 n2_crypto-y := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
@@ -21,14 +25,14 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
+obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
+obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
+obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
-obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o
+obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
-obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
-obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
+obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
-obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
-obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
-obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
+obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index dae1e39139e9..d10b4ae5e0da 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -135,8 +135,7 @@ int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
 	ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4,
 					 &ctx->sa_out_dma_addr, GFP_ATOMIC);
 	if (ctx->sa_out == NULL) {
-		dma_free_coherent(ctx->dev->core_dev->device,
-				  ctx->sa_len * 4,
+		dma_free_coherent(ctx->dev->core_dev->device, size * 4,
 				  ctx->sa_in, ctx->sa_in_dma_addr);
 		return -ENOMEM;
 	}
diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h
index 6c2951bb70b1..7694679802b3 100644
--- a/drivers/crypto/atmel-aes-regs.h
+++ b/drivers/crypto/atmel-aes-regs.h
@@ -28,6 +28,7 @@
 #define AES_MR_OPMOD_CFB		(0x3 << 12)
 #define AES_MR_OPMOD_CTR		(0x4 << 12)
 #define AES_MR_OPMOD_GCM		(0x5 << 12)
+#define AES_MR_OPMOD_XTS		(0x6 << 12)
 #define AES_MR_LOD				(0x1 << 15)
 #define AES_MR_CFBS_MASK		(0x7 << 16)
 #define AES_MR_CFBS_128b		(0x0 << 16)
@@ -67,6 +68,25 @@
 #define AES_CTRR	0x98
 #define AES_GCMHR(x)	(0x9c + ((x) * 0x04))
 
+#define AES_EMR		0xb0
+#define AES_EMR_APEN		BIT(0)	/* Auto Padding Enable */
+#define AES_EMR_APM		BIT(1)	/* Auto Padding Mode */
+#define AES_EMR_APM_IPSEC	0x0
+#define AES_EMR_APM_SSL		BIT(1)
+#define AES_EMR_PLIPEN		BIT(4)	/* PLIP Enable */
+#define AES_EMR_PLIPD		BIT(5)	/* PLIP Decipher */
+#define AES_EMR_PADLEN_MASK	(0xFu << 8)
+#define AES_EMR_PADLEN_OFFSET	8
+#define AES_EMR_PADLEN(padlen)	(((padlen) << AES_EMR_PADLEN_OFFSET) &\
+				 AES_EMR_PADLEN_MASK)
+#define AES_EMR_NHEAD_MASK	(0xFu << 16)
+#define AES_EMR_NHEAD_OFFSET	16
+#define AES_EMR_NHEAD(nhead)	(((nhead) << AES_EMR_NHEAD_OFFSET) &\
+				 AES_EMR_NHEAD_MASK)
+
+#define AES_TWR(x)	(0xc0 + ((x) * 0x04))
+#define AES_ALPHAR(x)	(0xd0 + ((x) * 0x04))
+
 #define AES_HW_VERSION	0xFC
 
 #endif /* __ATMEL_AES_REGS_H__ */
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index e3d40a8dfffb..29e20c37f3a6 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -36,10 +36,12 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/xts.h>
 #include <crypto/internal/aead.h>
 #include <linux/platform_data/crypto-atmel.h>
 #include <dt-bindings/dma/at91.h>
 #include "atmel-aes-regs.h"
+#include "atmel-authenc.h"
 
 #define ATMEL_AES_PRIORITY	300
 
@@ -68,6 +70,7 @@
 #define AES_FLAGS_CFB8		(AES_MR_OPMOD_CFB | AES_MR_CFBS_8b)
 #define AES_FLAGS_CTR		AES_MR_OPMOD_CTR
 #define AES_FLAGS_GCM		AES_MR_OPMOD_GCM
+#define AES_FLAGS_XTS		AES_MR_OPMOD_XTS
 
 #define AES_FLAGS_MODE_MASK	(AES_FLAGS_OPMODE_MASK |	\
 				 AES_FLAGS_ENCRYPT |		\
@@ -76,6 +79,7 @@
 #define AES_FLAGS_INIT		BIT(2)
 #define AES_FLAGS_BUSY		BIT(3)
 #define AES_FLAGS_DUMP_REG	BIT(4)
+#define AES_FLAGS_OWN_SHA	BIT(5)
 
 #define AES_FLAGS_PERSISTENT	(AES_FLAGS_INIT | AES_FLAGS_BUSY)
 
@@ -89,6 +93,8 @@ struct atmel_aes_caps {
 	bool			has_cfb64;
 	bool			has_ctr32;
 	bool			has_gcm;
+	bool			has_xts;
+	bool			has_authenc;
 	u32			max_burst_size;
 };
 
@@ -135,10 +141,37 @@ struct atmel_aes_gcm_ctx {
 	atmel_aes_fn_t		ghash_resume;
 };
 
+struct atmel_aes_xts_ctx {
+	struct atmel_aes_base_ctx	base;
+
+	u32			key2[AES_KEYSIZE_256 / sizeof(u32)];
+};
+
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+struct atmel_aes_authenc_ctx {
+	struct atmel_aes_base_ctx	base;
+	struct atmel_sha_authenc_ctx	*auth;
+};
+#endif
+
 struct atmel_aes_reqctx {
 	unsigned long		mode;
 };
 
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+struct atmel_aes_authenc_reqctx {
+	struct atmel_aes_reqctx	base;
+
+	struct scatterlist	src[2];
+	struct scatterlist	dst[2];
+	size_t			textlen;
+	u32			digest[SHA512_DIGEST_SIZE / sizeof(u32)];
+
+	/* auth_req MUST be place last. */
+	struct ahash_request	auth_req;
+};
+#endif
+
 struct atmel_aes_dma {
 	struct dma_chan		*chan;
 	struct scatterlist	*sg;
@@ -282,6 +315,23 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
 		snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
 		break;
 
+	case AES_EMR:
+		return "EMR";
+
+	case AES_TWR(0):
+	case AES_TWR(1):
+	case AES_TWR(2):
+	case AES_TWR(3):
+		snprintf(tmp, sz, "TWR[%u]", (offset - AES_TWR(0)) >> 2);
+		break;
+
+	case AES_ALPHAR(0):
+	case AES_ALPHAR(1):
+	case AES_ALPHAR(2):
+	case AES_ALPHAR(3):
+		snprintf(tmp, sz, "ALPHAR[%u]", (offset - AES_ALPHAR(0)) >> 2);
+		break;
+
 	default:
 		snprintf(tmp, sz, "0x%02x", offset);
 		break;
@@ -317,7 +367,7 @@ static inline void atmel_aes_write(struct atmel_aes_dev *dd,
 		char tmp[16];
 
 		dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
-			 atmel_aes_reg_name(offset, tmp));
+			 atmel_aes_reg_name(offset, tmp, sizeof(tmp)));
 	}
 #endif /* VERBOSE_DEBUG */
 
@@ -440,8 +490,16 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd)
 	return (dd->flags & AES_FLAGS_ENCRYPT);
 }
 
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err);
+#endif
+
 static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 {
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+	atmel_aes_authenc_complete(dd, err);
+#endif
+
 	clk_disable(dd->iclk);
 	dd->flags &= ~AES_FLAGS_BUSY;
 
@@ -453,15 +511,15 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 	return err;
 }
 
-static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
-				 const u32 *iv)
+static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma,
+				     const u32 *iv, const u32 *key, int keylen)
 {
 	u32 valmr = 0;
 
 	/* MR register must be set before IV registers */
-	if (dd->ctx->keylen == AES_KEYSIZE_128)
+	if (keylen == AES_KEYSIZE_128)
 		valmr |= AES_MR_KEYSIZE_128;
-	else if (dd->ctx->keylen == AES_KEYSIZE_192)
+	else if (keylen == AES_KEYSIZE_192)
 		valmr |= AES_MR_KEYSIZE_192;
 	else
 		valmr |= AES_MR_KEYSIZE_256;
@@ -478,13 +536,19 @@ static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
 
 	atmel_aes_write(dd, AES_MR, valmr);
 
-	atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key,
-			  SIZE_IN_WORDS(dd->ctx->keylen));
+	atmel_aes_write_n(dd, AES_KEYWR(0), key, SIZE_IN_WORDS(keylen));
 
 	if (iv && (valmr & AES_MR_OPMOD_MASK) != AES_MR_OPMOD_ECB)
 		atmel_aes_write_block(dd, AES_IVR(0), iv);
 }
 
+static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
+					const u32 *iv)
+
+{
+	atmel_aes_write_ctrl_key(dd, use_dma, iv,
+				 dd->ctx->key, dd->ctx->keylen);
+}
 
 /* CPU transfer */
 
@@ -850,6 +914,7 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
 	struct crypto_async_request *areq, *backlog;
 	struct atmel_aes_base_ctx *ctx;
 	unsigned long flags;
+	bool start_async;
 	int err, ret = 0;
 
 	spin_lock_irqsave(&dd->lock, flags);
@@ -875,10 +940,12 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
 
 	dd->areq = areq;
 	dd->ctx = ctx;
-	dd->is_async = (areq != new_areq);
+	start_async = (areq != new_areq);
+	dd->is_async = start_async;
 
+	/* WARNING: ctx->start() MAY change dd->is_async. */
 	err = ctx->start(dd);
-	return (dd->is_async) ? ret : err;
+	return (start_async) ? ret : err;
 }
 
 
@@ -1769,6 +1836,515 @@ static struct aead_alg aes_gcm_alg = {
 };
 
 
+/* xts functions */
+
+static inline struct atmel_aes_xts_ctx *
+atmel_aes_xts_ctx_cast(struct atmel_aes_base_ctx *ctx)
+{
+	return container_of(ctx, struct atmel_aes_xts_ctx, base);
+}
+
+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd);
+
+static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
+{
+	struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx);
+	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+	struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	unsigned long flags;
+	int err;
+
+	atmel_aes_set_mode(dd, rctx);
+
+	err = atmel_aes_hw_init(dd);
+	if (err)
+		return atmel_aes_complete(dd, err);
+
+	/* Compute the tweak value from req->info with ecb(aes). */
+	flags = dd->flags;
+	dd->flags &= ~AES_FLAGS_MODE_MASK;
+	dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
+	atmel_aes_write_ctrl_key(dd, false, NULL,
+				 ctx->key2, ctx->base.keylen);
+	dd->flags = flags;
+
+	atmel_aes_write_block(dd, AES_IDATAR(0), req->info);
+	return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data);
+}
+
+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+	bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD);
+	u32 tweak[AES_BLOCK_SIZE / sizeof(u32)];
+	static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
+	u8 *tweak_bytes = (u8 *)tweak;
+	int i;
+
+	/* Read the computed ciphered tweak value. */
+	atmel_aes_read_block(dd, AES_ODATAR(0), tweak);
+	/*
+	 * Hardware quirk:
+	 * the order of the ciphered tweak bytes need to be reversed before
+	 * writing them into the ODATARx registers.
+	 */
+	for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
+		u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
+
+		tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
+		tweak_bytes[i] = tmp;
+	}
+
+	/* Process the data. */
+	atmel_aes_write_ctrl(dd, use_dma, NULL);
+	atmel_aes_write_block(dd, AES_TWR(0), tweak);
+	atmel_aes_write_block(dd, AES_ALPHAR(0), one);
+	if (use_dma)
+		return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
+					   atmel_aes_transfer_complete);
+
+	return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
+				   atmel_aes_transfer_complete);
+}
+
+static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				unsigned int keylen)
+{
+	struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	int err;
+
+	err = xts_check_key(crypto_ablkcipher_tfm(tfm), key, keylen);
+	if (err)
+		return err;
+
+	memcpy(ctx->base.key, key, keylen/2);
+	memcpy(ctx->key2, key + keylen/2, keylen/2);
+	ctx->base.keylen = keylen/2;
+
+	return 0;
+}
+
+static int atmel_aes_xts_encrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT);
+}
+
+static int atmel_aes_xts_decrypt(struct ablkcipher_request *req)
+{
+	return atmel_aes_crypt(req, AES_FLAGS_XTS);
+}
+
+static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm)
+{
+	struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+	ctx->base.start = atmel_aes_xts_start;
+
+	return 0;
+}
+
+static struct crypto_alg aes_xts_alg = {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "atmel-xts-aes",
+	.cra_priority		= ATMEL_AES_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct atmel_aes_xts_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= atmel_aes_xts_cra_init,
+	.cra_exit		= atmel_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= atmel_aes_xts_setkey,
+		.encrypt	= atmel_aes_xts_encrypt,
+		.decrypt	= atmel_aes_xts_decrypt,
+	}
+};
+
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+/* authenc aead functions */
+
+static int atmel_aes_authenc_start(struct atmel_aes_dev *dd);
+static int atmel_aes_authenc_init(struct atmel_aes_dev *dd, int err,
+				  bool is_async);
+static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err,
+				      bool is_async);
+static int atmel_aes_authenc_digest(struct atmel_aes_dev *dd);
+static int atmel_aes_authenc_final(struct atmel_aes_dev *dd, int err,
+				   bool is_async);
+
+static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err)
+{
+	struct aead_request *req = aead_request_cast(dd->areq);
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+
+	if (err && (dd->flags & AES_FLAGS_OWN_SHA))
+		atmel_sha_authenc_abort(&rctx->auth_req);
+	dd->flags &= ~AES_FLAGS_OWN_SHA;
+}
+
+static int atmel_aes_authenc_start(struct atmel_aes_dev *dd)
+{
+	struct aead_request *req = aead_request_cast(dd->areq);
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
+	int err;
+
+	atmel_aes_set_mode(dd, &rctx->base);
+
+	err = atmel_aes_hw_init(dd);
+	if (err)
+		return atmel_aes_complete(dd, err);
+
+	return atmel_sha_authenc_schedule(&rctx->auth_req, ctx->auth,
+					  atmel_aes_authenc_init, dd);
+}
+
+static int atmel_aes_authenc_init(struct atmel_aes_dev *dd, int err,
+				  bool is_async)
+{
+	struct aead_request *req = aead_request_cast(dd->areq);
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+
+	if (is_async)
+		dd->is_async = true;
+	if (err)
+		return atmel_aes_complete(dd, err);
+
+	/* If here, we've got the ownership of the SHA device. */
+	dd->flags |= AES_FLAGS_OWN_SHA;
+
+	/* Configure the SHA device. */
+	return atmel_sha_authenc_init(&rctx->auth_req,
+				      req->src, req->assoclen,
+				      rctx->textlen,
+				      atmel_aes_authenc_transfer, dd);
+}
+
+static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err,
+				      bool is_async)
+{
+	struct aead_request *req = aead_request_cast(dd->areq);
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+	bool enc = atmel_aes_is_encrypt(dd);
+	struct scatterlist *src, *dst;
+	u32 iv[AES_BLOCK_SIZE / sizeof(u32)];
+	u32 emr;
+
+	if (is_async)
+		dd->is_async = true;
+	if (err)
+		return atmel_aes_complete(dd, err);
+
+	/* Prepare src and dst scatter-lists to transfer cipher/plain texts. */
+	src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen);
+	dst = src;
+
+	if (req->src != req->dst)
+		dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
+
+	/* Configure the AES device. */
+	memcpy(iv, req->iv, sizeof(iv));
+
+	/*
+	 * Here we always set the 2nd parameter of atmel_aes_write_ctrl() to
+	 * 'true' even if the data transfer is actually performed by the CPU (so
+	 * not by the DMA) because we must force the AES_MR_SMOD bitfield to the
+	 * value AES_MR_SMOD_IDATAR0. Indeed, both AES_MR_SMOD and SHA_MR_SMOD
+	 * must be set to *_MR_SMOD_IDATAR0.
+	 */
+	atmel_aes_write_ctrl(dd, true, iv);
+	emr = AES_EMR_PLIPEN;
+	if (!enc)
+		emr |= AES_EMR_PLIPD;
+	atmel_aes_write(dd, AES_EMR, emr);
+
+	/* Transfer data. */
+	return atmel_aes_dma_start(dd, src, dst, rctx->textlen,
+				   atmel_aes_authenc_digest);
+}
+
+static int atmel_aes_authenc_digest(struct atmel_aes_dev *dd)
+{
+	struct aead_request *req = aead_request_cast(dd->areq);
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+
+	/* atmel_sha_authenc_final() releases the SHA device. */
+	dd->flags &= ~AES_FLAGS_OWN_SHA;
+	return atmel_sha_authenc_final(&rctx->auth_req,
+				       rctx->digest, sizeof(rctx->digest),
+				       atmel_aes_authenc_final, dd);
+}
+
+static int atmel_aes_authenc_final(struct atmel_aes_dev *dd, int err,
+				   bool is_async)
+{
+	struct aead_request *req = aead_request_cast(dd->areq);
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	bool enc = atmel_aes_is_encrypt(dd);
+	u32 idigest[SHA512_DIGEST_SIZE / sizeof(u32)], *odigest = rctx->digest;
+	u32 offs, authsize;
+
+	if (is_async)
+		dd->is_async = true;
+	if (err)
+		goto complete;
+
+	offs = req->assoclen + rctx->textlen;
+	authsize = crypto_aead_authsize(tfm);
+	if (enc) {
+		scatterwalk_map_and_copy(odigest, req->dst, offs, authsize, 1);
+	} else {
+		scatterwalk_map_and_copy(idigest, req->src, offs, authsize, 0);
+		if (crypto_memneq(idigest, odigest, authsize))
+			err = -EBADMSG;
+	}
+
+complete:
+	return atmel_aes_complete(dd, err);
+}
+
+static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
+				    unsigned int keylen)
+{
+	struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
+	struct crypto_authenc_keys keys;
+	u32 flags;
+	int err;
+
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
+		goto badkey;
+
+	if (keys.enckeylen > sizeof(ctx->base.key))
+		goto badkey;
+
+	/* Save auth key. */
+	flags = crypto_aead_get_flags(tfm);
+	err = atmel_sha_authenc_setkey(ctx->auth,
+				       keys.authkey, keys.authkeylen,
+				       &flags);
+	crypto_aead_set_flags(tfm, flags & CRYPTO_TFM_RES_MASK);
+	if (err) {
+		memzero_explicit(&keys, sizeof(keys));
+		return err;
+	}
+
+	/* Save enc key. */
+	ctx->base.keylen = keys.enckeylen;
+	memcpy(ctx->base.key, keys.enckey, keys.enckeylen);
+
+	memzero_explicit(&keys, sizeof(keys));
+	return 0;
+
+badkey:
+	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&key, sizeof(keys));
+	return -EINVAL;
+}
+
+static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
+				      unsigned long auth_mode)
+{
+	struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize();
+
+	ctx->auth = atmel_sha_authenc_spawn(auth_mode);
+	if (IS_ERR(ctx->auth))
+		return PTR_ERR(ctx->auth);
+
+	crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) +
+				      auth_reqsize));
+	ctx->base.start = atmel_aes_authenc_start;
+
+	return 0;
+}
+
+static int atmel_aes_authenc_hmac_sha1_init_tfm(struct crypto_aead *tfm)
+{
+	return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA1);
+}
+
+static int atmel_aes_authenc_hmac_sha224_init_tfm(struct crypto_aead *tfm)
+{
+	return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA224);
+}
+
+static int atmel_aes_authenc_hmac_sha256_init_tfm(struct crypto_aead *tfm)
+{
+	return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA256);
+}
+
+static int atmel_aes_authenc_hmac_sha384_init_tfm(struct crypto_aead *tfm)
+{
+	return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA384);
+}
+
+static int atmel_aes_authenc_hmac_sha512_init_tfm(struct crypto_aead *tfm)
+{
+	return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA512);
+}
+
+static void atmel_aes_authenc_exit_tfm(struct crypto_aead *tfm)
+{
+	struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
+
+	atmel_sha_authenc_free(ctx->auth);
+}
+
+static int atmel_aes_authenc_crypt(struct aead_request *req,
+				   unsigned long mode)
+{
+	struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm);
+	u32 authsize = crypto_aead_authsize(tfm);
+	bool enc = (mode & AES_FLAGS_ENCRYPT);
+	struct atmel_aes_dev *dd;
+
+	/* Compute text length. */
+	if (!enc && req->cryptlen < authsize)
+		return -EINVAL;
+	rctx->textlen = req->cryptlen - (enc ? 0 : authsize);
+
+	/*
+	 * Currently, empty messages are not supported yet:
+	 * the SHA auto-padding can be used only on non-empty messages.
+	 * Hence a special case needs to be implemented for empty message.
+	 */
+	if (!rctx->textlen && !req->assoclen)
+		return -EINVAL;
+
+	rctx->base.mode = mode;
+	ctx->block_size = AES_BLOCK_SIZE;
+
+	dd = atmel_aes_find_dev(ctx);
+	if (!dd)
+		return -ENODEV;
+
+	return atmel_aes_handle_queue(dd, &req->base);
+}
+
+static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req)
+{
+	return atmel_aes_authenc_crypt(req, AES_FLAGS_CBC | AES_FLAGS_ENCRYPT);
+}
+
+static int atmel_aes_authenc_cbc_aes_decrypt(struct aead_request *req)
+{
+	return atmel_aes_authenc_crypt(req, AES_FLAGS_CBC);
+}
+
+static struct aead_alg aes_authenc_algs[] = {
+{
+	.setkey		= atmel_aes_authenc_setkey,
+	.encrypt	= atmel_aes_authenc_cbc_aes_encrypt,
+	.decrypt	= atmel_aes_authenc_cbc_aes_decrypt,
+	.init		= atmel_aes_authenc_hmac_sha1_init_tfm,
+	.exit		= atmel_aes_authenc_exit_tfm,
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= SHA1_DIGEST_SIZE,
+
+	.base = {
+		.cra_name		= "authenc(hmac(sha1),cbc(aes))",
+		.cra_driver_name	= "atmel-authenc-hmac-sha1-cbc-aes",
+		.cra_priority		= ATMEL_AES_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+{
+	.setkey		= atmel_aes_authenc_setkey,
+	.encrypt	= atmel_aes_authenc_cbc_aes_encrypt,
+	.decrypt	= atmel_aes_authenc_cbc_aes_decrypt,
+	.init		= atmel_aes_authenc_hmac_sha224_init_tfm,
+	.exit		= atmel_aes_authenc_exit_tfm,
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= SHA224_DIGEST_SIZE,
+
+	.base = {
+		.cra_name		= "authenc(hmac(sha224),cbc(aes))",
+		.cra_driver_name	= "atmel-authenc-hmac-sha224-cbc-aes",
+		.cra_priority		= ATMEL_AES_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+{
+	.setkey		= atmel_aes_authenc_setkey,
+	.encrypt	= atmel_aes_authenc_cbc_aes_encrypt,
+	.decrypt	= atmel_aes_authenc_cbc_aes_decrypt,
+	.init		= atmel_aes_authenc_hmac_sha256_init_tfm,
+	.exit		= atmel_aes_authenc_exit_tfm,
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= SHA256_DIGEST_SIZE,
+
+	.base = {
+		.cra_name		= "authenc(hmac(sha256),cbc(aes))",
+		.cra_driver_name	= "atmel-authenc-hmac-sha256-cbc-aes",
+		.cra_priority		= ATMEL_AES_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+{
+	.setkey		= atmel_aes_authenc_setkey,
+	.encrypt	= atmel_aes_authenc_cbc_aes_encrypt,
+	.decrypt	= atmel_aes_authenc_cbc_aes_decrypt,
+	.init		= atmel_aes_authenc_hmac_sha384_init_tfm,
+	.exit		= atmel_aes_authenc_exit_tfm,
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= SHA384_DIGEST_SIZE,
+
+	.base = {
+		.cra_name		= "authenc(hmac(sha384),cbc(aes))",
+		.cra_driver_name	= "atmel-authenc-hmac-sha384-cbc-aes",
+		.cra_priority		= ATMEL_AES_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+{
+	.setkey		= atmel_aes_authenc_setkey,
+	.encrypt	= atmel_aes_authenc_cbc_aes_encrypt,
+	.decrypt	= atmel_aes_authenc_cbc_aes_decrypt,
+	.init		= atmel_aes_authenc_hmac_sha512_init_tfm,
+	.exit		= atmel_aes_authenc_exit_tfm,
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= SHA512_DIGEST_SIZE,
+
+	.base = {
+		.cra_name		= "authenc(hmac(sha512),cbc(aes))",
+		.cra_driver_name	= "atmel-authenc-hmac-sha512-cbc-aes",
+		.cra_priority		= ATMEL_AES_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct atmel_aes_authenc_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+};
+#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */
+
 /* Probe functions */
 
 static int atmel_aes_buff_init(struct atmel_aes_dev *dd)
@@ -1877,6 +2453,15 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
 {
 	int i;
 
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+	if (dd->caps.has_authenc)
+		for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++)
+			crypto_unregister_aead(&aes_authenc_algs[i]);
+#endif
+
+	if (dd->caps.has_xts)
+		crypto_unregister_alg(&aes_xts_alg);
+
 	if (dd->caps.has_gcm)
 		crypto_unregister_aead(&aes_gcm_alg);
 
@@ -1909,8 +2494,33 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
 			goto err_aes_gcm_alg;
 	}
 
+	if (dd->caps.has_xts) {
+		err = crypto_register_alg(&aes_xts_alg);
+		if (err)
+			goto err_aes_xts_alg;
+	}
+
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+	if (dd->caps.has_authenc) {
+		for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++) {
+			err = crypto_register_aead(&aes_authenc_algs[i]);
+			if (err)
+				goto err_aes_authenc_alg;
+		}
+	}
+#endif
+
 	return 0;
 
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+	/* i = ARRAY_SIZE(aes_authenc_algs); */
+err_aes_authenc_alg:
+	for (j = 0; j < i; j++)
+		crypto_unregister_aead(&aes_authenc_algs[j]);
+	crypto_unregister_alg(&aes_xts_alg);
+#endif
+err_aes_xts_alg:
+	crypto_unregister_aead(&aes_gcm_alg);
 err_aes_gcm_alg:
 	crypto_unregister_alg(&aes_cfb64_alg);
 err_aes_cfb64_alg:
@@ -1928,6 +2538,8 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 	dd->caps.has_cfb64 = 0;
 	dd->caps.has_ctr32 = 0;
 	dd->caps.has_gcm = 0;
+	dd->caps.has_xts = 0;
+	dd->caps.has_authenc = 0;
 	dd->caps.max_burst_size = 1;
 
 	/* keep only major version number */
@@ -1937,6 +2549,8 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 		dd->caps.has_cfb64 = 1;
 		dd->caps.has_ctr32 = 1;
 		dd->caps.has_gcm = 1;
+		dd->caps.has_xts = 1;
+		dd->caps.has_authenc = 1;
 		dd->caps.max_burst_size = 4;
 		break;
 	case 0x200:
@@ -2095,6 +2709,13 @@ static int atmel_aes_probe(struct platform_device *pdev)
 
 	atmel_aes_get_cap(aes_dd);
 
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+	if (aes_dd->caps.has_authenc && !atmel_sha_authenc_is_ready()) {
+		err = -EPROBE_DEFER;
+		goto iclk_unprepare;
+	}
+#endif
+
 	err = atmel_aes_buff_init(aes_dd);
 	if (err)
 		goto err_aes_buff;
@@ -2131,14 +2752,15 @@ res_err:
 	tasklet_kill(&aes_dd->done_task);
 	tasklet_kill(&aes_dd->queue_task);
 aes_dd_err:
-	dev_err(dev, "initialization failed.\n");
+	if (err != -EPROBE_DEFER)
+		dev_err(dev, "initialization failed.\n");
 
 	return err;
 }
 
 static int atmel_aes_remove(struct platform_device *pdev)
 {
-	static struct atmel_aes_dev *aes_dd;
+	struct atmel_aes_dev *aes_dd;
 
 	aes_dd = platform_get_drvdata(pdev);
 	if (!aes_dd)
diff --git a/drivers/crypto/atmel-authenc.h b/drivers/crypto/atmel-authenc.h
new file mode 100644
index 000000000000..2a60d1224143
--- /dev/null
+++ b/drivers/crypto/atmel-authenc.h
@@ -0,0 +1,64 @@
+/*
+ * API for Atmel Secure Protocol Layers Improved Performances (SPLIP)
+ *
+ * Copyright (C) 2016 Atmel Corporation
+ *
+ * Author: Cyrille Pitchen <cyrille.pitchen@atmel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This driver is based on drivers/mtd/spi-nor/fsl-quadspi.c from Freescale.
+ */
+
+#ifndef __ATMEL_AUTHENC_H__
+#define __ATMEL_AUTHENC_H__
+
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+
+#include <crypto/authenc.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include "atmel-sha-regs.h"
+
+struct atmel_aes_dev;
+typedef int (*atmel_aes_authenc_fn_t)(struct atmel_aes_dev *, int, bool);
+
+struct atmel_sha_authenc_ctx;
+
+bool atmel_sha_authenc_is_ready(void);
+unsigned int atmel_sha_authenc_get_reqsize(void);
+
+struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode);
+void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth);
+int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth,
+			     const u8 *key, unsigned int keylen,
+			     u32 *flags);
+
+int atmel_sha_authenc_schedule(struct ahash_request *req,
+			       struct atmel_sha_authenc_ctx *auth,
+			       atmel_aes_authenc_fn_t cb,
+			       struct atmel_aes_dev *dd);
+int atmel_sha_authenc_init(struct ahash_request *req,
+			   struct scatterlist *assoc, unsigned int assoclen,
+			   unsigned int textlen,
+			   atmel_aes_authenc_fn_t cb,
+			   struct atmel_aes_dev *dd);
+int atmel_sha_authenc_final(struct ahash_request *req,
+			    u32 *digest, unsigned int digestlen,
+			    atmel_aes_authenc_fn_t cb,
+			    struct atmel_aes_dev *dd);
+void  atmel_sha_authenc_abort(struct ahash_request *req);
+
+#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */
+
+#endif /* __ATMEL_AUTHENC_H__ */
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h
index e08897109cab..1b0eba4a2706 100644
--- a/drivers/crypto/atmel-sha-regs.h
+++ b/drivers/crypto/atmel-sha-regs.h
@@ -16,16 +16,33 @@
 #define SHA_MR_MODE_MANUAL		0x0
 #define SHA_MR_MODE_AUTO		0x1
 #define SHA_MR_MODE_PDC			0x2
+#define SHA_MR_MODE_IDATAR0		0x2
 #define SHA_MR_PROCDLY			(1 << 4)
 #define SHA_MR_UIHV			(1 << 5)
 #define SHA_MR_UIEHV			(1 << 6)
+#define SHA_MR_ALGO_MASK		GENMASK(10, 8)
 #define SHA_MR_ALGO_SHA1		(0 << 8)
 #define SHA_MR_ALGO_SHA256		(1 << 8)
 #define SHA_MR_ALGO_SHA384		(2 << 8)
 #define SHA_MR_ALGO_SHA512		(3 << 8)
 #define SHA_MR_ALGO_SHA224		(4 << 8)
+#define SHA_MR_HMAC			(1 << 11)
 #define	SHA_MR_DUALBUFF			(1 << 16)
 
+#define SHA_FLAGS_ALGO_MASK	SHA_MR_ALGO_MASK
+#define SHA_FLAGS_SHA1		SHA_MR_ALGO_SHA1
+#define SHA_FLAGS_SHA256	SHA_MR_ALGO_SHA256
+#define SHA_FLAGS_SHA384	SHA_MR_ALGO_SHA384
+#define SHA_FLAGS_SHA512	SHA_MR_ALGO_SHA512
+#define SHA_FLAGS_SHA224	SHA_MR_ALGO_SHA224
+#define SHA_FLAGS_HMAC		SHA_MR_HMAC
+#define SHA_FLAGS_HMAC_SHA1	(SHA_FLAGS_HMAC | SHA_FLAGS_SHA1)
+#define SHA_FLAGS_HMAC_SHA256	(SHA_FLAGS_HMAC | SHA_FLAGS_SHA256)
+#define SHA_FLAGS_HMAC_SHA384	(SHA_FLAGS_HMAC | SHA_FLAGS_SHA384)
+#define SHA_FLAGS_HMAC_SHA512	(SHA_FLAGS_HMAC | SHA_FLAGS_SHA512)
+#define SHA_FLAGS_HMAC_SHA224	(SHA_FLAGS_HMAC | SHA_FLAGS_SHA224)
+#define SHA_FLAGS_MODE_MASK	(SHA_FLAGS_HMAC | SHA_FLAGS_ALGO_MASK)
+
 #define SHA_IER				0x10
 #define SHA_IDR				0x14
 #define SHA_IMR				0x18
@@ -40,6 +57,9 @@
 #define SHA_ISR_URAT_MR			(0x2 << 12)
 #define SHA_ISR_URAT_WO			(0x5 << 12)
 
+#define SHA_MSR				0x20
+#define SHA_BCR				0x30
+
 #define	SHA_HW_VERSION		0xFC
 
 #define SHA_TPR				0x108
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 97e34799e077..a9482023d7d3 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -41,6 +41,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/platform_data/crypto-atmel.h>
 #include "atmel-sha-regs.h"
+#include "atmel-authenc.h"
 
 /* SHA flags */
 #define SHA_FLAGS_BUSY			BIT(0)
@@ -50,21 +51,22 @@
 #define SHA_FLAGS_INIT			BIT(4)
 #define SHA_FLAGS_CPU			BIT(5)
 #define SHA_FLAGS_DMA_READY		BIT(6)
+#define SHA_FLAGS_DUMP_REG	BIT(7)
+
+/* bits[11:8] are reserved. */
 
 #define SHA_FLAGS_FINUP		BIT(16)
 #define SHA_FLAGS_SG		BIT(17)
-#define SHA_FLAGS_ALGO_MASK	GENMASK(22, 18)
-#define SHA_FLAGS_SHA1		BIT(18)
-#define SHA_FLAGS_SHA224	BIT(19)
-#define SHA_FLAGS_SHA256	BIT(20)
-#define SHA_FLAGS_SHA384	BIT(21)
-#define SHA_FLAGS_SHA512	BIT(22)
 #define SHA_FLAGS_ERROR		BIT(23)
 #define SHA_FLAGS_PAD		BIT(24)
 #define SHA_FLAGS_RESTORE	BIT(25)
+#define SHA_FLAGS_IDATAR0	BIT(26)
+#define SHA_FLAGS_WAIT_DATARDY	BIT(27)
 
+#define SHA_OP_INIT	0
 #define SHA_OP_UPDATE	1
 #define SHA_OP_FINAL	2
+#define SHA_OP_DIGEST	3
 
 #define SHA_BUFFER_LEN		(PAGE_SIZE / 16)
 
@@ -76,6 +78,7 @@ struct atmel_sha_caps {
 	bool	has_sha224;
 	bool	has_sha_384_512;
 	bool	has_uihv;
+	bool	has_hmac;
 };
 
 struct atmel_sha_dev;
@@ -101,12 +104,16 @@ struct atmel_sha_reqctx {
 	unsigned int	total;	/* total request */
 
 	size_t block_size;
+	size_t hash_size;
 
 	u8 buffer[SHA_BUFFER_LEN + SHA512_BLOCK_SIZE] __aligned(sizeof(u32));
 };
 
+typedef int (*atmel_sha_fn_t)(struct atmel_sha_dev *);
+
 struct atmel_sha_ctx {
 	struct atmel_sha_dev	*dd;
+	atmel_sha_fn_t		start;
 
 	unsigned long		flags;
 };
@@ -116,6 +123,9 @@ struct atmel_sha_ctx {
 struct atmel_sha_dma {
 	struct dma_chan			*chan;
 	struct dma_slave_config dma_conf;
+	struct scatterlist	*sg;
+	int			nents;
+	unsigned int		last_sg_length;
 };
 
 struct atmel_sha_dev {
@@ -134,11 +144,17 @@ struct atmel_sha_dev {
 	unsigned long		flags;
 	struct crypto_queue	queue;
 	struct ahash_request	*req;
+	bool			is_async;
+	bool			force_complete;
+	atmel_sha_fn_t		resume;
+	atmel_sha_fn_t		cpu_transfer_complete;
 
 	struct atmel_sha_dma	dma_lch_in;
 
 	struct atmel_sha_caps	caps;
 
+	struct scatterlist	tmp;
+
 	u32	hw_version;
 };
 
@@ -152,17 +168,140 @@ static struct atmel_sha_drv atmel_sha = {
 	.lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock),
 };
 
+#ifdef VERBOSE_DEBUG
+static const char *atmel_sha_reg_name(u32 offset, char *tmp, size_t sz, bool wr)
+{
+	switch (offset) {
+	case SHA_CR:
+		return "CR";
+
+	case SHA_MR:
+		return "MR";
+
+	case SHA_IER:
+		return "IER";
+
+	case SHA_IDR:
+		return "IDR";
+
+	case SHA_IMR:
+		return "IMR";
+
+	case SHA_ISR:
+		return "ISR";
+
+	case SHA_MSR:
+		return "MSR";
+
+	case SHA_BCR:
+		return "BCR";
+
+	case SHA_REG_DIN(0):
+	case SHA_REG_DIN(1):
+	case SHA_REG_DIN(2):
+	case SHA_REG_DIN(3):
+	case SHA_REG_DIN(4):
+	case SHA_REG_DIN(5):
+	case SHA_REG_DIN(6):
+	case SHA_REG_DIN(7):
+	case SHA_REG_DIN(8):
+	case SHA_REG_DIN(9):
+	case SHA_REG_DIN(10):
+	case SHA_REG_DIN(11):
+	case SHA_REG_DIN(12):
+	case SHA_REG_DIN(13):
+	case SHA_REG_DIN(14):
+	case SHA_REG_DIN(15):
+		snprintf(tmp, sz, "IDATAR[%u]", (offset - SHA_REG_DIN(0)) >> 2);
+		break;
+
+	case SHA_REG_DIGEST(0):
+	case SHA_REG_DIGEST(1):
+	case SHA_REG_DIGEST(2):
+	case SHA_REG_DIGEST(3):
+	case SHA_REG_DIGEST(4):
+	case SHA_REG_DIGEST(5):
+	case SHA_REG_DIGEST(6):
+	case SHA_REG_DIGEST(7):
+	case SHA_REG_DIGEST(8):
+	case SHA_REG_DIGEST(9):
+	case SHA_REG_DIGEST(10):
+	case SHA_REG_DIGEST(11):
+	case SHA_REG_DIGEST(12):
+	case SHA_REG_DIGEST(13):
+	case SHA_REG_DIGEST(14):
+	case SHA_REG_DIGEST(15):
+		if (wr)
+			snprintf(tmp, sz, "IDATAR[%u]",
+				 16u + ((offset - SHA_REG_DIGEST(0)) >> 2));
+		else
+			snprintf(tmp, sz, "ODATAR[%u]",
+				 (offset - SHA_REG_DIGEST(0)) >> 2);
+		break;
+
+	case SHA_HW_VERSION:
+		return "HWVER";
+
+	default:
+		snprintf(tmp, sz, "0x%02x", offset);
+		break;
+	}
+
+	return tmp;
+}
+
+#endif /* VERBOSE_DEBUG */
+
 static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset)
 {
-	return readl_relaxed(dd->io_base + offset);
+	u32 value = readl_relaxed(dd->io_base + offset);
+
+#ifdef VERBOSE_DEBUG
+	if (dd->flags & SHA_FLAGS_DUMP_REG) {
+		char tmp[16];
+
+		dev_vdbg(dd->dev, "read 0x%08x from %s\n", value,
+			 atmel_sha_reg_name(offset, tmp, sizeof(tmp), false));
+	}
+#endif /* VERBOSE_DEBUG */
+
+	return value;
 }
 
 static inline void atmel_sha_write(struct atmel_sha_dev *dd,
 					u32 offset, u32 value)
 {
+#ifdef VERBOSE_DEBUG
+	if (dd->flags & SHA_FLAGS_DUMP_REG) {
+		char tmp[16];
+
+		dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
+			 atmel_sha_reg_name(offset, tmp, sizeof(tmp), true));
+	}
+#endif /* VERBOSE_DEBUG */
+
 	writel_relaxed(value, dd->io_base + offset);
 }
 
+static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err)
+{
+	struct ahash_request *req = dd->req;
+
+	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
+		       SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY |
+		       SHA_FLAGS_DUMP_REG);
+
+	clk_disable(dd->iclk);
+
+	if ((dd->is_async || dd->force_complete) && req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
 static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
 {
 	size_t count;
@@ -241,7 +380,9 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
 	bits[1] = cpu_to_be64(size[0] << 3);
 	bits[0] = cpu_to_be64(size[1] << 3 | size[0] >> 61);
 
-	if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) {
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	case SHA_FLAGS_SHA384:
+	case SHA_FLAGS_SHA512:
 		index = ctx->bufcnt & 0x7f;
 		padlen = (index < 112) ? (112 - index) : ((128+112) - index);
 		*(ctx->buffer + ctx->bufcnt) = 0x80;
@@ -249,7 +390,9 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
 		memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16);
 		ctx->bufcnt += padlen + 16;
 		ctx->flags |= SHA_FLAGS_PAD;
-	} else {
+		break;
+
+	default:
 		index = ctx->bufcnt & 0x3f;
 		padlen = (index < 56) ? (56 - index) : ((64+56) - index);
 		*(ctx->buffer + ctx->bufcnt) = 0x80;
@@ -257,14 +400,12 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
 		memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8);
 		ctx->bufcnt += padlen + 8;
 		ctx->flags |= SHA_FLAGS_PAD;
+		break;
 	}
 }
 
-static int atmel_sha_init(struct ahash_request *req)
+static struct atmel_sha_dev *atmel_sha_find_dev(struct atmel_sha_ctx *tctx)
 {
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
-	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
 	struct atmel_sha_dev *dd = NULL;
 	struct atmel_sha_dev *tmp;
 
@@ -281,6 +422,16 @@ static int atmel_sha_init(struct ahash_request *req)
 
 	spin_unlock_bh(&atmel_sha.lock);
 
+	return dd;
+}
+
+static int atmel_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = atmel_sha_find_dev(tctx);
+
 	ctx->dd = dd;
 
 	ctx->flags = 0;
@@ -397,6 +548,19 @@ static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
 	atmel_sha_write(dd, SHA_MR, valmr);
 }
 
+static inline int atmel_sha_wait_for_data_ready(struct atmel_sha_dev *dd,
+						atmel_sha_fn_t resume)
+{
+	u32 isr = atmel_sha_read(dd, SHA_ISR);
+
+	if (unlikely(isr & SHA_INT_DATARDY))
+		return resume(dd);
+
+	dd->resume = resume;
+	atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
+	return -EINPROGRESS;
+}
+
 static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
 			      size_t length, int final)
 {
@@ -404,7 +568,7 @@ static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
 	int count, len32;
 	const u32 *buffer = (const u32 *)buf;
 
-	dev_dbg(dd->dev, "xmit_cpu: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: 0x%llx 0x%llx, length: %zd, final: %d\n",
 		ctx->digcnt[1], ctx->digcnt[0], length, final);
 
 	atmel_sha_write_ctrl(dd, 0);
@@ -433,7 +597,7 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
 	int len32;
 
-	dev_dbg(dd->dev, "xmit_pdc: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
+	dev_dbg(dd->dev, "xmit_pdc: digcnt: 0x%llx 0x%llx, length: %zd, final: %d\n",
 		ctx->digcnt[1], ctx->digcnt[0], length1, final);
 
 	len32 = DIV_ROUND_UP(length1, sizeof(u32));
@@ -467,6 +631,8 @@ static void atmel_sha_dma_callback(void *data)
 {
 	struct atmel_sha_dev *dd = data;
 
+	dd->is_async = true;
+
 	/* dma_lch_in - completed - wait DATRDY */
 	atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
 }
@@ -478,7 +644,7 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
 	struct dma_async_tx_descriptor	*in_desc;
 	struct scatterlist sg[2];
 
-	dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
+	dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %zd, final: %d\n",
 		ctx->digcnt[1], ctx->digcnt[0], length1, final);
 
 	dd->dma_lch_in.dma_conf.src_maxburst = 16;
@@ -502,7 +668,7 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
 			DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	}
 	if (!in_desc)
-		return -EINVAL;
+		return atmel_sha_complete(dd, -EINVAL);
 
 	in_desc->callback = atmel_sha_dma_callback;
 	in_desc->callback_param = dd;
@@ -557,9 +723,9 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
 	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
 				ctx->buflen + ctx->block_size, DMA_TO_DEVICE);
 	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
-		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
+		dev_err(dd->dev, "dma %zu bytes error\n", ctx->buflen +
 				ctx->block_size);
-		return -EINVAL;
+		return atmel_sha_complete(dd, -EINVAL);
 	}
 
 	ctx->flags &= ~SHA_FLAGS_SG;
@@ -578,7 +744,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
 
 	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
 
-	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: 0x%llx 0x%llx, final: %d\n",
+	dev_dbg(dd->dev, "slow: bufcnt: %zu, digcnt: 0x%llx 0x%llx, final: %d\n",
 		 ctx->bufcnt, ctx->digcnt[1], ctx->digcnt[0], final);
 
 	if (final)
@@ -606,7 +772,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
 	if (ctx->bufcnt || ctx->offset)
 		return atmel_sha_update_dma_slow(dd);
 
-	dev_dbg(dd->dev, "fast: digcnt: 0x%llx 0x%llx, bufcnt: %u, total: %u\n",
+	dev_dbg(dd->dev, "fast: digcnt: 0x%llx 0x%llx, bufcnt: %zd, total: %u\n",
 		ctx->digcnt[1], ctx->digcnt[0], ctx->bufcnt, ctx->total);
 
 	sg = ctx->sg;
@@ -648,9 +814,9 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
 		ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
 			ctx->buflen + ctx->block_size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
-			dev_err(dd->dev, "dma %u bytes error\n",
+			dev_err(dd->dev, "dma %zu bytes error\n",
 				ctx->buflen + ctx->block_size);
-			return -EINVAL;
+			return atmel_sha_complete(dd, -EINVAL);
 		}
 
 		if (length == 0) {
@@ -664,7 +830,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
 			if (!dma_map_sg(dd->dev, ctx->sg, 1,
 				DMA_TO_DEVICE)) {
 					dev_err(dd->dev, "dma_map_sg  error\n");
-					return -EINVAL;
+					return atmel_sha_complete(dd, -EINVAL);
 			}
 
 			ctx->flags |= SHA_FLAGS_SG;
@@ -678,7 +844,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
 
 	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
 		dev_err(dd->dev, "dma_map_sg  error\n");
-		return -EINVAL;
+		return atmel_sha_complete(dd, -EINVAL);
 	}
 
 	ctx->flags |= SHA_FLAGS_SG;
@@ -796,16 +962,28 @@ static void atmel_sha_copy_ready_hash(struct ahash_request *req)
 	if (!req->result)
 		return;
 
-	if (ctx->flags & SHA_FLAGS_SHA1)
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	default:
+	case SHA_FLAGS_SHA1:
 		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
-	else if (ctx->flags & SHA_FLAGS_SHA224)
+		break;
+
+	case SHA_FLAGS_SHA224:
 		memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE);
-	else if (ctx->flags & SHA_FLAGS_SHA256)
+		break;
+
+	case SHA_FLAGS_SHA256:
 		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
-	else if (ctx->flags & SHA_FLAGS_SHA384)
+		break;
+
+	case SHA_FLAGS_SHA384:
 		memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE);
-	else
+		break;
+
+	case SHA_FLAGS_SHA512:
 		memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE);
+		break;
+	}
 }
 
 static int atmel_sha_finish(struct ahash_request *req)
@@ -816,7 +994,7 @@ static int atmel_sha_finish(struct ahash_request *req)
 	if (ctx->digcnt[0] || ctx->digcnt[1])
 		atmel_sha_copy_ready_hash(req);
 
-	dev_dbg(dd->dev, "digcnt: 0x%llx 0x%llx, bufcnt: %d\n", ctx->digcnt[1],
+	dev_dbg(dd->dev, "digcnt: 0x%llx 0x%llx, bufcnt: %zd\n", ctx->digcnt[1],
 		ctx->digcnt[0], ctx->bufcnt);
 
 	return 0;
@@ -836,16 +1014,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err)
 	}
 
 	/* atomic operation is not needed here */
-	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
-			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
-
-	clk_disable(dd->iclk);
-
-	if (req->base.complete)
-		req->base.complete(&req->base, err);
-
-	/* handle new request */
-	tasklet_schedule(&dd->queue_task);
+	(void)atmel_sha_complete(dd, err);
 }
 
 static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
@@ -886,8 +1055,9 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
 				  struct ahash_request *req)
 {
 	struct crypto_async_request *async_req, *backlog;
-	struct atmel_sha_reqctx *ctx;
+	struct atmel_sha_ctx *ctx;
 	unsigned long flags;
+	bool start_async;
 	int err = 0, ret = 0;
 
 	spin_lock_irqsave(&dd->lock, flags);
@@ -912,35 +1082,69 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
 	if (backlog)
 		backlog->complete(backlog, -EINPROGRESS);
 
-	req = ahash_request_cast(async_req);
-	dd->req = req;
-	ctx = ahash_request_ctx(req);
+	ctx = crypto_tfm_ctx(async_req->tfm);
+
+	dd->req = ahash_request_cast(async_req);
+	start_async = (dd->req != req);
+	dd->is_async = start_async;
+	dd->force_complete = false;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static int atmel_sha_done(struct atmel_sha_dev *dd);
+
+static int atmel_sha_start(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err;
 
 	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
 						ctx->op, req->nbytes);
 
 	err = atmel_sha_hw_init(dd);
-
 	if (err)
-		goto err1;
+		return atmel_sha_complete(dd, err);
 
+	/*
+	 * atmel_sha_update_req() and atmel_sha_final_req() can return either:
+	 *  -EINPROGRESS: the hardware is busy and the SHA driver will resume
+	 *                its job later in the done_task.
+	 *                This is the main path.
+	 *
+	 * 0: the SHA driver can continue its job then release the hardware
+	 *    later, if needed, with atmel_sha_finish_req().
+	 *    This is the alternate path.
+	 *
+	 * < 0: an error has occurred so atmel_sha_complete(dd, err) has already
+	 *      been called, hence the hardware has been released.
+	 *      The SHA driver must stop its job without calling
+	 *      atmel_sha_finish_req(), otherwise atmel_sha_complete() would be
+	 *      called a second time.
+	 *
+	 * Please note that currently, atmel_sha_final_req() never returns 0.
+	 */
+
+	dd->resume = atmel_sha_done;
 	if (ctx->op == SHA_OP_UPDATE) {
 		err = atmel_sha_update_req(dd);
-		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP))
+		if (!err && (ctx->flags & SHA_FLAGS_FINUP))
 			/* no final() after finup() */
 			err = atmel_sha_final_req(dd);
 	} else if (ctx->op == SHA_OP_FINAL) {
 		err = atmel_sha_final_req(dd);
 	}
 
-err1:
-	if (err != -EINPROGRESS)
+	if (!err)
 		/* done_task will not finish it, so do it here */
 		atmel_sha_finish_req(req, err);
 
 	dev_dbg(dd->dev, "exit, err: %d\n", err);
 
-	return ret;
+	return err;
 }
 
 static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op)
@@ -1036,8 +1240,11 @@ static int atmel_sha_import(struct ahash_request *req, const void *in)
 
 static int atmel_sha_cra_init(struct crypto_tfm *tfm)
 {
+	struct atmel_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct atmel_sha_reqctx));
+	ctx->start = atmel_sha_start;
 
 	return 0;
 }
@@ -1176,9 +1383,8 @@ static void atmel_sha_queue_task(unsigned long data)
 	atmel_sha_handle_queue(dd, NULL);
 }
 
-static void atmel_sha_done_task(unsigned long data)
+static int atmel_sha_done(struct atmel_sha_dev *dd)
 {
-	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
 	int err = 0;
 
 	if (SHA_FLAGS_CPU & dd->flags) {
@@ -1204,11 +1410,21 @@ static void atmel_sha_done_task(unsigned long data)
 				goto finish;
 		}
 	}
-	return;
+	return err;
 
 finish:
 	/* finish curent request */
 	atmel_sha_finish_req(dd->req, err);
+
+	return err;
+}
+
+static void atmel_sha_done_task(unsigned long data)
+{
+	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
+
+	dd->is_async = true;
+	(void)dd->resume(dd);
 }
 
 static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
@@ -1233,10 +1449,1104 @@ static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
+
+/* DMA transfer functions */
+
+static bool atmel_sha_dma_check_aligned(struct atmel_sha_dev *dd,
+					struct scatterlist *sg,
+					size_t len)
+{
+	struct atmel_sha_dma *dma = &dd->dma_lch_in;
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	size_t bs = ctx->block_size;
+	int nents;
+
+	for (nents = 0; sg; sg = sg_next(sg), ++nents) {
+		if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+			return false;
+
+		/*
+		 * This is the last sg, the only one that is allowed to
+		 * have an unaligned length.
+		 */
+		if (len <= sg->length) {
+			dma->nents = nents + 1;
+			dma->last_sg_length = sg->length;
+			sg->length = ALIGN(len, sizeof(u32));
+			return true;
+		}
+
+		/* All other sg lengths MUST be aligned to the block size. */
+		if (!IS_ALIGNED(sg->length, bs))
+			return false;
+
+		len -= sg->length;
+	}
+
+	return false;
+}
+
+static void atmel_sha_dma_callback2(void *data)
+{
+	struct atmel_sha_dev *dd = data;
+	struct atmel_sha_dma *dma = &dd->dma_lch_in;
+	struct scatterlist *sg;
+	int nents;
+
+	dmaengine_terminate_all(dma->chan);
+	dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE);
+
+	sg = dma->sg;
+	for (nents = 0; nents < dma->nents - 1; ++nents)
+		sg = sg_next(sg);
+	sg->length = dma->last_sg_length;
+
+	dd->is_async = true;
+	(void)atmel_sha_wait_for_data_ready(dd, dd->resume);
+}
+
+static int atmel_sha_dma_start(struct atmel_sha_dev *dd,
+			       struct scatterlist *src,
+			       size_t len,
+			       atmel_sha_fn_t resume)
+{
+	struct atmel_sha_dma *dma = &dd->dma_lch_in;
+	struct dma_slave_config *config = &dma->dma_conf;
+	struct dma_chan *chan = dma->chan;
+	struct dma_async_tx_descriptor *desc;
+	dma_cookie_t cookie;
+	unsigned int sg_len;
+	int err;
+
+	dd->resume = resume;
+
+	/*
+	 * dma->nents has already been initialized by
+	 * atmel_sha_dma_check_aligned().
+	 */
+	dma->sg = src;
+	sg_len = dma_map_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE);
+	if (!sg_len) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	config->src_maxburst = 16;
+	config->dst_maxburst = 16;
+	err = dmaengine_slave_config(chan, config);
+	if (err)
+		goto unmap_sg;
+
+	desc = dmaengine_prep_slave_sg(chan, dma->sg, sg_len, DMA_MEM_TO_DEV,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		err = -ENOMEM;
+		goto unmap_sg;
+	}
+
+	desc->callback = atmel_sha_dma_callback2;
+	desc->callback_param = dd;
+	cookie = dmaengine_submit(desc);
+	err = dma_submit_error(cookie);
+	if (err)
+		goto unmap_sg;
+
+	dma_async_issue_pending(chan);
+
+	return -EINPROGRESS;
+
+unmap_sg:
+	dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE);
+exit:
+	return atmel_sha_complete(dd, err);
+}
+
+
+/* CPU transfer functions */
+
+static int atmel_sha_cpu_transfer(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	const u32 *words = (const u32 *)ctx->buffer;
+	size_t i, num_words;
+	u32 isr, din, din_inc;
+
+	din_inc = (ctx->flags & SHA_FLAGS_IDATAR0) ? 0 : 1;
+	for (;;) {
+		/* Write data into the Input Data Registers. */
+		num_words = DIV_ROUND_UP(ctx->bufcnt, sizeof(u32));
+		for (i = 0, din = 0; i < num_words; ++i, din += din_inc)
+			atmel_sha_write(dd, SHA_REG_DIN(din), words[i]);
+
+		ctx->offset += ctx->bufcnt;
+		ctx->total -= ctx->bufcnt;
+
+		if (!ctx->total)
+			break;
+
+		/*
+		 * Prepare next block:
+		 * Fill ctx->buffer now with the next data to be written into
+		 * IDATARx: it gives time for the SHA hardware to process
+		 * the current data so the SHA_INT_DATARDY flag might be set
+		 * in SHA_ISR when polling this register at the beginning of
+		 * the next loop.
+		 */
+		ctx->bufcnt = min_t(size_t, ctx->block_size, ctx->total);
+		scatterwalk_map_and_copy(ctx->buffer, ctx->sg,
+					 ctx->offset, ctx->bufcnt, 0);
+
+		/* Wait for hardware to be ready again. */
+		isr = atmel_sha_read(dd, SHA_ISR);
+		if (!(isr & SHA_INT_DATARDY)) {
+			/* Not ready yet. */
+			dd->resume = atmel_sha_cpu_transfer;
+			atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
+			return -EINPROGRESS;
+		}
+	}
+
+	if (unlikely(!(ctx->flags & SHA_FLAGS_WAIT_DATARDY)))
+		return dd->cpu_transfer_complete(dd);
+
+	return atmel_sha_wait_for_data_ready(dd, dd->cpu_transfer_complete);
+}
+
+static int atmel_sha_cpu_start(struct atmel_sha_dev *dd,
+			       struct scatterlist *sg,
+			       unsigned int len,
+			       bool idatar0_only,
+			       bool wait_data_ready,
+			       atmel_sha_fn_t resume)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!len)
+		return resume(dd);
+
+	ctx->flags &= ~(SHA_FLAGS_IDATAR0 | SHA_FLAGS_WAIT_DATARDY);
+
+	if (idatar0_only)
+		ctx->flags |= SHA_FLAGS_IDATAR0;
+
+	if (wait_data_ready)
+		ctx->flags |= SHA_FLAGS_WAIT_DATARDY;
+
+	ctx->sg = sg;
+	ctx->total = len;
+	ctx->offset = 0;
+
+	/* Prepare the first block to be written. */
+	ctx->bufcnt = min_t(size_t, ctx->block_size, ctx->total);
+	scatterwalk_map_and_copy(ctx->buffer, ctx->sg,
+				 ctx->offset, ctx->bufcnt, 0);
+
+	dd->cpu_transfer_complete = resume;
+	return atmel_sha_cpu_transfer(dd);
+}
+
+static int atmel_sha_cpu_hash(struct atmel_sha_dev *dd,
+			      const void *data, unsigned int datalen,
+			      bool auto_padding,
+			      atmel_sha_fn_t resume)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	u32 msglen = (auto_padding) ? datalen : 0;
+	u32 mr = SHA_MR_MODE_AUTO;
+
+	if (!(IS_ALIGNED(datalen, ctx->block_size) || auto_padding))
+		return atmel_sha_complete(dd, -EINVAL);
+
+	mr |= (ctx->flags & SHA_FLAGS_ALGO_MASK);
+	atmel_sha_write(dd, SHA_MR, mr);
+	atmel_sha_write(dd, SHA_MSR, msglen);
+	atmel_sha_write(dd, SHA_BCR, msglen);
+	atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST);
+
+	sg_init_one(&dd->tmp, data, datalen);
+	return atmel_sha_cpu_start(dd, &dd->tmp, datalen, false, true, resume);
+}
+
+
+/* hmac functions */
+
+struct atmel_sha_hmac_key {
+	bool			valid;
+	unsigned int		keylen;
+	u8			buffer[SHA512_BLOCK_SIZE];
+	u8			*keydup;
+};
+
+static inline void atmel_sha_hmac_key_init(struct atmel_sha_hmac_key *hkey)
+{
+	memset(hkey, 0, sizeof(*hkey));
+}
+
+static inline void atmel_sha_hmac_key_release(struct atmel_sha_hmac_key *hkey)
+{
+	kfree(hkey->keydup);
+	memset(hkey, 0, sizeof(*hkey));
+}
+
+static inline int atmel_sha_hmac_key_set(struct atmel_sha_hmac_key *hkey,
+					 const u8 *key,
+					 unsigned int keylen)
+{
+	atmel_sha_hmac_key_release(hkey);
+
+	if (keylen > sizeof(hkey->buffer)) {
+		hkey->keydup = kmemdup(key, keylen, GFP_KERNEL);
+		if (!hkey->keydup)
+			return -ENOMEM;
+
+	} else {
+		memcpy(hkey->buffer, key, keylen);
+	}
+
+	hkey->valid = true;
+	hkey->keylen = keylen;
+	return 0;
+}
+
+static inline bool atmel_sha_hmac_key_get(const struct atmel_sha_hmac_key *hkey,
+					  const u8 **key,
+					  unsigned int *keylen)
+{
+	if (!hkey->valid)
+		return false;
+
+	*keylen = hkey->keylen;
+	*key = (hkey->keydup) ? hkey->keydup : hkey->buffer;
+	return true;
+}
+
+
+struct atmel_sha_hmac_ctx {
+	struct atmel_sha_ctx	base;
+
+	struct atmel_sha_hmac_key	hkey;
+	u32			ipad[SHA512_BLOCK_SIZE / sizeof(u32)];
+	u32			opad[SHA512_BLOCK_SIZE / sizeof(u32)];
+	atmel_sha_fn_t		resume;
+};
+
+static int atmel_sha_hmac_setup(struct atmel_sha_dev *dd,
+				atmel_sha_fn_t resume);
+static int atmel_sha_hmac_prehash_key(struct atmel_sha_dev *dd,
+				      const u8 *key, unsigned int keylen);
+static int atmel_sha_hmac_prehash_key_done(struct atmel_sha_dev *dd);
+static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd);
+static int atmel_sha_hmac_compute_opad_hash(struct atmel_sha_dev *dd);
+static int atmel_sha_hmac_setup_done(struct atmel_sha_dev *dd);
+
+static int atmel_sha_hmac_init_done(struct atmel_sha_dev *dd);
+static int atmel_sha_hmac_final(struct atmel_sha_dev *dd);
+static int atmel_sha_hmac_final_done(struct atmel_sha_dev *dd);
+static int atmel_sha_hmac_digest2(struct atmel_sha_dev *dd);
+
+static int atmel_sha_hmac_setup(struct atmel_sha_dev *dd,
+				atmel_sha_fn_t resume)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	unsigned int keylen;
+	const u8 *key;
+	size_t bs;
+
+	hmac->resume = resume;
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	case SHA_FLAGS_SHA1:
+		ctx->block_size = SHA1_BLOCK_SIZE;
+		ctx->hash_size = SHA1_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA224:
+		ctx->block_size = SHA224_BLOCK_SIZE;
+		ctx->hash_size = SHA256_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA256:
+		ctx->block_size = SHA256_BLOCK_SIZE;
+		ctx->hash_size = SHA256_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA384:
+		ctx->block_size = SHA384_BLOCK_SIZE;
+		ctx->hash_size = SHA512_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA512:
+		ctx->block_size = SHA512_BLOCK_SIZE;
+		ctx->hash_size = SHA512_DIGEST_SIZE;
+		break;
+
+	default:
+		return atmel_sha_complete(dd, -EINVAL);
+	}
+	bs = ctx->block_size;
+
+	if (likely(!atmel_sha_hmac_key_get(&hmac->hkey, &key, &keylen)))
+		return resume(dd);
+
+	/* Compute K' from K. */
+	if (unlikely(keylen > bs))
+		return atmel_sha_hmac_prehash_key(dd, key, keylen);
+
+	/* Prepare ipad. */
+	memcpy((u8 *)hmac->ipad, key, keylen);
+	memset((u8 *)hmac->ipad + keylen, 0, bs - keylen);
+	return atmel_sha_hmac_compute_ipad_hash(dd);
+}
+
+static int atmel_sha_hmac_prehash_key(struct atmel_sha_dev *dd,
+				      const u8 *key, unsigned int keylen)
+{
+	return atmel_sha_cpu_hash(dd, key, keylen, true,
+				  atmel_sha_hmac_prehash_key_done);
+}
+
+static int atmel_sha_hmac_prehash_key_done(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	size_t ds = crypto_ahash_digestsize(tfm);
+	size_t bs = ctx->block_size;
+	size_t i, num_words = ds / sizeof(u32);
+
+	/* Prepare ipad. */
+	for (i = 0; i < num_words; ++i)
+		hmac->ipad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i));
+	memset((u8 *)hmac->ipad + ds, 0, bs - ds);
+	return atmel_sha_hmac_compute_ipad_hash(dd);
+}
+
+static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	size_t bs = ctx->block_size;
+	size_t i, num_words = bs / sizeof(u32);
+
+	memcpy(hmac->opad, hmac->ipad, bs);
+	for (i = 0; i < num_words; ++i) {
+		hmac->ipad[i] ^= 0x36363636;
+		hmac->opad[i] ^= 0x5c5c5c5c;
+	}
+
+	return atmel_sha_cpu_hash(dd, hmac->ipad, bs, false,
+				  atmel_sha_hmac_compute_opad_hash);
+}
+
+static int atmel_sha_hmac_compute_opad_hash(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	size_t bs = ctx->block_size;
+	size_t hs = ctx->hash_size;
+	size_t i, num_words = hs / sizeof(u32);
+
+	for (i = 0; i < num_words; ++i)
+		hmac->ipad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i));
+	return atmel_sha_cpu_hash(dd, hmac->opad, bs, false,
+				  atmel_sha_hmac_setup_done);
+}
+
+static int atmel_sha_hmac_setup_done(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	size_t hs = ctx->hash_size;
+	size_t i, num_words = hs / sizeof(u32);
+
+	for (i = 0; i < num_words; ++i)
+		hmac->opad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i));
+	atmel_sha_hmac_key_release(&hmac->hkey);
+	return hmac->resume(dd);
+}
+
+static int atmel_sha_hmac_start(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	err = atmel_sha_hw_init(dd);
+	if (err)
+		return atmel_sha_complete(dd, err);
+
+	switch (ctx->op) {
+	case SHA_OP_INIT:
+		err = atmel_sha_hmac_setup(dd, atmel_sha_hmac_init_done);
+		break;
+
+	case SHA_OP_UPDATE:
+		dd->resume = atmel_sha_done;
+		err = atmel_sha_update_req(dd);
+		break;
+
+	case SHA_OP_FINAL:
+		dd->resume = atmel_sha_hmac_final;
+		err = atmel_sha_final_req(dd);
+		break;
+
+	case SHA_OP_DIGEST:
+		err = atmel_sha_hmac_setup(dd, atmel_sha_hmac_digest2);
+		break;
+
+	default:
+		return atmel_sha_complete(dd, -EINVAL);
+	}
+
+	return err;
+}
+
+static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+
+	if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) {
+		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int atmel_sha_hmac_init(struct ahash_request *req)
+{
+	int err;
+
+	err = atmel_sha_init(req);
+	if (err)
+		return err;
+
+	return atmel_sha_enqueue(req, SHA_OP_INIT);
+}
+
+static int atmel_sha_hmac_init_done(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	size_t bs = ctx->block_size;
+	size_t hs = ctx->hash_size;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt[0] = bs;
+	ctx->digcnt[1] = 0;
+	ctx->flags |= SHA_FLAGS_RESTORE;
+	memcpy(ctx->digest, hmac->ipad, hs);
+	return atmel_sha_complete(dd, 0);
+}
+
+static int atmel_sha_hmac_final(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	u32 *digest = (u32 *)ctx->digest;
+	size_t ds = crypto_ahash_digestsize(tfm);
+	size_t bs = ctx->block_size;
+	size_t hs = ctx->hash_size;
+	size_t i, num_words;
+	u32 mr;
+
+	/* Save d = SHA((K' + ipad) | msg). */
+	num_words = ds / sizeof(u32);
+	for (i = 0; i < num_words; ++i)
+		digest[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i));
+
+	/* Restore context to finish computing SHA((K' + opad) | d). */
+	atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV);
+	num_words = hs / sizeof(u32);
+	for (i = 0; i < num_words; ++i)
+		atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]);
+
+	mr = SHA_MR_MODE_AUTO | SHA_MR_UIHV;
+	mr |= (ctx->flags & SHA_FLAGS_ALGO_MASK);
+	atmel_sha_write(dd, SHA_MR, mr);
+	atmel_sha_write(dd, SHA_MSR, bs + ds);
+	atmel_sha_write(dd, SHA_BCR, ds);
+	atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST);
+
+	sg_init_one(&dd->tmp, digest, ds);
+	return atmel_sha_cpu_start(dd, &dd->tmp, ds, false, true,
+				   atmel_sha_hmac_final_done);
+}
+
+static int atmel_sha_hmac_final_done(struct atmel_sha_dev *dd)
+{
+	/*
+	 * req->result might not be sizeof(u32) aligned, so copy the
+	 * digest into ctx->digest[] before memcpy() the data into
+	 * req->result.
+	 */
+	atmel_sha_copy_hash(dd->req);
+	atmel_sha_copy_ready_hash(dd->req);
+	return atmel_sha_complete(dd, 0);
+}
+
+static int atmel_sha_hmac_digest(struct ahash_request *req)
+{
+	int err;
+
+	err = atmel_sha_init(req);
+	if (err)
+		return err;
+
+	return atmel_sha_enqueue(req, SHA_OP_DIGEST);
+}
+
+static int atmel_sha_hmac_digest2(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	size_t hs = ctx->hash_size;
+	size_t i, num_words = hs / sizeof(u32);
+	bool use_dma = false;
+	u32 mr;
+
+	/* Special case for empty message. */
+	if (!req->nbytes)
+		return atmel_sha_complete(dd, -EINVAL); // TODO:
+
+	/* Check DMA threshold and alignment. */
+	if (req->nbytes > ATMEL_SHA_DMA_THRESHOLD &&
+	    atmel_sha_dma_check_aligned(dd, req->src, req->nbytes))
+		use_dma = true;
+
+	/* Write both initial hash values to compute a HMAC. */
+	atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV);
+	for (i = 0; i < num_words; ++i)
+		atmel_sha_write(dd, SHA_REG_DIN(i), hmac->ipad[i]);
+
+	atmel_sha_write(dd, SHA_CR, SHA_CR_WUIEHV);
+	for (i = 0; i < num_words; ++i)
+		atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]);
+
+	/* Write the Mode, Message Size, Bytes Count then Control Registers. */
+	mr = (SHA_MR_HMAC | SHA_MR_DUALBUFF);
+	mr |= ctx->flags & SHA_FLAGS_ALGO_MASK;
+	if (use_dma)
+		mr |= SHA_MR_MODE_IDATAR0;
+	else
+		mr |= SHA_MR_MODE_AUTO;
+	atmel_sha_write(dd, SHA_MR, mr);
+
+	atmel_sha_write(dd, SHA_MSR, req->nbytes);
+	atmel_sha_write(dd, SHA_BCR, req->nbytes);
+
+	atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST);
+
+	/* Process data. */
+	if (use_dma)
+		return atmel_sha_dma_start(dd, req->src, req->nbytes,
+					   atmel_sha_hmac_final_done);
+
+	return atmel_sha_cpu_start(dd, req->src, req->nbytes, false, true,
+				   atmel_sha_hmac_final_done);
+}
+
+static int atmel_sha_hmac_cra_init(struct crypto_tfm *tfm)
+{
+	struct atmel_sha_hmac_ctx *hmac = crypto_tfm_ctx(tfm);
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct atmel_sha_reqctx));
+	hmac->base.start = atmel_sha_hmac_start;
+	atmel_sha_hmac_key_init(&hmac->hkey);
+
+	return 0;
+}
+
+static void atmel_sha_hmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct atmel_sha_hmac_ctx *hmac = crypto_tfm_ctx(tfm);
+
+	atmel_sha_hmac_key_release(&hmac->hkey);
+}
+
+static struct ahash_alg sha_hmac_algs[] = {
+{
+	.init		= atmel_sha_hmac_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.digest		= atmel_sha_hmac_digest,
+	.setkey		= atmel_sha_hmac_setkey,
+	.export		= atmel_sha_export,
+	.import		= atmel_sha_import,
+	.halg = {
+		.digestsize	= SHA1_DIGEST_SIZE,
+		.statesize	= sizeof(struct atmel_sha_reqctx),
+		.base	= {
+			.cra_name		= "hmac(sha1)",
+			.cra_driver_name	= "atmel-hmac-sha1",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= SHA1_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_hmac_cra_init,
+			.cra_exit		= atmel_sha_hmac_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_hmac_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.digest		= atmel_sha_hmac_digest,
+	.setkey		= atmel_sha_hmac_setkey,
+	.export		= atmel_sha_export,
+	.import		= atmel_sha_import,
+	.halg = {
+		.digestsize	= SHA224_DIGEST_SIZE,
+		.statesize	= sizeof(struct atmel_sha_reqctx),
+		.base	= {
+			.cra_name		= "hmac(sha224)",
+			.cra_driver_name	= "atmel-hmac-sha224",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= SHA224_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_hmac_cra_init,
+			.cra_exit		= atmel_sha_hmac_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_hmac_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.digest		= atmel_sha_hmac_digest,
+	.setkey		= atmel_sha_hmac_setkey,
+	.export		= atmel_sha_export,
+	.import		= atmel_sha_import,
+	.halg = {
+		.digestsize	= SHA256_DIGEST_SIZE,
+		.statesize	= sizeof(struct atmel_sha_reqctx),
+		.base	= {
+			.cra_name		= "hmac(sha256)",
+			.cra_driver_name	= "atmel-hmac-sha256",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= SHA256_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_hmac_cra_init,
+			.cra_exit		= atmel_sha_hmac_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_hmac_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.digest		= atmel_sha_hmac_digest,
+	.setkey		= atmel_sha_hmac_setkey,
+	.export		= atmel_sha_export,
+	.import		= atmel_sha_import,
+	.halg = {
+		.digestsize	= SHA384_DIGEST_SIZE,
+		.statesize	= sizeof(struct atmel_sha_reqctx),
+		.base	= {
+			.cra_name		= "hmac(sha384)",
+			.cra_driver_name	= "atmel-hmac-sha384",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= SHA384_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_hmac_cra_init,
+			.cra_exit		= atmel_sha_hmac_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_hmac_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.digest		= atmel_sha_hmac_digest,
+	.setkey		= atmel_sha_hmac_setkey,
+	.export		= atmel_sha_export,
+	.import		= atmel_sha_import,
+	.halg = {
+		.digestsize	= SHA512_DIGEST_SIZE,
+		.statesize	= sizeof(struct atmel_sha_reqctx),
+		.base	= {
+			.cra_name		= "hmac(sha512)",
+			.cra_driver_name	= "atmel-hmac-sha512",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= SHA512_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_hmac_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_hmac_cra_init,
+			.cra_exit		= atmel_sha_hmac_cra_exit,
+		}
+	}
+},
+};
+
+#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC
+/* authenc functions */
+
+static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd);
+static int atmel_sha_authenc_init_done(struct atmel_sha_dev *dd);
+static int atmel_sha_authenc_final_done(struct atmel_sha_dev *dd);
+
+
+struct atmel_sha_authenc_ctx {
+	struct crypto_ahash	*tfm;
+};
+
+struct atmel_sha_authenc_reqctx {
+	struct atmel_sha_reqctx	base;
+
+	atmel_aes_authenc_fn_t	cb;
+	struct atmel_aes_dev	*aes_dev;
+
+	/* _init() parameters. */
+	struct scatterlist	*assoc;
+	u32			assoclen;
+	u32			textlen;
+
+	/* _final() parameters. */
+	u32			*digest;
+	unsigned int		digestlen;
+};
+
+static void atmel_sha_authenc_complete(struct crypto_async_request *areq,
+				       int err)
+{
+	struct ahash_request *req = areq->data;
+	struct atmel_sha_authenc_reqctx *authctx  = ahash_request_ctx(req);
+
+	authctx->cb(authctx->aes_dev, err, authctx->base.dd->is_async);
+}
+
+static int atmel_sha_authenc_start(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	int err;
+
+	/*
+	 * Force atmel_sha_complete() to call req->base.complete(), ie
+	 * atmel_sha_authenc_complete(), which in turn calls authctx->cb().
+	 */
+	dd->force_complete = true;
+
+	err = atmel_sha_hw_init(dd);
+	return authctx->cb(authctx->aes_dev, err, dd->is_async);
+}
+
+bool atmel_sha_authenc_is_ready(void)
+{
+	struct atmel_sha_ctx dummy;
+
+	dummy.dd = NULL;
+	return (atmel_sha_find_dev(&dummy) != NULL);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_is_ready);
+
+unsigned int atmel_sha_authenc_get_reqsize(void)
+{
+	return sizeof(struct atmel_sha_authenc_reqctx);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_get_reqsize);
+
+struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode)
+{
+	struct atmel_sha_authenc_ctx *auth;
+	struct crypto_ahash *tfm;
+	struct atmel_sha_ctx *tctx;
+	const char *name;
+	int err = -EINVAL;
+
+	switch (mode & SHA_FLAGS_MODE_MASK) {
+	case SHA_FLAGS_HMAC_SHA1:
+		name = "atmel-hmac-sha1";
+		break;
+
+	case SHA_FLAGS_HMAC_SHA224:
+		name = "atmel-hmac-sha224";
+		break;
+
+	case SHA_FLAGS_HMAC_SHA256:
+		name = "atmel-hmac-sha256";
+		break;
+
+	case SHA_FLAGS_HMAC_SHA384:
+		name = "atmel-hmac-sha384";
+		break;
+
+	case SHA_FLAGS_HMAC_SHA512:
+		name = "atmel-hmac-sha512";
+		break;
+
+	default:
+		goto error;
+	}
+
+	tfm = crypto_alloc_ahash(name,
+				 CRYPTO_ALG_TYPE_AHASH,
+				 CRYPTO_ALG_TYPE_AHASH_MASK);
+	if (IS_ERR(tfm)) {
+		err = PTR_ERR(tfm);
+		goto error;
+	}
+	tctx = crypto_ahash_ctx(tfm);
+	tctx->start = atmel_sha_authenc_start;
+	tctx->flags = mode;
+
+	auth = kzalloc(sizeof(*auth), GFP_KERNEL);
+	if (!auth) {
+		err = -ENOMEM;
+		goto err_free_ahash;
+	}
+	auth->tfm = tfm;
+
+	return auth;
+
+err_free_ahash:
+	crypto_free_ahash(tfm);
+error:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_spawn);
+
+void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth)
+{
+	if (auth)
+		crypto_free_ahash(auth->tfm);
+	kfree(auth);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_free);
+
+int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth,
+			     const u8 *key, unsigned int keylen,
+			     u32 *flags)
+{
+	struct crypto_ahash *tfm = auth->tfm;
+	int err;
+
+	crypto_ahash_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_ahash_set_flags(tfm, *flags & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ahash_setkey(tfm, key, keylen);
+	*flags = crypto_ahash_get_flags(tfm);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_setkey);
+
+int atmel_sha_authenc_schedule(struct ahash_request *req,
+			       struct atmel_sha_authenc_ctx *auth,
+			       atmel_aes_authenc_fn_t cb,
+			       struct atmel_aes_dev *aes_dev)
+{
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	struct atmel_sha_reqctx *ctx = &authctx->base;
+	struct crypto_ahash *tfm = auth->tfm;
+	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct atmel_sha_dev *dd;
+
+	/* Reset request context (MUST be done first). */
+	memset(authctx, 0, sizeof(*authctx));
+
+	/* Get SHA device. */
+	dd = atmel_sha_find_dev(tctx);
+	if (!dd)
+		return cb(aes_dev, -ENODEV, false);
+
+	/* Init request context. */
+	ctx->dd = dd;
+	ctx->buflen = SHA_BUFFER_LEN;
+	authctx->cb = cb;
+	authctx->aes_dev = aes_dev;
+	ahash_request_set_tfm(req, tfm);
+	ahash_request_set_callback(req, 0, atmel_sha_authenc_complete, req);
+
+	return atmel_sha_handle_queue(dd, req);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_schedule);
+
+int atmel_sha_authenc_init(struct ahash_request *req,
+			   struct scatterlist *assoc, unsigned int assoclen,
+			   unsigned int textlen,
+			   atmel_aes_authenc_fn_t cb,
+			   struct atmel_aes_dev *aes_dev)
+{
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	struct atmel_sha_reqctx *ctx = &authctx->base;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	if (unlikely(!IS_ALIGNED(assoclen, sizeof(u32))))
+		return atmel_sha_complete(dd, -EINVAL);
+
+	authctx->cb = cb;
+	authctx->aes_dev = aes_dev;
+	authctx->assoc = assoc;
+	authctx->assoclen = assoclen;
+	authctx->textlen = textlen;
+
+	ctx->flags = hmac->base.flags;
+	return atmel_sha_hmac_setup(dd, atmel_sha_authenc_init2);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_init);
+
+static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	struct atmel_sha_reqctx *ctx = &authctx->base;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm);
+	size_t hs = ctx->hash_size;
+	size_t i, num_words = hs / sizeof(u32);
+	u32 mr, msg_size;
+
+	atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV);
+	for (i = 0; i < num_words; ++i)
+		atmel_sha_write(dd, SHA_REG_DIN(i), hmac->ipad[i]);
+
+	atmel_sha_write(dd, SHA_CR, SHA_CR_WUIEHV);
+	for (i = 0; i < num_words; ++i)
+		atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]);
+
+	mr = (SHA_MR_MODE_IDATAR0 |
+	      SHA_MR_HMAC |
+	      SHA_MR_DUALBUFF);
+	mr |= ctx->flags & SHA_FLAGS_ALGO_MASK;
+	atmel_sha_write(dd, SHA_MR, mr);
+
+	msg_size = authctx->assoclen + authctx->textlen;
+	atmel_sha_write(dd, SHA_MSR, msg_size);
+	atmel_sha_write(dd, SHA_BCR, msg_size);
+
+	atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST);
+
+	/* Process assoc data. */
+	return atmel_sha_cpu_start(dd, authctx->assoc, authctx->assoclen,
+				   true, false,
+				   atmel_sha_authenc_init_done);
+}
+
+static int atmel_sha_authenc_init_done(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+
+	return authctx->cb(authctx->aes_dev, 0, dd->is_async);
+}
+
+int atmel_sha_authenc_final(struct ahash_request *req,
+			    u32 *digest, unsigned int digestlen,
+			    atmel_aes_authenc_fn_t cb,
+			    struct atmel_aes_dev *aes_dev)
+{
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	struct atmel_sha_reqctx *ctx = &authctx->base;
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	case SHA_FLAGS_SHA1:
+		authctx->digestlen = SHA1_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA224:
+		authctx->digestlen = SHA224_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA256:
+		authctx->digestlen = SHA256_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA384:
+		authctx->digestlen = SHA384_DIGEST_SIZE;
+		break;
+
+	case SHA_FLAGS_SHA512:
+		authctx->digestlen = SHA512_DIGEST_SIZE;
+		break;
+
+	default:
+		return atmel_sha_complete(dd, -EINVAL);
+	}
+	if (authctx->digestlen > digestlen)
+		authctx->digestlen = digestlen;
+
+	authctx->cb = cb;
+	authctx->aes_dev = aes_dev;
+	authctx->digest = digest;
+	return atmel_sha_wait_for_data_ready(dd,
+					     atmel_sha_authenc_final_done);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_final);
+
+static int atmel_sha_authenc_final_done(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	size_t i, num_words = authctx->digestlen / sizeof(u32);
+
+	for (i = 0; i < num_words; ++i)
+		authctx->digest[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i));
+
+	return atmel_sha_complete(dd, 0);
+}
+
+void atmel_sha_authenc_abort(struct ahash_request *req)
+{
+	struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
+	struct atmel_sha_reqctx *ctx = &authctx->base;
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	/* Prevent atmel_sha_complete() from calling req->base.complete(). */
+	dd->is_async = false;
+	dd->force_complete = false;
+	(void)atmel_sha_complete(dd, 0);
+}
+EXPORT_SYMBOL_GPL(atmel_sha_authenc_abort);
+
+#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */
+
+
 static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
 {
 	int i;
 
+	if (dd->caps.has_hmac)
+		for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++)
+			crypto_unregister_ahash(&sha_hmac_algs[i]);
+
 	for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++)
 		crypto_unregister_ahash(&sha_1_256_algs[i]);
 
@@ -1273,8 +2583,21 @@ static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
 		}
 	}
 
+	if (dd->caps.has_hmac) {
+		for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++) {
+			err = crypto_register_ahash(&sha_hmac_algs[i]);
+			if (err)
+				goto err_sha_hmac_algs;
+		}
+	}
+
 	return 0;
 
+	/*i = ARRAY_SIZE(sha_hmac_algs);*/
+err_sha_hmac_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_hmac_algs[j]);
+	i = ARRAY_SIZE(sha_384_512_algs);
 err_sha_384_512_algs:
 	for (j = 0; j < i; j++)
 		crypto_unregister_ahash(&sha_384_512_algs[j]);
@@ -1344,6 +2667,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
 	dd->caps.has_sha224 = 0;
 	dd->caps.has_sha_384_512 = 0;
 	dd->caps.has_uihv = 0;
+	dd->caps.has_hmac = 0;
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xff0) {
@@ -1353,6 +2677,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
 		dd->caps.has_sha224 = 1;
 		dd->caps.has_sha_384_512 = 1;
 		dd->caps.has_uihv = 1;
+		dd->caps.has_hmac = 1;
 		break;
 	case 0x420:
 		dd->caps.has_dma = 1;
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index bf467d7be35c..b25f1b3c981f 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -150,7 +150,7 @@ static struct atmel_tdes_drv atmel_tdes = {
 static int atmel_tdes_sg_copy(struct scatterlist **sg, size_t *offset,
 			void *buf, size_t buflen, size_t total, int out)
 {
-	unsigned int count, off = 0;
+	size_t count, off = 0;
 
 	while (buflen && total) {
 		count = min((*sg)->length - *offset, total);
@@ -336,7 +336,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd)
 				dd->buf_out, dd->buflen, dd->dma_size, 1);
 		if (count != dd->dma_size) {
 			err = -EINVAL;
-			pr_err("not all data converted: %u\n", count);
+			pr_err("not all data converted: %zu\n", count);
 		}
 	}
 
@@ -361,7 +361,7 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
 	dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
 					dd->buflen, DMA_TO_DEVICE);
 	if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
-		dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
+		dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
 		err = -EINVAL;
 		goto err_map_in;
 	}
@@ -369,7 +369,7 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
 	dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
 					dd->buflen, DMA_FROM_DEVICE);
 	if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
-		dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
+		dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
 		err = -EINVAL;
 		goto err_map_out;
 	}
@@ -525,8 +525,8 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
 
 
 	if (fast)  {
-		count = min(dd->total, sg_dma_len(dd->in_sg));
-		count = min(count, sg_dma_len(dd->out_sg));
+		count = min_t(size_t, dd->total, sg_dma_len(dd->in_sg));
+		count = min_t(size_t, count, sg_dma_len(dd->out_sg));
 
 		err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
 		if (!err) {
@@ -661,7 +661,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
 				dd->buf_out, dd->buflen, dd->dma_size, 1);
 			if (count != dd->dma_size) {
 				err = -EINVAL;
-				pr_err("not all data converted: %u\n", count);
+				pr_err("not all data converted: %zu\n", count);
 			}
 		}
 	}
diff --git a/drivers/crypto/bcm/Makefile b/drivers/crypto/bcm/Makefile
new file mode 100644
index 000000000000..13cb80eb2665
--- /dev/null
+++ b/drivers/crypto/bcm/Makefile
@@ -0,0 +1,15 @@
+# File: drivers/crypto/bcm/Makefile
+#
+# Makefile for crypto acceleration files for Broadcom SPU driver
+#
+# Uncomment to enable debug tracing in the SPU driver.
+# CFLAGS_util.o := -DDEBUG
+# CFLAGS_cipher.o := -DDEBUG
+# CFLAGS_spu.o := -DDEBUG
+# CFLAGS_spu2.o := -DDEBUG
+
+obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) := bcm_crypto_spu.o
+
+bcm_crypto_spu-objs :=  util.o spu.o spu2.o cipher.o
+
+ccflags-y += -I. -DBCMDRIVER
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
new file mode 100644
index 000000000000..cc0d5b98006e
--- /dev/null
+++ b/drivers/crypto/bcm/cipher.c
@@ -0,0 +1,4963 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/kthread.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+
+#include <crypto/algapi.h>
+#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/sha.h>
+#include <crypto/md5.h>
+#include <crypto/authenc.h>
+#include <crypto/skcipher.h>
+#include <crypto/hash.h>
+#include <crypto/aes.h>
+#include <crypto/sha3.h>
+
+#include "util.h"
+#include "cipher.h"
+#include "spu.h"
+#include "spum.h"
+#include "spu2.h"
+
+/* ================= Device Structure ================== */
+
+struct device_private iproc_priv;
+
+/* ==================== Parameters ===================== */
+
+int flow_debug_logging;
+module_param(flow_debug_logging, int, 0644);
+MODULE_PARM_DESC(flow_debug_logging, "Enable Flow Debug Logging");
+
+int packet_debug_logging;
+module_param(packet_debug_logging, int, 0644);
+MODULE_PARM_DESC(packet_debug_logging, "Enable Packet Debug Logging");
+
+int debug_logging_sleep;
+module_param(debug_logging_sleep, int, 0644);
+MODULE_PARM_DESC(debug_logging_sleep, "Packet Debug Logging Sleep");
+
+/*
+ * The value of these module parameters is used to set the priority for each
+ * algo type when this driver registers algos with the kernel crypto API.
+ * To use a priority other than the default, set the priority in the insmod or
+ * modprobe. Changing the module priority after init time has no effect.
+ *
+ * The default priorities are chosen to be lower (less preferred) than ARMv8 CE
+ * algos, but more preferred than generic software algos.
+ */
+static int cipher_pri = 150;
+module_param(cipher_pri, int, 0644);
+MODULE_PARM_DESC(cipher_pri, "Priority for cipher algos");
+
+static int hash_pri = 100;
+module_param(hash_pri, int, 0644);
+MODULE_PARM_DESC(hash_pri, "Priority for hash algos");
+
+static int aead_pri = 150;
+module_param(aead_pri, int, 0644);
+MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos");
+
+#define MAX_SPUS 16
+
+/* A type 3 BCM header, expected to precede the SPU header for SPU-M.
+ * Bits 3 and 4 in the first byte encode the channel number (the dma ringset).
+ * 0x60 - ring 0
+ * 0x68 - ring 1
+ * 0x70 - ring 2
+ * 0x78 - ring 3
+ */
+char BCMHEADER[] = { 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28 };
+/*
+ * Some SPU hw does not use BCM header on SPU messages. So BCM_HDR_LEN
+ * is set dynamically after reading SPU type from device tree.
+ */
+#define BCM_HDR_LEN  iproc_priv.bcm_hdr_len
+
+/* min and max time to sleep before retrying when mbox queue is full. usec */
+#define MBOX_SLEEP_MIN  800
+#define MBOX_SLEEP_MAX 1000
+
+/**
+ * select_channel() - Select a SPU channel to handle a crypto request. Selects
+ * channel in round robin order.
+ *
+ * Return:  channel index
+ */
+static u8 select_channel(void)
+{
+	u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan);
+
+	return chan_idx % iproc_priv.spu.num_spu;
+}
+
+/**
+ * spu_ablkcipher_rx_sg_create() - Build up the scatterlist of buffers used to
+ * receive a SPU response message for an ablkcipher request. Includes buffers to
+ * catch SPU message headers and the response data.
+ * @mssg:	mailbox message containing the receive sg
+ * @rctx:	crypto request context
+ * @rx_frag_num: number of scatterlist elements required to hold the
+ *		SPU response message
+ * @chunksize:	Number of bytes of response data expected
+ * @stat_pad_len: Number of bytes required to pad the STAT field to
+ *		a 4-byte boundary
+ *
+ * The scatterlist that gets allocated here is freed in spu_chunk_cleanup()
+ * when the request completes, whether the request is handled successfully or
+ * there is an error.
+ *
+ * Returns:
+ *   0 if successful
+ *   < 0 if an error
+ */
+static int
+spu_ablkcipher_rx_sg_create(struct brcm_message *mssg,
+			    struct iproc_reqctx_s *rctx,
+			    u8 rx_frag_num,
+			    unsigned int chunksize, u32 stat_pad_len)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct scatterlist *sg;	/* used to build sgs in mbox message */
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	u32 datalen;		/* Number of bytes of response data expected */
+
+	mssg->spu.dst = kcalloc(rx_frag_num, sizeof(struct scatterlist),
+				rctx->gfp);
+	if (!mssg->spu.dst)
+		return -ENOMEM;
+
+	sg = mssg->spu.dst;
+	sg_init_table(sg, rx_frag_num);
+	/* Space for SPU message header */
+	sg_set_buf(sg++, rctx->msg_buf.spu_resp_hdr, ctx->spu_resp_hdr_len);
+
+	/* If XTS tweak in payload, add buffer to receive encrypted tweak */
+	if ((ctx->cipher.mode == CIPHER_MODE_XTS) &&
+	    spu->spu_xts_tweak_in_payload())
+		sg_set_buf(sg++, rctx->msg_buf.c.supdt_tweak,
+			   SPU_XTS_TWEAK_SIZE);
+
+	/* Copy in each dst sg entry from request, up to chunksize */
+	datalen = spu_msg_sg_add(&sg, &rctx->dst_sg, &rctx->dst_skip,
+				 rctx->dst_nents, chunksize);
+	if (datalen < chunksize) {
+		pr_err("%s(): failed to copy dst sg to mbox msg. chunksize %u, datalen %u",
+		       __func__, chunksize, datalen);
+		return -EFAULT;
+	}
+
+	if (ctx->cipher.alg == CIPHER_ALG_RC4)
+		/* Add buffer to catch 260-byte SUPDT field for RC4 */
+		sg_set_buf(sg++, rctx->msg_buf.c.supdt_tweak, SPU_SUPDT_LEN);
+
+	if (stat_pad_len)
+		sg_set_buf(sg++, rctx->msg_buf.rx_stat_pad, stat_pad_len);
+
+	memset(rctx->msg_buf.rx_stat, 0, SPU_RX_STATUS_LEN);
+	sg_set_buf(sg, rctx->msg_buf.rx_stat, spu->spu_rx_status_len());
+
+	return 0;
+}
+
+/**
+ * spu_ablkcipher_tx_sg_create() - Build up the scatterlist of buffers used to
+ * send a SPU request message for an ablkcipher request. Includes SPU message
+ * headers and the request data.
+ * @mssg:	mailbox message containing the transmit sg
+ * @rctx:	crypto request context
+ * @tx_frag_num: number of scatterlist elements required to construct the
+ *		SPU request message
+ * @chunksize:	Number of bytes of request data
+ * @pad_len:	Number of pad bytes
+ *
+ * The scatterlist that gets allocated here is freed in spu_chunk_cleanup()
+ * when the request completes, whether the request is handled successfully or
+ * there is an error.
+ *
+ * Returns:
+ *   0 if successful
+ *   < 0 if an error
+ */
+static int
+spu_ablkcipher_tx_sg_create(struct brcm_message *mssg,
+			    struct iproc_reqctx_s *rctx,
+			    u8 tx_frag_num, unsigned int chunksize, u32 pad_len)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct scatterlist *sg;	/* used to build sgs in mbox message */
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	u32 datalen;		/* Number of bytes of response data expected */
+	u32 stat_len;
+
+	mssg->spu.src = kcalloc(tx_frag_num, sizeof(struct scatterlist),
+				rctx->gfp);
+	if (unlikely(!mssg->spu.src))
+		return -ENOMEM;
+
+	sg = mssg->spu.src;
+	sg_init_table(sg, tx_frag_num);
+
+	sg_set_buf(sg++, rctx->msg_buf.bcm_spu_req_hdr,
+		   BCM_HDR_LEN + ctx->spu_req_hdr_len);
+
+	/* if XTS tweak in payload, copy from IV (where crypto API puts it) */
+	if ((ctx->cipher.mode == CIPHER_MODE_XTS) &&
+	    spu->spu_xts_tweak_in_payload())
+		sg_set_buf(sg++, rctx->msg_buf.iv_ctr, SPU_XTS_TWEAK_SIZE);
+
+	/* Copy in each src sg entry from request, up to chunksize */
+	datalen = spu_msg_sg_add(&sg, &rctx->src_sg, &rctx->src_skip,
+				 rctx->src_nents, chunksize);
+	if (unlikely(datalen < chunksize)) {
+		pr_err("%s(): failed to copy src sg to mbox msg",
+		       __func__);
+		return -EFAULT;
+	}
+
+	if (pad_len)
+		sg_set_buf(sg++, rctx->msg_buf.spu_req_pad, pad_len);
+
+	stat_len = spu->spu_tx_status_len();
+	if (stat_len) {
+		memset(rctx->msg_buf.tx_stat, 0, stat_len);
+		sg_set_buf(sg, rctx->msg_buf.tx_stat, stat_len);
+	}
+	return 0;
+}
+
+/**
+ * handle_ablkcipher_req() - Submit as much of a block cipher request as fits in
+ * a single SPU request message, starting at the current position in the request
+ * data.
+ * @rctx:	Crypto request context
+ *
+ * This may be called on the crypto API thread, or, when a request is so large
+ * it must be broken into multiple SPU messages, on the thread used to invoke
+ * the response callback. When requests are broken into multiple SPU
+ * messages, we assume subsequent messages depend on previous results, and
+ * thus always wait for previous results before submitting the next message.
+ * Because requests are submitted in lock step like this, there is no need
+ * to synchronize access to request data structures.
+ *
+ * Return: -EINPROGRESS: request has been accepted and result will be returned
+ *			 asynchronously
+ *         Any other value indicates an error
+ */
+static int handle_ablkcipher_req(struct iproc_reqctx_s *rctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_async_request *areq = rctx->parent;
+	struct ablkcipher_request *req =
+	    container_of(areq, struct ablkcipher_request, base);
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	struct spu_cipher_parms cipher_parms;
+	int err = 0;
+	unsigned int chunksize = 0;	/* Num bytes of request to submit */
+	int remaining = 0;	/* Bytes of request still to process */
+	int chunk_start;	/* Beginning of data for current SPU msg */
+
+	/* IV or ctr value to use in this SPU msg */
+	u8 local_iv_ctr[MAX_IV_SIZE];
+	u32 stat_pad_len;	/* num bytes to align status field */
+	u32 pad_len;		/* total length of all padding */
+	bool update_key = false;
+	struct brcm_message *mssg;	/* mailbox message */
+	int retry_cnt = 0;
+
+	/* number of entries in src and dst sg in mailbox message. */
+	u8 rx_frag_num = 2;	/* response header and STATUS */
+	u8 tx_frag_num = 1;	/* request header */
+
+	flow_log("%s\n", __func__);
+
+	cipher_parms.alg = ctx->cipher.alg;
+	cipher_parms.mode = ctx->cipher.mode;
+	cipher_parms.type = ctx->cipher_type;
+	cipher_parms.key_len = ctx->enckeylen;
+	cipher_parms.key_buf = ctx->enckey;
+	cipher_parms.iv_buf = local_iv_ctr;
+	cipher_parms.iv_len = rctx->iv_ctr_len;
+
+	mssg = &rctx->mb_mssg;
+	chunk_start = rctx->src_sent;
+	remaining = rctx->total_todo - chunk_start;
+
+	/* determine the chunk we are breaking off and update the indexes */
+	if ((ctx->max_payload != SPU_MAX_PAYLOAD_INF) &&
+	    (remaining > ctx->max_payload))
+		chunksize = ctx->max_payload;
+	else
+		chunksize = remaining;
+
+	rctx->src_sent += chunksize;
+	rctx->total_sent = rctx->src_sent;
+
+	/* Count number of sg entries to be included in this request */
+	rctx->src_nents = spu_sg_count(rctx->src_sg, rctx->src_skip, chunksize);
+	rctx->dst_nents = spu_sg_count(rctx->dst_sg, rctx->dst_skip, chunksize);
+
+	if ((ctx->cipher.mode == CIPHER_MODE_CBC) &&
+	    rctx->is_encrypt && chunk_start)
+		/*
+		 * Encrypting non-first first chunk. Copy last block of
+		 * previous result to IV for this chunk.
+		 */
+		sg_copy_part_to_buf(req->dst, rctx->msg_buf.iv_ctr,
+				    rctx->iv_ctr_len,
+				    chunk_start - rctx->iv_ctr_len);
+
+	if (rctx->iv_ctr_len) {
+		/* get our local copy of the iv */
+		__builtin_memcpy(local_iv_ctr, rctx->msg_buf.iv_ctr,
+				 rctx->iv_ctr_len);
+
+		/* generate the next IV if possible */
+		if ((ctx->cipher.mode == CIPHER_MODE_CBC) &&
+		    !rctx->is_encrypt) {
+			/*
+			 * CBC Decrypt: next IV is the last ciphertext block in
+			 * this chunk
+			 */
+			sg_copy_part_to_buf(req->src, rctx->msg_buf.iv_ctr,
+					    rctx->iv_ctr_len,
+					    rctx->src_sent - rctx->iv_ctr_len);
+		} else if (ctx->cipher.mode == CIPHER_MODE_CTR) {
+			/*
+			 * The SPU hardware increments the counter once for
+			 * each AES block of 16 bytes. So update the counter
+			 * for the next chunk, if there is one. Note that for
+			 * this chunk, the counter has already been copied to
+			 * local_iv_ctr. We can assume a block size of 16,
+			 * because we only support CTR mode for AES, not for
+			 * any other cipher alg.
+			 */
+			add_to_ctr(rctx->msg_buf.iv_ctr, chunksize >> 4);
+		}
+	}
+
+	if (ctx->cipher.alg == CIPHER_ALG_RC4) {
+		rx_frag_num++;
+		if (chunk_start) {
+			/*
+			 * for non-first RC4 chunks, use SUPDT from previous
+			 * response as key for this chunk.
+			 */
+			cipher_parms.key_buf = rctx->msg_buf.c.supdt_tweak;
+			update_key = true;
+			cipher_parms.type = CIPHER_TYPE_UPDT;
+		} else if (!rctx->is_encrypt) {
+			/*
+			 * First RC4 chunk. For decrypt, key in pre-built msg
+			 * header may have been changed if encrypt required
+			 * multiple chunks. So revert the key to the
+			 * ctx->enckey value.
+			 */
+			update_key = true;
+			cipher_parms.type = CIPHER_TYPE_INIT;
+		}
+	}
+
+	if (ctx->max_payload == SPU_MAX_PAYLOAD_INF)
+		flow_log("max_payload infinite\n");
+	else
+		flow_log("max_payload %u\n", ctx->max_payload);
+
+	flow_log("sent:%u start:%u remains:%u size:%u\n",
+		 rctx->src_sent, chunk_start, remaining, chunksize);
+
+	/* Copy SPU header template created at setkey time */
+	memcpy(rctx->msg_buf.bcm_spu_req_hdr, ctx->bcm_spu_req_hdr,
+	       sizeof(rctx->msg_buf.bcm_spu_req_hdr));
+
+	/*
+	 * Pass SUPDT field as key. Key field in finish() call is only used
+	 * when update_key has been set above for RC4. Will be ignored in
+	 * all other cases.
+	 */
+	spu->spu_cipher_req_finish(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN,
+				   ctx->spu_req_hdr_len, !(rctx->is_encrypt),
+				   &cipher_parms, update_key, chunksize);
+
+	atomic64_add(chunksize, &iproc_priv.bytes_out);
+
+	stat_pad_len = spu->spu_wordalign_padlen(chunksize);
+	if (stat_pad_len)
+		rx_frag_num++;
+	pad_len = stat_pad_len;
+	if (pad_len) {
+		tx_frag_num++;
+		spu->spu_request_pad(rctx->msg_buf.spu_req_pad, 0,
+				     0, ctx->auth.alg, ctx->auth.mode,
+				     rctx->total_sent, stat_pad_len);
+	}
+
+	spu->spu_dump_msg_hdr(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN,
+			      ctx->spu_req_hdr_len);
+	packet_log("payload:\n");
+	dump_sg(rctx->src_sg, rctx->src_skip, chunksize);
+	packet_dump("   pad: ", rctx->msg_buf.spu_req_pad, pad_len);
+
+	/*
+	 * Build mailbox message containing SPU request msg and rx buffers
+	 * to catch response message
+	 */
+	memset(mssg, 0, sizeof(*mssg));
+	mssg->type = BRCM_MESSAGE_SPU;
+	mssg->ctx = rctx;	/* Will be returned in response */
+
+	/* Create rx scatterlist to catch result */
+	rx_frag_num += rctx->dst_nents;
+
+	if ((ctx->cipher.mode == CIPHER_MODE_XTS) &&
+	    spu->spu_xts_tweak_in_payload())
+		rx_frag_num++;	/* extra sg to insert tweak */
+
+	err = spu_ablkcipher_rx_sg_create(mssg, rctx, rx_frag_num, chunksize,
+					  stat_pad_len);
+	if (err)
+		return err;
+
+	/* Create tx scatterlist containing SPU request message */
+	tx_frag_num += rctx->src_nents;
+	if (spu->spu_tx_status_len())
+		tx_frag_num++;
+
+	if ((ctx->cipher.mode == CIPHER_MODE_XTS) &&
+	    spu->spu_xts_tweak_in_payload())
+		tx_frag_num++;	/* extra sg to insert tweak */
+
+	err = spu_ablkcipher_tx_sg_create(mssg, rctx, tx_frag_num, chunksize,
+					  pad_len);
+	if (err)
+		return err;
+
+	err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], mssg);
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) {
+		while ((err == -ENOBUFS) && (retry_cnt < SPU_MB_RETRY_MAX)) {
+			/*
+			 * Mailbox queue is full. Since MAY_SLEEP is set, assume
+			 * not in atomic context and we can wait and try again.
+			 */
+			retry_cnt++;
+			usleep_range(MBOX_SLEEP_MIN, MBOX_SLEEP_MAX);
+			err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx],
+						mssg);
+			atomic_inc(&iproc_priv.mb_no_spc);
+		}
+	}
+	if (unlikely(err < 0)) {
+		atomic_inc(&iproc_priv.mb_send_fail);
+		return err;
+	}
+
+	return -EINPROGRESS;
+}
+
+/**
+ * handle_ablkcipher_resp() - Process a block cipher SPU response. Updates the
+ * total received count for the request and updates global stats.
+ * @rctx:	Crypto request context
+ */
+static void handle_ablkcipher_resp(struct iproc_reqctx_s *rctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+#ifdef DEBUG
+	struct crypto_async_request *areq = rctx->parent;
+	struct ablkcipher_request *req = ablkcipher_request_cast(areq);
+#endif
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	u32 payload_len;
+
+	/* See how much data was returned */
+	payload_len = spu->spu_payload_length(rctx->msg_buf.spu_resp_hdr);
+
+	/*
+	 * In XTS mode, the first SPU_XTS_TWEAK_SIZE bytes may be the
+	 * encrypted tweak ("i") value; we don't count those.
+	 */
+	if ((ctx->cipher.mode == CIPHER_MODE_XTS) &&
+	    spu->spu_xts_tweak_in_payload() &&
+	    (payload_len >= SPU_XTS_TWEAK_SIZE))
+		payload_len -= SPU_XTS_TWEAK_SIZE;
+
+	atomic64_add(payload_len, &iproc_priv.bytes_in);
+
+	flow_log("%s() offset: %u, bd_len: %u BD:\n",
+		 __func__, rctx->total_received, payload_len);
+
+	dump_sg(req->dst, rctx->total_received, payload_len);
+	if (ctx->cipher.alg == CIPHER_ALG_RC4)
+		packet_dump("  supdt ", rctx->msg_buf.c.supdt_tweak,
+			    SPU_SUPDT_LEN);
+
+	rctx->total_received += payload_len;
+	if (rctx->total_received == rctx->total_todo) {
+		atomic_inc(&iproc_priv.op_counts[SPU_OP_CIPHER]);
+		atomic_inc(
+		   &iproc_priv.cipher_cnt[ctx->cipher.alg][ctx->cipher.mode]);
+	}
+}
+
+/**
+ * spu_ahash_rx_sg_create() - Build up the scatterlist of buffers used to
+ * receive a SPU response message for an ahash request.
+ * @mssg:	mailbox message containing the receive sg
+ * @rctx:	crypto request context
+ * @rx_frag_num: number of scatterlist elements required to hold the
+ *		SPU response message
+ * @digestsize: length of hash digest, in bytes
+ * @stat_pad_len: Number of bytes required to pad the STAT field to
+ *		a 4-byte boundary
+ *
+ * The scatterlist that gets allocated here is freed in spu_chunk_cleanup()
+ * when the request completes, whether the request is handled successfully or
+ * there is an error.
+ *
+ * Return:
+ *   0 if successful
+ *   < 0 if an error
+ */
+static int
+spu_ahash_rx_sg_create(struct brcm_message *mssg,
+		       struct iproc_reqctx_s *rctx,
+		       u8 rx_frag_num, unsigned int digestsize,
+		       u32 stat_pad_len)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct scatterlist *sg;	/* used to build sgs in mbox message */
+	struct iproc_ctx_s *ctx = rctx->ctx;
+
+	mssg->spu.dst = kcalloc(rx_frag_num, sizeof(struct scatterlist),
+				rctx->gfp);
+	if (!mssg->spu.dst)
+		return -ENOMEM;
+
+	sg = mssg->spu.dst;
+	sg_init_table(sg, rx_frag_num);
+	/* Space for SPU message header */
+	sg_set_buf(sg++, rctx->msg_buf.spu_resp_hdr, ctx->spu_resp_hdr_len);
+
+	/* Space for digest */
+	sg_set_buf(sg++, rctx->msg_buf.digest, digestsize);
+
+	if (stat_pad_len)
+		sg_set_buf(sg++, rctx->msg_buf.rx_stat_pad, stat_pad_len);
+
+	memset(rctx->msg_buf.rx_stat, 0, SPU_RX_STATUS_LEN);
+	sg_set_buf(sg, rctx->msg_buf.rx_stat, spu->spu_rx_status_len());
+	return 0;
+}
+
+/**
+ * spu_ahash_tx_sg_create() -  Build up the scatterlist of buffers used to send
+ * a SPU request message for an ahash request. Includes SPU message headers and
+ * the request data.
+ * @mssg:	mailbox message containing the transmit sg
+ * @rctx:	crypto request context
+ * @tx_frag_num: number of scatterlist elements required to construct the
+ *		SPU request message
+ * @spu_hdr_len: length in bytes of SPU message header
+ * @hash_carry_len: Number of bytes of data carried over from previous req
+ * @new_data_len: Number of bytes of new request data
+ * @pad_len:	Number of pad bytes
+ *
+ * The scatterlist that gets allocated here is freed in spu_chunk_cleanup()
+ * when the request completes, whether the request is handled successfully or
+ * there is an error.
+ *
+ * Return:
+ *   0 if successful
+ *   < 0 if an error
+ */
+static int
+spu_ahash_tx_sg_create(struct brcm_message *mssg,
+		       struct iproc_reqctx_s *rctx,
+		       u8 tx_frag_num,
+		       u32 spu_hdr_len,
+		       unsigned int hash_carry_len,
+		       unsigned int new_data_len, u32 pad_len)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct scatterlist *sg;	/* used to build sgs in mbox message */
+	u32 datalen;		/* Number of bytes of response data expected */
+	u32 stat_len;
+
+	mssg->spu.src = kcalloc(tx_frag_num, sizeof(struct scatterlist),
+				rctx->gfp);
+	if (!mssg->spu.src)
+		return -ENOMEM;
+
+	sg = mssg->spu.src;
+	sg_init_table(sg, tx_frag_num);
+
+	sg_set_buf(sg++, rctx->msg_buf.bcm_spu_req_hdr,
+		   BCM_HDR_LEN + spu_hdr_len);
+
+	if (hash_carry_len)
+		sg_set_buf(sg++, rctx->hash_carry, hash_carry_len);
+
+	if (new_data_len) {
+		/* Copy in each src sg entry from request, up to chunksize */
+		datalen = spu_msg_sg_add(&sg, &rctx->src_sg, &rctx->src_skip,
+					 rctx->src_nents, new_data_len);
+		if (datalen < new_data_len) {
+			pr_err("%s(): failed to copy src sg to mbox msg",
+			       __func__);
+			return -EFAULT;
+		}
+	}
+
+	if (pad_len)
+		sg_set_buf(sg++, rctx->msg_buf.spu_req_pad, pad_len);
+
+	stat_len = spu->spu_tx_status_len();
+	if (stat_len) {
+		memset(rctx->msg_buf.tx_stat, 0, stat_len);
+		sg_set_buf(sg, rctx->msg_buf.tx_stat, stat_len);
+	}
+
+	return 0;
+}
+
+/**
+ * handle_ahash_req() - Process an asynchronous hash request from the crypto
+ * API.
+ * @rctx:  Crypto request context
+ *
+ * Builds a SPU request message embedded in a mailbox message and submits the
+ * mailbox message on a selected mailbox channel. The SPU request message is
+ * constructed as a scatterlist, including entries from the crypto API's
+ * src scatterlist to avoid copying the data to be hashed. This function is
+ * called either on the thread from the crypto API, or, in the case that the
+ * crypto API request is too large to fit in a single SPU request message,
+ * on the thread that invokes the receive callback with a response message.
+ * Because some operations require the response from one chunk before the next
+ * chunk can be submitted, we always wait for the response for the previous
+ * chunk before submitting the next chunk. Because requests are submitted in
+ * lock step like this, there is no need to synchronize access to request data
+ * structures.
+ *
+ * Return:
+ *   -EINPROGRESS: request has been submitted to SPU and response will be
+ *		   returned asynchronously
+ *   -EAGAIN:      non-final request included a small amount of data, which for
+ *		   efficiency we did not submit to the SPU, but instead stored
+ *		   to be submitted to the SPU with the next part of the request
+ *   other:        an error code
+ */
+static int handle_ahash_req(struct iproc_reqctx_s *rctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_async_request *areq = rctx->parent;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_ahash_tfm(ahash);
+	unsigned int blocksize = crypto_tfm_alg_blocksize(tfm);
+	struct iproc_ctx_s *ctx = rctx->ctx;
+
+	/* number of bytes still to be hashed in this req */
+	unsigned int nbytes_to_hash = 0;
+	int err = 0;
+	unsigned int chunksize = 0;	/* length of hash carry + new data */
+	/*
+	 * length of new data, not from hash carry, to be submitted in
+	 * this hw request
+	 */
+	unsigned int new_data_len;
+
+	unsigned int chunk_start = 0;
+	u32 db_size;	 /* Length of data field, incl gcm and hash padding */
+	int pad_len = 0; /* total pad len, including gcm, hash, stat padding */
+	u32 data_pad_len = 0;	/* length of GCM/CCM padding */
+	u32 stat_pad_len = 0;	/* length of padding to align STATUS word */
+	struct brcm_message *mssg;	/* mailbox message */
+	struct spu_request_opts req_opts;
+	struct spu_cipher_parms cipher_parms;
+	struct spu_hash_parms hash_parms;
+	struct spu_aead_parms aead_parms;
+	unsigned int local_nbuf;
+	u32 spu_hdr_len;
+	unsigned int digestsize;
+	u16 rem = 0;
+	int retry_cnt = 0;
+
+	/*
+	 * number of entries in src and dst sg. Always includes SPU msg header.
+	 * rx always includes a buffer to catch digest and STATUS.
+	 */
+	u8 rx_frag_num = 3;
+	u8 tx_frag_num = 1;
+
+	flow_log("total_todo %u, total_sent %u\n",
+		 rctx->total_todo, rctx->total_sent);
+
+	memset(&req_opts, 0, sizeof(req_opts));
+	memset(&cipher_parms, 0, sizeof(cipher_parms));
+	memset(&hash_parms, 0, sizeof(hash_parms));
+	memset(&aead_parms, 0, sizeof(aead_parms));
+
+	req_opts.bd_suppress = true;
+	hash_parms.alg = ctx->auth.alg;
+	hash_parms.mode = ctx->auth.mode;
+	hash_parms.type = HASH_TYPE_NONE;
+	hash_parms.key_buf = (u8 *)ctx->authkey;
+	hash_parms.key_len = ctx->authkeylen;
+
+	/*
+	 * For hash algorithms below assignment looks bit odd but
+	 * it's needed for AES-XCBC and AES-CMAC hash algorithms
+	 * to differentiate between 128, 192, 256 bit key values.
+	 * Based on the key values, hash algorithm is selected.
+	 * For example for 128 bit key, hash algorithm is AES-128.
+	 */
+	cipher_parms.type = ctx->cipher_type;
+
+	mssg = &rctx->mb_mssg;
+	chunk_start = rctx->src_sent;
+
+	/*
+	 * Compute the amount remaining to hash. This may include data
+	 * carried over from previous requests.
+	 */
+	nbytes_to_hash = rctx->total_todo - rctx->total_sent;
+	chunksize = nbytes_to_hash;
+	if ((ctx->max_payload != SPU_MAX_PAYLOAD_INF) &&
+	    (chunksize > ctx->max_payload))
+		chunksize = ctx->max_payload;
+
+	/*
+	 * If this is not a final request and the request data is not a multiple
+	 * of a full block, then simply park the extra data and prefix it to the
+	 * data for the next request.
+	 */
+	if (!rctx->is_final) {
+		u8 *dest = rctx->hash_carry + rctx->hash_carry_len;
+		u16 new_len;  /* len of data to add to hash carry */
+
+		rem = chunksize % blocksize;   /* remainder */
+		if (rem) {
+			/* chunksize not a multiple of blocksize */
+			chunksize -= rem;
+			if (chunksize == 0) {
+				/* Don't have a full block to submit to hw */
+				new_len = rem - rctx->hash_carry_len;
+				sg_copy_part_to_buf(req->src, dest, new_len,
+						    rctx->src_sent);
+				rctx->hash_carry_len = rem;
+				flow_log("Exiting with hash carry len: %u\n",
+					 rctx->hash_carry_len);
+				packet_dump("  buf: ",
+					    rctx->hash_carry,
+					    rctx->hash_carry_len);
+				return -EAGAIN;
+			}
+		}
+	}
+
+	/* if we have hash carry, then prefix it to the data in this request */
+	local_nbuf = rctx->hash_carry_len;
+	rctx->hash_carry_len = 0;
+	if (local_nbuf)
+		tx_frag_num++;
+	new_data_len = chunksize - local_nbuf;
+
+	/* Count number of sg entries to be used in this request */
+	rctx->src_nents = spu_sg_count(rctx->src_sg, rctx->src_skip,
+				       new_data_len);
+
+	/* AES hashing keeps key size in type field, so need to copy it here */
+	if (hash_parms.alg == HASH_ALG_AES)
+		hash_parms.type = cipher_parms.type;
+	else
+		hash_parms.type = spu->spu_hash_type(rctx->total_sent);
+
+	digestsize = spu->spu_digest_size(ctx->digestsize, ctx->auth.alg,
+					  hash_parms.type);
+	hash_parms.digestsize =	digestsize;
+
+	/* update the indexes */
+	rctx->total_sent += chunksize;
+	/* if you sent a prebuf then that wasn't from this req->src */
+	rctx->src_sent += new_data_len;
+
+	if ((rctx->total_sent == rctx->total_todo) && rctx->is_final)
+		hash_parms.pad_len = spu->spu_hash_pad_len(hash_parms.alg,
+							   hash_parms.mode,
+							   chunksize,
+							   blocksize);
+
+	/*
+	 * If a non-first chunk, then include the digest returned from the
+	 * previous chunk so that hw can add to it (except for AES types).
+	 */
+	if ((hash_parms.type == HASH_TYPE_UPDT) &&
+	    (hash_parms.alg != HASH_ALG_AES)) {
+		hash_parms.key_buf = rctx->incr_hash;
+		hash_parms.key_len = digestsize;
+	}
+
+	atomic64_add(chunksize, &iproc_priv.bytes_out);
+
+	flow_log("%s() final: %u nbuf: %u ",
+		 __func__, rctx->is_final, local_nbuf);
+
+	if (ctx->max_payload == SPU_MAX_PAYLOAD_INF)
+		flow_log("max_payload infinite\n");
+	else
+		flow_log("max_payload %u\n", ctx->max_payload);
+
+	flow_log("chunk_start: %u chunk_size: %u\n", chunk_start, chunksize);
+
+	/* Prepend SPU header with type 3 BCM header */
+	memcpy(rctx->msg_buf.bcm_spu_req_hdr, BCMHEADER, BCM_HDR_LEN);
+
+	hash_parms.prebuf_len = local_nbuf;
+	spu_hdr_len = spu->spu_create_request(rctx->msg_buf.bcm_spu_req_hdr +
+					      BCM_HDR_LEN,
+					      &req_opts, &cipher_parms,
+					      &hash_parms, &aead_parms,
+					      new_data_len);
+
+	if (spu_hdr_len == 0) {
+		pr_err("Failed to create SPU request header\n");
+		return -EFAULT;
+	}
+
+	/*
+	 * Determine total length of padding required. Put all padding in one
+	 * buffer.
+	 */
+	data_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, chunksize);
+	db_size = spu_real_db_size(0, 0, local_nbuf, new_data_len,
+				   0, 0, hash_parms.pad_len);
+	if (spu->spu_tx_status_len())
+		stat_pad_len = spu->spu_wordalign_padlen(db_size);
+	if (stat_pad_len)
+		rx_frag_num++;
+	pad_len = hash_parms.pad_len + data_pad_len + stat_pad_len;
+	if (pad_len) {
+		tx_frag_num++;
+		spu->spu_request_pad(rctx->msg_buf.spu_req_pad, data_pad_len,
+				     hash_parms.pad_len, ctx->auth.alg,
+				     ctx->auth.mode, rctx->total_sent,
+				     stat_pad_len);
+	}
+
+	spu->spu_dump_msg_hdr(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN,
+			      spu_hdr_len);
+	packet_dump("    prebuf: ", rctx->hash_carry, local_nbuf);
+	flow_log("Data:\n");
+	dump_sg(rctx->src_sg, rctx->src_skip, new_data_len);
+	packet_dump("   pad: ", rctx->msg_buf.spu_req_pad, pad_len);
+
+	/*
+	 * Build mailbox message containing SPU request msg and rx buffers
+	 * to catch response message
+	 */
+	memset(mssg, 0, sizeof(*mssg));
+	mssg->type = BRCM_MESSAGE_SPU;
+	mssg->ctx = rctx;	/* Will be returned in response */
+
+	/* Create rx scatterlist to catch result */
+	err = spu_ahash_rx_sg_create(mssg, rctx, rx_frag_num, digestsize,
+				     stat_pad_len);
+	if (err)
+		return err;
+
+	/* Create tx scatterlist containing SPU request message */
+	tx_frag_num += rctx->src_nents;
+	if (spu->spu_tx_status_len())
+		tx_frag_num++;
+	err = spu_ahash_tx_sg_create(mssg, rctx, tx_frag_num, spu_hdr_len,
+				     local_nbuf, new_data_len, pad_len);
+	if (err)
+		return err;
+
+	err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], mssg);
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) {
+		while ((err == -ENOBUFS) && (retry_cnt < SPU_MB_RETRY_MAX)) {
+			/*
+			 * Mailbox queue is full. Since MAY_SLEEP is set, assume
+			 * not in atomic context and we can wait and try again.
+			 */
+			retry_cnt++;
+			usleep_range(MBOX_SLEEP_MIN, MBOX_SLEEP_MAX);
+			err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx],
+						mssg);
+			atomic_inc(&iproc_priv.mb_no_spc);
+		}
+	}
+	if (err < 0) {
+		atomic_inc(&iproc_priv.mb_send_fail);
+		return err;
+	}
+	return -EINPROGRESS;
+}
+
+/**
+ * spu_hmac_outer_hash() - Request synchonous software compute of the outer hash
+ * for an HMAC request.
+ * @req:  The HMAC request from the crypto API
+ * @ctx:  The session context
+ *
+ * Return: 0 if synchronous hash operation successful
+ *         -EINVAL if the hash algo is unrecognized
+ *         any other value indicates an error
+ */
+static int spu_hmac_outer_hash(struct ahash_request *req,
+			       struct iproc_ctx_s *ctx)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	unsigned int blocksize =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
+	int rc;
+
+	switch (ctx->auth.alg) {
+	case HASH_ALG_MD5:
+		rc = do_shash("md5", req->result, ctx->opad, blocksize,
+			      req->result, ctx->digestsize, NULL, 0);
+		break;
+	case HASH_ALG_SHA1:
+		rc = do_shash("sha1", req->result, ctx->opad, blocksize,
+			      req->result, ctx->digestsize, NULL, 0);
+		break;
+	case HASH_ALG_SHA224:
+		rc = do_shash("sha224", req->result, ctx->opad, blocksize,
+			      req->result, ctx->digestsize, NULL, 0);
+		break;
+	case HASH_ALG_SHA256:
+		rc = do_shash("sha256", req->result, ctx->opad, blocksize,
+			      req->result, ctx->digestsize, NULL, 0);
+		break;
+	case HASH_ALG_SHA384:
+		rc = do_shash("sha384", req->result, ctx->opad, blocksize,
+			      req->result, ctx->digestsize, NULL, 0);
+		break;
+	case HASH_ALG_SHA512:
+		rc = do_shash("sha512", req->result, ctx->opad, blocksize,
+			      req->result, ctx->digestsize, NULL, 0);
+		break;
+	default:
+		pr_err("%s() Error : unknown hmac type\n", __func__);
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+/**
+ * ahash_req_done() - Process a hash result from the SPU hardware.
+ * @rctx: Crypto request context
+ *
+ * Return: 0 if successful
+ *         < 0 if an error
+ */
+static int ahash_req_done(struct iproc_reqctx_s *rctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_async_request *areq = rctx->parent;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	int err;
+
+	memcpy(req->result, rctx->msg_buf.digest, ctx->digestsize);
+
+	if (spu->spu_type == SPU_TYPE_SPUM) {
+		/* byte swap the output from the UPDT function to network byte
+		 * order
+		 */
+		if (ctx->auth.alg == HASH_ALG_MD5) {
+			__swab32s((u32 *)req->result);
+			__swab32s(((u32 *)req->result) + 1);
+			__swab32s(((u32 *)req->result) + 2);
+			__swab32s(((u32 *)req->result) + 3);
+			__swab32s(((u32 *)req->result) + 4);
+		}
+	}
+
+	flow_dump("  digest ", req->result, ctx->digestsize);
+
+	/* if this an HMAC then do the outer hash */
+	if (rctx->is_sw_hmac) {
+		err = spu_hmac_outer_hash(req, ctx);
+		if (err < 0)
+			return err;
+		flow_dump("  hmac: ", req->result, ctx->digestsize);
+	}
+
+	if (rctx->is_sw_hmac || ctx->auth.mode == HASH_MODE_HMAC) {
+		atomic_inc(&iproc_priv.op_counts[SPU_OP_HMAC]);
+		atomic_inc(&iproc_priv.hmac_cnt[ctx->auth.alg]);
+	} else {
+		atomic_inc(&iproc_priv.op_counts[SPU_OP_HASH]);
+		atomic_inc(&iproc_priv.hash_cnt[ctx->auth.alg]);
+	}
+
+	return 0;
+}
+
+/**
+ * handle_ahash_resp() - Process a SPU response message for a hash request.
+ * Checks if the entire crypto API request has been processed, and if so,
+ * invokes post processing on the result.
+ * @rctx: Crypto request context
+ */
+static void handle_ahash_resp(struct iproc_reqctx_s *rctx)
+{
+	struct iproc_ctx_s *ctx = rctx->ctx;
+#ifdef DEBUG
+	struct crypto_async_request *areq = rctx->parent;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	unsigned int blocksize =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
+#endif
+	/*
+	 * Save hash to use as input to next op if incremental. Might be copying
+	 * too much, but that's easier than figuring out actual digest size here
+	 */
+	memcpy(rctx->incr_hash, rctx->msg_buf.digest, MAX_DIGEST_SIZE);
+
+	flow_log("%s() blocksize:%u digestsize:%u\n",
+		 __func__, blocksize, ctx->digestsize);
+
+	atomic64_add(ctx->digestsize, &iproc_priv.bytes_in);
+
+	if (rctx->is_final && (rctx->total_sent == rctx->total_todo))
+		ahash_req_done(rctx);
+}
+
+/**
+ * spu_aead_rx_sg_create() - Build up the scatterlist of buffers used to receive
+ * a SPU response message for an AEAD request. Includes buffers to catch SPU
+ * message headers and the response data.
+ * @mssg:	mailbox message containing the receive sg
+ * @rctx:	crypto request context
+ * @rx_frag_num: number of scatterlist elements required to hold the
+ *		SPU response message
+ * @assoc_len:	Length of associated data included in the crypto request
+ * @ret_iv_len: Length of IV returned in response
+ * @resp_len:	Number of bytes of response data expected to be written to
+ *              dst buffer from crypto API
+ * @digestsize: Length of hash digest, in bytes
+ * @stat_pad_len: Number of bytes required to pad the STAT field to
+ *		a 4-byte boundary
+ *
+ * The scatterlist that gets allocated here is freed in spu_chunk_cleanup()
+ * when the request completes, whether the request is handled successfully or
+ * there is an error.
+ *
+ * Returns:
+ *   0 if successful
+ *   < 0 if an error
+ */
+static int spu_aead_rx_sg_create(struct brcm_message *mssg,
+				 struct aead_request *req,
+				 struct iproc_reqctx_s *rctx,
+				 u8 rx_frag_num,
+				 unsigned int assoc_len,
+				 u32 ret_iv_len, unsigned int resp_len,
+				 unsigned int digestsize, u32 stat_pad_len)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct scatterlist *sg;	/* used to build sgs in mbox message */
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	u32 datalen;		/* Number of bytes of response data expected */
+	u32 assoc_buf_len;
+	u8 data_padlen = 0;
+
+	if (ctx->is_rfc4543) {
+		/* RFC4543: only pad after data, not after AAD */
+		data_padlen = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
+							  assoc_len + resp_len);
+		assoc_buf_len = assoc_len;
+	} else {
+		data_padlen = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
+							  resp_len);
+		assoc_buf_len = spu->spu_assoc_resp_len(ctx->cipher.mode,
+						assoc_len, ret_iv_len,
+						rctx->is_encrypt);
+	}
+
+	if (ctx->cipher.mode == CIPHER_MODE_CCM)
+		/* ICV (after data) must be in the next 32-bit word for CCM */
+		data_padlen += spu->spu_wordalign_padlen(assoc_buf_len +
+							 resp_len +
+							 data_padlen);
+
+	if (data_padlen)
+		/* have to catch gcm pad in separate buffer */
+		rx_frag_num++;
+
+	mssg->spu.dst = kcalloc(rx_frag_num, sizeof(struct scatterlist),
+				rctx->gfp);
+	if (!mssg->spu.dst)
+		return -ENOMEM;
+
+	sg = mssg->spu.dst;
+	sg_init_table(sg, rx_frag_num);
+
+	/* Space for SPU message header */
+	sg_set_buf(sg++, rctx->msg_buf.spu_resp_hdr, ctx->spu_resp_hdr_len);
+
+	if (assoc_buf_len) {
+		/*
+		 * Don't write directly to req->dst, because SPU may pad the
+		 * assoc data in the response
+		 */
+		memset(rctx->msg_buf.a.resp_aad, 0, assoc_buf_len);
+		sg_set_buf(sg++, rctx->msg_buf.a.resp_aad, assoc_buf_len);
+	}
+
+	if (resp_len) {
+		/*
+		 * Copy in each dst sg entry from request, up to chunksize.
+		 * dst sg catches just the data. digest caught in separate buf.
+		 */
+		datalen = spu_msg_sg_add(&sg, &rctx->dst_sg, &rctx->dst_skip,
+					 rctx->dst_nents, resp_len);
+		if (datalen < (resp_len)) {
+			pr_err("%s(): failed to copy dst sg to mbox msg. expected len %u, datalen %u",
+			       __func__, resp_len, datalen);
+			return -EFAULT;
+		}
+	}
+
+	/* If GCM/CCM data is padded, catch padding in separate buffer */
+	if (data_padlen) {
+		memset(rctx->msg_buf.a.gcmpad, 0, data_padlen);
+		sg_set_buf(sg++, rctx->msg_buf.a.gcmpad, data_padlen);
+	}
+
+	/* Always catch ICV in separate buffer */
+	sg_set_buf(sg++, rctx->msg_buf.digest, digestsize);
+
+	flow_log("stat_pad_len %u\n", stat_pad_len);
+	if (stat_pad_len) {
+		memset(rctx->msg_buf.rx_stat_pad, 0, stat_pad_len);
+		sg_set_buf(sg++, rctx->msg_buf.rx_stat_pad, stat_pad_len);
+	}
+
+	memset(rctx->msg_buf.rx_stat, 0, SPU_RX_STATUS_LEN);
+	sg_set_buf(sg, rctx->msg_buf.rx_stat, spu->spu_rx_status_len());
+
+	return 0;
+}
+
+/**
+ * spu_aead_tx_sg_create() - Build up the scatterlist of buffers used to send a
+ * SPU request message for an AEAD request. Includes SPU message headers and the
+ * request data.
+ * @mssg:	mailbox message containing the transmit sg
+ * @rctx:	crypto request context
+ * @tx_frag_num: number of scatterlist elements required to construct the
+ *		SPU request message
+ * @spu_hdr_len: length of SPU message header in bytes
+ * @assoc:	crypto API associated data scatterlist
+ * @assoc_len:	length of associated data
+ * @assoc_nents: number of scatterlist entries containing assoc data
+ * @aead_iv_len: length of AEAD IV, if included
+ * @chunksize:	Number of bytes of request data
+ * @aad_pad_len: Number of bytes of padding at end of AAD. For GCM/CCM.
+ * @pad_len:	Number of pad bytes
+ * @incl_icv:	If true, write separate ICV buffer after data and
+ *              any padding
+ *
+ * The scatterlist that gets allocated here is freed in spu_chunk_cleanup()
+ * when the request completes, whether the request is handled successfully or
+ * there is an error.
+ *
+ * Return:
+ *   0 if successful
+ *   < 0 if an error
+ */
+static int spu_aead_tx_sg_create(struct brcm_message *mssg,
+				 struct iproc_reqctx_s *rctx,
+				 u8 tx_frag_num,
+				 u32 spu_hdr_len,
+				 struct scatterlist *assoc,
+				 unsigned int assoc_len,
+				 int assoc_nents,
+				 unsigned int aead_iv_len,
+				 unsigned int chunksize,
+				 u32 aad_pad_len, u32 pad_len, bool incl_icv)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct scatterlist *sg;	/* used to build sgs in mbox message */
+	struct scatterlist *assoc_sg = assoc;
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	u32 datalen;		/* Number of bytes of data to write */
+	u32 written;		/* Number of bytes of data written */
+	u32 assoc_offset = 0;
+	u32 stat_len;
+
+	mssg->spu.src = kcalloc(tx_frag_num, sizeof(struct scatterlist),
+				rctx->gfp);
+	if (!mssg->spu.src)
+		return -ENOMEM;
+
+	sg = mssg->spu.src;
+	sg_init_table(sg, tx_frag_num);
+
+	sg_set_buf(sg++, rctx->msg_buf.bcm_spu_req_hdr,
+		   BCM_HDR_LEN + spu_hdr_len);
+
+	if (assoc_len) {
+		/* Copy in each associated data sg entry from request */
+		written = spu_msg_sg_add(&sg, &assoc_sg, &assoc_offset,
+					 assoc_nents, assoc_len);
+		if (written < assoc_len) {
+			pr_err("%s(): failed to copy assoc sg to mbox msg",
+			       __func__);
+			return -EFAULT;
+		}
+	}
+
+	if (aead_iv_len)
+		sg_set_buf(sg++, rctx->msg_buf.iv_ctr, aead_iv_len);
+
+	if (aad_pad_len) {
+		memset(rctx->msg_buf.a.req_aad_pad, 0, aad_pad_len);
+		sg_set_buf(sg++, rctx->msg_buf.a.req_aad_pad, aad_pad_len);
+	}
+
+	datalen = chunksize;
+	if ((chunksize > ctx->digestsize) && incl_icv)
+		datalen -= ctx->digestsize;
+	if (datalen) {
+		/* For aead, a single msg should consume the entire src sg */
+		written = spu_msg_sg_add(&sg, &rctx->src_sg, &rctx->src_skip,
+					 rctx->src_nents, datalen);
+		if (written < datalen) {
+			pr_err("%s(): failed to copy src sg to mbox msg",
+			       __func__);
+			return -EFAULT;
+		}
+	}
+
+	if (pad_len) {
+		memset(rctx->msg_buf.spu_req_pad, 0, pad_len);
+		sg_set_buf(sg++, rctx->msg_buf.spu_req_pad, pad_len);
+	}
+
+	if (incl_icv)
+		sg_set_buf(sg++, rctx->msg_buf.digest, ctx->digestsize);
+
+	stat_len = spu->spu_tx_status_len();
+	if (stat_len) {
+		memset(rctx->msg_buf.tx_stat, 0, stat_len);
+		sg_set_buf(sg, rctx->msg_buf.tx_stat, stat_len);
+	}
+	return 0;
+}
+
+/**
+ * handle_aead_req() - Submit a SPU request message for the next chunk of the
+ * current AEAD request.
+ * @rctx:  Crypto request context
+ *
+ * Unlike other operation types, we assume the length of the request fits in
+ * a single SPU request message. aead_enqueue() makes sure this is true.
+ * Comments for other op types regarding threads applies here as well.
+ *
+ * Unlike incremental hash ops, where the spu returns the entire hash for
+ * truncated algs like sha-224, the SPU returns just the truncated hash in
+ * response to aead requests. So digestsize is always ctx->digestsize here.
+ *
+ * Return: -EINPROGRESS: crypto request has been accepted and result will be
+ *			 returned asynchronously
+ *         Any other value indicates an error
+ */
+static int handle_aead_req(struct iproc_reqctx_s *rctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_async_request *areq = rctx->parent;
+	struct aead_request *req = container_of(areq,
+						struct aead_request, base);
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	int err;
+	unsigned int chunksize;
+	unsigned int resp_len;
+	u32 spu_hdr_len;
+	u32 db_size;
+	u32 stat_pad_len;
+	u32 pad_len;
+	struct brcm_message *mssg;	/* mailbox message */
+	struct spu_request_opts req_opts;
+	struct spu_cipher_parms cipher_parms;
+	struct spu_hash_parms hash_parms;
+	struct spu_aead_parms aead_parms;
+	int assoc_nents = 0;
+	bool incl_icv = false;
+	unsigned int digestsize = ctx->digestsize;
+	int retry_cnt = 0;
+
+	/* number of entries in src and dst sg. Always includes SPU msg header.
+	 */
+	u8 rx_frag_num = 2;	/* and STATUS */
+	u8 tx_frag_num = 1;
+
+	/* doing the whole thing at once */
+	chunksize = rctx->total_todo;
+
+	flow_log("%s: chunksize %u\n", __func__, chunksize);
+
+	memset(&req_opts, 0, sizeof(req_opts));
+	memset(&hash_parms, 0, sizeof(hash_parms));
+	memset(&aead_parms, 0, sizeof(aead_parms));
+
+	req_opts.is_inbound = !(rctx->is_encrypt);
+	req_opts.auth_first = ctx->auth_first;
+	req_opts.is_aead = true;
+	req_opts.is_esp = ctx->is_esp;
+
+	cipher_parms.alg = ctx->cipher.alg;
+	cipher_parms.mode = ctx->cipher.mode;
+	cipher_parms.type = ctx->cipher_type;
+	cipher_parms.key_buf = ctx->enckey;
+	cipher_parms.key_len = ctx->enckeylen;
+	cipher_parms.iv_buf = rctx->msg_buf.iv_ctr;
+	cipher_parms.iv_len = rctx->iv_ctr_len;
+
+	hash_parms.alg = ctx->auth.alg;
+	hash_parms.mode = ctx->auth.mode;
+	hash_parms.type = HASH_TYPE_NONE;
+	hash_parms.key_buf = (u8 *)ctx->authkey;
+	hash_parms.key_len = ctx->authkeylen;
+	hash_parms.digestsize = digestsize;
+
+	if ((ctx->auth.alg == HASH_ALG_SHA224) &&
+	    (ctx->authkeylen < SHA224_DIGEST_SIZE))
+		hash_parms.key_len = SHA224_DIGEST_SIZE;
+
+	aead_parms.assoc_size = req->assoclen;
+	if (ctx->is_esp && !ctx->is_rfc4543) {
+		/*
+		 * 8-byte IV is included assoc data in request. SPU2
+		 * expects AAD to include just SPI and seqno. So
+		 * subtract off the IV len.
+		 */
+		aead_parms.assoc_size -= GCM_ESP_IV_SIZE;
+
+		if (rctx->is_encrypt) {
+			aead_parms.return_iv = true;
+			aead_parms.ret_iv_len = GCM_ESP_IV_SIZE;
+			aead_parms.ret_iv_off = GCM_ESP_SALT_SIZE;
+		}
+	} else {
+		aead_parms.ret_iv_len = 0;
+	}
+
+	/*
+	 * Count number of sg entries from the crypto API request that are to
+	 * be included in this mailbox message. For dst sg, don't count space
+	 * for digest. Digest gets caught in a separate buffer and copied back
+	 * to dst sg when processing response.
+	 */
+	rctx->src_nents = spu_sg_count(rctx->src_sg, rctx->src_skip, chunksize);
+	rctx->dst_nents = spu_sg_count(rctx->dst_sg, rctx->dst_skip, chunksize);
+	if (aead_parms.assoc_size)
+		assoc_nents = spu_sg_count(rctx->assoc, 0,
+					   aead_parms.assoc_size);
+
+	mssg = &rctx->mb_mssg;
+
+	rctx->total_sent = chunksize;
+	rctx->src_sent = chunksize;
+	if (spu->spu_assoc_resp_len(ctx->cipher.mode,
+				    aead_parms.assoc_size,
+				    aead_parms.ret_iv_len,
+				    rctx->is_encrypt))
+		rx_frag_num++;
+
+	aead_parms.iv_len = spu->spu_aead_ivlen(ctx->cipher.mode,
+						rctx->iv_ctr_len);
+
+	if (ctx->auth.alg == HASH_ALG_AES)
+		hash_parms.type = ctx->cipher_type;
+
+	/* General case AAD padding (CCM and RFC4543 special cases below) */
+	aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
+						 aead_parms.assoc_size);
+
+	/* General case data padding (CCM decrypt special case below) */
+	aead_parms.data_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
+							   chunksize);
+
+	if (ctx->cipher.mode == CIPHER_MODE_CCM) {
+		/*
+		 * for CCM, AAD len + 2 (rather than AAD len) needs to be
+		 * 128-bit aligned
+		 */
+		aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(
+					 ctx->cipher.mode,
+					 aead_parms.assoc_size + 2);
+
+		/*
+		 * And when decrypting CCM, need to pad without including
+		 * size of ICV which is tacked on to end of chunk
+		 */
+		if (!rctx->is_encrypt)
+			aead_parms.data_pad_len =
+				spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
+							chunksize - digestsize);
+
+		/* CCM also requires software to rewrite portions of IV: */
+		spu->spu_ccm_update_iv(digestsize, &cipher_parms, req->assoclen,
+				       chunksize, rctx->is_encrypt,
+				       ctx->is_esp);
+	}
+
+	if (ctx->is_rfc4543) {
+		/*
+		 * RFC4543: data is included in AAD, so don't pad after AAD
+		 * and pad data based on both AAD + data size
+		 */
+		aead_parms.aad_pad_len = 0;
+		if (!rctx->is_encrypt)
+			aead_parms.data_pad_len = spu->spu_gcm_ccm_pad_len(
+					ctx->cipher.mode,
+					aead_parms.assoc_size + chunksize -
+					digestsize);
+		else
+			aead_parms.data_pad_len = spu->spu_gcm_ccm_pad_len(
+					ctx->cipher.mode,
+					aead_parms.assoc_size + chunksize);
+
+		req_opts.is_rfc4543 = true;
+	}
+
+	if (spu_req_incl_icv(ctx->cipher.mode, rctx->is_encrypt)) {
+		incl_icv = true;
+		tx_frag_num++;
+		/* Copy ICV from end of src scatterlist to digest buf */
+		sg_copy_part_to_buf(req->src, rctx->msg_buf.digest, digestsize,
+				    req->assoclen + rctx->total_sent -
+				    digestsize);
+	}
+
+	atomic64_add(chunksize, &iproc_priv.bytes_out);
+
+	flow_log("%s()-sent chunksize:%u\n", __func__, chunksize);
+
+	/* Prepend SPU header with type 3 BCM header */
+	memcpy(rctx->msg_buf.bcm_spu_req_hdr, BCMHEADER, BCM_HDR_LEN);
+
+	spu_hdr_len = spu->spu_create_request(rctx->msg_buf.bcm_spu_req_hdr +
+					      BCM_HDR_LEN, &req_opts,
+					      &cipher_parms, &hash_parms,
+					      &aead_parms, chunksize);
+
+	/* Determine total length of padding. Put all padding in one buffer. */
+	db_size = spu_real_db_size(aead_parms.assoc_size, aead_parms.iv_len, 0,
+				   chunksize, aead_parms.aad_pad_len,
+				   aead_parms.data_pad_len, 0);
+
+	stat_pad_len = spu->spu_wordalign_padlen(db_size);
+
+	if (stat_pad_len)
+		rx_frag_num++;
+	pad_len = aead_parms.data_pad_len + stat_pad_len;
+	if (pad_len) {
+		tx_frag_num++;
+		spu->spu_request_pad(rctx->msg_buf.spu_req_pad,
+				     aead_parms.data_pad_len, 0,
+				     ctx->auth.alg, ctx->auth.mode,
+				     rctx->total_sent, stat_pad_len);
+	}
+
+	spu->spu_dump_msg_hdr(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN,
+			      spu_hdr_len);
+	dump_sg(rctx->assoc, 0, aead_parms.assoc_size);
+	packet_dump("    aead iv: ", rctx->msg_buf.iv_ctr, aead_parms.iv_len);
+	packet_log("BD:\n");
+	dump_sg(rctx->src_sg, rctx->src_skip, chunksize);
+	packet_dump("   pad: ", rctx->msg_buf.spu_req_pad, pad_len);
+
+	/*
+	 * Build mailbox message containing SPU request msg and rx buffers
+	 * to catch response message
+	 */
+	memset(mssg, 0, sizeof(*mssg));
+	mssg->type = BRCM_MESSAGE_SPU;
+	mssg->ctx = rctx;	/* Will be returned in response */
+
+	/* Create rx scatterlist to catch result */
+	rx_frag_num += rctx->dst_nents;
+	resp_len = chunksize;
+
+	/*
+	 * Always catch ICV in separate buffer. Have to for GCM/CCM because of
+	 * padding. Have to for SHA-224 and other truncated SHAs because SPU
+	 * sends entire digest back.
+	 */
+	rx_frag_num++;
+
+	if (((ctx->cipher.mode == CIPHER_MODE_GCM) ||
+	     (ctx->cipher.mode == CIPHER_MODE_CCM)) && !rctx->is_encrypt) {
+		/*
+		 * Input is ciphertxt plus ICV, but ICV not incl
+		 * in output.
+		 */
+		resp_len -= ctx->digestsize;
+		if (resp_len == 0)
+			/* no rx frags to catch output data */
+			rx_frag_num -= rctx->dst_nents;
+	}
+
+	err = spu_aead_rx_sg_create(mssg, req, rctx, rx_frag_num,
+				    aead_parms.assoc_size,
+				    aead_parms.ret_iv_len, resp_len, digestsize,
+				    stat_pad_len);
+	if (err)
+		return err;
+
+	/* Create tx scatterlist containing SPU request message */
+	tx_frag_num += rctx->src_nents;
+	tx_frag_num += assoc_nents;
+	if (aead_parms.aad_pad_len)
+		tx_frag_num++;
+	if (aead_parms.iv_len)
+		tx_frag_num++;
+	if (spu->spu_tx_status_len())
+		tx_frag_num++;
+	err = spu_aead_tx_sg_create(mssg, rctx, tx_frag_num, spu_hdr_len,
+				    rctx->assoc, aead_parms.assoc_size,
+				    assoc_nents, aead_parms.iv_len, chunksize,
+				    aead_parms.aad_pad_len, pad_len, incl_icv);
+	if (err)
+		return err;
+
+	err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], mssg);
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) {
+		while ((err == -ENOBUFS) && (retry_cnt < SPU_MB_RETRY_MAX)) {
+			/*
+			 * Mailbox queue is full. Since MAY_SLEEP is set, assume
+			 * not in atomic context and we can wait and try again.
+			 */
+			retry_cnt++;
+			usleep_range(MBOX_SLEEP_MIN, MBOX_SLEEP_MAX);
+			err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx],
+						mssg);
+			atomic_inc(&iproc_priv.mb_no_spc);
+		}
+	}
+	if (err < 0) {
+		atomic_inc(&iproc_priv.mb_send_fail);
+		return err;
+	}
+
+	return -EINPROGRESS;
+}
+
+/**
+ * handle_aead_resp() - Process a SPU response message for an AEAD request.
+ * @rctx:  Crypto request context
+ */
+static void handle_aead_resp(struct iproc_reqctx_s *rctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_async_request *areq = rctx->parent;
+	struct aead_request *req = container_of(areq,
+						struct aead_request, base);
+	struct iproc_ctx_s *ctx = rctx->ctx;
+	u32 payload_len;
+	unsigned int icv_offset;
+	u32 result_len;
+
+	/* See how much data was returned */
+	payload_len = spu->spu_payload_length(rctx->msg_buf.spu_resp_hdr);
+	flow_log("payload_len %u\n", payload_len);
+
+	/* only count payload */
+	atomic64_add(payload_len, &iproc_priv.bytes_in);
+
+	if (req->assoclen)
+		packet_dump("  assoc_data ", rctx->msg_buf.a.resp_aad,
+			    req->assoclen);
+
+	/*
+	 * Copy the ICV back to the destination
+	 * buffer. In decrypt case, SPU gives us back the digest, but crypto
+	 * API doesn't expect ICV in dst buffer.
+	 */
+	result_len = req->cryptlen;
+	if (rctx->is_encrypt) {
+		icv_offset = req->assoclen + rctx->total_sent;
+		packet_dump("  ICV: ", rctx->msg_buf.digest, ctx->digestsize);
+		flow_log("copying ICV to dst sg at offset %u\n", icv_offset);
+		sg_copy_part_from_buf(req->dst, rctx->msg_buf.digest,
+				      ctx->digestsize, icv_offset);
+		result_len += ctx->digestsize;
+	}
+
+	packet_log("response data:  ");
+	dump_sg(req->dst, req->assoclen, result_len);
+
+	atomic_inc(&iproc_priv.op_counts[SPU_OP_AEAD]);
+	if (ctx->cipher.alg == CIPHER_ALG_AES) {
+		if (ctx->cipher.mode == CIPHER_MODE_CCM)
+			atomic_inc(&iproc_priv.aead_cnt[AES_CCM]);
+		else if (ctx->cipher.mode == CIPHER_MODE_GCM)
+			atomic_inc(&iproc_priv.aead_cnt[AES_GCM]);
+		else
+			atomic_inc(&iproc_priv.aead_cnt[AUTHENC]);
+	} else {
+		atomic_inc(&iproc_priv.aead_cnt[AUTHENC]);
+	}
+}
+
+/**
+ * spu_chunk_cleanup() - Do cleanup after processing one chunk of a request
+ * @rctx:  request context
+ *
+ * Mailbox scatterlists are allocated for each chunk. So free them after
+ * processing each chunk.
+ */
+static void spu_chunk_cleanup(struct iproc_reqctx_s *rctx)
+{
+	/* mailbox message used to tx request */
+	struct brcm_message *mssg = &rctx->mb_mssg;
+
+	kfree(mssg->spu.src);
+	kfree(mssg->spu.dst);
+	memset(mssg, 0, sizeof(struct brcm_message));
+}
+
+/**
+ * finish_req() - Used to invoke the complete callback from the requester when
+ * a request has been handled asynchronously.
+ * @rctx:  Request context
+ * @err:   Indicates whether the request was successful or not
+ *
+ * Ensures that cleanup has been done for request
+ */
+static void finish_req(struct iproc_reqctx_s *rctx, int err)
+{
+	struct crypto_async_request *areq = rctx->parent;
+
+	flow_log("%s() err:%d\n\n", __func__, err);
+
+	/* No harm done if already called */
+	spu_chunk_cleanup(rctx);
+
+	if (areq)
+		areq->complete(areq, err);
+}
+
+/**
+ * spu_rx_callback() - Callback from mailbox framework with a SPU response.
+ * @cl:		mailbox client structure for SPU driver
+ * @msg:	mailbox message containing SPU response
+ */
+static void spu_rx_callback(struct mbox_client *cl, void *msg)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct brcm_message *mssg = msg;
+	struct iproc_reqctx_s *rctx;
+	struct iproc_ctx_s *ctx;
+	struct crypto_async_request *areq;
+	int err = 0;
+
+	rctx = mssg->ctx;
+	if (unlikely(!rctx)) {
+		/* This is fatal */
+		pr_err("%s(): no request context", __func__);
+		err = -EFAULT;
+		goto cb_finish;
+	}
+	areq = rctx->parent;
+	ctx = rctx->ctx;
+
+	/* process the SPU status */
+	err = spu->spu_status_process(rctx->msg_buf.rx_stat);
+	if (err != 0) {
+		if (err == SPU_INVALID_ICV)
+			atomic_inc(&iproc_priv.bad_icv);
+		err = -EBADMSG;
+		goto cb_finish;
+	}
+
+	/* Process the SPU response message */
+	switch (rctx->ctx->alg->type) {
+	case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		handle_ablkcipher_resp(rctx);
+		break;
+	case CRYPTO_ALG_TYPE_AHASH:
+		handle_ahash_resp(rctx);
+		break;
+	case CRYPTO_ALG_TYPE_AEAD:
+		handle_aead_resp(rctx);
+		break;
+	default:
+		err = -EINVAL;
+		goto cb_finish;
+	}
+
+	/*
+	 * If this response does not complete the request, then send the next
+	 * request chunk.
+	 */
+	if (rctx->total_sent < rctx->total_todo) {
+		/* Deallocate anything specific to previous chunk */
+		spu_chunk_cleanup(rctx);
+
+		switch (rctx->ctx->alg->type) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			err = handle_ablkcipher_req(rctx);
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			err = handle_ahash_req(rctx);
+			if (err == -EAGAIN)
+				/*
+				 * we saved data in hash carry, but tell crypto
+				 * API we successfully completed request.
+				 */
+				err = 0;
+			break;
+		case CRYPTO_ALG_TYPE_AEAD:
+			err = handle_aead_req(rctx);
+			break;
+		default:
+			err = -EINVAL;
+		}
+
+		if (err == -EINPROGRESS)
+			/* Successfully submitted request for next chunk */
+			return;
+	}
+
+cb_finish:
+	finish_req(rctx, err);
+}
+
+/* ==================== Kernel Cryptographic API ==================== */
+
+/**
+ * ablkcipher_enqueue() - Handle ablkcipher encrypt or decrypt request.
+ * @req:	Crypto API request
+ * @encrypt:	true if encrypting; false if decrypting
+ *
+ * Return: -EINPROGRESS if request accepted and result will be returned
+ *			asynchronously
+ *	   < 0 if an error
+ */
+static int ablkcipher_enqueue(struct ablkcipher_request *req, bool encrypt)
+{
+	struct iproc_reqctx_s *rctx = ablkcipher_request_ctx(req);
+	struct iproc_ctx_s *ctx =
+	    crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	int err;
+
+	flow_log("%s() enc:%u\n", __func__, encrypt);
+
+	rctx->gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	rctx->parent = &req->base;
+	rctx->is_encrypt = encrypt;
+	rctx->bd_suppress = false;
+	rctx->total_todo = req->nbytes;
+	rctx->src_sent = 0;
+	rctx->total_sent = 0;
+	rctx->total_received = 0;
+	rctx->ctx = ctx;
+
+	/* Initialize current position in src and dst scatterlists */
+	rctx->src_sg = req->src;
+	rctx->src_nents = 0;
+	rctx->src_skip = 0;
+	rctx->dst_sg = req->dst;
+	rctx->dst_nents = 0;
+	rctx->dst_skip = 0;
+
+	if (ctx->cipher.mode == CIPHER_MODE_CBC ||
+	    ctx->cipher.mode == CIPHER_MODE_CTR ||
+	    ctx->cipher.mode == CIPHER_MODE_OFB ||
+	    ctx->cipher.mode == CIPHER_MODE_XTS ||
+	    ctx->cipher.mode == CIPHER_MODE_GCM ||
+	    ctx->cipher.mode == CIPHER_MODE_CCM) {
+		rctx->iv_ctr_len =
+		    crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
+		memcpy(rctx->msg_buf.iv_ctr, req->info, rctx->iv_ctr_len);
+	} else {
+		rctx->iv_ctr_len = 0;
+	}
+
+	/* Choose a SPU to process this request */
+	rctx->chan_idx = select_channel();
+	err = handle_ablkcipher_req(rctx);
+	if (err != -EINPROGRESS)
+		/* synchronous result */
+		spu_chunk_cleanup(rctx);
+
+	return err;
+}
+
+static int des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		      unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	u32 tmp[DES_EXPKEY_WORDS];
+
+	if (keylen == DES_KEY_SIZE) {
+		if (des_ekey(tmp, key) == 0) {
+			if (crypto_ablkcipher_get_flags(cipher) &
+			    CRYPTO_TFM_REQ_WEAK_KEY) {
+				u32 flags = CRYPTO_TFM_RES_WEAK_KEY;
+
+				crypto_ablkcipher_set_flags(cipher, flags);
+				return -EINVAL;
+			}
+		}
+
+		ctx->cipher_type = CIPHER_TYPE_DES;
+	} else {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int threedes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+
+	if (keylen == (DES_KEY_SIZE * 3)) {
+		const u32 *K = (const u32 *)key;
+		u32 flags = CRYPTO_TFM_RES_BAD_KEY_SCHED;
+
+		if (!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+		    !((K[2] ^ K[4]) | (K[3] ^ K[5]))) {
+			crypto_ablkcipher_set_flags(cipher, flags);
+			return -EINVAL;
+		}
+
+		ctx->cipher_type = CIPHER_TYPE_3DES;
+	} else {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		      unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+
+	if (ctx->cipher.mode == CIPHER_MODE_XTS)
+		/* XTS includes two keys of equal length */
+		keylen = keylen / 2;
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		ctx->cipher_type = CIPHER_TYPE_AES128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->cipher_type = CIPHER_TYPE_AES192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->cipher_type = CIPHER_TYPE_AES256;
+		break;
+	default:
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	WARN_ON((ctx->max_payload != SPU_MAX_PAYLOAD_INF) &&
+		((ctx->max_payload % AES_BLOCK_SIZE) != 0));
+	return 0;
+}
+
+static int rc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		      unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	int i;
+
+	ctx->enckeylen = ARC4_MAX_KEY_SIZE + ARC4_STATE_SIZE;
+
+	ctx->enckey[0] = 0x00;	/* 0x00 */
+	ctx->enckey[1] = 0x00;	/* i    */
+	ctx->enckey[2] = 0x00;	/* 0x00 */
+	ctx->enckey[3] = 0x00;	/* j    */
+	for (i = 0; i < ARC4_MAX_KEY_SIZE; i++)
+		ctx->enckey[i + ARC4_STATE_SIZE] = key[i % keylen];
+
+	ctx->cipher_type = CIPHER_TYPE_INIT;
+
+	return 0;
+}
+
+static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			     unsigned int keylen)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	struct spu_cipher_parms cipher_parms;
+	u32 alloc_len = 0;
+	int err;
+
+	flow_log("ablkcipher_setkey() keylen: %d\n", keylen);
+	flow_dump("  key: ", key, keylen);
+
+	switch (ctx->cipher.alg) {
+	case CIPHER_ALG_DES:
+		err = des_setkey(cipher, key, keylen);
+		break;
+	case CIPHER_ALG_3DES:
+		err = threedes_setkey(cipher, key, keylen);
+		break;
+	case CIPHER_ALG_AES:
+		err = aes_setkey(cipher, key, keylen);
+		break;
+	case CIPHER_ALG_RC4:
+		err = rc4_setkey(cipher, key, keylen);
+		break;
+	default:
+		pr_err("%s() Error: unknown cipher alg\n", __func__);
+		err = -EINVAL;
+	}
+	if (err)
+		return err;
+
+	/* RC4 already populated ctx->enkey */
+	if (ctx->cipher.alg != CIPHER_ALG_RC4) {
+		memcpy(ctx->enckey, key, keylen);
+		ctx->enckeylen = keylen;
+	}
+	/* SPU needs XTS keys in the reverse order the crypto API presents */
+	if ((ctx->cipher.alg == CIPHER_ALG_AES) &&
+	    (ctx->cipher.mode == CIPHER_MODE_XTS)) {
+		unsigned int xts_keylen = keylen / 2;
+
+		memcpy(ctx->enckey, key + xts_keylen, xts_keylen);
+		memcpy(ctx->enckey + xts_keylen, key, xts_keylen);
+	}
+
+	if (spu->spu_type == SPU_TYPE_SPUM)
+		alloc_len = BCM_HDR_LEN + SPU_HEADER_ALLOC_LEN;
+	else if (spu->spu_type == SPU_TYPE_SPU2)
+		alloc_len = BCM_HDR_LEN + SPU2_HEADER_ALLOC_LEN;
+	memset(ctx->bcm_spu_req_hdr, 0, alloc_len);
+	cipher_parms.iv_buf = NULL;
+	cipher_parms.iv_len = crypto_ablkcipher_ivsize(cipher);
+	flow_log("%s: iv_len %u\n", __func__, cipher_parms.iv_len);
+
+	cipher_parms.alg = ctx->cipher.alg;
+	cipher_parms.mode = ctx->cipher.mode;
+	cipher_parms.type = ctx->cipher_type;
+	cipher_parms.key_buf = ctx->enckey;
+	cipher_parms.key_len = ctx->enckeylen;
+
+	/* Prepend SPU request message with BCM header */
+	memcpy(ctx->bcm_spu_req_hdr, BCMHEADER, BCM_HDR_LEN);
+	ctx->spu_req_hdr_len =
+	    spu->spu_cipher_req_init(ctx->bcm_spu_req_hdr + BCM_HDR_LEN,
+				     &cipher_parms);
+
+	ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen,
+							  ctx->enckeylen,
+							  false);
+
+	atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_CIPHER]);
+
+	return 0;
+}
+
+static int ablkcipher_encrypt(struct ablkcipher_request *req)
+{
+	flow_log("ablkcipher_encrypt() nbytes:%u\n", req->nbytes);
+
+	return ablkcipher_enqueue(req, true);
+}
+
+static int ablkcipher_decrypt(struct ablkcipher_request *req)
+{
+	flow_log("ablkcipher_decrypt() nbytes:%u\n", req->nbytes);
+	return ablkcipher_enqueue(req, false);
+}
+
+static int ahash_enqueue(struct ahash_request *req)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	int err = 0;
+	const char *alg_name;
+
+	flow_log("ahash_enqueue() nbytes:%u\n", req->nbytes);
+
+	rctx->gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	rctx->parent = &req->base;
+	rctx->ctx = ctx;
+	rctx->bd_suppress = true;
+	memset(&rctx->mb_mssg, 0, sizeof(struct brcm_message));
+
+	/* Initialize position in src scatterlist */
+	rctx->src_sg = req->src;
+	rctx->src_skip = 0;
+	rctx->src_nents = 0;
+	rctx->dst_sg = NULL;
+	rctx->dst_skip = 0;
+	rctx->dst_nents = 0;
+
+	/* SPU2 hardware does not compute hash of zero length data */
+	if ((rctx->is_final == 1) && (rctx->total_todo == 0) &&
+	    (iproc_priv.spu.spu_type == SPU_TYPE_SPU2)) {
+		alg_name = crypto_tfm_alg_name(crypto_ahash_tfm(tfm));
+		flow_log("Doing %sfinal %s zero-len hash request in software\n",
+			 rctx->is_final ? "" : "non-", alg_name);
+		err = do_shash((unsigned char *)alg_name, req->result,
+			       NULL, 0, NULL, 0, ctx->authkey,
+			       ctx->authkeylen);
+		if (err < 0)
+			flow_log("Hash request failed with error %d\n", err);
+		return err;
+	}
+	/* Choose a SPU to process this request */
+	rctx->chan_idx = select_channel();
+
+	err = handle_ahash_req(rctx);
+	if (err != -EINPROGRESS)
+		/* synchronous result */
+		spu_chunk_cleanup(rctx);
+
+	if (err == -EAGAIN)
+		/*
+		 * we saved data in hash carry, but tell crypto API
+		 * we successfully completed request.
+		 */
+		err = 0;
+
+	return err;
+}
+
+static int __ahash_init(struct ahash_request *req)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+
+	flow_log("%s()\n", __func__);
+
+	/* Initialize the context */
+	rctx->hash_carry_len = 0;
+	rctx->is_final = 0;
+
+	rctx->total_todo = 0;
+	rctx->src_sent = 0;
+	rctx->total_sent = 0;
+	rctx->total_received = 0;
+
+	ctx->digestsize = crypto_ahash_digestsize(tfm);
+	/* If we add a hash whose digest is larger, catch it here. */
+	WARN_ON(ctx->digestsize > MAX_DIGEST_SIZE);
+
+	rctx->is_sw_hmac = false;
+
+	ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen, 0,
+							  true);
+
+	return 0;
+}
+
+/**
+ * spu_no_incr_hash() - Determine whether incremental hashing is supported.
+ * @ctx:  Crypto session context
+ *
+ * SPU-2 does not support incremental hashing (we'll have to revisit and
+ * condition based on chip revision or device tree entry if future versions do
+ * support incremental hash)
+ *
+ * SPU-M also doesn't support incremental hashing of AES-XCBC
+ *
+ * Return: true if incremental hashing is not supported
+ *         false otherwise
+ */
+bool spu_no_incr_hash(struct iproc_ctx_s *ctx)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+
+	if (spu->spu_type == SPU_TYPE_SPU2)
+		return true;
+
+	if ((ctx->auth.alg == HASH_ALG_AES) &&
+	    (ctx->auth.mode == HASH_MODE_XCBC))
+		return true;
+
+	/* Otherwise, incremental hashing is supported */
+	return false;
+}
+
+static int ahash_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	const char *alg_name;
+	struct crypto_shash *hash;
+	int ret;
+	gfp_t gfp;
+
+	if (spu_no_incr_hash(ctx)) {
+		/*
+		 * If we get an incremental hashing request and it's not
+		 * supported by the hardware, we need to handle it in software
+		 * by calling synchronous hash functions.
+		 */
+		alg_name = crypto_tfm_alg_name(crypto_ahash_tfm(tfm));
+		hash = crypto_alloc_shash(alg_name, 0, 0);
+		if (IS_ERR(hash)) {
+			ret = PTR_ERR(hash);
+			goto err;
+		}
+
+		gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+		ctx->shash = kmalloc(sizeof(*ctx->shash) +
+				     crypto_shash_descsize(hash), gfp);
+		if (!ctx->shash) {
+			ret = -ENOMEM;
+			goto err_hash;
+		}
+		ctx->shash->tfm = hash;
+		ctx->shash->flags = 0;
+
+		/* Set the key using data we already have from setkey */
+		if (ctx->authkeylen > 0) {
+			ret = crypto_shash_setkey(hash, ctx->authkey,
+						  ctx->authkeylen);
+			if (ret)
+				goto err_shash;
+		}
+
+		/* Initialize hash w/ this key and other params */
+		ret = crypto_shash_init(ctx->shash);
+		if (ret)
+			goto err_shash;
+	} else {
+		/* Otherwise call the internal function which uses SPU hw */
+		ret = __ahash_init(req);
+	}
+
+	return ret;
+
+err_shash:
+	kfree(ctx->shash);
+err_hash:
+	crypto_free_shash(hash);
+err:
+	return ret;
+}
+
+static int __ahash_update(struct ahash_request *req)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+
+	flow_log("ahash_update() nbytes:%u\n", req->nbytes);
+
+	if (!req->nbytes)
+		return 0;
+	rctx->total_todo += req->nbytes;
+	rctx->src_sent = 0;
+
+	return ahash_enqueue(req);
+}
+
+static int ahash_update(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	u8 *tmpbuf;
+	int ret;
+	int nents;
+	gfp_t gfp;
+
+	if (spu_no_incr_hash(ctx)) {
+		/*
+		 * If we get an incremental hashing request and it's not
+		 * supported by the hardware, we need to handle it in software
+		 * by calling synchronous hash functions.
+		 */
+		if (req->src)
+			nents = sg_nents(req->src);
+		else
+			return -EINVAL;
+
+		/* Copy data from req scatterlist to tmp buffer */
+		gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+		tmpbuf = kmalloc(req->nbytes, gfp);
+		if (!tmpbuf)
+			return -ENOMEM;
+
+		if (sg_copy_to_buffer(req->src, nents, tmpbuf, req->nbytes) !=
+				req->nbytes) {
+			kfree(tmpbuf);
+			return -EINVAL;
+		}
+
+		/* Call synchronous update */
+		ret = crypto_shash_update(ctx->shash, tmpbuf, req->nbytes);
+		kfree(tmpbuf);
+	} else {
+		/* Otherwise call the internal function which uses SPU hw */
+		ret = __ahash_update(req);
+	}
+
+	return ret;
+}
+
+static int __ahash_final(struct ahash_request *req)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+
+	flow_log("ahash_final() nbytes:%u\n", req->nbytes);
+
+	rctx->is_final = 1;
+
+	return ahash_enqueue(req);
+}
+
+static int ahash_final(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	int ret;
+
+	if (spu_no_incr_hash(ctx)) {
+		/*
+		 * If we get an incremental hashing request and it's not
+		 * supported by the hardware, we need to handle it in software
+		 * by calling synchronous hash functions.
+		 */
+		ret = crypto_shash_final(ctx->shash, req->result);
+
+		/* Done with hash, can deallocate it now */
+		crypto_free_shash(ctx->shash->tfm);
+		kfree(ctx->shash);
+
+	} else {
+		/* Otherwise call the internal function which uses SPU hw */
+		ret = __ahash_final(req);
+	}
+
+	return ret;
+}
+
+static int __ahash_finup(struct ahash_request *req)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+
+	flow_log("ahash_finup() nbytes:%u\n", req->nbytes);
+
+	rctx->total_todo += req->nbytes;
+	rctx->src_sent = 0;
+	rctx->is_final = 1;
+
+	return ahash_enqueue(req);
+}
+
+static int ahash_finup(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	u8 *tmpbuf;
+	int ret;
+	int nents;
+	gfp_t gfp;
+
+	if (spu_no_incr_hash(ctx)) {
+		/*
+		 * If we get an incremental hashing request and it's not
+		 * supported by the hardware, we need to handle it in software
+		 * by calling synchronous hash functions.
+		 */
+		if (req->src) {
+			nents = sg_nents(req->src);
+		} else {
+			ret = -EINVAL;
+			goto ahash_finup_exit;
+		}
+
+		/* Copy data from req scatterlist to tmp buffer */
+		gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+		tmpbuf = kmalloc(req->nbytes, gfp);
+		if (!tmpbuf) {
+			ret = -ENOMEM;
+			goto ahash_finup_exit;
+		}
+
+		if (sg_copy_to_buffer(req->src, nents, tmpbuf, req->nbytes) !=
+				req->nbytes) {
+			ret = -EINVAL;
+			goto ahash_finup_free;
+		}
+
+		/* Call synchronous update */
+		ret = crypto_shash_finup(ctx->shash, tmpbuf, req->nbytes,
+					 req->result);
+	} else {
+		/* Otherwise call the internal function which uses SPU hw */
+		return __ahash_finup(req);
+	}
+ahash_finup_free:
+	kfree(tmpbuf);
+
+ahash_finup_exit:
+	/* Done with hash, can deallocate it now */
+	crypto_free_shash(ctx->shash->tfm);
+	kfree(ctx->shash);
+	return ret;
+}
+
+static int ahash_digest(struct ahash_request *req)
+{
+	int err = 0;
+
+	flow_log("ahash_digest() nbytes:%u\n", req->nbytes);
+
+	/* whole thing at once */
+	err = __ahash_init(req);
+	if (!err)
+		err = __ahash_finup(req);
+
+	return err;
+}
+
+static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
+			unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(ahash);
+
+	flow_log("%s() ahash:%p key:%p keylen:%u\n",
+		 __func__, ahash, key, keylen);
+	flow_dump("  key: ", key, keylen);
+
+	if (ctx->auth.alg == HASH_ALG_AES) {
+		switch (keylen) {
+		case AES_KEYSIZE_128:
+			ctx->cipher_type = CIPHER_TYPE_AES128;
+			break;
+		case AES_KEYSIZE_192:
+			ctx->cipher_type = CIPHER_TYPE_AES192;
+			break;
+		case AES_KEYSIZE_256:
+			ctx->cipher_type = CIPHER_TYPE_AES256;
+			break;
+		default:
+			pr_err("%s() Error: Invalid key length\n", __func__);
+			return -EINVAL;
+		}
+	} else {
+		pr_err("%s() Error: unknown hash alg\n", __func__);
+		return -EINVAL;
+	}
+	memcpy(ctx->authkey, key, keylen);
+	ctx->authkeylen = keylen;
+
+	return 0;
+}
+
+static int ahash_export(struct ahash_request *req, void *out)
+{
+	const struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+	struct spu_hash_export_s *spu_exp = (struct spu_hash_export_s *)out;
+
+	spu_exp->total_todo = rctx->total_todo;
+	spu_exp->total_sent = rctx->total_sent;
+	spu_exp->is_sw_hmac = rctx->is_sw_hmac;
+	memcpy(spu_exp->hash_carry, rctx->hash_carry, sizeof(rctx->hash_carry));
+	spu_exp->hash_carry_len = rctx->hash_carry_len;
+	memcpy(spu_exp->incr_hash, rctx->incr_hash, sizeof(rctx->incr_hash));
+
+	return 0;
+}
+
+static int ahash_import(struct ahash_request *req, const void *in)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+	struct spu_hash_export_s *spu_exp = (struct spu_hash_export_s *)in;
+
+	rctx->total_todo = spu_exp->total_todo;
+	rctx->total_sent = spu_exp->total_sent;
+	rctx->is_sw_hmac = spu_exp->is_sw_hmac;
+	memcpy(rctx->hash_carry, spu_exp->hash_carry, sizeof(rctx->hash_carry));
+	rctx->hash_carry_len = spu_exp->hash_carry_len;
+	memcpy(rctx->incr_hash, spu_exp->incr_hash, sizeof(rctx->incr_hash));
+
+	return 0;
+}
+
+static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
+			     unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(ahash);
+	unsigned int blocksize =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
+	unsigned int digestsize = crypto_ahash_digestsize(ahash);
+	unsigned int index;
+	int rc;
+
+	flow_log("%s() ahash:%p key:%p keylen:%u blksz:%u digestsz:%u\n",
+		 __func__, ahash, key, keylen, blocksize, digestsize);
+	flow_dump("  key: ", key, keylen);
+
+	if (keylen > blocksize) {
+		switch (ctx->auth.alg) {
+		case HASH_ALG_MD5:
+			rc = do_shash("md5", ctx->authkey, key, keylen, NULL,
+				      0, NULL, 0);
+			break;
+		case HASH_ALG_SHA1:
+			rc = do_shash("sha1", ctx->authkey, key, keylen, NULL,
+				      0, NULL, 0);
+			break;
+		case HASH_ALG_SHA224:
+			rc = do_shash("sha224", ctx->authkey, key, keylen, NULL,
+				      0, NULL, 0);
+			break;
+		case HASH_ALG_SHA256:
+			rc = do_shash("sha256", ctx->authkey, key, keylen, NULL,
+				      0, NULL, 0);
+			break;
+		case HASH_ALG_SHA384:
+			rc = do_shash("sha384", ctx->authkey, key, keylen, NULL,
+				      0, NULL, 0);
+			break;
+		case HASH_ALG_SHA512:
+			rc = do_shash("sha512", ctx->authkey, key, keylen, NULL,
+				      0, NULL, 0);
+			break;
+		case HASH_ALG_SHA3_224:
+			rc = do_shash("sha3-224", ctx->authkey, key, keylen,
+				      NULL, 0, NULL, 0);
+			break;
+		case HASH_ALG_SHA3_256:
+			rc = do_shash("sha3-256", ctx->authkey, key, keylen,
+				      NULL, 0, NULL, 0);
+			break;
+		case HASH_ALG_SHA3_384:
+			rc = do_shash("sha3-384", ctx->authkey, key, keylen,
+				      NULL, 0, NULL, 0);
+			break;
+		case HASH_ALG_SHA3_512:
+			rc = do_shash("sha3-512", ctx->authkey, key, keylen,
+				      NULL, 0, NULL, 0);
+			break;
+		default:
+			pr_err("%s() Error: unknown hash alg\n", __func__);
+			return -EINVAL;
+		}
+		if (rc < 0) {
+			pr_err("%s() Error %d computing shash for %s\n",
+			       __func__, rc, hash_alg_name[ctx->auth.alg]);
+			return rc;
+		}
+		ctx->authkeylen = digestsize;
+
+		flow_log("  keylen > digestsize... hashed\n");
+		flow_dump("  newkey: ", ctx->authkey, ctx->authkeylen);
+	} else {
+		memcpy(ctx->authkey, key, keylen);
+		ctx->authkeylen = keylen;
+	}
+
+	/*
+	 * Full HMAC operation in SPUM is not verified,
+	 * So keeping the generation of IPAD, OPAD and
+	 * outer hashing in software.
+	 */
+	if (iproc_priv.spu.spu_type == SPU_TYPE_SPUM) {
+		memcpy(ctx->ipad, ctx->authkey, ctx->authkeylen);
+		memset(ctx->ipad + ctx->authkeylen, 0,
+		       blocksize - ctx->authkeylen);
+		ctx->authkeylen = 0;
+		memcpy(ctx->opad, ctx->ipad, blocksize);
+
+		for (index = 0; index < blocksize; index++) {
+			ctx->ipad[index] ^= 0x36;
+			ctx->opad[index] ^= 0x5c;
+		}
+
+		flow_dump("  ipad: ", ctx->ipad, blocksize);
+		flow_dump("  opad: ", ctx->opad, blocksize);
+	}
+	ctx->digestsize = digestsize;
+	atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_HMAC]);
+
+	return 0;
+}
+
+static int ahash_hmac_init(struct ahash_request *req)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	unsigned int blocksize =
+			crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	flow_log("ahash_hmac_init()\n");
+
+	/* init the context as a hash */
+	ahash_init(req);
+
+	if (!spu_no_incr_hash(ctx)) {
+		/* SPU-M can do incr hashing but needs sw for outer HMAC */
+		rctx->is_sw_hmac = true;
+		ctx->auth.mode = HASH_MODE_HASH;
+		/* start with a prepended ipad */
+		memcpy(rctx->hash_carry, ctx->ipad, blocksize);
+		rctx->hash_carry_len = blocksize;
+		rctx->total_todo += blocksize;
+	}
+
+	return 0;
+}
+
+static int ahash_hmac_update(struct ahash_request *req)
+{
+	flow_log("ahash_hmac_update() nbytes:%u\n", req->nbytes);
+
+	if (!req->nbytes)
+		return 0;
+
+	return ahash_update(req);
+}
+
+static int ahash_hmac_final(struct ahash_request *req)
+{
+	flow_log("ahash_hmac_final() nbytes:%u\n", req->nbytes);
+
+	return ahash_final(req);
+}
+
+static int ahash_hmac_finup(struct ahash_request *req)
+{
+	flow_log("ahash_hmac_finupl() nbytes:%u\n", req->nbytes);
+
+	return ahash_finup(req);
+}
+
+static int ahash_hmac_digest(struct ahash_request *req)
+{
+	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
+	unsigned int blocksize =
+			crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	flow_log("ahash_hmac_digest() nbytes:%u\n", req->nbytes);
+
+	/* Perform initialization and then call finup */
+	__ahash_init(req);
+
+	if (iproc_priv.spu.spu_type == SPU_TYPE_SPU2) {
+		/*
+		 * SPU2 supports full HMAC implementation in the
+		 * hardware, need not to generate IPAD, OPAD and
+		 * outer hash in software.
+		 * Only for hash key len > hash block size, SPU2
+		 * expects to perform hashing on the key, shorten
+		 * it to digest size and feed it as hash key.
+		 */
+		rctx->is_sw_hmac = false;
+		ctx->auth.mode = HASH_MODE_HMAC;
+	} else {
+		rctx->is_sw_hmac = true;
+		ctx->auth.mode = HASH_MODE_HASH;
+		/* start with a prepended ipad */
+		memcpy(rctx->hash_carry, ctx->ipad, blocksize);
+		rctx->hash_carry_len = blocksize;
+		rctx->total_todo += blocksize;
+	}
+
+	return __ahash_finup(req);
+}
+
+/* aead helpers */
+
+static int aead_need_fallback(struct aead_request *req)
+{
+	struct iproc_reqctx_s *rctx = aead_request_ctx(req);
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(aead);
+	u32 payload_len;
+
+	/*
+	 * SPU hardware cannot handle the AES-GCM/CCM case where plaintext
+	 * and AAD are both 0 bytes long. So use fallback in this case.
+	 */
+	if (((ctx->cipher.mode == CIPHER_MODE_GCM) ||
+	     (ctx->cipher.mode == CIPHER_MODE_CCM)) &&
+	    (req->assoclen == 0)) {
+		if ((rctx->is_encrypt && (req->cryptlen == 0)) ||
+		    (!rctx->is_encrypt && (req->cryptlen == ctx->digestsize))) {
+			flow_log("AES GCM/CCM needs fallback for 0 len req\n");
+			return 1;
+		}
+	}
+
+	/* SPU-M hardware only supports CCM digest size of 8, 12, or 16 bytes */
+	if ((ctx->cipher.mode == CIPHER_MODE_CCM) &&
+	    (spu->spu_type == SPU_TYPE_SPUM) &&
+	    (ctx->digestsize != 8) && (ctx->digestsize != 12) &&
+	    (ctx->digestsize != 16)) {
+		flow_log("%s() AES CCM needs fallbck for digest size %d\n",
+			 __func__, ctx->digestsize);
+		return 1;
+	}
+
+	/*
+	 * SPU-M on NSP has an issue where AES-CCM hash is not correct
+	 * when AAD size is 0
+	 */
+	if ((ctx->cipher.mode == CIPHER_MODE_CCM) &&
+	    (spu->spu_subtype == SPU_SUBTYPE_SPUM_NSP) &&
+	    (req->assoclen == 0)) {
+		flow_log("%s() AES_CCM needs fallback for 0 len AAD on NSP\n",
+			 __func__);
+		return 1;
+	}
+
+	payload_len = req->cryptlen;
+	if (spu->spu_type == SPU_TYPE_SPUM)
+		payload_len += req->assoclen;
+
+	flow_log("%s() payload len: %u\n", __func__, payload_len);
+
+	if (ctx->max_payload == SPU_MAX_PAYLOAD_INF)
+		return 0;
+	else
+		return payload_len > ctx->max_payload;
+}
+
+static void aead_complete(struct crypto_async_request *areq, int err)
+{
+	struct aead_request *req =
+	    container_of(areq, struct aead_request, base);
+	struct iproc_reqctx_s *rctx = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+
+	flow_log("%s() err:%d\n", __func__, err);
+
+	areq->tfm = crypto_aead_tfm(aead);
+
+	areq->complete = rctx->old_complete;
+	areq->data = rctx->old_data;
+
+	areq->complete(areq, err);
+}
+
+static int aead_do_fallback(struct aead_request *req, bool is_encrypt)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct iproc_reqctx_s *rctx = aead_request_ctx(req);
+	struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm);
+	int err;
+	u32 req_flags;
+
+	flow_log("%s() enc:%u\n", __func__, is_encrypt);
+
+	if (ctx->fallback_cipher) {
+		/* Store the cipher tfm and then use the fallback tfm */
+		rctx->old_tfm = tfm;
+		aead_request_set_tfm(req, ctx->fallback_cipher);
+		/*
+		 * Save the callback and chain ourselves in, so we can restore
+		 * the tfm
+		 */
+		rctx->old_complete = req->base.complete;
+		rctx->old_data = req->base.data;
+		req_flags = aead_request_flags(req);
+		aead_request_set_callback(req, req_flags, aead_complete, req);
+		err = is_encrypt ? crypto_aead_encrypt(req) :
+		    crypto_aead_decrypt(req);
+
+		if (err == 0) {
+			/*
+			 * fallback was synchronous (did not return
+			 * -EINPROGRESS). So restore request state here.
+			 */
+			aead_request_set_callback(req, req_flags,
+						  rctx->old_complete, req);
+			req->base.data = rctx->old_data;
+			aead_request_set_tfm(req, aead);
+			flow_log("%s() fallback completed successfully\n\n",
+				 __func__);
+		}
+	} else {
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int aead_enqueue(struct aead_request *req, bool is_encrypt)
+{
+	struct iproc_reqctx_s *rctx = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(aead);
+	int err;
+
+	flow_log("%s() enc:%u\n", __func__, is_encrypt);
+
+	if (req->assoclen > MAX_ASSOC_SIZE) {
+		pr_err
+		    ("%s() Error: associated data too long. (%u > %u bytes)\n",
+		     __func__, req->assoclen, MAX_ASSOC_SIZE);
+		return -EINVAL;
+	}
+
+	rctx->gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	rctx->parent = &req->base;
+	rctx->is_encrypt = is_encrypt;
+	rctx->bd_suppress = false;
+	rctx->total_todo = req->cryptlen;
+	rctx->src_sent = 0;
+	rctx->total_sent = 0;
+	rctx->total_received = 0;
+	rctx->is_sw_hmac = false;
+	rctx->ctx = ctx;
+	memset(&rctx->mb_mssg, 0, sizeof(struct brcm_message));
+
+	/* assoc data is at start of src sg */
+	rctx->assoc = req->src;
+
+	/*
+	 * Init current position in src scatterlist to be after assoc data.
+	 * src_skip set to buffer offset where data begins. (Assoc data could
+	 * end in the middle of a buffer.)
+	 */
+	if (spu_sg_at_offset(req->src, req->assoclen, &rctx->src_sg,
+			     &rctx->src_skip) < 0) {
+		pr_err("%s() Error: Unable to find start of src data\n",
+		       __func__);
+		return -EINVAL;
+	}
+
+	rctx->src_nents = 0;
+	rctx->dst_nents = 0;
+	if (req->dst == req->src) {
+		rctx->dst_sg = rctx->src_sg;
+		rctx->dst_skip = rctx->src_skip;
+	} else {
+		/*
+		 * Expect req->dst to have room for assoc data followed by
+		 * output data and ICV, if encrypt. So initialize dst_sg
+		 * to point beyond assoc len offset.
+		 */
+		if (spu_sg_at_offset(req->dst, req->assoclen, &rctx->dst_sg,
+				     &rctx->dst_skip) < 0) {
+			pr_err("%s() Error: Unable to find start of dst data\n",
+			       __func__);
+			return -EINVAL;
+		}
+	}
+
+	if (ctx->cipher.mode == CIPHER_MODE_CBC ||
+	    ctx->cipher.mode == CIPHER_MODE_CTR ||
+	    ctx->cipher.mode == CIPHER_MODE_OFB ||
+	    ctx->cipher.mode == CIPHER_MODE_XTS ||
+	    ctx->cipher.mode == CIPHER_MODE_GCM) {
+		rctx->iv_ctr_len =
+			ctx->salt_len +
+			crypto_aead_ivsize(crypto_aead_reqtfm(req));
+	} else if (ctx->cipher.mode == CIPHER_MODE_CCM) {
+		rctx->iv_ctr_len = CCM_AES_IV_SIZE;
+	} else {
+		rctx->iv_ctr_len = 0;
+	}
+
+	rctx->hash_carry_len = 0;
+
+	flow_log("  src sg: %p\n", req->src);
+	flow_log("  rctx->src_sg: %p, src_skip %u\n",
+		 rctx->src_sg, rctx->src_skip);
+	flow_log("  assoc:  %p, assoclen %u\n", rctx->assoc, req->assoclen);
+	flow_log("  dst sg: %p\n", req->dst);
+	flow_log("  rctx->dst_sg: %p, dst_skip %u\n",
+		 rctx->dst_sg, rctx->dst_skip);
+	flow_log("  iv_ctr_len:%u\n", rctx->iv_ctr_len);
+	flow_dump("  iv: ", req->iv, rctx->iv_ctr_len);
+	flow_log("  authkeylen:%u\n", ctx->authkeylen);
+	flow_log("  is_esp: %s\n", ctx->is_esp ? "yes" : "no");
+
+	if (ctx->max_payload == SPU_MAX_PAYLOAD_INF)
+		flow_log("  max_payload infinite");
+	else
+		flow_log("  max_payload: %u\n", ctx->max_payload);
+
+	if (unlikely(aead_need_fallback(req)))
+		return aead_do_fallback(req, is_encrypt);
+
+	/*
+	 * Do memory allocations for request after fallback check, because if we
+	 * do fallback, we won't call finish_req() to dealloc.
+	 */
+	if (rctx->iv_ctr_len) {
+		if (ctx->salt_len)
+			memcpy(rctx->msg_buf.iv_ctr + ctx->salt_offset,
+			       ctx->salt, ctx->salt_len);
+		memcpy(rctx->msg_buf.iv_ctr + ctx->salt_offset + ctx->salt_len,
+		       req->iv,
+		       rctx->iv_ctr_len - ctx->salt_len - ctx->salt_offset);
+	}
+
+	rctx->chan_idx = select_channel();
+	err = handle_aead_req(rctx);
+	if (err != -EINPROGRESS)
+		/* synchronous result */
+		spu_chunk_cleanup(rctx);
+
+	return err;
+}
+
+static int aead_authenc_setkey(struct crypto_aead *cipher,
+			       const u8 *key, unsigned int keylen)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher);
+	struct crypto_tfm *tfm = crypto_aead_tfm(cipher);
+	struct rtattr *rta = (void *)key;
+	struct crypto_authenc_key_param *param;
+	const u8 *origkey = key;
+	const unsigned int origkeylen = keylen;
+
+	int ret = 0;
+
+	flow_log("%s() aead:%p key:%p keylen:%u\n", __func__, cipher, key,
+		 keylen);
+	flow_dump("  key: ", key, keylen);
+
+	if (!RTA_OK(rta, keylen))
+		goto badkey;
+	if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+		goto badkey;
+	if (RTA_PAYLOAD(rta) < sizeof(*param))
+		goto badkey;
+
+	param = RTA_DATA(rta);
+	ctx->enckeylen = be32_to_cpu(param->enckeylen);
+
+	key += RTA_ALIGN(rta->rta_len);
+	keylen -= RTA_ALIGN(rta->rta_len);
+
+	if (keylen < ctx->enckeylen)
+		goto badkey;
+	if (ctx->enckeylen > MAX_KEY_SIZE)
+		goto badkey;
+
+	ctx->authkeylen = keylen - ctx->enckeylen;
+
+	if (ctx->authkeylen > MAX_KEY_SIZE)
+		goto badkey;
+
+	memcpy(ctx->enckey, key + ctx->authkeylen, ctx->enckeylen);
+	/* May end up padding auth key. So make sure it's zeroed. */
+	memset(ctx->authkey, 0, sizeof(ctx->authkey));
+	memcpy(ctx->authkey, key, ctx->authkeylen);
+
+	switch (ctx->alg->cipher_info.alg) {
+	case CIPHER_ALG_DES:
+		if (ctx->enckeylen == DES_KEY_SIZE) {
+			u32 tmp[DES_EXPKEY_WORDS];
+			u32 flags = CRYPTO_TFM_RES_WEAK_KEY;
+
+			if (des_ekey(tmp, key) == 0) {
+				if (crypto_aead_get_flags(cipher) &
+				    CRYPTO_TFM_REQ_WEAK_KEY) {
+					crypto_aead_set_flags(cipher, flags);
+					return -EINVAL;
+				}
+			}
+
+			ctx->cipher_type = CIPHER_TYPE_DES;
+		} else {
+			goto badkey;
+		}
+		break;
+	case CIPHER_ALG_3DES:
+		if (ctx->enckeylen == (DES_KEY_SIZE * 3)) {
+			const u32 *K = (const u32 *)key;
+			u32 flags = CRYPTO_TFM_RES_BAD_KEY_SCHED;
+
+			if (!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+			    !((K[2] ^ K[4]) | (K[3] ^ K[5]))) {
+				crypto_aead_set_flags(cipher, flags);
+				return -EINVAL;
+			}
+
+			ctx->cipher_type = CIPHER_TYPE_3DES;
+		} else {
+			crypto_aead_set_flags(cipher,
+					      CRYPTO_TFM_RES_BAD_KEY_LEN);
+			return -EINVAL;
+		}
+		break;
+	case CIPHER_ALG_AES:
+		switch (ctx->enckeylen) {
+		case AES_KEYSIZE_128:
+			ctx->cipher_type = CIPHER_TYPE_AES128;
+			break;
+		case AES_KEYSIZE_192:
+			ctx->cipher_type = CIPHER_TYPE_AES192;
+			break;
+		case AES_KEYSIZE_256:
+			ctx->cipher_type = CIPHER_TYPE_AES256;
+			break;
+		default:
+			goto badkey;
+		}
+		break;
+	case CIPHER_ALG_RC4:
+		ctx->cipher_type = CIPHER_TYPE_INIT;
+		break;
+	default:
+		pr_err("%s() Error: Unknown cipher alg\n", __func__);
+		return -EINVAL;
+	}
+
+	flow_log("  enckeylen:%u authkeylen:%u\n", ctx->enckeylen,
+		 ctx->authkeylen);
+	flow_dump("  enc: ", ctx->enckey, ctx->enckeylen);
+	flow_dump("  auth: ", ctx->authkey, ctx->authkeylen);
+
+	/* setkey the fallback just in case we needto use it */
+	if (ctx->fallback_cipher) {
+		flow_log("  running fallback setkey()\n");
+
+		ctx->fallback_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+		ctx->fallback_cipher->base.crt_flags |=
+		    tfm->crt_flags & CRYPTO_TFM_REQ_MASK;
+		ret =
+		    crypto_aead_setkey(ctx->fallback_cipher, origkey,
+				       origkeylen);
+		if (ret) {
+			flow_log("  fallback setkey() returned:%d\n", ret);
+			tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+			tfm->crt_flags |=
+			    (ctx->fallback_cipher->base.crt_flags &
+			     CRYPTO_TFM_RES_MASK);
+		}
+	}
+
+	ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen,
+							  ctx->enckeylen,
+							  false);
+
+	atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_AEAD]);
+
+	return ret;
+
+badkey:
+	ctx->enckeylen = 0;
+	ctx->authkeylen = 0;
+	ctx->digestsize = 0;
+
+	crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+static int aead_gcm_ccm_setkey(struct crypto_aead *cipher,
+			       const u8 *key, unsigned int keylen)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher);
+	struct crypto_tfm *tfm = crypto_aead_tfm(cipher);
+
+	int ret = 0;
+
+	flow_log("%s() keylen:%u\n", __func__, keylen);
+	flow_dump("  key: ", key, keylen);
+
+	if (!ctx->is_esp)
+		ctx->digestsize = keylen;
+
+	ctx->enckeylen = keylen;
+	ctx->authkeylen = 0;
+	memcpy(ctx->enckey, key, ctx->enckeylen);
+
+	switch (ctx->enckeylen) {
+	case AES_KEYSIZE_128:
+		ctx->cipher_type = CIPHER_TYPE_AES128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->cipher_type = CIPHER_TYPE_AES192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->cipher_type = CIPHER_TYPE_AES256;
+		break;
+	default:
+		goto badkey;
+	}
+
+	flow_log("  enckeylen:%u authkeylen:%u\n", ctx->enckeylen,
+		 ctx->authkeylen);
+	flow_dump("  enc: ", ctx->enckey, ctx->enckeylen);
+	flow_dump("  auth: ", ctx->authkey, ctx->authkeylen);
+
+	/* setkey the fallback just in case we need to use it */
+	if (ctx->fallback_cipher) {
+		flow_log("  running fallback setkey()\n");
+
+		ctx->fallback_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+		ctx->fallback_cipher->base.crt_flags |=
+		    tfm->crt_flags & CRYPTO_TFM_REQ_MASK;
+		ret = crypto_aead_setkey(ctx->fallback_cipher, key,
+					 keylen + ctx->salt_len);
+		if (ret) {
+			flow_log("  fallback setkey() returned:%d\n", ret);
+			tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+			tfm->crt_flags |=
+			    (ctx->fallback_cipher->base.crt_flags &
+			     CRYPTO_TFM_RES_MASK);
+		}
+	}
+
+	ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen,
+							  ctx->enckeylen,
+							  false);
+
+	atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_AEAD]);
+
+	flow_log("  enckeylen:%u authkeylen:%u\n", ctx->enckeylen,
+		 ctx->authkeylen);
+
+	return ret;
+
+badkey:
+	ctx->enckeylen = 0;
+	ctx->authkeylen = 0;
+	ctx->digestsize = 0;
+
+	crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+/**
+ * aead_gcm_esp_setkey() - setkey() operation for ESP variant of GCM AES.
+ * @cipher: AEAD structure
+ * @key:    Key followed by 4 bytes of salt
+ * @keylen: Length of key plus salt, in bytes
+ *
+ * Extracts salt from key and stores it to be prepended to IV on each request.
+ * Digest is always 16 bytes
+ *
+ * Return: Value from generic gcm setkey.
+ */
+static int aead_gcm_esp_setkey(struct crypto_aead *cipher,
+			       const u8 *key, unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher);
+
+	flow_log("%s\n", __func__);
+	ctx->salt_len = GCM_ESP_SALT_SIZE;
+	ctx->salt_offset = GCM_ESP_SALT_OFFSET;
+	memcpy(ctx->salt, key + keylen - GCM_ESP_SALT_SIZE, GCM_ESP_SALT_SIZE);
+	keylen -= GCM_ESP_SALT_SIZE;
+	ctx->digestsize = GCM_ESP_DIGESTSIZE;
+	ctx->is_esp = true;
+	flow_dump("salt: ", ctx->salt, GCM_ESP_SALT_SIZE);
+
+	return aead_gcm_ccm_setkey(cipher, key, keylen);
+}
+
+/**
+ * rfc4543_gcm_esp_setkey() - setkey operation for RFC4543 variant of GCM/GMAC.
+ * cipher: AEAD structure
+ * key:    Key followed by 4 bytes of salt
+ * keylen: Length of key plus salt, in bytes
+ *
+ * Extracts salt from key and stores it to be prepended to IV on each request.
+ * Digest is always 16 bytes
+ *
+ * Return: Value from generic gcm setkey.
+ */
+static int rfc4543_gcm_esp_setkey(struct crypto_aead *cipher,
+				  const u8 *key, unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher);
+
+	flow_log("%s\n", __func__);
+	ctx->salt_len = GCM_ESP_SALT_SIZE;
+	ctx->salt_offset = GCM_ESP_SALT_OFFSET;
+	memcpy(ctx->salt, key + keylen - GCM_ESP_SALT_SIZE, GCM_ESP_SALT_SIZE);
+	keylen -= GCM_ESP_SALT_SIZE;
+	ctx->digestsize = GCM_ESP_DIGESTSIZE;
+	ctx->is_esp = true;
+	ctx->is_rfc4543 = true;
+	flow_dump("salt: ", ctx->salt, GCM_ESP_SALT_SIZE);
+
+	return aead_gcm_ccm_setkey(cipher, key, keylen);
+}
+
+/**
+ * aead_ccm_esp_setkey() - setkey() operation for ESP variant of CCM AES.
+ * @cipher: AEAD structure
+ * @key:    Key followed by 4 bytes of salt
+ * @keylen: Length of key plus salt, in bytes
+ *
+ * Extracts salt from key and stores it to be prepended to IV on each request.
+ * Digest is always 16 bytes
+ *
+ * Return: Value from generic ccm setkey.
+ */
+static int aead_ccm_esp_setkey(struct crypto_aead *cipher,
+			       const u8 *key, unsigned int keylen)
+{
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher);
+
+	flow_log("%s\n", __func__);
+	ctx->salt_len = CCM_ESP_SALT_SIZE;
+	ctx->salt_offset = CCM_ESP_SALT_OFFSET;
+	memcpy(ctx->salt, key + keylen - CCM_ESP_SALT_SIZE, CCM_ESP_SALT_SIZE);
+	keylen -= CCM_ESP_SALT_SIZE;
+	ctx->is_esp = true;
+	flow_dump("salt: ", ctx->salt, CCM_ESP_SALT_SIZE);
+
+	return aead_gcm_ccm_setkey(cipher, key, keylen);
+}
+
+static int aead_setauthsize(struct crypto_aead *cipher, unsigned int authsize)
+{
+	struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher);
+	int ret = 0;
+
+	flow_log("%s() authkeylen:%u authsize:%u\n",
+		 __func__, ctx->authkeylen, authsize);
+
+	ctx->digestsize = authsize;
+
+	/* setkey the fallback just in case we needto use it */
+	if (ctx->fallback_cipher) {
+		flow_log("  running fallback setauth()\n");
+
+		ret = crypto_aead_setauthsize(ctx->fallback_cipher, authsize);
+		if (ret)
+			flow_log("  fallback setauth() returned:%d\n", ret);
+	}
+
+	return ret;
+}
+
+static int aead_encrypt(struct aead_request *req)
+{
+	flow_log("%s() cryptlen:%u %08x\n", __func__, req->cryptlen,
+		 req->cryptlen);
+	dump_sg(req->src, 0, req->cryptlen + req->assoclen);
+	flow_log("  assoc_len:%u\n", req->assoclen);
+
+	return aead_enqueue(req, true);
+}
+
+static int aead_decrypt(struct aead_request *req)
+{
+	flow_log("%s() cryptlen:%u\n", __func__, req->cryptlen);
+	dump_sg(req->src, 0, req->cryptlen + req->assoclen);
+	flow_log("  assoc_len:%u\n", req->assoclen);
+
+	return aead_enqueue(req, false);
+}
+
+/* ==================== Supported Cipher Algorithms ==================== */
+
+static struct iproc_alg_s driver_algs[] = {
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "gcm(aes)",
+			.cra_driver_name = "gcm-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK
+		 },
+		 .setkey = aead_gcm_ccm_setkey,
+		 .ivsize = GCM_AES_IV_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_GCM,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_GCM,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "ccm(aes)",
+			.cra_driver_name = "ccm-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK
+		 },
+		 .setkey = aead_gcm_ccm_setkey,
+		 .ivsize = CCM_AES_IV_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CCM,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_CCM,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "rfc4106(gcm(aes))",
+			.cra_driver_name = "gcm-aes-esp-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK
+		 },
+		 .setkey = aead_gcm_esp_setkey,
+		 .ivsize = GCM_ESP_IV_SIZE,
+		 .maxauthsize = AES_BLOCK_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_GCM,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_GCM,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "rfc4309(ccm(aes))",
+			.cra_driver_name = "ccm-aes-esp-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK
+		 },
+		 .setkey = aead_ccm_esp_setkey,
+		 .ivsize = CCM_AES_IV_SIZE,
+		 .maxauthsize = AES_BLOCK_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CCM,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_CCM,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "rfc4543(gcm(aes))",
+			.cra_driver_name = "gmac-aes-esp-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK
+		 },
+		 .setkey = rfc4543_gcm_esp_setkey,
+		 .ivsize = GCM_ESP_IV_SIZE,
+		 .maxauthsize = AES_BLOCK_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_GCM,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_GCM,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(md5),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-md5-cbc-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = MD5_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_MD5,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha1),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha1-cbc-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = AES_BLOCK_SIZE,
+		 .maxauthsize = SHA1_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA1,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha256),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha256-cbc-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = AES_BLOCK_SIZE,
+		 .maxauthsize = SHA256_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA256,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(md5),cbc(des))",
+			.cra_driver_name = "authenc-hmac-md5-cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES_BLOCK_SIZE,
+		 .maxauthsize = MD5_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_MD5,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha1),cbc(des))",
+			.cra_driver_name = "authenc-hmac-sha1-cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES_BLOCK_SIZE,
+		 .maxauthsize = SHA1_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA1,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha224),cbc(des))",
+			.cra_driver_name = "authenc-hmac-sha224-cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES_BLOCK_SIZE,
+		 .maxauthsize = SHA224_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA224,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha256),cbc(des))",
+			.cra_driver_name = "authenc-hmac-sha256-cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES_BLOCK_SIZE,
+		 .maxauthsize = SHA256_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA256,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha384),cbc(des))",
+			.cra_driver_name = "authenc-hmac-sha384-cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES_BLOCK_SIZE,
+		 .maxauthsize = SHA384_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA384,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha512),cbc(des))",
+			.cra_driver_name = "authenc-hmac-sha512-cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES_BLOCK_SIZE,
+		 .maxauthsize = SHA512_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA512,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-md5-cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES3_EDE_BLOCK_SIZE,
+		 .maxauthsize = MD5_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_MD5,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha1-cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES3_EDE_BLOCK_SIZE,
+		 .maxauthsize = SHA1_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA1,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha224),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha224-cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES3_EDE_BLOCK_SIZE,
+		 .maxauthsize = SHA224_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA224,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha256-cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES3_EDE_BLOCK_SIZE,
+		 .maxauthsize = SHA256_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA256,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha384),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha384-cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES3_EDE_BLOCK_SIZE,
+		 .maxauthsize = SHA384_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA384,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AEAD,
+	 .alg.aead = {
+		 .base = {
+			.cra_name = "authenc(hmac(sha512),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha512-cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+		 },
+		 .setkey = aead_authenc_setkey,
+		 .ivsize = DES3_EDE_BLOCK_SIZE,
+		 .maxauthsize = SHA512_DIGEST_SIZE,
+	 },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA512,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 .auth_first = 0,
+	 },
+
+/* ABLKCIPHER algorithms. */
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ecb(arc4)",
+			.cra_driver_name = "ecb-arc4-iproc",
+			.cra_blocksize = ARC4_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = ARC4_MIN_KEY_SIZE,
+					   .max_keysize = ARC4_MAX_KEY_SIZE,
+					   .ivsize = 0,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_RC4,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ofb(des)",
+			.cra_driver_name = "ofb-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = DES_KEY_SIZE,
+					   .max_keysize = DES_KEY_SIZE,
+					   .ivsize = DES_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_OFB,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "cbc(des)",
+			.cra_driver_name = "cbc-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = DES_KEY_SIZE,
+					   .max_keysize = DES_KEY_SIZE,
+					   .ivsize = DES_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ecb(des)",
+			.cra_driver_name = "ecb-des-iproc",
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = DES_KEY_SIZE,
+					   .max_keysize = DES_KEY_SIZE,
+					   .ivsize = 0,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_DES,
+			 .mode = CIPHER_MODE_ECB,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ofb(des3_ede)",
+			.cra_driver_name = "ofb-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = DES3_EDE_KEY_SIZE,
+					   .max_keysize = DES3_EDE_KEY_SIZE,
+					   .ivsize = DES3_EDE_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_OFB,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "cbc(des3_ede)",
+			.cra_driver_name = "cbc-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = DES3_EDE_KEY_SIZE,
+					   .max_keysize = DES3_EDE_KEY_SIZE,
+					   .ivsize = DES3_EDE_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ecb(des3_ede)",
+			.cra_driver_name = "ecb-des3-iproc",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = DES3_EDE_KEY_SIZE,
+					   .max_keysize = DES3_EDE_KEY_SIZE,
+					   .ivsize = 0,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_3DES,
+			 .mode = CIPHER_MODE_ECB,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ofb(aes)",
+			.cra_driver_name = "ofb-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = AES_MIN_KEY_SIZE,
+					   .max_keysize = AES_MAX_KEY_SIZE,
+					   .ivsize = AES_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_OFB,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = AES_MIN_KEY_SIZE,
+					   .max_keysize = AES_MAX_KEY_SIZE,
+					   .ivsize = AES_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CBC,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   .min_keysize = AES_MIN_KEY_SIZE,
+					   .max_keysize = AES_MAX_KEY_SIZE,
+					   .ivsize = 0,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_ECB,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "ctr-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+					   /* .geniv = "chainiv", */
+					   .min_keysize = AES_MIN_KEY_SIZE,
+					   .max_keysize = AES_MAX_KEY_SIZE,
+					   .ivsize = AES_BLOCK_SIZE,
+					}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_CTR,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+{
+	 .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+	 .alg.crypto = {
+			.cra_name = "xts(aes)",
+			.cra_driver_name = "xts-aes-iproc",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ablkcipher = {
+				.min_keysize = 2 * AES_MIN_KEY_SIZE,
+				.max_keysize = 2 * AES_MAX_KEY_SIZE,
+				.ivsize = AES_BLOCK_SIZE,
+				}
+			},
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_AES,
+			 .mode = CIPHER_MODE_XTS,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_NONE,
+		       .mode = HASH_MODE_NONE,
+		       },
+	 },
+
+/* AHASH algorithms. */
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = MD5_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "md5",
+				    .cra_driver_name = "md5-iproc",
+				    .cra_blocksize = MD5_BLOCK_WORDS * 4,
+				    .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_MD5,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = MD5_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(md5)",
+				    .cra_driver_name = "hmac-md5-iproc",
+				    .cra_blocksize = MD5_BLOCK_WORDS * 4,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_MD5,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA1_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha1",
+				    .cra_driver_name = "sha1-iproc",
+				    .cra_blocksize = SHA1_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA1,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA1_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha1)",
+				    .cra_driver_name = "hmac-sha1-iproc",
+				    .cra_blocksize = SHA1_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA1,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+			.halg.digestsize = SHA224_DIGEST_SIZE,
+			.halg.base = {
+				    .cra_name = "sha224",
+				    .cra_driver_name = "sha224-iproc",
+				    .cra_blocksize = SHA224_BLOCK_SIZE,
+			}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA224,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA224_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha224)",
+				    .cra_driver_name = "hmac-sha224-iproc",
+				    .cra_blocksize = SHA224_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA224,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA256_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha256",
+				    .cra_driver_name = "sha256-iproc",
+				    .cra_blocksize = SHA256_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA256,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA256_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha256)",
+				    .cra_driver_name = "hmac-sha256-iproc",
+				    .cra_blocksize = SHA256_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA256,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{
+	.type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA384_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha384",
+				    .cra_driver_name = "sha384-iproc",
+				    .cra_blocksize = SHA384_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA384,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA384_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha384)",
+				    .cra_driver_name = "hmac-sha384-iproc",
+				    .cra_blocksize = SHA384_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA384,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA512_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha512",
+				    .cra_driver_name = "sha512-iproc",
+				    .cra_blocksize = SHA512_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA512,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA512_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha512)",
+				    .cra_driver_name = "hmac-sha512-iproc",
+				    .cra_blocksize = SHA512_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA512,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_224_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha3-224",
+				    .cra_driver_name = "sha3-224-iproc",
+				    .cra_blocksize = SHA3_224_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_224,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_224_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha3-224)",
+				    .cra_driver_name = "hmac-sha3-224-iproc",
+				    .cra_blocksize = SHA3_224_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_224,
+		       .mode = HASH_MODE_HMAC
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_256_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha3-256",
+				    .cra_driver_name = "sha3-256-iproc",
+				    .cra_blocksize = SHA3_256_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_256,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_256_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha3-256)",
+				    .cra_driver_name = "hmac-sha3-256-iproc",
+				    .cra_blocksize = SHA3_256_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_256,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_384_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha3-384",
+				    .cra_driver_name = "sha3-384-iproc",
+				    .cra_blocksize = SHA3_224_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_384,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_384_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha3-384)",
+				    .cra_driver_name = "hmac-sha3-384-iproc",
+				    .cra_blocksize = SHA3_384_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_384,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_512_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "sha3-512",
+				    .cra_driver_name = "sha3-512-iproc",
+				    .cra_blocksize = SHA3_512_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_512,
+		       .mode = HASH_MODE_HASH,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = SHA3_512_DIGEST_SIZE,
+		      .halg.base = {
+				    .cra_name = "hmac(sha3-512)",
+				    .cra_driver_name = "hmac-sha3-512-iproc",
+				    .cra_blocksize = SHA3_512_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_SHA3_512,
+		       .mode = HASH_MODE_HMAC,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = AES_BLOCK_SIZE,
+		      .halg.base = {
+				    .cra_name = "xcbc(aes)",
+				    .cra_driver_name = "xcbc-aes-iproc",
+				    .cra_blocksize = AES_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_XCBC,
+		       },
+	 },
+	{
+	 .type = CRYPTO_ALG_TYPE_AHASH,
+	 .alg.hash = {
+		      .halg.digestsize = AES_BLOCK_SIZE,
+		      .halg.base = {
+				    .cra_name = "cmac(aes)",
+				    .cra_driver_name = "cmac-aes-iproc",
+				    .cra_blocksize = AES_BLOCK_SIZE,
+				}
+		      },
+	 .cipher_info = {
+			 .alg = CIPHER_ALG_NONE,
+			 .mode = CIPHER_MODE_NONE,
+			 },
+	 .auth_info = {
+		       .alg = HASH_ALG_AES,
+		       .mode = HASH_MODE_CMAC,
+		       },
+	 },
+};
+
+static int generic_cra_init(struct crypto_tfm *tfm,
+			    struct iproc_alg_s *cipher_alg)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm);
+	unsigned int blocksize = crypto_tfm_alg_blocksize(tfm);
+
+	flow_log("%s()\n", __func__);
+
+	ctx->alg = cipher_alg;
+	ctx->cipher = cipher_alg->cipher_info;
+	ctx->auth = cipher_alg->auth_info;
+	ctx->auth_first = cipher_alg->auth_first;
+	ctx->max_payload = spu->spu_ctx_max_payload(ctx->cipher.alg,
+						    ctx->cipher.mode,
+						    blocksize);
+	ctx->fallback_cipher = NULL;
+
+	ctx->enckeylen = 0;
+	ctx->authkeylen = 0;
+
+	atomic_inc(&iproc_priv.stream_count);
+	atomic_inc(&iproc_priv.session_count);
+
+	return 0;
+}
+
+static int ablkcipher_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct iproc_alg_s *cipher_alg;
+
+	flow_log("%s()\n", __func__);
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct iproc_reqctx_s);
+
+	cipher_alg = container_of(alg, struct iproc_alg_s, alg.crypto);
+	return generic_cra_init(tfm, cipher_alg);
+}
+
+static int ahash_cra_init(struct crypto_tfm *tfm)
+{
+	int err;
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct iproc_alg_s *cipher_alg;
+
+	cipher_alg = container_of(__crypto_ahash_alg(alg), struct iproc_alg_s,
+				  alg.hash);
+
+	err = generic_cra_init(tfm, cipher_alg);
+	flow_log("%s()\n", __func__);
+
+	/*
+	 * export state size has to be < 512 bytes. So don't include msg bufs
+	 * in state size.
+	 */
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct iproc_reqctx_s));
+
+	return err;
+}
+
+static int aead_cra_init(struct crypto_aead *aead)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct aead_alg *aalg = container_of(alg, struct aead_alg, base);
+	struct iproc_alg_s *cipher_alg = container_of(aalg, struct iproc_alg_s,
+						      alg.aead);
+
+	int err = generic_cra_init(tfm, cipher_alg);
+
+	flow_log("%s()\n", __func__);
+
+	crypto_aead_set_reqsize(aead, sizeof(struct iproc_reqctx_s));
+	ctx->is_esp = false;
+	ctx->salt_len = 0;
+	ctx->salt_offset = 0;
+
+	/* random first IV */
+	get_random_bytes(ctx->iv, MAX_IV_SIZE);
+	flow_dump("  iv: ", ctx->iv, MAX_IV_SIZE);
+
+	if (!err) {
+		if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
+			flow_log("%s() creating fallback cipher\n", __func__);
+
+			ctx->fallback_cipher =
+			    crypto_alloc_aead(alg->cra_name, 0,
+					      CRYPTO_ALG_ASYNC |
+					      CRYPTO_ALG_NEED_FALLBACK);
+			if (IS_ERR(ctx->fallback_cipher)) {
+				pr_err("%s() Error: failed to allocate fallback for %s\n",
+				       __func__, alg->cra_name);
+				return PTR_ERR(ctx->fallback_cipher);
+			}
+		}
+	}
+
+	return err;
+}
+
+static void generic_cra_exit(struct crypto_tfm *tfm)
+{
+	atomic_dec(&iproc_priv.session_count);
+}
+
+static void aead_cra_exit(struct crypto_aead *aead)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm);
+
+	generic_cra_exit(tfm);
+
+	if (ctx->fallback_cipher) {
+		crypto_free_aead(ctx->fallback_cipher);
+		ctx->fallback_cipher = NULL;
+	}
+}
+
+/**
+ * spu_functions_register() - Specify hardware-specific SPU functions based on
+ * SPU type read from device tree.
+ * @dev:	device structure
+ * @spu_type:	SPU hardware generation
+ * @spu_subtype: SPU hardware version
+ */
+static void spu_functions_register(struct device *dev,
+				   enum spu_spu_type spu_type,
+				   enum spu_spu_subtype spu_subtype)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+
+	if (spu_type == SPU_TYPE_SPUM) {
+		dev_dbg(dev, "Registering SPUM functions");
+		spu->spu_dump_msg_hdr = spum_dump_msg_hdr;
+		spu->spu_payload_length = spum_payload_length;
+		spu->spu_response_hdr_len = spum_response_hdr_len;
+		spu->spu_hash_pad_len = spum_hash_pad_len;
+		spu->spu_gcm_ccm_pad_len = spum_gcm_ccm_pad_len;
+		spu->spu_assoc_resp_len = spum_assoc_resp_len;
+		spu->spu_aead_ivlen = spum_aead_ivlen;
+		spu->spu_hash_type = spum_hash_type;
+		spu->spu_digest_size = spum_digest_size;
+		spu->spu_create_request = spum_create_request;
+		spu->spu_cipher_req_init = spum_cipher_req_init;
+		spu->spu_cipher_req_finish = spum_cipher_req_finish;
+		spu->spu_request_pad = spum_request_pad;
+		spu->spu_tx_status_len = spum_tx_status_len;
+		spu->spu_rx_status_len = spum_rx_status_len;
+		spu->spu_status_process = spum_status_process;
+		spu->spu_xts_tweak_in_payload = spum_xts_tweak_in_payload;
+		spu->spu_ccm_update_iv = spum_ccm_update_iv;
+		spu->spu_wordalign_padlen = spum_wordalign_padlen;
+		if (spu_subtype == SPU_SUBTYPE_SPUM_NS2)
+			spu->spu_ctx_max_payload = spum_ns2_ctx_max_payload;
+		else
+			spu->spu_ctx_max_payload = spum_nsp_ctx_max_payload;
+	} else {
+		dev_dbg(dev, "Registering SPU2 functions");
+		spu->spu_dump_msg_hdr = spu2_dump_msg_hdr;
+		spu->spu_ctx_max_payload = spu2_ctx_max_payload;
+		spu->spu_payload_length = spu2_payload_length;
+		spu->spu_response_hdr_len = spu2_response_hdr_len;
+		spu->spu_hash_pad_len = spu2_hash_pad_len;
+		spu->spu_gcm_ccm_pad_len = spu2_gcm_ccm_pad_len;
+		spu->spu_assoc_resp_len = spu2_assoc_resp_len;
+		spu->spu_aead_ivlen = spu2_aead_ivlen;
+		spu->spu_hash_type = spu2_hash_type;
+		spu->spu_digest_size = spu2_digest_size;
+		spu->spu_create_request = spu2_create_request;
+		spu->spu_cipher_req_init = spu2_cipher_req_init;
+		spu->spu_cipher_req_finish = spu2_cipher_req_finish;
+		spu->spu_request_pad = spu2_request_pad;
+		spu->spu_tx_status_len = spu2_tx_status_len;
+		spu->spu_rx_status_len = spu2_rx_status_len;
+		spu->spu_status_process = spu2_status_process;
+		spu->spu_xts_tweak_in_payload = spu2_xts_tweak_in_payload;
+		spu->spu_ccm_update_iv = spu2_ccm_update_iv;
+		spu->spu_wordalign_padlen = spu2_wordalign_padlen;
+	}
+}
+
+/**
+ * spu_mb_init() - Initialize mailbox client. Request ownership of a mailbox
+ * channel for the SPU being probed.
+ * @dev:  SPU driver device structure
+ *
+ * Return: 0 if successful
+ *	   < 0 otherwise
+ */
+static int spu_mb_init(struct device *dev)
+{
+	struct mbox_client *mcl = &iproc_priv.mcl[iproc_priv.spu.num_spu];
+	int err;
+
+	mcl->dev = dev;
+	mcl->tx_block = false;
+	mcl->tx_tout = 0;
+	mcl->knows_txdone = false;
+	mcl->rx_callback = spu_rx_callback;
+	mcl->tx_done = NULL;
+
+	iproc_priv.mbox[iproc_priv.spu.num_spu] =
+			mbox_request_channel(mcl, 0);
+	if (IS_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu])) {
+		err = (int)PTR_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu]);
+		dev_err(dev,
+			"Mbox channel %d request failed with err %d",
+			iproc_priv.spu.num_spu, err);
+		iproc_priv.mbox[iproc_priv.spu.num_spu] = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
+static void spu_mb_release(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < iproc_priv.spu.num_spu; i++)
+		mbox_free_channel(iproc_priv.mbox[i]);
+}
+
+static void spu_counters_init(void)
+{
+	int i;
+	int j;
+
+	atomic_set(&iproc_priv.session_count, 0);
+	atomic_set(&iproc_priv.stream_count, 0);
+	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_spu);
+	atomic64_set(&iproc_priv.bytes_in, 0);
+	atomic64_set(&iproc_priv.bytes_out, 0);
+	for (i = 0; i < SPU_OP_NUM; i++) {
+		atomic_set(&iproc_priv.op_counts[i], 0);
+		atomic_set(&iproc_priv.setkey_cnt[i], 0);
+	}
+	for (i = 0; i < CIPHER_ALG_LAST; i++)
+		for (j = 0; j < CIPHER_MODE_LAST; j++)
+			atomic_set(&iproc_priv.cipher_cnt[i][j], 0);
+
+	for (i = 0; i < HASH_ALG_LAST; i++) {
+		atomic_set(&iproc_priv.hash_cnt[i], 0);
+		atomic_set(&iproc_priv.hmac_cnt[i], 0);
+	}
+	for (i = 0; i < AEAD_TYPE_LAST; i++)
+		atomic_set(&iproc_priv.aead_cnt[i], 0);
+
+	atomic_set(&iproc_priv.mb_no_spc, 0);
+	atomic_set(&iproc_priv.mb_send_fail, 0);
+	atomic_set(&iproc_priv.bad_icv, 0);
+}
+
+static int spu_register_ablkcipher(struct iproc_alg_s *driver_alg)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct crypto_alg *crypto = &driver_alg->alg.crypto;
+	int err;
+
+	/* SPU2 does not support RC4 */
+	if ((driver_alg->cipher_info.alg == CIPHER_ALG_RC4) &&
+	    (spu->spu_type == SPU_TYPE_SPU2))
+		return 0;
+
+	crypto->cra_module = THIS_MODULE;
+	crypto->cra_priority = cipher_pri;
+	crypto->cra_alignmask = 0;
+	crypto->cra_ctxsize = sizeof(struct iproc_ctx_s);
+	INIT_LIST_HEAD(&crypto->cra_list);
+
+	crypto->cra_init = ablkcipher_cra_init;
+	crypto->cra_exit = generic_cra_exit;
+	crypto->cra_type = &crypto_ablkcipher_type;
+	crypto->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY;
+
+	crypto->cra_ablkcipher.setkey = ablkcipher_setkey;
+	crypto->cra_ablkcipher.encrypt = ablkcipher_encrypt;
+	crypto->cra_ablkcipher.decrypt = ablkcipher_decrypt;
+
+	err = crypto_register_alg(crypto);
+	/* Mark alg as having been registered, if successful */
+	if (err == 0)
+		driver_alg->registered = true;
+	pr_debug("  registered ablkcipher %s\n", crypto->cra_driver_name);
+	return err;
+}
+
+static int spu_register_ahash(struct iproc_alg_s *driver_alg)
+{
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct ahash_alg *hash = &driver_alg->alg.hash;
+	int err;
+
+	/* AES-XCBC is the only AES hash type currently supported on SPU-M */
+	if ((driver_alg->auth_info.alg == HASH_ALG_AES) &&
+	    (driver_alg->auth_info.mode != HASH_MODE_XCBC) &&
+	    (spu->spu_type == SPU_TYPE_SPUM))
+		return 0;
+
+	/* SHA3 algorithm variants are not registered for SPU-M or SPU2. */
+	if ((driver_alg->auth_info.alg >= HASH_ALG_SHA3_224) &&
+	    (spu->spu_subtype != SPU_SUBTYPE_SPU2_V2))
+		return 0;
+
+	hash->halg.base.cra_module = THIS_MODULE;
+	hash->halg.base.cra_priority = hash_pri;
+	hash->halg.base.cra_alignmask = 0;
+	hash->halg.base.cra_ctxsize = sizeof(struct iproc_ctx_s);
+	hash->halg.base.cra_init = ahash_cra_init;
+	hash->halg.base.cra_exit = generic_cra_exit;
+	hash->halg.base.cra_type = &crypto_ahash_type;
+	hash->halg.base.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC;
+	hash->halg.statesize = sizeof(struct spu_hash_export_s);
+
+	if (driver_alg->auth_info.mode != HASH_MODE_HMAC) {
+		hash->setkey = ahash_setkey;
+		hash->init = ahash_init;
+		hash->update = ahash_update;
+		hash->final = ahash_final;
+		hash->finup = ahash_finup;
+		hash->digest = ahash_digest;
+	} else {
+		hash->setkey = ahash_hmac_setkey;
+		hash->init = ahash_hmac_init;
+		hash->update = ahash_hmac_update;
+		hash->final = ahash_hmac_final;
+		hash->finup = ahash_hmac_finup;
+		hash->digest = ahash_hmac_digest;
+	}
+	hash->export = ahash_export;
+	hash->import = ahash_import;
+
+	err = crypto_register_ahash(hash);
+	/* Mark alg as having been registered, if successful */
+	if (err == 0)
+		driver_alg->registered = true;
+	pr_debug("  registered ahash %s\n",
+		 hash->halg.base.cra_driver_name);
+	return err;
+}
+
+static int spu_register_aead(struct iproc_alg_s *driver_alg)
+{
+	struct aead_alg *aead = &driver_alg->alg.aead;
+	int err;
+
+	aead->base.cra_module = THIS_MODULE;
+	aead->base.cra_priority = aead_pri;
+	aead->base.cra_alignmask = 0;
+	aead->base.cra_ctxsize = sizeof(struct iproc_ctx_s);
+	INIT_LIST_HEAD(&aead->base.cra_list);
+
+	aead->base.cra_flags |= CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+	/* setkey set in alg initialization */
+	aead->setauthsize = aead_setauthsize;
+	aead->encrypt = aead_encrypt;
+	aead->decrypt = aead_decrypt;
+	aead->init = aead_cra_init;
+	aead->exit = aead_cra_exit;
+
+	err = crypto_register_aead(aead);
+	/* Mark alg as having been registered, if successful */
+	if (err == 0)
+		driver_alg->registered = true;
+	pr_debug("  registered aead %s\n", aead->base.cra_driver_name);
+	return err;
+}
+
+/* register crypto algorithms the device supports */
+static int spu_algs_register(struct device *dev)
+{
+	int i, j;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		switch (driver_algs[i].type) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			err = spu_register_ablkcipher(&driver_algs[i]);
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			err = spu_register_ahash(&driver_algs[i]);
+			break;
+		case CRYPTO_ALG_TYPE_AEAD:
+			err = spu_register_aead(&driver_algs[i]);
+			break;
+		default:
+			dev_err(dev,
+				"iproc-crypto: unknown alg type: %d",
+				driver_algs[i].type);
+			err = -EINVAL;
+		}
+
+		if (err) {
+			dev_err(dev, "alg registration failed with error %d\n",
+				err);
+			goto err_algs;
+		}
+	}
+
+	return 0;
+
+err_algs:
+	for (j = 0; j < i; j++) {
+		/* Skip any algorithm not registered */
+		if (!driver_algs[j].registered)
+			continue;
+		switch (driver_algs[j].type) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			crypto_unregister_alg(&driver_algs[j].alg.crypto);
+			driver_algs[j].registered = false;
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			crypto_unregister_ahash(&driver_algs[j].alg.hash);
+			driver_algs[j].registered = false;
+			break;
+		case CRYPTO_ALG_TYPE_AEAD:
+			crypto_unregister_aead(&driver_algs[j].alg.aead);
+			driver_algs[j].registered = false;
+			break;
+		}
+	}
+	return err;
+}
+
+/* ==================== Kernel Platform API ==================== */
+
+static struct spu_type_subtype spum_ns2_types = {
+	SPU_TYPE_SPUM, SPU_SUBTYPE_SPUM_NS2
+};
+
+static struct spu_type_subtype spum_nsp_types = {
+	SPU_TYPE_SPUM, SPU_SUBTYPE_SPUM_NSP
+};
+
+static struct spu_type_subtype spu2_types = {
+	SPU_TYPE_SPU2, SPU_SUBTYPE_SPU2_V1
+};
+
+static struct spu_type_subtype spu2_v2_types = {
+	SPU_TYPE_SPU2, SPU_SUBTYPE_SPU2_V2
+};
+
+static const struct of_device_id bcm_spu_dt_ids[] = {
+	{
+		.compatible = "brcm,spum-crypto",
+		.data = &spum_ns2_types,
+	},
+	{
+		.compatible = "brcm,spum-nsp-crypto",
+		.data = &spum_nsp_types,
+	},
+	{
+		.compatible = "brcm,spu2-crypto",
+		.data = &spu2_types,
+	},
+	{
+		.compatible = "brcm,spu2-v2-crypto",
+		.data = &spu2_v2_types,
+	},
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, bcm_spu_dt_ids);
+
+static int spu_dt_read(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct spu_hw *spu = &iproc_priv.spu;
+	struct resource *spu_ctrl_regs;
+	const struct of_device_id *match;
+	const struct spu_type_subtype *matched_spu_type;
+	void __iomem *spu_reg_vbase[MAX_SPUS];
+	int err;
+
+	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
+	matched_spu_type = match->data;
+
+	if (iproc_priv.spu.num_spu > 1) {
+		/* If this is 2nd or later SPU, make sure it's same type */
+		if ((spu->spu_type != matched_spu_type->type) ||
+		    (spu->spu_subtype != matched_spu_type->subtype)) {
+			err = -EINVAL;
+			dev_err(&pdev->dev, "Multiple SPU types not allowed");
+			return err;
+		}
+	} else {
+		/* Record type of first SPU */
+		spu->spu_type = matched_spu_type->type;
+		spu->spu_subtype = matched_spu_type->subtype;
+	}
+
+	/* Get and map SPU registers */
+	spu_ctrl_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!spu_ctrl_regs) {
+		err = -EINVAL;
+		dev_err(&pdev->dev, "Invalid/missing registers for SPU\n");
+		return err;
+	}
+
+	spu_reg_vbase[iproc_priv.spu.num_spu] =
+				devm_ioremap_resource(dev, spu_ctrl_regs);
+	if (IS_ERR(spu_reg_vbase[iproc_priv.spu.num_spu])) {
+		err = PTR_ERR(spu_reg_vbase[iproc_priv.spu.num_spu]);
+		dev_err(&pdev->dev, "Failed to map registers: %d\n",
+			err);
+		spu_reg_vbase[iproc_priv.spu.num_spu] = NULL;
+		return err;
+	}
+
+	dev_dbg(dev, "SPU %d detected.", iproc_priv.spu.num_spu);
+
+	spu->reg_vbase[iproc_priv.spu.num_spu] = spu_reg_vbase;
+
+	return 0;
+}
+
+int bcm_spu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct spu_hw *spu = &iproc_priv.spu;
+	int err = 0;
+
+	iproc_priv.pdev[iproc_priv.spu.num_spu] = pdev;
+	platform_set_drvdata(iproc_priv.pdev[iproc_priv.spu.num_spu],
+			     &iproc_priv);
+
+	err = spu_dt_read(pdev);
+	if (err < 0)
+		goto failure;
+
+	err = spu_mb_init(&pdev->dev);
+	if (err < 0)
+		goto failure;
+
+	iproc_priv.spu.num_spu++;
+
+	/* If already initialized, we've just added another SPU and are done */
+	if (iproc_priv.inited)
+		return 0;
+
+	if (spu->spu_type == SPU_TYPE_SPUM)
+		iproc_priv.bcm_hdr_len = 8;
+	else if (spu->spu_type == SPU_TYPE_SPU2)
+		iproc_priv.bcm_hdr_len = 0;
+
+	spu_functions_register(&pdev->dev, spu->spu_type, spu->spu_subtype);
+
+	spu_counters_init();
+
+	spu_setup_debugfs();
+
+	err = spu_algs_register(dev);
+	if (err < 0)
+		goto fail_reg;
+
+	iproc_priv.inited = true;
+
+	return 0;
+
+fail_reg:
+	spu_free_debugfs();
+failure:
+	spu_mb_release(pdev);
+	dev_err(dev, "%s failed with error %d.\n", __func__, err);
+
+	return err;
+}
+
+int bcm_spu_remove(struct platform_device *pdev)
+{
+	int i;
+	struct device *dev = &pdev->dev;
+	char *cdn;
+
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		/*
+		 * Not all algorithms were registered, depending on whether
+		 * hardware is SPU or SPU2.  So here we make sure to skip
+		 * those algorithms that were not previously registered.
+		 */
+		if (!driver_algs[i].registered)
+			continue;
+
+		switch (driver_algs[i].type) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			crypto_unregister_alg(&driver_algs[i].alg.crypto);
+			dev_dbg(dev, "  unregistered cipher %s\n",
+				driver_algs[i].alg.crypto.cra_driver_name);
+			driver_algs[i].registered = false;
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			crypto_unregister_ahash(&driver_algs[i].alg.hash);
+			cdn = driver_algs[i].alg.hash.halg.base.cra_driver_name;
+			dev_dbg(dev, "  unregistered hash %s\n", cdn);
+			driver_algs[i].registered = false;
+			break;
+		case CRYPTO_ALG_TYPE_AEAD:
+			crypto_unregister_aead(&driver_algs[i].alg.aead);
+			dev_dbg(dev, "  unregistered aead %s\n",
+				driver_algs[i].alg.aead.base.cra_driver_name);
+			driver_algs[i].registered = false;
+			break;
+		}
+	}
+	spu_free_debugfs();
+	spu_mb_release(pdev);
+	return 0;
+}
+
+/* ===== Kernel Module API ===== */
+
+static struct platform_driver bcm_spu_pdriver = {
+	.driver = {
+		   .name = "brcm-spu-crypto",
+		   .of_match_table = of_match_ptr(bcm_spu_dt_ids),
+		   },
+	.probe = bcm_spu_probe,
+	.remove = bcm_spu_remove,
+};
+module_platform_driver(bcm_spu_pdriver);
+
+MODULE_AUTHOR("Rob Rice <rob.rice@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom symmetric crypto offload driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h
new file mode 100644
index 000000000000..51dca529ce8f
--- /dev/null
+++ b/drivers/crypto/bcm/cipher.h
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#ifndef _CIPHER_H
+#define _CIPHER_H
+
+#include <linux/atomic.h>
+#include <linux/mailbox/brcm-message.h>
+#include <linux/mailbox_client.h>
+#include <crypto/aes.h>
+#include <crypto/internal/hash.h>
+#include <crypto/aead.h>
+#include <crypto/sha.h>
+#include <crypto/sha3.h>
+
+#include "spu.h"
+#include "spum.h"
+#include "spu2.h"
+
+/* Driver supports up to MAX_SPUS SPU blocks */
+#define MAX_SPUS 16
+
+#define ARC4_MIN_KEY_SIZE   1
+#define ARC4_MAX_KEY_SIZE   256
+#define ARC4_BLOCK_SIZE     1
+#define ARC4_STATE_SIZE     4
+
+#define CCM_AES_IV_SIZE    16
+#define GCM_AES_IV_SIZE    12
+#define GCM_ESP_IV_SIZE     8
+#define CCM_ESP_IV_SIZE     8
+#define RFC4543_ICV_SIZE   16
+
+#define MAX_KEY_SIZE	ARC4_MAX_KEY_SIZE
+#define MAX_IV_SIZE	AES_BLOCK_SIZE
+#define MAX_DIGEST_SIZE	SHA3_512_DIGEST_SIZE
+#define MAX_ASSOC_SIZE	512
+
+/* size of salt value for AES-GCM-ESP and AES-CCM-ESP */
+#define GCM_ESP_SALT_SIZE   4
+#define CCM_ESP_SALT_SIZE   3
+#define MAX_SALT_SIZE       GCM_ESP_SALT_SIZE
+#define GCM_ESP_SALT_OFFSET 0
+#define CCM_ESP_SALT_OFFSET 1
+
+#define GCM_ESP_DIGESTSIZE 16
+
+#define MAX_HASH_BLOCK_SIZE SHA512_BLOCK_SIZE
+
+/*
+ * Maximum number of bytes from a non-final hash request that can be deferred
+ * until more data is available. With new crypto API framework, this
+ * can be no more than one block of data.
+ */
+#define HASH_CARRY_MAX  MAX_HASH_BLOCK_SIZE
+
+/* Force at least 4-byte alignment of all SPU message fields */
+#define SPU_MSG_ALIGN  4
+
+/* Number of times to resend mailbox message if mb queue is full */
+#define SPU_MB_RETRY_MAX  1000
+
+/* op_counts[] indexes */
+enum op_type {
+	SPU_OP_CIPHER,
+	SPU_OP_HASH,
+	SPU_OP_HMAC,
+	SPU_OP_AEAD,
+	SPU_OP_NUM
+};
+
+enum spu_spu_type {
+	SPU_TYPE_SPUM,
+	SPU_TYPE_SPU2,
+};
+
+/*
+ * SPUM_NS2 and SPUM_NSP are the SPU-M block on Northstar 2 and Northstar Plus,
+ * respectively.
+ */
+enum spu_spu_subtype {
+	SPU_SUBTYPE_SPUM_NS2,
+	SPU_SUBTYPE_SPUM_NSP,
+	SPU_SUBTYPE_SPU2_V1,
+	SPU_SUBTYPE_SPU2_V2
+};
+
+struct spu_type_subtype {
+	enum spu_spu_type type;
+	enum spu_spu_subtype subtype;
+};
+
+struct cipher_op {
+	enum spu_cipher_alg alg;
+	enum spu_cipher_mode mode;
+};
+
+struct auth_op {
+	enum hash_alg alg;
+	enum hash_mode mode;
+};
+
+struct iproc_alg_s {
+	u32 type;
+	union {
+		struct crypto_alg crypto;
+		struct ahash_alg hash;
+		struct aead_alg aead;
+	} alg;
+	struct cipher_op cipher_info;
+	struct auth_op auth_info;
+	bool auth_first;
+	bool registered;
+};
+
+/*
+ * Buffers for a SPU request/reply message pair. All part of one structure to
+ * allow a single alloc per request.
+ */
+struct spu_msg_buf {
+	/* Request message fragments */
+
+	/*
+	 * SPU request message header. For SPU-M, holds MH, EMH, SCTX, BDESC,
+	 * and BD header. For SPU2, holds FMD, OMD.
+	 */
+	u8 bcm_spu_req_hdr[ALIGN(SPU2_HEADER_ALLOC_LEN, SPU_MSG_ALIGN)];
+
+	/* IV or counter. Size to include salt. Also used for XTS tweek. */
+	u8 iv_ctr[ALIGN(2 * AES_BLOCK_SIZE, SPU_MSG_ALIGN)];
+
+	/* Hash digest. request and response. */
+	u8 digest[ALIGN(MAX_DIGEST_SIZE, SPU_MSG_ALIGN)];
+
+	/* SPU request message padding */
+	u8 spu_req_pad[ALIGN(SPU_PAD_LEN_MAX, SPU_MSG_ALIGN)];
+
+	/* SPU-M request message STATUS field */
+	u8 tx_stat[ALIGN(SPU_TX_STATUS_LEN, SPU_MSG_ALIGN)];
+
+	/* Response message fragments */
+
+	/* SPU response message header */
+	u8 spu_resp_hdr[ALIGN(SPU2_HEADER_ALLOC_LEN, SPU_MSG_ALIGN)];
+
+	/* SPU response message STATUS field padding */
+	u8 rx_stat_pad[ALIGN(SPU_STAT_PAD_MAX, SPU_MSG_ALIGN)];
+
+	/* SPU response message STATUS field */
+	u8 rx_stat[ALIGN(SPU_RX_STATUS_LEN, SPU_MSG_ALIGN)];
+
+	union {
+		/* Buffers only used for ablkcipher */
+		struct {
+			/*
+			 * Field used for either SUPDT when RC4 is used
+			 * -OR- tweak value when XTS/AES is used
+			 */
+			u8 supdt_tweak[ALIGN(SPU_SUPDT_LEN, SPU_MSG_ALIGN)];
+		} c;
+
+		/* Buffers only used for aead */
+		struct {
+			/* SPU response pad for GCM data */
+			u8 gcmpad[ALIGN(AES_BLOCK_SIZE, SPU_MSG_ALIGN)];
+
+			/* SPU request msg padding for GCM AAD */
+			u8 req_aad_pad[ALIGN(SPU_PAD_LEN_MAX, SPU_MSG_ALIGN)];
+
+			/* SPU response data to be discarded */
+			u8 resp_aad[ALIGN(MAX_ASSOC_SIZE + MAX_IV_SIZE,
+					  SPU_MSG_ALIGN)];
+		} a;
+	};
+};
+
+struct iproc_ctx_s {
+	u8 enckey[MAX_KEY_SIZE + ARC4_STATE_SIZE];
+	unsigned int enckeylen;
+
+	u8 authkey[MAX_KEY_SIZE + ARC4_STATE_SIZE];
+	unsigned int authkeylen;
+
+	u8 salt[MAX_SALT_SIZE];
+	unsigned int salt_len;
+	unsigned int salt_offset;
+	u8 iv[MAX_IV_SIZE];
+
+	unsigned int digestsize;
+
+	struct iproc_alg_s *alg;
+	bool is_esp;
+
+	struct cipher_op cipher;
+	enum spu_cipher_type cipher_type;
+
+	struct auth_op auth;
+	bool auth_first;
+
+	/*
+	 * The maximum length in bytes of the payload in a SPU message for this
+	 * context. For SPU-M, the payload is the combination of AAD and data.
+	 * For SPU2, the payload is just data. A value of SPU_MAX_PAYLOAD_INF
+	 * indicates that there is no limit to the length of the SPU message
+	 * payload.
+	 */
+	unsigned int max_payload;
+
+	struct crypto_aead *fallback_cipher;
+
+	/* auth_type is determined during processing of request */
+
+	u8 ipad[MAX_HASH_BLOCK_SIZE];
+	u8 opad[MAX_HASH_BLOCK_SIZE];
+
+	/*
+	 * Buffer to hold SPU message header template. Template is created at
+	 * setkey time for ablkcipher requests, since most of the fields in the
+	 * header are known at that time. At request time, just fill in a few
+	 * missing pieces related to length of data in the request and IVs, etc.
+	 */
+	u8 bcm_spu_req_hdr[ALIGN(SPU2_HEADER_ALLOC_LEN, SPU_MSG_ALIGN)];
+
+	/* Length of SPU request header */
+	u16 spu_req_hdr_len;
+
+	/* Expected length of SPU response header */
+	u16 spu_resp_hdr_len;
+
+	/*
+	 * shash descriptor - needed to perform incremental hashing in
+	 * in software, when hw doesn't support it.
+	 */
+	struct shash_desc *shash;
+
+	bool is_rfc4543;	/* RFC 4543 style of GMAC */
+};
+
+/* state from iproc_reqctx_s necessary for hash state export/import */
+struct spu_hash_export_s {
+	unsigned int total_todo;
+	unsigned int total_sent;
+	u8 hash_carry[HASH_CARRY_MAX];
+	unsigned int hash_carry_len;
+	u8 incr_hash[MAX_DIGEST_SIZE];
+	bool is_sw_hmac;
+};
+
+struct iproc_reqctx_s {
+	/* general context */
+	struct crypto_async_request *parent;
+
+	/* only valid after enqueue() */
+	struct iproc_ctx_s *ctx;
+
+	u8 chan_idx;   /* Mailbox channel to be used to submit this request */
+
+	/* total todo, rx'd, and sent for this request */
+	unsigned int total_todo;
+	unsigned int total_received;	/* only valid for ablkcipher */
+	unsigned int total_sent;
+
+	/*
+	 * num bytes sent to hw from the src sg in this request. This can differ
+	 * from total_sent for incremental hashing. total_sent includes previous
+	 * init() and update() data. src_sent does not.
+	 */
+	unsigned int src_sent;
+
+	/*
+	 * For AEAD requests, start of associated data. This will typically
+	 * point to the beginning of the src scatterlist from the request,
+	 * since assoc data is at the beginning of the src scatterlist rather
+	 * than in its own sg.
+	 */
+	struct scatterlist *assoc;
+
+	/*
+	 * scatterlist entry and offset to start of data for next chunk. Crypto
+	 * API src scatterlist for AEAD starts with AAD, if present. For first
+	 * chunk, src_sg is sg entry at beginning of input data (after AAD).
+	 * src_skip begins at the offset in that sg entry where data begins.
+	 */
+	struct scatterlist *src_sg;
+	int src_nents;		/* Number of src entries with data */
+	u32 src_skip;		/* bytes of current sg entry already used */
+
+	/*
+	 * Same for destination. For AEAD, if there is AAD, output data must
+	 * be written at offset following AAD.
+	 */
+	struct scatterlist *dst_sg;
+	int dst_nents;		/* Number of dst entries with data */
+	u32 dst_skip;		/* bytes of current sg entry already written */
+
+	/* Mailbox message used to send this request to PDC driver */
+	struct brcm_message mb_mssg;
+
+	bool bd_suppress;	/* suppress BD field in SPU response? */
+
+	/* cipher context */
+	bool is_encrypt;
+
+	/*
+	 * CBC mode: IV.  CTR mode: counter.  Else empty. Used as a DMA
+	 * buffer for AEAD requests. So allocate as DMAable memory. If IV
+	 * concatenated with salt, includes the salt.
+	 */
+	u8 *iv_ctr;
+	/* Length of IV or counter, in bytes */
+	unsigned int iv_ctr_len;
+
+	/*
+	 * Hash requests can be of any size, whether initial, update, or final.
+	 * A non-final request must be submitted to the SPU as an integral
+	 * number of blocks. This may leave data at the end of the request
+	 * that is not a full block. Since the request is non-final, it cannot
+	 * be padded. So, we write the remainder to this hash_carry buffer and
+	 * hold it until the next request arrives. The carry data is then
+	 * submitted at the beginning of the data in the next SPU msg.
+	 * hash_carry_len is the number of bytes currently in hash_carry. These
+	 * fields are only used for ahash requests.
+	 */
+	u8 hash_carry[HASH_CARRY_MAX];
+	unsigned int hash_carry_len;
+	unsigned int is_final;	/* is this the final for the hash op? */
+
+	/*
+	 * Digest from incremental hash is saved here to include in next hash
+	 * operation. Cannot be stored in req->result for truncated hashes,
+	 * since result may be sized for final digest. Cannot be saved in
+	 * msg_buf because that gets deleted between incremental hash ops
+	 * and is not saved as part of export().
+	 */
+	u8 incr_hash[MAX_DIGEST_SIZE];
+
+	/* hmac context */
+	bool is_sw_hmac;
+
+	/* aead context */
+	struct crypto_tfm *old_tfm;
+	crypto_completion_t old_complete;
+	void *old_data;
+
+	gfp_t gfp;
+
+	/* Buffers used to build SPU request and response messages */
+	struct spu_msg_buf msg_buf;
+};
+
+/*
+ * Structure encapsulates a set of function pointers specific to the type of
+ * SPU hardware running. These functions handling creation and parsing of
+ * SPU request messages and SPU response messages. Includes hardware-specific
+ * values read from device tree.
+ */
+struct spu_hw {
+	void (*spu_dump_msg_hdr)(u8 *buf, unsigned int buf_len);
+	u32 (*spu_ctx_max_payload)(enum spu_cipher_alg cipher_alg,
+				   enum spu_cipher_mode cipher_mode,
+				   unsigned int blocksize);
+	u32 (*spu_payload_length)(u8 *spu_hdr);
+	u16 (*spu_response_hdr_len)(u16 auth_key_len, u16 enc_key_len,
+				    bool is_hash);
+	u16 (*spu_hash_pad_len)(enum hash_alg hash_alg,
+				enum hash_mode hash_mode, u32 chunksize,
+				u16 hash_block_size);
+	u32 (*spu_gcm_ccm_pad_len)(enum spu_cipher_mode cipher_mode,
+				   unsigned int data_size);
+	u32 (*spu_assoc_resp_len)(enum spu_cipher_mode cipher_mode,
+				  unsigned int assoc_len,
+				  unsigned int iv_len, bool is_encrypt);
+	u8 (*spu_aead_ivlen)(enum spu_cipher_mode cipher_mode,
+			     u16 iv_len);
+	enum hash_type (*spu_hash_type)(u32 src_sent);
+	u32 (*spu_digest_size)(u32 digest_size, enum hash_alg alg,
+			       enum hash_type);
+	u32 (*spu_create_request)(u8 *spu_hdr,
+				  struct spu_request_opts *req_opts,
+				  struct spu_cipher_parms *cipher_parms,
+				  struct spu_hash_parms *hash_parms,
+				  struct spu_aead_parms *aead_parms,
+				  unsigned int data_size);
+	u16 (*spu_cipher_req_init)(u8 *spu_hdr,
+				   struct spu_cipher_parms *cipher_parms);
+	void (*spu_cipher_req_finish)(u8 *spu_hdr,
+				      u16 spu_req_hdr_len,
+				      unsigned int is_inbound,
+				      struct spu_cipher_parms *cipher_parms,
+				      bool update_key,
+				      unsigned int data_size);
+	void (*spu_request_pad)(u8 *pad_start, u32 gcm_padding,
+				u32 hash_pad_len, enum hash_alg auth_alg,
+				enum hash_mode auth_mode,
+				unsigned int total_sent, u32 status_padding);
+	u8 (*spu_xts_tweak_in_payload)(void);
+	u8 (*spu_tx_status_len)(void);
+	u8 (*spu_rx_status_len)(void);
+	int (*spu_status_process)(u8 *statp);
+	void (*spu_ccm_update_iv)(unsigned int digestsize,
+				  struct spu_cipher_parms *cipher_parms,
+				  unsigned int assoclen, unsigned int chunksize,
+				  bool is_encrypt, bool is_esp);
+	u32 (*spu_wordalign_padlen)(u32 data_size);
+
+	/* The base virtual address of the SPU hw registers */
+	void __iomem *reg_vbase[MAX_SPUS];
+
+	/* Version of the SPU hardware */
+	enum spu_spu_type spu_type;
+
+	/* Sub-version of the SPU hardware */
+	enum spu_spu_subtype spu_subtype;
+
+	/* The number of SPUs on this platform */
+	u32 num_spu;
+};
+
+struct device_private {
+	struct platform_device *pdev[MAX_SPUS];
+
+	struct spu_hw spu;
+
+	atomic_t session_count;	/* number of streams active */
+	atomic_t stream_count;	/* monotonic counter for streamID's */
+
+	/* Length of BCM header. Set to 0 when hw does not expect BCM HEADER. */
+	u8 bcm_hdr_len;
+
+	/* The index of the channel to use for the next crypto request */
+	atomic_t next_chan;
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_stats;
+
+	/* Number of request bytes processed and result bytes returned */
+	atomic64_t bytes_in;
+	atomic64_t bytes_out;
+
+	/* Number of operations of each type */
+	atomic_t op_counts[SPU_OP_NUM];
+
+	atomic_t cipher_cnt[CIPHER_ALG_LAST][CIPHER_MODE_LAST];
+	atomic_t hash_cnt[HASH_ALG_LAST];
+	atomic_t hmac_cnt[HASH_ALG_LAST];
+	atomic_t aead_cnt[AEAD_TYPE_LAST];
+
+	/* Number of calls to setkey() for each operation type */
+	atomic_t setkey_cnt[SPU_OP_NUM];
+
+	/* Number of times request was resubmitted because mb was full */
+	atomic_t mb_no_spc;
+
+	/* Number of mailbox send failures */
+	atomic_t mb_send_fail;
+
+	/* Number of ICV check failures for AEAD messages */
+	atomic_t bad_icv;
+
+	struct mbox_client mcl[MAX_SPUS];
+	/* Array of mailbox channel pointers, one for each channel */
+	struct mbox_chan *mbox[MAX_SPUS];
+
+	/* Driver initialized */
+	bool inited;
+};
+
+extern struct device_private iproc_priv;
+
+#endif
diff --git a/drivers/crypto/bcm/spu.c b/drivers/crypto/bcm/spu.c
new file mode 100644
index 000000000000..dbb5c03dde49
--- /dev/null
+++ b/drivers/crypto/bcm/spu.c
@@ -0,0 +1,1251 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include "util.h"
+#include "spu.h"
+#include "spum.h"
+#include "cipher.h"
+
+/* This array is based on the hash algo type supported in spu.h */
+char *tag_to_hash_idx[] = { "none", "md5", "sha1", "sha224", "sha256" };
+
+char *hash_alg_name[] = { "None", "md5", "sha1", "sha224", "sha256", "aes",
+	"sha384", "sha512", "sha3_224", "sha3_256", "sha3_384", "sha3_512" };
+
+char *aead_alg_name[] = { "ccm(aes)", "gcm(aes)", "authenc" };
+
+/* Assumes SPU-M messages are in big endian */
+void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len)
+{
+	u8 *ptr = buf;
+	struct SPUHEADER *spuh = (struct SPUHEADER *)buf;
+	unsigned int hash_key_len = 0;
+	unsigned int hash_state_len = 0;
+	unsigned int cipher_key_len = 0;
+	unsigned int iv_len;
+	u32 pflags;
+	u32 cflags;
+	u32 ecf;
+	u32 cipher_alg;
+	u32 cipher_mode;
+	u32 cipher_type;
+	u32 hash_alg;
+	u32 hash_mode;
+	u32 hash_type;
+	u32 sctx_size;   /* SCTX length in words */
+	u32 sctx_pl_len; /* SCTX payload length in bytes */
+
+	packet_log("\n");
+	packet_log("SPU Message header %p len: %u\n", buf, buf_len);
+
+	/* ========== Decode MH ========== */
+	packet_log("  MH 0x%08x\n", be32_to_cpu(*((u32 *)ptr)));
+	if (spuh->mh.flags & MH_SCTX_PRES)
+		packet_log("    SCTX  present\n");
+	if (spuh->mh.flags & MH_BDESC_PRES)
+		packet_log("    BDESC present\n");
+	if (spuh->mh.flags & MH_MFM_PRES)
+		packet_log("    MFM   present\n");
+	if (spuh->mh.flags & MH_BD_PRES)
+		packet_log("    BD    present\n");
+	if (spuh->mh.flags & MH_HASH_PRES)
+		packet_log("    HASH  present\n");
+	if (spuh->mh.flags & MH_SUPDT_PRES)
+		packet_log("    SUPDT present\n");
+	packet_log("    Opcode 0x%02x\n", spuh->mh.op_code);
+
+	ptr += sizeof(spuh->mh) + sizeof(spuh->emh);  /* skip emh. unused */
+
+	/* ========== Decode SCTX ========== */
+	if (spuh->mh.flags & MH_SCTX_PRES) {
+		pflags = be32_to_cpu(spuh->sa.proto_flags);
+		packet_log("  SCTX[0] 0x%08x\n", pflags);
+		sctx_size = pflags & SCTX_SIZE;
+		packet_log("    Size %u words\n", sctx_size);
+
+		cflags = be32_to_cpu(spuh->sa.cipher_flags);
+		packet_log("  SCTX[1] 0x%08x\n", cflags);
+		packet_log("    Inbound:%lu (1:decrypt/vrfy 0:encrypt/auth)\n",
+			   (cflags & CIPHER_INBOUND) >> CIPHER_INBOUND_SHIFT);
+		packet_log("    Order:%lu (1:AuthFirst 0:EncFirst)\n",
+			   (cflags & CIPHER_ORDER) >> CIPHER_ORDER_SHIFT);
+		packet_log("    ICV_IS_512:%lx\n",
+			   (cflags & ICV_IS_512) >> ICV_IS_512_SHIFT);
+		cipher_alg = (cflags & CIPHER_ALG) >> CIPHER_ALG_SHIFT;
+		cipher_mode = (cflags & CIPHER_MODE) >> CIPHER_MODE_SHIFT;
+		cipher_type = (cflags & CIPHER_TYPE) >> CIPHER_TYPE_SHIFT;
+		packet_log("    Crypto Alg:%u Mode:%u Type:%u\n",
+			   cipher_alg, cipher_mode, cipher_type);
+		hash_alg = (cflags & HASH_ALG) >> HASH_ALG_SHIFT;
+		hash_mode = (cflags & HASH_MODE) >> HASH_MODE_SHIFT;
+		hash_type = (cflags & HASH_TYPE) >> HASH_TYPE_SHIFT;
+		packet_log("    Hash   Alg:%x Mode:%x Type:%x\n",
+			   hash_alg, hash_mode, hash_type);
+		packet_log("    UPDT_Offset:%u\n", cflags & UPDT_OFST);
+
+		ecf = be32_to_cpu(spuh->sa.ecf);
+		packet_log("  SCTX[2] 0x%08x\n", ecf);
+		packet_log("    WriteICV:%lu CheckICV:%lu ICV_SIZE:%u ",
+			   (ecf & INSERT_ICV) >> INSERT_ICV_SHIFT,
+			   (ecf & CHECK_ICV) >> CHECK_ICV_SHIFT,
+			   (ecf & ICV_SIZE) >> ICV_SIZE_SHIFT);
+		packet_log("BD_SUPPRESS:%lu\n",
+			   (ecf & BD_SUPPRESS) >> BD_SUPPRESS_SHIFT);
+		packet_log("    SCTX_IV:%lu ExplicitIV:%lu GenIV:%lu ",
+			   (ecf & SCTX_IV) >> SCTX_IV_SHIFT,
+			   (ecf & EXPLICIT_IV) >> EXPLICIT_IV_SHIFT,
+			   (ecf & GEN_IV) >> GEN_IV_SHIFT);
+		packet_log("IV_OV_OFST:%lu EXP_IV_SIZE:%u\n",
+			   (ecf & IV_OFFSET) >> IV_OFFSET_SHIFT,
+			   ecf & EXP_IV_SIZE);
+
+		ptr += sizeof(struct SCTX);
+
+		if (hash_alg && hash_mode) {
+			char *name = "NONE";
+
+			switch (hash_alg) {
+			case HASH_ALG_MD5:
+				hash_key_len = 16;
+				name = "MD5";
+				break;
+			case HASH_ALG_SHA1:
+				hash_key_len = 20;
+				name = "SHA1";
+				break;
+			case HASH_ALG_SHA224:
+				hash_key_len = 28;
+				name = "SHA224";
+				break;
+			case HASH_ALG_SHA256:
+				hash_key_len = 32;
+				name = "SHA256";
+				break;
+			case HASH_ALG_SHA384:
+				hash_key_len = 48;
+				name = "SHA384";
+				break;
+			case HASH_ALG_SHA512:
+				hash_key_len = 64;
+				name = "SHA512";
+				break;
+			case HASH_ALG_AES:
+				hash_key_len = 0;
+				name = "AES";
+				break;
+			case HASH_ALG_NONE:
+				break;
+			}
+
+			packet_log("    Auth Key Type:%s Length:%u Bytes\n",
+				   name, hash_key_len);
+			packet_dump("    KEY: ", ptr, hash_key_len);
+			ptr += hash_key_len;
+		} else if ((hash_alg == HASH_ALG_AES) &&
+			   (hash_mode == HASH_MODE_XCBC)) {
+			char *name = "NONE";
+
+			switch (cipher_type) {
+			case CIPHER_TYPE_AES128:
+				hash_key_len = 16;
+				name = "AES128-XCBC";
+				break;
+			case CIPHER_TYPE_AES192:
+				hash_key_len = 24;
+				name = "AES192-XCBC";
+				break;
+			case CIPHER_TYPE_AES256:
+				hash_key_len = 32;
+				name = "AES256-XCBC";
+				break;
+			}
+			packet_log("    Auth Key Type:%s Length:%u Bytes\n",
+				   name, hash_key_len);
+			packet_dump("    KEY: ", ptr, hash_key_len);
+			ptr += hash_key_len;
+		}
+
+		if (hash_alg && (hash_mode == HASH_MODE_NONE) &&
+		    (hash_type == HASH_TYPE_UPDT)) {
+			char *name = "NONE";
+
+			switch (hash_alg) {
+			case HASH_ALG_MD5:
+				hash_state_len = 16;
+				name = "MD5";
+				break;
+			case HASH_ALG_SHA1:
+				hash_state_len = 20;
+				name = "SHA1";
+				break;
+			case HASH_ALG_SHA224:
+				hash_state_len = 32;
+				name = "SHA224";
+				break;
+			case HASH_ALG_SHA256:
+				hash_state_len = 32;
+				name = "SHA256";
+				break;
+			case HASH_ALG_SHA384:
+				hash_state_len = 48;
+				name = "SHA384";
+				break;
+			case HASH_ALG_SHA512:
+				hash_state_len = 64;
+				name = "SHA512";
+				break;
+			case HASH_ALG_AES:
+				hash_state_len = 0;
+				name = "AES";
+				break;
+			case HASH_ALG_NONE:
+				break;
+			}
+
+			packet_log("    Auth State Type:%s Length:%u Bytes\n",
+				   name, hash_state_len);
+			packet_dump("    State: ", ptr, hash_state_len);
+			ptr += hash_state_len;
+		}
+
+		if (cipher_alg) {
+			char *name = "NONE";
+
+			switch (cipher_alg) {
+			case CIPHER_ALG_DES:
+				cipher_key_len = 8;
+				name = "DES";
+				break;
+			case CIPHER_ALG_3DES:
+				cipher_key_len = 24;
+				name = "3DES";
+				break;
+			case CIPHER_ALG_RC4:
+				cipher_key_len = 260;
+				name = "ARC4";
+				break;
+			case CIPHER_ALG_AES:
+				switch (cipher_type) {
+				case CIPHER_TYPE_AES128:
+					cipher_key_len = 16;
+					name = "AES128";
+					break;
+				case CIPHER_TYPE_AES192:
+					cipher_key_len = 24;
+					name = "AES192";
+					break;
+				case CIPHER_TYPE_AES256:
+					cipher_key_len = 32;
+					name = "AES256";
+					break;
+				}
+				break;
+			case CIPHER_ALG_NONE:
+				break;
+			}
+
+			packet_log("    Cipher Key Type:%s Length:%u Bytes\n",
+				   name, cipher_key_len);
+
+			/* XTS has two keys */
+			if (cipher_mode == CIPHER_MODE_XTS) {
+				packet_dump("    KEY2: ", ptr, cipher_key_len);
+				ptr += cipher_key_len;
+				packet_dump("    KEY1: ", ptr, cipher_key_len);
+				ptr += cipher_key_len;
+
+				cipher_key_len *= 2;
+			} else {
+				packet_dump("    KEY: ", ptr, cipher_key_len);
+				ptr += cipher_key_len;
+			}
+
+			if (ecf & SCTX_IV) {
+				sctx_pl_len = sctx_size * sizeof(u32) -
+					sizeof(struct SCTX);
+				iv_len = sctx_pl_len -
+					(hash_key_len + hash_state_len +
+					 cipher_key_len);
+				packet_log("    IV Length:%u Bytes\n", iv_len);
+				packet_dump("    IV: ", ptr, iv_len);
+				ptr += iv_len;
+			}
+		}
+	}
+
+	/* ========== Decode BDESC ========== */
+	if (spuh->mh.flags & MH_BDESC_PRES) {
+#ifdef DEBUG
+		struct BDESC_HEADER *bdesc = (struct BDESC_HEADER *)ptr;
+#endif
+		packet_log("  BDESC[0] 0x%08x\n", be32_to_cpu(*((u32 *)ptr)));
+		packet_log("    OffsetMAC:%u LengthMAC:%u\n",
+			   be16_to_cpu(bdesc->offset_mac),
+			   be16_to_cpu(bdesc->length_mac));
+		ptr += sizeof(u32);
+
+		packet_log("  BDESC[1] 0x%08x\n", be32_to_cpu(*((u32 *)ptr)));
+		packet_log("    OffsetCrypto:%u LengthCrypto:%u\n",
+			   be16_to_cpu(bdesc->offset_crypto),
+			   be16_to_cpu(bdesc->length_crypto));
+		ptr += sizeof(u32);
+
+		packet_log("  BDESC[2] 0x%08x\n", be32_to_cpu(*((u32 *)ptr)));
+		packet_log("    OffsetICV:%u OffsetIV:%u\n",
+			   be16_to_cpu(bdesc->offset_icv),
+			   be16_to_cpu(bdesc->offset_iv));
+		ptr += sizeof(u32);
+	}
+
+	/* ========== Decode BD ========== */
+	if (spuh->mh.flags & MH_BD_PRES) {
+#ifdef DEBUG
+		struct BD_HEADER *bd = (struct BD_HEADER *)ptr;
+#endif
+		packet_log("  BD[0] 0x%08x\n", be32_to_cpu(*((u32 *)ptr)));
+		packet_log("    Size:%ubytes PrevLength:%u\n",
+			   be16_to_cpu(bd->size), be16_to_cpu(bd->prev_length));
+		ptr += 4;
+	}
+
+	/* Double check sanity */
+	if (buf + buf_len != ptr) {
+		packet_log(" Packet parsed incorrectly. ");
+		packet_log("buf:%p buf_len:%u buf+buf_len:%p ptr:%p\n",
+			   buf, buf_len, buf + buf_len, ptr);
+	}
+
+	packet_log("\n");
+}
+
+/**
+ * spum_ns2_ctx_max_payload() - Determine the max length of the payload for a
+ * SPU message for a given cipher and hash alg context.
+ * @cipher_alg:		The cipher algorithm
+ * @cipher_mode:	The cipher mode
+ * @blocksize:		The size of a block of data for this algo
+ *
+ * The max payload must be a multiple of the blocksize so that if a request is
+ * too large to fit in a single SPU message, the request can be broken into
+ * max_payload sized chunks. Each chunk must be a multiple of blocksize.
+ *
+ * Return: Max payload length in bytes
+ */
+u32 spum_ns2_ctx_max_payload(enum spu_cipher_alg cipher_alg,
+			     enum spu_cipher_mode cipher_mode,
+			     unsigned int blocksize)
+{
+	u32 max_payload = SPUM_NS2_MAX_PAYLOAD;
+	u32 excess;
+
+	/* In XTS on SPU-M, we'll need to insert tweak before input data */
+	if (cipher_mode == CIPHER_MODE_XTS)
+		max_payload -= SPU_XTS_TWEAK_SIZE;
+
+	excess = max_payload % blocksize;
+
+	return max_payload - excess;
+}
+
+/**
+ * spum_nsp_ctx_max_payload() - Determine the max length of the payload for a
+ * SPU message for a given cipher and hash alg context.
+ * @cipher_alg:		The cipher algorithm
+ * @cipher_mode:	The cipher mode
+ * @blocksize:		The size of a block of data for this algo
+ *
+ * The max payload must be a multiple of the blocksize so that if a request is
+ * too large to fit in a single SPU message, the request can be broken into
+ * max_payload sized chunks. Each chunk must be a multiple of blocksize.
+ *
+ * Return: Max payload length in bytes
+ */
+u32 spum_nsp_ctx_max_payload(enum spu_cipher_alg cipher_alg,
+			     enum spu_cipher_mode cipher_mode,
+			     unsigned int blocksize)
+{
+	u32 max_payload = SPUM_NSP_MAX_PAYLOAD;
+	u32 excess;
+
+	/* In XTS on SPU-M, we'll need to insert tweak before input data */
+	if (cipher_mode == CIPHER_MODE_XTS)
+		max_payload -= SPU_XTS_TWEAK_SIZE;
+
+	excess = max_payload % blocksize;
+
+	return max_payload - excess;
+}
+
+/** spum_payload_length() - Given a SPU-M message header, extract the payload
+ * length.
+ * @spu_hdr:	Start of SPU header
+ *
+ * Assumes just MH, EMH, BD (no SCTX, BDESC. Works for response frames.
+ *
+ * Return: payload length in bytes
+ */
+u32 spum_payload_length(u8 *spu_hdr)
+{
+	struct BD_HEADER *bd;
+	u32 pl_len;
+
+	/* Find BD header.  skip MH, EMH */
+	bd = (struct BD_HEADER *)(spu_hdr + 8);
+	pl_len = be16_to_cpu(bd->size);
+
+	return pl_len;
+}
+
+/**
+ * spum_response_hdr_len() - Given the length of the hash key and encryption
+ * key, determine the expected length of a SPU response header.
+ * @auth_key_len:	authentication key length (bytes)
+ * @enc_key_len:	encryption key length (bytes)
+ * @is_hash:		true if response message is for a hash operation
+ *
+ * Return: length of SPU response header (bytes)
+ */
+u16 spum_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash)
+{
+	if (is_hash)
+		return SPU_HASH_RESP_HDR_LEN;
+	else
+		return SPU_RESP_HDR_LEN;
+}
+
+/**
+ * spum_hash_pad_len() - Calculate the length of hash padding required to extend
+ * data to a full block size.
+ * @hash_alg:   hash algorithm
+ * @hash_mode:       hash mode
+ * @chunksize:  length of data, in bytes
+ * @hash_block_size:  size of a block of data for hash algorithm
+ *
+ * Reserve space for 1 byte (0x80) start of pad and the total length as u64
+ *
+ * Return:  length of hash pad in bytes
+ */
+u16 spum_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode,
+		      u32 chunksize, u16 hash_block_size)
+{
+	unsigned int length_len;
+	unsigned int used_space_last_block;
+	int hash_pad_len;
+
+	/* AES-XCBC hash requires just padding to next block boundary */
+	if ((hash_alg == HASH_ALG_AES) && (hash_mode == HASH_MODE_XCBC)) {
+		used_space_last_block = chunksize % hash_block_size;
+		hash_pad_len = hash_block_size - used_space_last_block;
+		if (hash_pad_len >= hash_block_size)
+			hash_pad_len -= hash_block_size;
+		return hash_pad_len;
+	}
+
+	used_space_last_block = chunksize % hash_block_size + 1;
+	if ((hash_alg == HASH_ALG_SHA384) || (hash_alg == HASH_ALG_SHA512))
+		length_len = 2 * sizeof(u64);
+	else
+		length_len = sizeof(u64);
+
+	used_space_last_block += length_len;
+	hash_pad_len = hash_block_size - used_space_last_block;
+	if (hash_pad_len < 0)
+		hash_pad_len += hash_block_size;
+
+	hash_pad_len += 1 + length_len;
+	return hash_pad_len;
+}
+
+/**
+ * spum_gcm_ccm_pad_len() - Determine the required length of GCM or CCM padding.
+ * @cipher_mode:	Algo type
+ * @data_size:		Length of plaintext (bytes)
+ *
+ * @Return: Length of padding, in bytes
+ */
+u32 spum_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode,
+			 unsigned int data_size)
+{
+	u32 pad_len = 0;
+	u32 m1 = SPU_GCM_CCM_ALIGN - 1;
+
+	if ((cipher_mode == CIPHER_MODE_GCM) ||
+	    (cipher_mode == CIPHER_MODE_CCM))
+		pad_len = ((data_size + m1) & ~m1) - data_size;
+
+	return pad_len;
+}
+
+/**
+ * spum_assoc_resp_len() - Determine the size of the receive buffer required to
+ * catch associated data.
+ * @cipher_mode:	cipher mode
+ * @assoc_len:		length of associated data (bytes)
+ * @iv_len:		length of IV (bytes)
+ * @is_encrypt:		true if encrypting. false if decrypting.
+ *
+ * Return: length of associated data in response message (bytes)
+ */
+u32 spum_assoc_resp_len(enum spu_cipher_mode cipher_mode,
+			unsigned int assoc_len, unsigned int iv_len,
+			bool is_encrypt)
+{
+	u32 buflen = 0;
+	u32 pad;
+
+	if (assoc_len)
+		buflen = assoc_len;
+
+	if (cipher_mode == CIPHER_MODE_GCM) {
+		/* AAD needs to be padded in responses too */
+		pad = spum_gcm_ccm_pad_len(cipher_mode, buflen);
+		buflen += pad;
+	}
+	if (cipher_mode == CIPHER_MODE_CCM) {
+		/*
+		 * AAD needs to be padded in responses too
+		 * for CCM, len + 2 needs to be 128-bit aligned.
+		 */
+		pad = spum_gcm_ccm_pad_len(cipher_mode, buflen + 2);
+		buflen += pad;
+	}
+
+	return buflen;
+}
+
+/**
+ * spu_aead_ivlen() - Calculate the length of the AEAD IV to be included
+ * in a SPU request after the AAD and before the payload.
+ * @cipher_mode:  cipher mode
+ * @iv_ctr_len:   initialization vector length in bytes
+ *
+ * In Linux ~4.2 and later, the assoc_data sg includes the IV. So no need
+ * to include the IV as a separate field in the SPU request msg.
+ *
+ * Return: Length of AEAD IV in bytes
+ */
+u8 spum_aead_ivlen(enum spu_cipher_mode cipher_mode, u16 iv_len)
+{
+	return 0;
+}
+
+/**
+ * spum_hash_type() - Determine the type of hash operation.
+ * @src_sent:  The number of bytes in the current request that have already
+ *             been sent to the SPU to be hashed.
+ *
+ * We do not use HASH_TYPE_FULL for requests that fit in a single SPU message.
+ * Using FULL causes failures (such as when the string to be hashed is empty).
+ * For similar reasons, we never use HASH_TYPE_FIN. Instead, submit messages
+ * as INIT or UPDT and do the hash padding in sw.
+ */
+enum hash_type spum_hash_type(u32 src_sent)
+{
+	return src_sent ? HASH_TYPE_UPDT : HASH_TYPE_INIT;
+}
+
+/**
+ * spum_digest_size() - Determine the size of a hash digest to expect the SPU to
+ * return.
+ * alg_digest_size: Number of bytes in the final digest for the given algo
+ * alg:             The hash algorithm
+ * htype:           Type of hash operation (init, update, full, etc)
+ *
+ * When doing incremental hashing for an algorithm with a truncated hash
+ * (e.g., SHA224), the SPU returns the full digest so that it can be fed back as
+ * a partial result for the next chunk.
+ */
+u32 spum_digest_size(u32 alg_digest_size, enum hash_alg alg,
+		     enum hash_type htype)
+{
+	u32 digestsize = alg_digest_size;
+
+	/* SPU returns complete digest when doing incremental hash and truncated
+	 * hash algo.
+	 */
+	if ((htype == HASH_TYPE_INIT) || (htype == HASH_TYPE_UPDT)) {
+		if (alg == HASH_ALG_SHA224)
+			digestsize = SHA256_DIGEST_SIZE;
+		else if (alg == HASH_ALG_SHA384)
+			digestsize = SHA512_DIGEST_SIZE;
+	}
+	return digestsize;
+}
+
+/**
+ * spum_create_request() - Build a SPU request message header, up to and
+ * including the BD header. Construct the message starting at spu_hdr. Caller
+ * should allocate this buffer in DMA-able memory at least SPU_HEADER_ALLOC_LEN
+ * bytes long.
+ * @spu_hdr: Start of buffer where SPU request header is to be written
+ * @req_opts: SPU request message options
+ * @cipher_parms: Parameters related to cipher algorithm
+ * @hash_parms:   Parameters related to hash algorithm
+ * @aead_parms:   Parameters related to AEAD operation
+ * @data_size:    Length of data to be encrypted or authenticated. If AEAD, does
+ *		  not include length of AAD.
+
+ * Return: the length of the SPU header in bytes. 0 if an error occurs.
+ */
+u32 spum_create_request(u8 *spu_hdr,
+			struct spu_request_opts *req_opts,
+			struct spu_cipher_parms *cipher_parms,
+			struct spu_hash_parms *hash_parms,
+			struct spu_aead_parms *aead_parms,
+			unsigned int data_size)
+{
+	struct SPUHEADER *spuh;
+	struct BDESC_HEADER *bdesc;
+	struct BD_HEADER *bd;
+
+	u8 *ptr;
+	u32 protocol_bits = 0;
+	u32 cipher_bits = 0;
+	u32 ecf_bits = 0;
+	u8 sctx_words = 0;
+	unsigned int buf_len = 0;
+
+	/* size of the cipher payload */
+	unsigned int cipher_len = hash_parms->prebuf_len + data_size +
+				hash_parms->pad_len;
+
+	/* offset of prebuf or data from end of BD header */
+	unsigned int cipher_offset = aead_parms->assoc_size +
+		aead_parms->iv_len + aead_parms->aad_pad_len;
+
+	/* total size of the DB data (without STAT word padding) */
+	unsigned int real_db_size = spu_real_db_size(aead_parms->assoc_size,
+						 aead_parms->iv_len,
+						 hash_parms->prebuf_len,
+						 data_size,
+						 aead_parms->aad_pad_len,
+						 aead_parms->data_pad_len,
+						 hash_parms->pad_len);
+
+	unsigned int auth_offset = 0;
+	unsigned int offset_iv = 0;
+
+	/* size/offset of the auth payload */
+	unsigned int auth_len;
+
+	auth_len = real_db_size;
+
+	if (req_opts->is_aead && req_opts->is_inbound)
+		cipher_len -= hash_parms->digestsize;
+
+	if (req_opts->is_aead && req_opts->is_inbound)
+		auth_len -= hash_parms->digestsize;
+
+	if ((hash_parms->alg == HASH_ALG_AES) &&
+	    (hash_parms->mode == HASH_MODE_XCBC)) {
+		auth_len -= hash_parms->pad_len;
+		cipher_len -= hash_parms->pad_len;
+	}
+
+	flow_log("%s()\n", __func__);
+	flow_log("  in:%u authFirst:%u\n",
+		 req_opts->is_inbound, req_opts->auth_first);
+	flow_log("  %s. cipher alg:%u mode:%u type %u\n",
+		 spu_alg_name(cipher_parms->alg, cipher_parms->mode),
+		 cipher_parms->alg, cipher_parms->mode, cipher_parms->type);
+	flow_log("    key: %d\n", cipher_parms->key_len);
+	flow_dump("    key: ", cipher_parms->key_buf, cipher_parms->key_len);
+	flow_log("    iv: %d\n", cipher_parms->iv_len);
+	flow_dump("    iv: ", cipher_parms->iv_buf, cipher_parms->iv_len);
+	flow_log("  auth alg:%u mode:%u type %u\n",
+		 hash_parms->alg, hash_parms->mode, hash_parms->type);
+	flow_log("  digestsize: %u\n", hash_parms->digestsize);
+	flow_log("  authkey: %d\n", hash_parms->key_len);
+	flow_dump("  authkey: ", hash_parms->key_buf, hash_parms->key_len);
+	flow_log("  assoc_size:%u\n", aead_parms->assoc_size);
+	flow_log("  prebuf_len:%u\n", hash_parms->prebuf_len);
+	flow_log("  data_size:%u\n", data_size);
+	flow_log("  hash_pad_len:%u\n", hash_parms->pad_len);
+	flow_log("  real_db_size:%u\n", real_db_size);
+	flow_log(" auth_offset:%u auth_len:%u cipher_offset:%u cipher_len:%u\n",
+		 auth_offset, auth_len, cipher_offset, cipher_len);
+	flow_log("  aead_iv: %u\n", aead_parms->iv_len);
+
+	/* starting out: zero the header (plus some) */
+	ptr = spu_hdr;
+	memset(ptr, 0, sizeof(struct SPUHEADER));
+
+	/* format master header word */
+	/* Do not set the next bit even though the datasheet says to */
+	spuh = (struct SPUHEADER *)ptr;
+	ptr += sizeof(struct SPUHEADER);
+	buf_len += sizeof(struct SPUHEADER);
+
+	spuh->mh.op_code = SPU_CRYPTO_OPERATION_GENERIC;
+	spuh->mh.flags |= (MH_SCTX_PRES | MH_BDESC_PRES | MH_BD_PRES);
+
+	/* Format sctx word 0 (protocol_bits) */
+	sctx_words = 3;		/* size in words */
+
+	/* Format sctx word 1 (cipher_bits) */
+	if (req_opts->is_inbound)
+		cipher_bits |= CIPHER_INBOUND;
+	if (req_opts->auth_first)
+		cipher_bits |= CIPHER_ORDER;
+
+	/* Set the crypto parameters in the cipher.flags */
+	cipher_bits |= cipher_parms->alg << CIPHER_ALG_SHIFT;
+	cipher_bits |= cipher_parms->mode << CIPHER_MODE_SHIFT;
+	cipher_bits |= cipher_parms->type << CIPHER_TYPE_SHIFT;
+
+	/* Set the auth parameters in the cipher.flags */
+	cipher_bits |= hash_parms->alg << HASH_ALG_SHIFT;
+	cipher_bits |= hash_parms->mode << HASH_MODE_SHIFT;
+	cipher_bits |= hash_parms->type << HASH_TYPE_SHIFT;
+
+	/*
+	 * Format sctx extensions if required, and update main fields if
+	 * required)
+	 */
+	if (hash_parms->alg) {
+		/* Write the authentication key material if present */
+		if (hash_parms->key_len) {
+			memcpy(ptr, hash_parms->key_buf, hash_parms->key_len);
+			ptr += hash_parms->key_len;
+			buf_len += hash_parms->key_len;
+			sctx_words += hash_parms->key_len / 4;
+		}
+
+		if ((cipher_parms->mode == CIPHER_MODE_GCM) ||
+		    (cipher_parms->mode == CIPHER_MODE_CCM))
+			/* unpadded length */
+			offset_iv = aead_parms->assoc_size;
+
+		/* if GCM/CCM we need to write ICV into the payload */
+		if (!req_opts->is_inbound) {
+			if ((cipher_parms->mode == CIPHER_MODE_GCM) ||
+			    (cipher_parms->mode == CIPHER_MODE_CCM))
+				ecf_bits |= 1 << INSERT_ICV_SHIFT;
+		} else {
+			ecf_bits |= CHECK_ICV;
+		}
+
+		/* Inform the SPU of the ICV size (in words) */
+		if (hash_parms->digestsize == 64)
+			cipher_bits |= ICV_IS_512;
+		else
+			ecf_bits |=
+			(hash_parms->digestsize / 4) << ICV_SIZE_SHIFT;
+	}
+
+	if (req_opts->bd_suppress)
+		ecf_bits |= BD_SUPPRESS;
+
+	/* copy the encryption keys in the SAD entry */
+	if (cipher_parms->alg) {
+		if (cipher_parms->key_len) {
+			memcpy(ptr, cipher_parms->key_buf,
+			       cipher_parms->key_len);
+			ptr += cipher_parms->key_len;
+			buf_len += cipher_parms->key_len;
+			sctx_words += cipher_parms->key_len / 4;
+		}
+
+		/*
+		 * if encrypting then set IV size, use SCTX IV unless no IV
+		 * given here
+		 */
+		if (cipher_parms->iv_buf && cipher_parms->iv_len) {
+			/* Use SCTX IV */
+			ecf_bits |= SCTX_IV;
+
+			/* cipher iv provided so put it in here */
+			memcpy(ptr, cipher_parms->iv_buf, cipher_parms->iv_len);
+
+			ptr += cipher_parms->iv_len;
+			buf_len += cipher_parms->iv_len;
+			sctx_words += cipher_parms->iv_len / 4;
+		}
+	}
+
+	/*
+	 * RFC4543 (GMAC/ESP) requires data to be sent as part of AAD
+	 * so we need to override the BDESC parameters.
+	 */
+	if (req_opts->is_rfc4543) {
+		if (req_opts->is_inbound)
+			data_size -= hash_parms->digestsize;
+		offset_iv = aead_parms->assoc_size + data_size;
+		cipher_len = 0;
+		cipher_offset = offset_iv;
+		auth_len = cipher_offset + aead_parms->data_pad_len;
+	}
+
+	/* write in the total sctx length now that we know it */
+	protocol_bits |= sctx_words;
+
+	/* Endian adjust the SCTX */
+	spuh->sa.proto_flags = cpu_to_be32(protocol_bits);
+	spuh->sa.cipher_flags = cpu_to_be32(cipher_bits);
+	spuh->sa.ecf = cpu_to_be32(ecf_bits);
+
+	/* === create the BDESC section === */
+	bdesc = (struct BDESC_HEADER *)ptr;
+
+	bdesc->offset_mac = cpu_to_be16(auth_offset);
+	bdesc->length_mac = cpu_to_be16(auth_len);
+	bdesc->offset_crypto = cpu_to_be16(cipher_offset);
+	bdesc->length_crypto = cpu_to_be16(cipher_len);
+
+	/*
+	 * CCM in SPU-M requires that ICV not be in same 32-bit word as data or
+	 * padding.  So account for padding as necessary.
+	 */
+	if (cipher_parms->mode == CIPHER_MODE_CCM)
+		auth_len += spum_wordalign_padlen(auth_len);
+
+	bdesc->offset_icv = cpu_to_be16(auth_len);
+	bdesc->offset_iv = cpu_to_be16(offset_iv);
+
+	ptr += sizeof(struct BDESC_HEADER);
+	buf_len += sizeof(struct BDESC_HEADER);
+
+	/* === no MFM section === */
+
+	/* === create the BD section === */
+
+	/* add the BD header */
+	bd = (struct BD_HEADER *)ptr;
+	bd->size = cpu_to_be16(real_db_size);
+	bd->prev_length = 0;
+
+	ptr += sizeof(struct BD_HEADER);
+	buf_len += sizeof(struct BD_HEADER);
+
+	packet_dump("  SPU request header: ", spu_hdr, buf_len);
+
+	return buf_len;
+}
+
+/**
+ * spum_cipher_req_init() - Build a SPU request message header, up to and
+ * including the BD header.
+ * @spu_hdr:      Start of SPU request header (MH)
+ * @cipher_parms: Parameters that describe the cipher request
+ *
+ * Construct the message starting at spu_hdr. Caller should allocate this buffer
+ * in DMA-able memory at least SPU_HEADER_ALLOC_LEN bytes long.
+ *
+ * Return: the length of the SPU header in bytes. 0 if an error occurs.
+ */
+u16 spum_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms)
+{
+	struct SPUHEADER *spuh;
+	u32 protocol_bits = 0;
+	u32 cipher_bits = 0;
+	u32 ecf_bits = 0;
+	u8 sctx_words = 0;
+	u8 *ptr = spu_hdr;
+
+	flow_log("%s()\n", __func__);
+	flow_log("  cipher alg:%u mode:%u type %u\n", cipher_parms->alg,
+		 cipher_parms->mode, cipher_parms->type);
+	flow_log("  cipher_iv_len: %u\n", cipher_parms->iv_len);
+	flow_log("    key: %d\n", cipher_parms->key_len);
+	flow_dump("    key: ", cipher_parms->key_buf, cipher_parms->key_len);
+
+	/* starting out: zero the header (plus some) */
+	memset(spu_hdr, 0, sizeof(struct SPUHEADER));
+	ptr += sizeof(struct SPUHEADER);
+
+	/* format master header word */
+	/* Do not set the next bit even though the datasheet says to */
+	spuh = (struct SPUHEADER *)spu_hdr;
+
+	spuh->mh.op_code = SPU_CRYPTO_OPERATION_GENERIC;
+	spuh->mh.flags |= (MH_SCTX_PRES | MH_BDESC_PRES | MH_BD_PRES);
+
+	/* Format sctx word 0 (protocol_bits) */
+	sctx_words = 3;		/* size in words */
+
+	/* copy the encryption keys in the SAD entry */
+	if (cipher_parms->alg) {
+		if (cipher_parms->key_len) {
+			ptr += cipher_parms->key_len;
+			sctx_words += cipher_parms->key_len / 4;
+		}
+
+		/*
+		 * if encrypting then set IV size, use SCTX IV unless no IV
+		 * given here
+		 */
+		if (cipher_parms->iv_len) {
+			/* Use SCTX IV */
+			ecf_bits |= SCTX_IV;
+			ptr += cipher_parms->iv_len;
+			sctx_words += cipher_parms->iv_len / 4;
+		}
+	}
+
+	/* Set the crypto parameters in the cipher.flags */
+	cipher_bits |= cipher_parms->alg << CIPHER_ALG_SHIFT;
+	cipher_bits |= cipher_parms->mode << CIPHER_MODE_SHIFT;
+	cipher_bits |= cipher_parms->type << CIPHER_TYPE_SHIFT;
+
+	/* copy the encryption keys in the SAD entry */
+	if (cipher_parms->alg && cipher_parms->key_len)
+		memcpy(spuh + 1, cipher_parms->key_buf, cipher_parms->key_len);
+
+	/* write in the total sctx length now that we know it */
+	protocol_bits |= sctx_words;
+
+	/* Endian adjust the SCTX */
+	spuh->sa.proto_flags = cpu_to_be32(protocol_bits);
+
+	/* Endian adjust the SCTX */
+	spuh->sa.cipher_flags = cpu_to_be32(cipher_bits);
+	spuh->sa.ecf = cpu_to_be32(ecf_bits);
+
+	packet_dump("  SPU request header: ", spu_hdr,
+		    sizeof(struct SPUHEADER));
+
+	return sizeof(struct SPUHEADER) + cipher_parms->key_len +
+		cipher_parms->iv_len + sizeof(struct BDESC_HEADER) +
+		sizeof(struct BD_HEADER);
+}
+
+/**
+ * spum_cipher_req_finish() - Finish building a SPU request message header for a
+ * block cipher request. Assumes much of the header was already filled in at
+ * setkey() time in spu_cipher_req_init().
+ * @spu_hdr:         Start of the request message header (MH field)
+ * @spu_req_hdr_len: Length in bytes of the SPU request header
+ * @isInbound:       0 encrypt, 1 decrypt
+ * @cipher_parms:    Parameters describing cipher operation to be performed
+ * @update_key:      If true, rewrite the cipher key in SCTX
+ * @data_size:       Length of the data in the BD field
+ *
+ * Assumes much of the header was already filled in at setkey() time in
+ * spum_cipher_req_init().
+ * spum_cipher_req_init() fills in the encryption key. For RC4, when submitting
+ * a request for a non-first chunk, we use the 260-byte SUPDT field from the
+ * previous response as the key. update_key is true for this case. Unused in all
+ * other cases.
+ */
+void spum_cipher_req_finish(u8 *spu_hdr,
+			    u16 spu_req_hdr_len,
+			    unsigned int is_inbound,
+			    struct spu_cipher_parms *cipher_parms,
+			    bool update_key,
+			    unsigned int data_size)
+{
+	struct SPUHEADER *spuh;
+	struct BDESC_HEADER *bdesc;
+	struct BD_HEADER *bd;
+	u8 *bdesc_ptr = spu_hdr + spu_req_hdr_len -
+	    (sizeof(struct BD_HEADER) + sizeof(struct BDESC_HEADER));
+
+	u32 cipher_bits;
+
+	flow_log("%s()\n", __func__);
+	flow_log(" in: %u\n", is_inbound);
+	flow_log(" cipher alg: %u, cipher_type: %u\n", cipher_parms->alg,
+		 cipher_parms->type);
+	if (update_key) {
+		flow_log(" cipher key len: %u\n", cipher_parms->key_len);
+		flow_dump("  key: ", cipher_parms->key_buf,
+			  cipher_parms->key_len);
+	}
+
+	/*
+	 * In XTS mode, API puts "i" parameter (block tweak) in IV.  For
+	 * SPU-M, should be in start of the BD; tx_sg_create() copies it there.
+	 * IV in SPU msg for SPU-M should be 0, since that's the "j" parameter
+	 * (block ctr within larger data unit) - given we can send entire disk
+	 * block (<= 4KB) in 1 SPU msg, don't need to use this parameter.
+	 */
+	if (cipher_parms->mode == CIPHER_MODE_XTS)
+		memset(cipher_parms->iv_buf, 0, cipher_parms->iv_len);
+
+	flow_log(" iv len: %d\n", cipher_parms->iv_len);
+	flow_dump("    iv: ", cipher_parms->iv_buf, cipher_parms->iv_len);
+	flow_log(" data_size: %u\n", data_size);
+
+	/* format master header word */
+	/* Do not set the next bit even though the datasheet says to */
+	spuh = (struct SPUHEADER *)spu_hdr;
+
+	/* cipher_bits was initialized at setkey time */
+	cipher_bits = be32_to_cpu(spuh->sa.cipher_flags);
+
+	/* Format sctx word 1 (cipher_bits) */
+	if (is_inbound)
+		cipher_bits |= CIPHER_INBOUND;
+	else
+		cipher_bits &= ~CIPHER_INBOUND;
+
+	/* update encryption key for RC4 on non-first chunk */
+	if (update_key) {
+		spuh->sa.cipher_flags |=
+			cipher_parms->type << CIPHER_TYPE_SHIFT;
+		memcpy(spuh + 1, cipher_parms->key_buf, cipher_parms->key_len);
+	}
+
+	if (cipher_parms->alg && cipher_parms->iv_buf && cipher_parms->iv_len)
+		/* cipher iv provided so put it in here */
+		memcpy(bdesc_ptr - cipher_parms->iv_len, cipher_parms->iv_buf,
+		       cipher_parms->iv_len);
+
+	spuh->sa.cipher_flags = cpu_to_be32(cipher_bits);
+
+	/* === create the BDESC section === */
+	bdesc = (struct BDESC_HEADER *)bdesc_ptr;
+	bdesc->offset_mac = 0;
+	bdesc->length_mac = 0;
+	bdesc->offset_crypto = 0;
+
+	/* XTS mode, data_size needs to include tweak parameter */
+	if (cipher_parms->mode == CIPHER_MODE_XTS)
+		bdesc->length_crypto = cpu_to_be16(data_size +
+						  SPU_XTS_TWEAK_SIZE);
+	else
+		bdesc->length_crypto = cpu_to_be16(data_size);
+
+	bdesc->offset_icv = 0;
+	bdesc->offset_iv = 0;
+
+	/* === no MFM section === */
+
+	/* === create the BD section === */
+	/* add the BD header */
+	bd = (struct BD_HEADER *)(bdesc_ptr + sizeof(struct BDESC_HEADER));
+	bd->size = cpu_to_be16(data_size);
+
+	/* XTS mode, data_size needs to include tweak parameter */
+	if (cipher_parms->mode == CIPHER_MODE_XTS)
+		bd->size = cpu_to_be16(data_size + SPU_XTS_TWEAK_SIZE);
+	else
+		bd->size = cpu_to_be16(data_size);
+
+	bd->prev_length = 0;
+
+	packet_dump("  SPU request header: ", spu_hdr, spu_req_hdr_len);
+}
+
+/**
+ * spum_request_pad() - Create pad bytes at the end of the data.
+ * @pad_start:		Start of buffer where pad bytes are to be written
+ * @gcm_ccm_padding:	length of GCM/CCM padding, in bytes
+ * @hash_pad_len:	Number of bytes of padding extend data to full block
+ * @auth_alg:		authentication algorithm
+ * @auth_mode:		authentication mode
+ * @total_sent:		length inserted at end of hash pad
+ * @status_padding:	Number of bytes of padding to align STATUS word
+ *
+ * There may be three forms of pad:
+ *  1. GCM/CCM pad - for GCM/CCM mode ciphers, pad to 16-byte alignment
+ *  2. hash pad - pad to a block length, with 0x80 data terminator and
+ *                size at the end
+ *  3. STAT pad - to ensure the STAT field is 4-byte aligned
+ */
+void spum_request_pad(u8 *pad_start,
+		      u32 gcm_ccm_padding,
+		      u32 hash_pad_len,
+		      enum hash_alg auth_alg,
+		      enum hash_mode auth_mode,
+		      unsigned int total_sent, u32 status_padding)
+{
+	u8 *ptr = pad_start;
+
+	/* fix data alignent for GCM/CCM */
+	if (gcm_ccm_padding > 0) {
+		flow_log("  GCM: padding to 16 byte alignment: %u bytes\n",
+			 gcm_ccm_padding);
+		memset(ptr, 0, gcm_ccm_padding);
+		ptr += gcm_ccm_padding;
+	}
+
+	if (hash_pad_len > 0) {
+		/* clear the padding section */
+		memset(ptr, 0, hash_pad_len);
+
+		if ((auth_alg == HASH_ALG_AES) &&
+		    (auth_mode == HASH_MODE_XCBC)) {
+			/* AES/XCBC just requires padding to be 0s */
+			ptr += hash_pad_len;
+		} else {
+			/* terminate the data */
+			*ptr = 0x80;
+			ptr += (hash_pad_len - sizeof(u64));
+
+			/* add the size at the end as required per alg */
+			if (auth_alg == HASH_ALG_MD5)
+				*(u64 *)ptr = cpu_to_le64((u64)total_sent * 8);
+			else		/* SHA1, SHA2-224, SHA2-256 */
+				*(u64 *)ptr = cpu_to_be64((u64)total_sent * 8);
+			ptr += sizeof(u64);
+		}
+	}
+
+	/* pad to a 4byte alignment for STAT */
+	if (status_padding > 0) {
+		flow_log("  STAT: padding to 4 byte alignment: %u bytes\n",
+			 status_padding);
+
+		memset(ptr, 0, status_padding);
+		ptr += status_padding;
+	}
+}
+
+/**
+ * spum_xts_tweak_in_payload() - Indicate that SPUM DOES place the XTS tweak
+ * field in the packet payload (rather than using IV)
+ *
+ * Return: 1
+ */
+u8 spum_xts_tweak_in_payload(void)
+{
+	return 1;
+}
+
+/**
+ * spum_tx_status_len() - Return the length of the STATUS field in a SPU
+ * response message.
+ *
+ * Return: Length of STATUS field in bytes.
+ */
+u8 spum_tx_status_len(void)
+{
+	return SPU_TX_STATUS_LEN;
+}
+
+/**
+ * spum_rx_status_len() - Return the length of the STATUS field in a SPU
+ * response message.
+ *
+ * Return: Length of STATUS field in bytes.
+ */
+u8 spum_rx_status_len(void)
+{
+	return SPU_RX_STATUS_LEN;
+}
+
+/**
+ * spum_status_process() - Process the status from a SPU response message.
+ * @statp:  start of STATUS word
+ * Return:
+ *   0 - if status is good and response should be processed
+ *   !0 - status indicates an error and response is invalid
+ */
+int spum_status_process(u8 *statp)
+{
+	u32 status;
+
+	status = __be32_to_cpu(*(__be32 *)statp);
+	flow_log("SPU response STATUS %#08x\n", status);
+	if (status & SPU_STATUS_ERROR_FLAG) {
+		pr_err("%s() Warning: Error result from SPU: %#08x\n",
+		       __func__, status);
+		if (status & SPU_STATUS_INVALID_ICV)
+			return SPU_INVALID_ICV;
+		return -EBADMSG;
+	}
+	return 0;
+}
+
+/**
+ * spum_ccm_update_iv() - Update the IV as per the requirements for CCM mode.
+ *
+ * @digestsize:		Digest size of this request
+ * @cipher_parms:	(pointer to) cipher parmaeters, includes IV buf & IV len
+ * @assoclen:		Length of AAD data
+ * @chunksize:		length of input data to be sent in this req
+ * @is_encrypt:		true if this is an output/encrypt operation
+ * @is_esp:		true if this is an ESP / RFC4309 operation
+ *
+ */
+void spum_ccm_update_iv(unsigned int digestsize,
+			struct spu_cipher_parms *cipher_parms,
+			unsigned int assoclen,
+			unsigned int chunksize,
+			bool is_encrypt,
+			bool is_esp)
+{
+	u8 L;		/* L from CCM algorithm, length of plaintext data */
+	u8 mprime;	/* M' from CCM algo, (M - 2) / 2, where M=authsize */
+	u8 adata;
+
+	if (cipher_parms->iv_len != CCM_AES_IV_SIZE) {
+		pr_err("%s(): Invalid IV len %d for CCM mode, should be %d\n",
+		       __func__, cipher_parms->iv_len, CCM_AES_IV_SIZE);
+		return;
+	}
+
+	/*
+	 * IV needs to be formatted as follows:
+	 *
+	 * |          Byte 0               | Bytes 1 - N | Bytes (N+1) - 15 |
+	 * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | Bits 7 - 0  |    Bits 7 - 0    |
+	 * | 0 |Ad?|(M - 2) / 2|   L - 1   |    Nonce    | Plaintext Length |
+	 *
+	 * Ad? = 1 if AAD present, 0 if not present
+	 * M = size of auth field, 8, 12, or 16 bytes (SPU-M) -or-
+	 *                         4, 6, 8, 10, 12, 14, 16 bytes (SPU2)
+	 * L = Size of Plaintext Length field; Nonce size = 15 - L
+	 *
+	 * It appears that the crypto API already expects the L-1 portion
+	 * to be set in the first byte of the IV, which implicitly determines
+	 * the nonce size, and also fills in the nonce.  But the other bits
+	 * in byte 0 as well as the plaintext length need to be filled in.
+	 *
+	 * In rfc4309/esp mode, L is not already in the supplied IV and
+	 * we need to fill it in, as well as move the IV data to be after
+	 * the salt
+	 */
+	if (is_esp) {
+		L = CCM_ESP_L_VALUE;	/* RFC4309 has fixed L */
+	} else {
+		/* L' = plaintext length - 1 so Plaintext length is L' + 1 */
+		L = ((cipher_parms->iv_buf[0] & CCM_B0_L_PRIME) >>
+		      CCM_B0_L_PRIME_SHIFT) + 1;
+	}
+
+	mprime = (digestsize - 2) >> 1;  /* M' = (M - 2) / 2 */
+	adata = (assoclen > 0);  /* adata = 1 if any associated data */
+
+	cipher_parms->iv_buf[0] = (adata << CCM_B0_ADATA_SHIFT) |
+				  (mprime << CCM_B0_M_PRIME_SHIFT) |
+				  ((L - 1) << CCM_B0_L_PRIME_SHIFT);
+
+	/* Nonce is already filled in by crypto API, and is 15 - L bytes */
+
+	/* Don't include digest in plaintext size when decrypting */
+	if (!is_encrypt)
+		chunksize -= digestsize;
+
+	/* Fill in length of plaintext, formatted to be L bytes long */
+	format_value_ccm(chunksize, &cipher_parms->iv_buf[15 - L + 1], L);
+}
+
+/**
+ * spum_wordalign_padlen() - Given the length of a data field, determine the
+ * padding required to align the data following this field on a 4-byte boundary.
+ * @data_size: length of data field in bytes
+ *
+ * Return: length of status field padding, in bytes
+ */
+u32 spum_wordalign_padlen(u32 data_size)
+{
+	return ((data_size + 3) & ~3) - data_size;
+}
diff --git a/drivers/crypto/bcm/spu.h b/drivers/crypto/bcm/spu.h
new file mode 100644
index 000000000000..aa6fc38db263
--- /dev/null
+++ b/drivers/crypto/bcm/spu.h
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+/*
+ * This file contains the definition of SPU messages. There are currently two
+ * SPU message formats: SPU-M and SPU2. The hardware uses different values to
+ * identify the same things in SPU-M vs SPU2. So this file defines values that
+ * are hardware independent. Software can use these values for any version of
+ * SPU hardware. These values are used in APIs in spu.c. Functions internal to
+ * spu.c and spu2.c convert these to hardware-specific values.
+ */
+
+#ifndef _SPU_H
+#define _SPU_H
+
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <crypto/sha.h>
+
+enum spu_cipher_alg {
+	CIPHER_ALG_NONE = 0x0,
+	CIPHER_ALG_RC4 = 0x1,
+	CIPHER_ALG_DES = 0x2,
+	CIPHER_ALG_3DES = 0x3,
+	CIPHER_ALG_AES = 0x4,
+	CIPHER_ALG_LAST = 0x5
+};
+
+enum spu_cipher_mode {
+	CIPHER_MODE_NONE = 0x0,
+	CIPHER_MODE_ECB = 0x0,
+	CIPHER_MODE_CBC = 0x1,
+	CIPHER_MODE_OFB = 0x2,
+	CIPHER_MODE_CFB = 0x3,
+	CIPHER_MODE_CTR = 0x4,
+	CIPHER_MODE_CCM = 0x5,
+	CIPHER_MODE_GCM = 0x6,
+	CIPHER_MODE_XTS = 0x7,
+	CIPHER_MODE_LAST = 0x8
+};
+
+enum spu_cipher_type {
+	CIPHER_TYPE_NONE = 0x0,
+	CIPHER_TYPE_DES = 0x0,
+	CIPHER_TYPE_3DES = 0x0,
+	CIPHER_TYPE_INIT = 0x0,	/* used for ARC4 */
+	CIPHER_TYPE_AES128 = 0x0,
+	CIPHER_TYPE_AES192 = 0x1,
+	CIPHER_TYPE_UPDT = 0x1,	/* used for ARC4 */
+	CIPHER_TYPE_AES256 = 0x2,
+};
+
+enum hash_alg {
+	HASH_ALG_NONE = 0x0,
+	HASH_ALG_MD5 = 0x1,
+	HASH_ALG_SHA1 = 0x2,
+	HASH_ALG_SHA224 = 0x3,
+	HASH_ALG_SHA256 = 0x4,
+	HASH_ALG_AES = 0x5,
+	HASH_ALG_SHA384 = 0x6,
+	HASH_ALG_SHA512 = 0x7,
+	/* Keep SHA3 algorithms at the end always */
+	HASH_ALG_SHA3_224 = 0x8,
+	HASH_ALG_SHA3_256 = 0x9,
+	HASH_ALG_SHA3_384 = 0xa,
+	HASH_ALG_SHA3_512 = 0xb,
+	HASH_ALG_LAST
+};
+
+enum hash_mode {
+	HASH_MODE_NONE = 0x0,
+	HASH_MODE_HASH = 0x0,
+	HASH_MODE_XCBC = 0x0,
+	HASH_MODE_CMAC = 0x1,
+	HASH_MODE_CTXT = 0x1,
+	HASH_MODE_HMAC = 0x2,
+	HASH_MODE_RABIN = 0x4,
+	HASH_MODE_FHMAC = 0x6,
+	HASH_MODE_CCM = 0x5,
+	HASH_MODE_GCM = 0x6,
+};
+
+enum hash_type {
+	HASH_TYPE_NONE = 0x0,
+	HASH_TYPE_FULL = 0x0,
+	HASH_TYPE_INIT = 0x1,
+	HASH_TYPE_UPDT = 0x2,
+	HASH_TYPE_FIN = 0x3,
+	HASH_TYPE_AES128 = 0x0,
+	HASH_TYPE_AES192 = 0x1,
+	HASH_TYPE_AES256 = 0x2
+};
+
+enum aead_type {
+	AES_CCM,
+	AES_GCM,
+	AUTHENC,
+	AEAD_TYPE_LAST
+};
+
+extern char *hash_alg_name[HASH_ALG_LAST];
+extern char *aead_alg_name[AEAD_TYPE_LAST];
+
+struct spu_request_opts {
+	bool is_inbound;
+	bool auth_first;
+	bool is_aead;
+	bool is_esp;
+	bool bd_suppress;
+	bool is_rfc4543;
+};
+
+struct spu_cipher_parms {
+	enum spu_cipher_alg  alg;
+	enum spu_cipher_mode mode;
+	enum spu_cipher_type type;
+	u8                  *key_buf;
+	u16                  key_len;
+	/* iv_buf and iv_len include salt, if applicable */
+	u8                  *iv_buf;
+	u16                  iv_len;
+};
+
+struct spu_hash_parms {
+	enum hash_alg  alg;
+	enum hash_mode mode;
+	enum hash_type type;
+	u8             digestsize;
+	u8            *key_buf;
+	u16            key_len;
+	u16            prebuf_len;
+	/* length of hash pad. signed, needs to handle roll-overs */
+	int            pad_len;
+};
+
+struct spu_aead_parms {
+	u32 assoc_size;
+	u16 iv_len;      /* length of IV field between assoc data and data */
+	u8  aad_pad_len; /* For AES GCM/CCM, length of padding after AAD */
+	u8  data_pad_len;/* For AES GCM/CCM, length of padding after data */
+	bool return_iv;  /* True if SPU should return an IV */
+	u32 ret_iv_len;  /* Length in bytes of returned IV */
+	u32 ret_iv_off;  /* Offset into full IV if partial IV returned */
+};
+
+/************** SPU sizes ***************/
+
+#define SPU_RX_STATUS_LEN  4
+
+/* Max length of padding for 4-byte alignment of STATUS field */
+#define SPU_STAT_PAD_MAX  4
+
+/* Max length of pad fragment. 4 is for 4-byte alignment of STATUS field */
+#define SPU_PAD_LEN_MAX (SPU_GCM_CCM_ALIGN + MAX_HASH_BLOCK_SIZE + \
+			 SPU_STAT_PAD_MAX)
+
+/* GCM and CCM require 16-byte alignment */
+#define SPU_GCM_CCM_ALIGN 16
+
+/* Length up SUPDT field in SPU response message for RC4 */
+#define SPU_SUPDT_LEN 260
+
+/* SPU status error codes. These used as common error codes across all
+ * SPU variants.
+ */
+#define SPU_INVALID_ICV  1
+
+/* Indicates no limit to the length of the payload in a SPU message */
+#define SPU_MAX_PAYLOAD_INF  0xFFFFFFFF
+
+/* Size of XTS tweak ("i" parameter), in bytes */
+#define SPU_XTS_TWEAK_SIZE 16
+
+/* CCM B_0 field definitions, common for SPU-M and SPU2 */
+#define CCM_B0_ADATA		0x40
+#define CCM_B0_ADATA_SHIFT	   6
+#define CCM_B0_M_PRIME		0x38
+#define CCM_B0_M_PRIME_SHIFT	   3
+#define CCM_B0_L_PRIME		0x07
+#define CCM_B0_L_PRIME_SHIFT	   0
+#define CCM_ESP_L_VALUE		   4
+
+/**
+ * spu_req_incl_icv() - Return true if SPU request message should include the
+ * ICV as a separate buffer.
+ * @cipher_mode:  the cipher mode being requested
+ * @is_encrypt:   true if encrypting. false if decrypting.
+ *
+ * Return:  true if ICV to be included as separate buffer
+ */
+static __always_inline  bool spu_req_incl_icv(enum spu_cipher_mode cipher_mode,
+					      bool is_encrypt)
+{
+	if ((cipher_mode == CIPHER_MODE_GCM) && !is_encrypt)
+		return true;
+	if ((cipher_mode == CIPHER_MODE_CCM) && !is_encrypt)
+		return true;
+
+	return false;
+}
+
+static __always_inline u32 spu_real_db_size(u32 assoc_size,
+					    u32 aead_iv_buf_len,
+					    u32 prebuf_len,
+					    u32 data_size,
+					    u32 aad_pad_len,
+					    u32 gcm_pad_len,
+					    u32 hash_pad_len)
+{
+	return assoc_size + aead_iv_buf_len + prebuf_len + data_size +
+	    aad_pad_len + gcm_pad_len + hash_pad_len;
+}
+
+/************** SPU Functions Prototypes **************/
+
+void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len);
+
+u32 spum_ns2_ctx_max_payload(enum spu_cipher_alg cipher_alg,
+			     enum spu_cipher_mode cipher_mode,
+			     unsigned int blocksize);
+u32 spum_nsp_ctx_max_payload(enum spu_cipher_alg cipher_alg,
+			     enum spu_cipher_mode cipher_mode,
+			     unsigned int blocksize);
+u32 spum_payload_length(u8 *spu_hdr);
+u16 spum_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash);
+u16 spum_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode,
+		      u32 chunksize, u16 hash_block_size);
+u32 spum_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode,
+			 unsigned int data_size);
+u32 spum_assoc_resp_len(enum spu_cipher_mode cipher_mode,
+			unsigned int assoc_len, unsigned int iv_len,
+			bool is_encrypt);
+u8 spum_aead_ivlen(enum spu_cipher_mode cipher_mode, u16 iv_len);
+bool spu_req_incl_icv(enum spu_cipher_mode cipher_mode, bool is_encrypt);
+enum hash_type spum_hash_type(u32 src_sent);
+u32 spum_digest_size(u32 alg_digest_size, enum hash_alg alg,
+		     enum hash_type htype);
+
+u32 spum_create_request(u8 *spu_hdr,
+			struct spu_request_opts *req_opts,
+			struct spu_cipher_parms *cipher_parms,
+			struct spu_hash_parms *hash_parms,
+			struct spu_aead_parms *aead_parms,
+			unsigned int data_size);
+
+u16 spum_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms);
+
+void spum_cipher_req_finish(u8 *spu_hdr,
+			    u16 spu_req_hdr_len,
+			    unsigned int is_inbound,
+			    struct spu_cipher_parms *cipher_parms,
+			    bool update_key,
+			    unsigned int data_size);
+
+void spum_request_pad(u8 *pad_start,
+		      u32 gcm_padding,
+		      u32 hash_pad_len,
+		      enum hash_alg auth_alg,
+		      enum hash_mode auth_mode,
+		      unsigned int total_sent, u32 status_padding);
+
+u8 spum_xts_tweak_in_payload(void);
+u8 spum_tx_status_len(void);
+u8 spum_rx_status_len(void);
+int spum_status_process(u8 *statp);
+
+void spum_ccm_update_iv(unsigned int digestsize,
+			struct spu_cipher_parms *cipher_parms,
+			unsigned int assoclen,
+			unsigned int chunksize,
+			bool is_encrypt,
+			bool is_esp);
+u32 spum_wordalign_padlen(u32 data_size);
+#endif
diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c
new file mode 100644
index 000000000000..ef04c9748317
--- /dev/null
+++ b/drivers/crypto/bcm/spu2.c
@@ -0,0 +1,1401 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+/*
+ * This file works with the SPU2 version of the SPU. SPU2 has different message
+ * formats than the previous version of the SPU. All SPU message format
+ * differences should be hidden in the spux.c,h files.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include "util.h"
+#include "spu.h"
+#include "spu2.h"
+
+#define SPU2_TX_STATUS_LEN  0	/* SPU2 has no STATUS in input packet */
+
+/*
+ * Controlled by pkt_stat_cnt field in CRYPTO_SS_SPU0_CORE_SPU2_CONTROL0
+ * register. Defaults to 2.
+ */
+#define SPU2_RX_STATUS_LEN  2
+
+enum spu2_proto_sel {
+	SPU2_PROTO_RESV = 0,
+	SPU2_MACSEC_SECTAG8_ECB = 1,
+	SPU2_MACSEC_SECTAG8_SCB = 2,
+	SPU2_MACSEC_SECTAG16 = 3,
+	SPU2_MACSEC_SECTAG16_8_XPN = 4,
+	SPU2_IPSEC = 5,
+	SPU2_IPSEC_ESN = 6,
+	SPU2_TLS_CIPHER = 7,
+	SPU2_TLS_AEAD = 8,
+	SPU2_DTLS_CIPHER = 9,
+	SPU2_DTLS_AEAD = 10
+};
+
+char *spu2_cipher_type_names[] = { "None", "AES128", "AES192", "AES256",
+	"DES", "3DES"
+};
+
+char *spu2_cipher_mode_names[] = { "ECB", "CBC", "CTR", "CFB", "OFB", "XTS",
+	"CCM", "GCM"
+};
+
+char *spu2_hash_type_names[] = { "None", "AES128", "AES192", "AES256",
+	"Reserved", "Reserved", "MD5", "SHA1", "SHA224", "SHA256", "SHA384",
+	"SHA512", "SHA512/224", "SHA512/256", "SHA3-224", "SHA3-256",
+	"SHA3-384", "SHA3-512"
+};
+
+char *spu2_hash_mode_names[] = { "CMAC", "CBC-MAC", "XCBC-MAC", "HMAC",
+	"Rabin", "CCM", "GCM", "Reserved"
+};
+
+static char *spu2_ciph_type_name(enum spu2_cipher_type cipher_type)
+{
+	if (cipher_type >= SPU2_CIPHER_TYPE_LAST)
+		return "Reserved";
+	return spu2_cipher_type_names[cipher_type];
+}
+
+static char *spu2_ciph_mode_name(enum spu2_cipher_mode cipher_mode)
+{
+	if (cipher_mode >= SPU2_CIPHER_MODE_LAST)
+		return "Reserved";
+	return spu2_cipher_mode_names[cipher_mode];
+}
+
+static char *spu2_hash_type_name(enum spu2_hash_type hash_type)
+{
+	if (hash_type >= SPU2_HASH_TYPE_LAST)
+		return "Reserved";
+	return spu2_hash_type_names[hash_type];
+}
+
+static char *spu2_hash_mode_name(enum spu2_hash_mode hash_mode)
+{
+	if (hash_mode >= SPU2_HASH_MODE_LAST)
+		return "Reserved";
+	return spu2_hash_mode_names[hash_mode];
+}
+
+/*
+ * Convert from a software cipher mode value to the corresponding value
+ * for SPU2.
+ */
+static int spu2_cipher_mode_xlate(enum spu_cipher_mode cipher_mode,
+				  enum spu2_cipher_mode *spu2_mode)
+{
+	switch (cipher_mode) {
+	case CIPHER_MODE_ECB:
+		*spu2_mode = SPU2_CIPHER_MODE_ECB;
+		break;
+	case CIPHER_MODE_CBC:
+		*spu2_mode = SPU2_CIPHER_MODE_CBC;
+		break;
+	case CIPHER_MODE_OFB:
+		*spu2_mode = SPU2_CIPHER_MODE_OFB;
+		break;
+	case CIPHER_MODE_CFB:
+		*spu2_mode = SPU2_CIPHER_MODE_CFB;
+		break;
+	case CIPHER_MODE_CTR:
+		*spu2_mode = SPU2_CIPHER_MODE_CTR;
+		break;
+	case CIPHER_MODE_CCM:
+		*spu2_mode = SPU2_CIPHER_MODE_CCM;
+		break;
+	case CIPHER_MODE_GCM:
+		*spu2_mode = SPU2_CIPHER_MODE_GCM;
+		break;
+	case CIPHER_MODE_XTS:
+		*spu2_mode = SPU2_CIPHER_MODE_XTS;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * spu2_cipher_xlate() - Convert a cipher {alg/mode/type} triple to a SPU2
+ * cipher type and mode.
+ * @cipher_alg:  [in]  cipher algorithm value from software enumeration
+ * @cipher_mode: [in]  cipher mode value from software enumeration
+ * @cipher_type: [in]  cipher type value from software enumeration
+ * @spu2_type:   [out] cipher type value used by spu2 hardware
+ * @spu2_mode:   [out] cipher mode value used by spu2 hardware
+ *
+ * Return:  0 if successful
+ */
+static int spu2_cipher_xlate(enum spu_cipher_alg cipher_alg,
+			     enum spu_cipher_mode cipher_mode,
+			     enum spu_cipher_type cipher_type,
+			     enum spu2_cipher_type *spu2_type,
+			     enum spu2_cipher_mode *spu2_mode)
+{
+	int err;
+
+	err = spu2_cipher_mode_xlate(cipher_mode, spu2_mode);
+	if (err) {
+		flow_log("Invalid cipher mode %d\n", cipher_mode);
+		return err;
+	}
+
+	switch (cipher_alg) {
+	case CIPHER_ALG_NONE:
+		*spu2_type = SPU2_CIPHER_TYPE_NONE;
+		break;
+	case CIPHER_ALG_RC4:
+		/* SPU2 does not support RC4 */
+		err = -EINVAL;
+		*spu2_type = SPU2_CIPHER_TYPE_NONE;
+		break;
+	case CIPHER_ALG_DES:
+		*spu2_type = SPU2_CIPHER_TYPE_DES;
+		break;
+	case CIPHER_ALG_3DES:
+		*spu2_type = SPU2_CIPHER_TYPE_3DES;
+		break;
+	case CIPHER_ALG_AES:
+		switch (cipher_type) {
+		case CIPHER_TYPE_AES128:
+			*spu2_type = SPU2_CIPHER_TYPE_AES128;
+			break;
+		case CIPHER_TYPE_AES192:
+			*spu2_type = SPU2_CIPHER_TYPE_AES192;
+			break;
+		case CIPHER_TYPE_AES256:
+			*spu2_type = SPU2_CIPHER_TYPE_AES256;
+			break;
+		default:
+			err = -EINVAL;
+		}
+		break;
+	case CIPHER_ALG_LAST:
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err)
+		flow_log("Invalid cipher alg %d or type %d\n",
+			 cipher_alg, cipher_type);
+	return err;
+}
+
+/*
+ * Convert from a software hash mode value to the corresponding value
+ * for SPU2. Note that HASH_MODE_NONE and HASH_MODE_XCBC have the same value.
+ */
+static int spu2_hash_mode_xlate(enum hash_mode hash_mode,
+				enum spu2_hash_mode *spu2_mode)
+{
+	switch (hash_mode) {
+	case HASH_MODE_XCBC:
+		*spu2_mode = SPU2_HASH_MODE_XCBC_MAC;
+		break;
+	case HASH_MODE_CMAC:
+		*spu2_mode = SPU2_HASH_MODE_CMAC;
+		break;
+	case HASH_MODE_HMAC:
+		*spu2_mode = SPU2_HASH_MODE_HMAC;
+		break;
+	case HASH_MODE_CCM:
+		*spu2_mode = SPU2_HASH_MODE_CCM;
+		break;
+	case HASH_MODE_GCM:
+		*spu2_mode = SPU2_HASH_MODE_GCM;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * spu2_hash_xlate() - Convert a hash {alg/mode/type} triple to a SPU2 hash type
+ * and mode.
+ * @hash_alg:  [in] hash algorithm value from software enumeration
+ * @hash_mode: [in] hash mode value from software enumeration
+ * @hash_type: [in] hash type value from software enumeration
+ * @ciph_type: [in] cipher type value from software enumeration
+ * @spu2_type: [out] hash type value used by SPU2 hardware
+ * @spu2_mode: [out] hash mode value used by SPU2 hardware
+ *
+ * Return:  0 if successful
+ */
+static int
+spu2_hash_xlate(enum hash_alg hash_alg, enum hash_mode hash_mode,
+		enum hash_type hash_type, enum spu_cipher_type ciph_type,
+		enum spu2_hash_type *spu2_type, enum spu2_hash_mode *spu2_mode)
+{
+	int err;
+
+	err = spu2_hash_mode_xlate(hash_mode, spu2_mode);
+	if (err) {
+		flow_log("Invalid hash mode %d\n", hash_mode);
+		return err;
+	}
+
+	switch (hash_alg) {
+	case HASH_ALG_NONE:
+		*spu2_type = SPU2_HASH_TYPE_NONE;
+		break;
+	case HASH_ALG_MD5:
+		*spu2_type = SPU2_HASH_TYPE_MD5;
+		break;
+	case HASH_ALG_SHA1:
+		*spu2_type = SPU2_HASH_TYPE_SHA1;
+		break;
+	case HASH_ALG_SHA224:
+		*spu2_type = SPU2_HASH_TYPE_SHA224;
+		break;
+	case HASH_ALG_SHA256:
+		*spu2_type = SPU2_HASH_TYPE_SHA256;
+		break;
+	case HASH_ALG_SHA384:
+		*spu2_type = SPU2_HASH_TYPE_SHA384;
+		break;
+	case HASH_ALG_SHA512:
+		*spu2_type = SPU2_HASH_TYPE_SHA512;
+		break;
+	case HASH_ALG_AES:
+		switch (ciph_type) {
+		case CIPHER_TYPE_AES128:
+			*spu2_type = SPU2_HASH_TYPE_AES128;
+			break;
+		case CIPHER_TYPE_AES192:
+			*spu2_type = SPU2_HASH_TYPE_AES192;
+			break;
+		case CIPHER_TYPE_AES256:
+			*spu2_type = SPU2_HASH_TYPE_AES256;
+			break;
+		default:
+			err = -EINVAL;
+		}
+		break;
+	case HASH_ALG_SHA3_224:
+		*spu2_type = SPU2_HASH_TYPE_SHA3_224;
+		break;
+	case HASH_ALG_SHA3_256:
+		*spu2_type = SPU2_HASH_TYPE_SHA3_256;
+		break;
+	case HASH_ALG_SHA3_384:
+		*spu2_type = SPU2_HASH_TYPE_SHA3_384;
+		break;
+	case HASH_ALG_SHA3_512:
+		*spu2_type = SPU2_HASH_TYPE_SHA3_512;
+	case HASH_ALG_LAST:
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err)
+		flow_log("Invalid hash alg %d or type %d\n",
+			 hash_alg, hash_type);
+	return err;
+}
+
+/* Dump FMD ctrl0. The ctrl0 input is in host byte order */
+static void spu2_dump_fmd_ctrl0(u64 ctrl0)
+{
+	enum spu2_cipher_type ciph_type;
+	enum spu2_cipher_mode ciph_mode;
+	enum spu2_hash_type hash_type;
+	enum spu2_hash_mode hash_mode;
+	char *ciph_name;
+	char *ciph_mode_name;
+	char *hash_name;
+	char *hash_mode_name;
+	u8 cfb;
+	u8 proto;
+
+	packet_log(" FMD CTRL0 %#16llx\n", ctrl0);
+	if (ctrl0 & SPU2_CIPH_ENCRYPT_EN)
+		packet_log("  encrypt\n");
+	else
+		packet_log("  decrypt\n");
+
+	ciph_type = (ctrl0 & SPU2_CIPH_TYPE) >> SPU2_CIPH_TYPE_SHIFT;
+	ciph_name = spu2_ciph_type_name(ciph_type);
+	packet_log("  Cipher type: %s\n", ciph_name);
+
+	if (ciph_type != SPU2_CIPHER_TYPE_NONE) {
+		ciph_mode = (ctrl0 & SPU2_CIPH_MODE) >> SPU2_CIPH_MODE_SHIFT;
+		ciph_mode_name = spu2_ciph_mode_name(ciph_mode);
+		packet_log("  Cipher mode: %s\n", ciph_mode_name);
+	}
+
+	cfb = (ctrl0 & SPU2_CFB_MASK) >> SPU2_CFB_MASK_SHIFT;
+	packet_log("  CFB %#x\n", cfb);
+
+	proto = (ctrl0 & SPU2_PROTO_SEL) >> SPU2_PROTO_SEL_SHIFT;
+	packet_log("  protocol %#x\n", proto);
+
+	if (ctrl0 & SPU2_HASH_FIRST)
+		packet_log("  hash first\n");
+	else
+		packet_log("  cipher first\n");
+
+	if (ctrl0 & SPU2_CHK_TAG)
+		packet_log("  check tag\n");
+
+	hash_type = (ctrl0 & SPU2_HASH_TYPE) >> SPU2_HASH_TYPE_SHIFT;
+	hash_name = spu2_hash_type_name(hash_type);
+	packet_log("  Hash type: %s\n", hash_name);
+
+	if (hash_type != SPU2_HASH_TYPE_NONE) {
+		hash_mode = (ctrl0 & SPU2_HASH_MODE) >> SPU2_HASH_MODE_SHIFT;
+		hash_mode_name = spu2_hash_mode_name(hash_mode);
+		packet_log("  Hash mode: %s\n", hash_mode_name);
+	}
+
+	if (ctrl0 & SPU2_CIPH_PAD_EN) {
+		packet_log("  Cipher pad: %#2llx\n",
+			   (ctrl0 & SPU2_CIPH_PAD) >> SPU2_CIPH_PAD_SHIFT);
+	}
+}
+
+/* Dump FMD ctrl1. The ctrl1 input is in host byte order */
+static void spu2_dump_fmd_ctrl1(u64 ctrl1)
+{
+	u8 hash_key_len;
+	u8 ciph_key_len;
+	u8 ret_iv_len;
+	u8 iv_offset;
+	u8 iv_len;
+	u8 hash_tag_len;
+	u8 ret_md;
+
+	packet_log(" FMD CTRL1 %#16llx\n", ctrl1);
+	if (ctrl1 & SPU2_TAG_LOC)
+		packet_log("  Tag after payload\n");
+
+	packet_log("  Msg includes ");
+	if (ctrl1 & SPU2_HAS_FR_DATA)
+		packet_log("FD ");
+	if (ctrl1 & SPU2_HAS_AAD1)
+		packet_log("AAD1 ");
+	if (ctrl1 & SPU2_HAS_NAAD)
+		packet_log("NAAD ");
+	if (ctrl1 & SPU2_HAS_AAD2)
+		packet_log("AAD2 ");
+	if (ctrl1 & SPU2_HAS_ESN)
+		packet_log("ESN ");
+	packet_log("\n");
+
+	hash_key_len = (ctrl1 & SPU2_HASH_KEY_LEN) >> SPU2_HASH_KEY_LEN_SHIFT;
+	packet_log("  Hash key len %u\n", hash_key_len);
+
+	ciph_key_len = (ctrl1 & SPU2_CIPH_KEY_LEN) >> SPU2_CIPH_KEY_LEN_SHIFT;
+	packet_log("  Cipher key len %u\n", ciph_key_len);
+
+	if (ctrl1 & SPU2_GENIV)
+		packet_log("  Generate IV\n");
+
+	if (ctrl1 & SPU2_HASH_IV)
+		packet_log("  IV included in hash\n");
+
+	if (ctrl1 & SPU2_RET_IV)
+		packet_log("  Return IV in output before payload\n");
+
+	ret_iv_len = (ctrl1 & SPU2_RET_IV_LEN) >> SPU2_RET_IV_LEN_SHIFT;
+	packet_log("  Length of returned IV %u bytes\n",
+		   ret_iv_len ? ret_iv_len : 16);
+
+	iv_offset = (ctrl1 & SPU2_IV_OFFSET) >> SPU2_IV_OFFSET_SHIFT;
+	packet_log("  IV offset %u\n", iv_offset);
+
+	iv_len = (ctrl1 & SPU2_IV_LEN) >> SPU2_IV_LEN_SHIFT;
+	packet_log("  Input IV len %u bytes\n", iv_len);
+
+	hash_tag_len = (ctrl1 & SPU2_HASH_TAG_LEN) >> SPU2_HASH_TAG_LEN_SHIFT;
+	packet_log("  Hash tag length %u bytes\n", hash_tag_len);
+
+	packet_log("  Return ");
+	ret_md = (ctrl1 & SPU2_RETURN_MD) >> SPU2_RETURN_MD_SHIFT;
+	if (ret_md)
+		packet_log("FMD ");
+	if (ret_md == SPU2_RET_FMD_OMD)
+		packet_log("OMD ");
+	else if (ret_md == SPU2_RET_FMD_OMD_IV)
+		packet_log("OMD IV ");
+	if (ctrl1 & SPU2_RETURN_FD)
+		packet_log("FD ");
+	if (ctrl1 & SPU2_RETURN_AAD1)
+		packet_log("AAD1 ");
+	if (ctrl1 & SPU2_RETURN_NAAD)
+		packet_log("NAAD ");
+	if (ctrl1 & SPU2_RETURN_AAD2)
+		packet_log("AAD2 ");
+	if (ctrl1 & SPU2_RETURN_PAY)
+		packet_log("Payload");
+	packet_log("\n");
+}
+
+/* Dump FMD ctrl2. The ctrl2 input is in host byte order */
+static void spu2_dump_fmd_ctrl2(u64 ctrl2)
+{
+	packet_log(" FMD CTRL2 %#16llx\n", ctrl2);
+
+	packet_log("  AAD1 offset %llu length %llu bytes\n",
+		   ctrl2 & SPU2_AAD1_OFFSET,
+		   (ctrl2 & SPU2_AAD1_LEN) >> SPU2_AAD1_LEN_SHIFT);
+	packet_log("  AAD2 offset %llu\n",
+		   (ctrl2 & SPU2_AAD2_OFFSET) >> SPU2_AAD2_OFFSET_SHIFT);
+	packet_log("  Payload offset %llu\n",
+		   (ctrl2 & SPU2_PL_OFFSET) >> SPU2_PL_OFFSET_SHIFT);
+}
+
+/* Dump FMD ctrl3. The ctrl3 input is in host byte order */
+static void spu2_dump_fmd_ctrl3(u64 ctrl3)
+{
+	packet_log(" FMD CTRL3 %#16llx\n", ctrl3);
+
+	packet_log("  Payload length %llu bytes\n", ctrl3 & SPU2_PL_LEN);
+	packet_log("  TLS length %llu bytes\n",
+		   (ctrl3 & SPU2_TLS_LEN) >> SPU2_TLS_LEN_SHIFT);
+}
+
+static void spu2_dump_fmd(struct SPU2_FMD *fmd)
+{
+	spu2_dump_fmd_ctrl0(le64_to_cpu(fmd->ctrl0));
+	spu2_dump_fmd_ctrl1(le64_to_cpu(fmd->ctrl1));
+	spu2_dump_fmd_ctrl2(le64_to_cpu(fmd->ctrl2));
+	spu2_dump_fmd_ctrl3(le64_to_cpu(fmd->ctrl3));
+}
+
+static void spu2_dump_omd(u8 *omd, u16 hash_key_len, u16 ciph_key_len,
+			  u16 hash_iv_len, u16 ciph_iv_len)
+{
+	u8 *ptr = omd;
+
+	packet_log(" OMD:\n");
+
+	if (hash_key_len) {
+		packet_log("  Hash Key Length %u bytes\n", hash_key_len);
+		packet_dump("  KEY: ", ptr, hash_key_len);
+		ptr += hash_key_len;
+	}
+
+	if (ciph_key_len) {
+		packet_log("  Cipher Key Length %u bytes\n", ciph_key_len);
+		packet_dump("  KEY: ", ptr, ciph_key_len);
+		ptr += ciph_key_len;
+	}
+
+	if (hash_iv_len) {
+		packet_log("  Hash IV Length %u bytes\n", hash_iv_len);
+		packet_dump("  hash IV: ", ptr, hash_iv_len);
+		ptr += ciph_key_len;
+	}
+
+	if (ciph_iv_len) {
+		packet_log("  Cipher IV Length %u bytes\n", ciph_iv_len);
+		packet_dump("  cipher IV: ", ptr, ciph_iv_len);
+	}
+}
+
+/* Dump a SPU2 header for debug */
+void spu2_dump_msg_hdr(u8 *buf, unsigned int buf_len)
+{
+	struct SPU2_FMD *fmd = (struct SPU2_FMD *)buf;
+	u8 *omd;
+	u64 ctrl1;
+	u16 hash_key_len;
+	u16 ciph_key_len;
+	u16 hash_iv_len;
+	u16 ciph_iv_len;
+	u16 omd_len;
+
+	packet_log("\n");
+	packet_log("SPU2 message header %p len: %u\n", buf, buf_len);
+
+	spu2_dump_fmd(fmd);
+	omd = (u8 *)(fmd + 1);
+
+	ctrl1 = le64_to_cpu(fmd->ctrl1);
+	hash_key_len = (ctrl1 & SPU2_HASH_KEY_LEN) >> SPU2_HASH_KEY_LEN_SHIFT;
+	ciph_key_len = (ctrl1 & SPU2_CIPH_KEY_LEN) >> SPU2_CIPH_KEY_LEN_SHIFT;
+	hash_iv_len = 0;
+	ciph_iv_len = (ctrl1 & SPU2_IV_LEN) >> SPU2_IV_LEN_SHIFT;
+	spu2_dump_omd(omd, hash_key_len, ciph_key_len, hash_iv_len,
+		      ciph_iv_len);
+
+	/* Double check sanity */
+	omd_len = hash_key_len + ciph_key_len + hash_iv_len + ciph_iv_len;
+	if (FMD_SIZE + omd_len != buf_len) {
+		packet_log
+		    (" Packet parsed incorrectly. buf_len %u, sum of MD %zu\n",
+		     buf_len, FMD_SIZE + omd_len);
+	}
+	packet_log("\n");
+}
+
+/**
+ * spu2_fmd_init() - At setkey time, initialize the fixed meta data for
+ * subsequent ablkcipher requests for this context.
+ * @spu2_cipher_type:  Cipher algorithm
+ * @spu2_mode:         Cipher mode
+ * @cipher_key_len:    Length of cipher key, in bytes
+ * @cipher_iv_len:     Length of cipher initialization vector, in bytes
+ *
+ * Return:  0 (success)
+ */
+static int spu2_fmd_init(struct SPU2_FMD *fmd,
+			 enum spu2_cipher_type spu2_type,
+			 enum spu2_cipher_mode spu2_mode,
+			 u32 cipher_key_len, u32 cipher_iv_len)
+{
+	u64 ctrl0;
+	u64 ctrl1;
+	u64 ctrl2;
+	u64 ctrl3;
+	u32 aad1_offset;
+	u32 aad2_offset;
+	u16 aad1_len = 0;
+	u64 payload_offset;
+
+	ctrl0 = (spu2_type << SPU2_CIPH_TYPE_SHIFT) |
+	    (spu2_mode << SPU2_CIPH_MODE_SHIFT);
+
+	ctrl1 = (cipher_key_len << SPU2_CIPH_KEY_LEN_SHIFT) |
+	    ((u64)cipher_iv_len << SPU2_IV_LEN_SHIFT) |
+	    ((u64)SPU2_RET_FMD_ONLY << SPU2_RETURN_MD_SHIFT) | SPU2_RETURN_PAY;
+
+	/*
+	 * AAD1 offset is from start of FD. FD length is always 0 for this
+	 * driver. So AAD1_offset is always 0.
+	 */
+	aad1_offset = 0;
+	aad2_offset = aad1_offset;
+	payload_offset = 0;
+	ctrl2 = aad1_offset |
+	    (aad1_len << SPU2_AAD1_LEN_SHIFT) |
+	    (aad2_offset << SPU2_AAD2_OFFSET_SHIFT) |
+	    (payload_offset << SPU2_PL_OFFSET_SHIFT);
+
+	ctrl3 = 0;
+
+	fmd->ctrl0 = cpu_to_le64(ctrl0);
+	fmd->ctrl1 = cpu_to_le64(ctrl1);
+	fmd->ctrl2 = cpu_to_le64(ctrl2);
+	fmd->ctrl3 = cpu_to_le64(ctrl3);
+
+	return 0;
+}
+
+/**
+ * spu2_fmd_ctrl0_write() - Write ctrl0 field in fixed metadata (FMD) field of
+ * SPU request packet.
+ * @fmd:            Start of FMD field to be written
+ * @is_inbound:     true if decrypting. false if encrypting.
+ * @authFirst:      true if alg authenticates before encrypting
+ * @protocol:       protocol selector
+ * @cipher_type:    cipher algorithm
+ * @cipher_mode:    cipher mode
+ * @auth_type:      authentication type
+ * @auth_mode:      authentication mode
+ */
+static void spu2_fmd_ctrl0_write(struct SPU2_FMD *fmd,
+				 bool is_inbound, bool auth_first,
+				 enum spu2_proto_sel protocol,
+				 enum spu2_cipher_type cipher_type,
+				 enum spu2_cipher_mode cipher_mode,
+				 enum spu2_hash_type auth_type,
+				 enum spu2_hash_mode auth_mode)
+{
+	u64 ctrl0 = 0;
+
+	if ((cipher_type != SPU2_CIPHER_TYPE_NONE) && !is_inbound)
+		ctrl0 |= SPU2_CIPH_ENCRYPT_EN;
+
+	ctrl0 |= ((u64)cipher_type << SPU2_CIPH_TYPE_SHIFT) |
+	    ((u64)cipher_mode << SPU2_CIPH_MODE_SHIFT);
+
+	if (protocol)
+		ctrl0 |= (u64)protocol << SPU2_PROTO_SEL_SHIFT;
+
+	if (auth_first)
+		ctrl0 |= SPU2_HASH_FIRST;
+
+	if (is_inbound && (auth_type != SPU2_HASH_TYPE_NONE))
+		ctrl0 |= SPU2_CHK_TAG;
+
+	ctrl0 |= (((u64)auth_type << SPU2_HASH_TYPE_SHIFT) |
+		  ((u64)auth_mode << SPU2_HASH_MODE_SHIFT));
+
+	fmd->ctrl0 = cpu_to_le64(ctrl0);
+}
+
+/**
+ * spu2_fmd_ctrl1_write() - Write ctrl1 field in fixed metadata (FMD) field of
+ * SPU request packet.
+ * @fmd:            Start of FMD field to be written
+ * @assoc_size:     Length of additional associated data, in bytes
+ * @auth_key_len:   Length of authentication key, in bytes
+ * @cipher_key_len: Length of cipher key, in bytes
+ * @gen_iv:         If true, hw generates IV and returns in response
+ * @hash_iv:        IV participates in hash. Used for IPSEC and TLS.
+ * @return_iv:      Return IV in output packet before payload
+ * @ret_iv_len:     Length of IV returned from SPU, in bytes
+ * @ret_iv_offset:  Offset into full IV of start of returned IV
+ * @cipher_iv_len:  Length of input cipher IV, in bytes
+ * @digest_size:    Length of digest (aka, hash tag or ICV), in bytes
+ * @return_payload: Return payload in SPU response
+ * @return_md : return metadata in SPU response
+ *
+ * Packet can have AAD2 w/o AAD1. For algorithms currently supported,
+ * associated data goes in AAD2.
+ */
+static void spu2_fmd_ctrl1_write(struct SPU2_FMD *fmd, bool is_inbound,
+				 u64 assoc_size,
+				 u64 auth_key_len, u64 cipher_key_len,
+				 bool gen_iv, bool hash_iv, bool return_iv,
+				 u64 ret_iv_len, u64 ret_iv_offset,
+				 u64 cipher_iv_len, u64 digest_size,
+				 bool return_payload, bool return_md)
+{
+	u64 ctrl1 = 0;
+
+	if (is_inbound && digest_size)
+		ctrl1 |= SPU2_TAG_LOC;
+
+	if (assoc_size) {
+		ctrl1 |= SPU2_HAS_AAD2;
+		ctrl1 |= SPU2_RETURN_AAD2;  /* need aad2 for gcm aes esp */
+	}
+
+	if (auth_key_len)
+		ctrl1 |= ((auth_key_len << SPU2_HASH_KEY_LEN_SHIFT) &
+			  SPU2_HASH_KEY_LEN);
+
+	if (cipher_key_len)
+		ctrl1 |= ((cipher_key_len << SPU2_CIPH_KEY_LEN_SHIFT) &
+			  SPU2_CIPH_KEY_LEN);
+
+	if (gen_iv)
+		ctrl1 |= SPU2_GENIV;
+
+	if (hash_iv)
+		ctrl1 |= SPU2_HASH_IV;
+
+	if (return_iv) {
+		ctrl1 |= SPU2_RET_IV;
+		ctrl1 |= ret_iv_len << SPU2_RET_IV_LEN_SHIFT;
+		ctrl1 |= ret_iv_offset << SPU2_IV_OFFSET_SHIFT;
+	}
+
+	ctrl1 |= ((cipher_iv_len << SPU2_IV_LEN_SHIFT) & SPU2_IV_LEN);
+
+	if (digest_size)
+		ctrl1 |= ((digest_size << SPU2_HASH_TAG_LEN_SHIFT) &
+			  SPU2_HASH_TAG_LEN);
+
+	/* Let's ask for the output pkt to include FMD, but don't need to
+	 * get keys and IVs back in OMD.
+	 */
+	if (return_md)
+		ctrl1 |= ((u64)SPU2_RET_FMD_ONLY << SPU2_RETURN_MD_SHIFT);
+	else
+		ctrl1 |= ((u64)SPU2_RET_NO_MD << SPU2_RETURN_MD_SHIFT);
+
+	/* Crypto API does not get assoc data back. So no need for AAD2. */
+
+	if (return_payload)
+		ctrl1 |= SPU2_RETURN_PAY;
+
+	fmd->ctrl1 = cpu_to_le64(ctrl1);
+}
+
+/**
+ * spu2_fmd_ctrl2_write() - Set the ctrl2 field in the fixed metadata field of
+ * SPU2 header.
+ * @fmd:            Start of FMD field to be written
+ * @cipher_offset:  Number of bytes from Start of Packet (end of FD field) where
+ *                  data to be encrypted or decrypted begins
+ * @auth_key_len:   Length of authentication key, in bytes
+ * @auth_iv_len:    Length of authentication initialization vector, in bytes
+ * @cipher_key_len: Length of cipher key, in bytes
+ * @cipher_iv_len:  Length of cipher IV, in bytes
+ */
+static void spu2_fmd_ctrl2_write(struct SPU2_FMD *fmd, u64 cipher_offset,
+				 u64 auth_key_len, u64 auth_iv_len,
+				 u64 cipher_key_len, u64 cipher_iv_len)
+{
+	u64 ctrl2;
+	u64 aad1_offset;
+	u64 aad2_offset;
+	u16 aad1_len = 0;
+	u64 payload_offset;
+
+	/* AAD1 offset is from start of FD. FD length always 0. */
+	aad1_offset = 0;
+
+	aad2_offset = aad1_offset;
+	payload_offset = cipher_offset;
+	ctrl2 = aad1_offset |
+	    (aad1_len << SPU2_AAD1_LEN_SHIFT) |
+	    (aad2_offset << SPU2_AAD2_OFFSET_SHIFT) |
+	    (payload_offset << SPU2_PL_OFFSET_SHIFT);
+
+	fmd->ctrl2 = cpu_to_le64(ctrl2);
+}
+
+/**
+ * spu2_fmd_ctrl3_write() - Set the ctrl3 field in FMD
+ * @fmd:          Fixed meta data. First field in SPU2 msg header.
+ * @payload_len:  Length of payload, in bytes
+ */
+static void spu2_fmd_ctrl3_write(struct SPU2_FMD *fmd, u64 payload_len)
+{
+	u64 ctrl3;
+
+	ctrl3 = payload_len & SPU2_PL_LEN;
+
+	fmd->ctrl3 = cpu_to_le64(ctrl3);
+}
+
+/**
+ * spu2_ctx_max_payload() - Determine the maximum length of the payload for a
+ * SPU message for a given cipher and hash alg context.
+ * @cipher_alg:		The cipher algorithm
+ * @cipher_mode:	The cipher mode
+ * @blocksize:		The size of a block of data for this algo
+ *
+ * For SPU2, the hardware generally ignores the PayloadLen field in ctrl3 of
+ * FMD and just keeps computing until it receives a DMA descriptor with the EOF
+ * flag set. So we consider the max payload to be infinite. AES CCM is an
+ * exception.
+ *
+ * Return: Max payload length in bytes
+ */
+u32 spu2_ctx_max_payload(enum spu_cipher_alg cipher_alg,
+			 enum spu_cipher_mode cipher_mode,
+			 unsigned int blocksize)
+{
+	if ((cipher_alg == CIPHER_ALG_AES) &&
+	    (cipher_mode == CIPHER_MODE_CCM)) {
+		u32 excess = SPU2_MAX_PAYLOAD % blocksize;
+
+		return SPU2_MAX_PAYLOAD - excess;
+	} else {
+		return SPU_MAX_PAYLOAD_INF;
+	}
+}
+
+/**
+ * spu_payload_length() -  Given a SPU2 message header, extract the payload
+ * length.
+ * @spu_hdr:  Start of SPU message header (FMD)
+ *
+ * Return: payload length, in bytes
+ */
+u32 spu2_payload_length(u8 *spu_hdr)
+{
+	struct SPU2_FMD *fmd = (struct SPU2_FMD *)spu_hdr;
+	u32 pl_len;
+	u64 ctrl3;
+
+	ctrl3 = le64_to_cpu(fmd->ctrl3);
+	pl_len = ctrl3 & SPU2_PL_LEN;
+
+	return pl_len;
+}
+
+/**
+ * spu_response_hdr_len() - Determine the expected length of a SPU response
+ * header.
+ * @auth_key_len:  Length of authentication key, in bytes
+ * @enc_key_len:   Length of encryption key, in bytes
+ *
+ * For SPU2, includes just FMD. OMD is never requested.
+ *
+ * Return: Length of FMD, in bytes
+ */
+u16 spu2_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash)
+{
+	return FMD_SIZE;
+}
+
+/**
+ * spu_hash_pad_len() - Calculate the length of hash padding required to extend
+ * data to a full block size.
+ * @hash_alg:        hash algorithm
+ * @hash_mode:       hash mode
+ * @chunksize:       length of data, in bytes
+ * @hash_block_size: size of a hash block, in bytes
+ *
+ * SPU2 hardware does all hash padding
+ *
+ * Return:  length of hash pad in bytes
+ */
+u16 spu2_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode,
+		      u32 chunksize, u16 hash_block_size)
+{
+	return 0;
+}
+
+/**
+ * spu2_gcm_ccm_padlen() -  Determine the length of GCM/CCM padding for either
+ * the AAD field or the data.
+ *
+ * Return:  0. Unlike SPU-M, SPU2 hardware does any GCM/CCM padding required.
+ */
+u32 spu2_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode,
+			 unsigned int data_size)
+{
+	return 0;
+}
+
+/**
+ * spu_assoc_resp_len() - Determine the size of the AAD2 buffer needed to catch
+ * associated data in a SPU2 output packet.
+ * @cipher_mode:   cipher mode
+ * @assoc_len:     length of additional associated data, in bytes
+ * @iv_len:        length of initialization vector, in bytes
+ * @is_encrypt:    true if encrypting. false if decrypt.
+ *
+ * Return: Length of buffer to catch associated data in response
+ */
+u32 spu2_assoc_resp_len(enum spu_cipher_mode cipher_mode,
+			unsigned int assoc_len, unsigned int iv_len,
+			bool is_encrypt)
+{
+	u32 resp_len = assoc_len;
+
+	if (is_encrypt)
+		/* gcm aes esp has to write 8-byte IV in response */
+		resp_len += iv_len;
+	return resp_len;
+}
+
+/*
+ * spu_aead_ivlen() - Calculate the length of the AEAD IV to be included
+ * in a SPU request after the AAD and before the payload.
+ * @cipher_mode:  cipher mode
+ * @iv_ctr_len:   initialization vector length in bytes
+ *
+ * For SPU2, AEAD IV is included in OMD and does not need to be repeated
+ * prior to the payload.
+ *
+ * Return: Length of AEAD IV in bytes
+ */
+u8 spu2_aead_ivlen(enum spu_cipher_mode cipher_mode, u16 iv_len)
+{
+	return 0;
+}
+
+/**
+ * spu2_hash_type() - Determine the type of hash operation.
+ * @src_sent:  The number of bytes in the current request that have already
+ *             been sent to the SPU to be hashed.
+ *
+ * SPU2 always does a FULL hash operation
+ */
+enum hash_type spu2_hash_type(u32 src_sent)
+{
+	return HASH_TYPE_FULL;
+}
+
+/**
+ * spu2_digest_size() - Determine the size of a hash digest to expect the SPU to
+ * return.
+ * alg_digest_size: Number of bytes in the final digest for the given algo
+ * alg:             The hash algorithm
+ * htype:           Type of hash operation (init, update, full, etc)
+ *
+ */
+u32 spu2_digest_size(u32 alg_digest_size, enum hash_alg alg,
+		     enum hash_type htype)
+{
+	return alg_digest_size;
+}
+
+/**
+ * spu_create_request() - Build a SPU2 request message header, includint FMD and
+ * OMD.
+ * @spu_hdr: Start of buffer where SPU request header is to be written
+ * @req_opts: SPU request message options
+ * @cipher_parms: Parameters related to cipher algorithm
+ * @hash_parms:   Parameters related to hash algorithm
+ * @aead_parms:   Parameters related to AEAD operation
+ * @data_size:    Length of data to be encrypted or authenticated. If AEAD, does
+ *		  not include length of AAD.
+ *
+ * Construct the message starting at spu_hdr. Caller should allocate this buffer
+ * in DMA-able memory at least SPU_HEADER_ALLOC_LEN bytes long.
+ *
+ * Return: the length of the SPU header in bytes. 0 if an error occurs.
+ */
+u32 spu2_create_request(u8 *spu_hdr,
+			struct spu_request_opts *req_opts,
+			struct spu_cipher_parms *cipher_parms,
+			struct spu_hash_parms *hash_parms,
+			struct spu_aead_parms *aead_parms,
+			unsigned int data_size)
+{
+	struct SPU2_FMD *fmd;
+	u8 *ptr;
+	unsigned int buf_len;
+	int err;
+	enum spu2_cipher_type spu2_ciph_type = SPU2_CIPHER_TYPE_NONE;
+	enum spu2_cipher_mode spu2_ciph_mode;
+	enum spu2_hash_type spu2_auth_type = SPU2_HASH_TYPE_NONE;
+	enum spu2_hash_mode spu2_auth_mode;
+	bool return_md = true;
+	enum spu2_proto_sel proto = SPU2_PROTO_RESV;
+
+	/* size of the payload */
+	unsigned int payload_len =
+	    hash_parms->prebuf_len + data_size + hash_parms->pad_len -
+	    ((req_opts->is_aead && req_opts->is_inbound) ?
+	     hash_parms->digestsize : 0);
+
+	/* offset of prebuf or data from start of AAD2 */
+	unsigned int cipher_offset = aead_parms->assoc_size +
+			aead_parms->aad_pad_len + aead_parms->iv_len;
+
+#ifdef DEBUG
+	/* total size of the data following OMD (without STAT word padding) */
+	unsigned int real_db_size = spu_real_db_size(aead_parms->assoc_size,
+						 aead_parms->iv_len,
+						 hash_parms->prebuf_len,
+						 data_size,
+						 aead_parms->aad_pad_len,
+						 aead_parms->data_pad_len,
+						 hash_parms->pad_len);
+#endif
+	unsigned int assoc_size = aead_parms->assoc_size;
+
+	if (req_opts->is_aead &&
+	    (cipher_parms->alg == CIPHER_ALG_AES) &&
+	    (cipher_parms->mode == CIPHER_MODE_GCM))
+		/*
+		 * On SPU 2, aes gcm cipher first on encrypt, auth first on
+		 * decrypt
+		 */
+		req_opts->auth_first = req_opts->is_inbound;
+
+	/* and do opposite for ccm (auth 1st on encrypt) */
+	if (req_opts->is_aead &&
+	    (cipher_parms->alg == CIPHER_ALG_AES) &&
+	    (cipher_parms->mode == CIPHER_MODE_CCM))
+		req_opts->auth_first = !req_opts->is_inbound;
+
+	flow_log("%s()\n", __func__);
+	flow_log("  in:%u authFirst:%u\n",
+		 req_opts->is_inbound, req_opts->auth_first);
+	flow_log("  cipher alg:%u mode:%u type %u\n", cipher_parms->alg,
+		 cipher_parms->mode, cipher_parms->type);
+	flow_log("  is_esp: %s\n", req_opts->is_esp ? "yes" : "no");
+	flow_log("    key: %d\n", cipher_parms->key_len);
+	flow_dump("    key: ", cipher_parms->key_buf, cipher_parms->key_len);
+	flow_log("    iv: %d\n", cipher_parms->iv_len);
+	flow_dump("    iv: ", cipher_parms->iv_buf, cipher_parms->iv_len);
+	flow_log("  auth alg:%u mode:%u type %u\n",
+		 hash_parms->alg, hash_parms->mode, hash_parms->type);
+	flow_log("  digestsize: %u\n", hash_parms->digestsize);
+	flow_log("  authkey: %d\n", hash_parms->key_len);
+	flow_dump("  authkey: ", hash_parms->key_buf, hash_parms->key_len);
+	flow_log("  assoc_size:%u\n", assoc_size);
+	flow_log("  prebuf_len:%u\n", hash_parms->prebuf_len);
+	flow_log("  data_size:%u\n", data_size);
+	flow_log("  hash_pad_len:%u\n", hash_parms->pad_len);
+	flow_log("  real_db_size:%u\n", real_db_size);
+	flow_log("  cipher_offset:%u payload_len:%u\n",
+		 cipher_offset, payload_len);
+	flow_log("  aead_iv: %u\n", aead_parms->iv_len);
+
+	/* Convert to spu2 values for cipher alg, hash alg */
+	err = spu2_cipher_xlate(cipher_parms->alg, cipher_parms->mode,
+				cipher_parms->type,
+				&spu2_ciph_type, &spu2_ciph_mode);
+
+	/* If we are doing GCM hashing only - either via rfc4543 transform
+	 * or because we happen to do GCM with AAD only and no payload - we
+	 * need to configure hardware to use hash key rather than cipher key
+	 * and put data into payload.  This is because unlike SPU-M, running
+	 * GCM cipher with 0 size payload is not permitted.
+	 */
+	if ((req_opts->is_rfc4543) ||
+	    ((spu2_ciph_mode == SPU2_CIPHER_MODE_GCM) &&
+	    (payload_len == 0))) {
+		/* Use hashing (only) and set up hash key */
+		spu2_ciph_type = SPU2_CIPHER_TYPE_NONE;
+		hash_parms->key_len = cipher_parms->key_len;
+		memcpy(hash_parms->key_buf, cipher_parms->key_buf,
+		       cipher_parms->key_len);
+		cipher_parms->key_len = 0;
+
+		if (req_opts->is_rfc4543)
+			payload_len += assoc_size;
+		else
+			payload_len = assoc_size;
+		cipher_offset = 0;
+		assoc_size = 0;
+	}
+
+	if (err)
+		return 0;
+
+	flow_log("spu2 cipher type %s, cipher mode %s\n",
+		 spu2_ciph_type_name(spu2_ciph_type),
+		 spu2_ciph_mode_name(spu2_ciph_mode));
+
+	err = spu2_hash_xlate(hash_parms->alg, hash_parms->mode,
+			      hash_parms->type,
+			      cipher_parms->type,
+			      &spu2_auth_type, &spu2_auth_mode);
+	if (err)
+		return 0;
+
+	flow_log("spu2 hash type %s, hash mode %s\n",
+		 spu2_hash_type_name(spu2_auth_type),
+		 spu2_hash_mode_name(spu2_auth_mode));
+
+	fmd = (struct SPU2_FMD *)spu_hdr;
+
+	spu2_fmd_ctrl0_write(fmd, req_opts->is_inbound, req_opts->auth_first,
+			     proto, spu2_ciph_type, spu2_ciph_mode,
+			     spu2_auth_type, spu2_auth_mode);
+
+	spu2_fmd_ctrl1_write(fmd, req_opts->is_inbound, assoc_size,
+			     hash_parms->key_len, cipher_parms->key_len,
+			     false, false,
+			     aead_parms->return_iv, aead_parms->ret_iv_len,
+			     aead_parms->ret_iv_off,
+			     cipher_parms->iv_len, hash_parms->digestsize,
+			     !req_opts->bd_suppress, return_md);
+
+	spu2_fmd_ctrl2_write(fmd, cipher_offset, hash_parms->key_len, 0,
+			     cipher_parms->key_len, cipher_parms->iv_len);
+
+	spu2_fmd_ctrl3_write(fmd, payload_len);
+
+	ptr = (u8 *)(fmd + 1);
+	buf_len = sizeof(struct SPU2_FMD);
+
+	/* Write OMD */
+	if (hash_parms->key_len) {
+		memcpy(ptr, hash_parms->key_buf, hash_parms->key_len);
+		ptr += hash_parms->key_len;
+		buf_len += hash_parms->key_len;
+	}
+	if (cipher_parms->key_len) {
+		memcpy(ptr, cipher_parms->key_buf, cipher_parms->key_len);
+		ptr += cipher_parms->key_len;
+		buf_len += cipher_parms->key_len;
+	}
+	if (cipher_parms->iv_len) {
+		memcpy(ptr, cipher_parms->iv_buf, cipher_parms->iv_len);
+		ptr += cipher_parms->iv_len;
+		buf_len += cipher_parms->iv_len;
+	}
+
+	packet_dump("  SPU request header: ", spu_hdr, buf_len);
+
+	return buf_len;
+}
+
+/**
+ * spu_cipher_req_init() - Build an ablkcipher SPU2 request message header,
+ * including FMD and OMD.
+ * @spu_hdr:       Location of start of SPU request (FMD field)
+ * @cipher_parms:  Parameters describing cipher request
+ *
+ * Called at setkey time to initialize a msg header that can be reused for all
+ * subsequent ablkcipher requests. Construct the message starting at spu_hdr.
+ * Caller should allocate this buffer in DMA-able memory at least
+ * SPU_HEADER_ALLOC_LEN bytes long.
+ *
+ * Return: the total length of the SPU header (FMD and OMD) in bytes. 0 if an
+ * error occurs.
+ */
+u16 spu2_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms)
+{
+	struct SPU2_FMD *fmd;
+	u8 *omd;
+	enum spu2_cipher_type spu2_type = SPU2_CIPHER_TYPE_NONE;
+	enum spu2_cipher_mode spu2_mode;
+	int err;
+
+	flow_log("%s()\n", __func__);
+	flow_log("  cipher alg:%u mode:%u type %u\n", cipher_parms->alg,
+		 cipher_parms->mode, cipher_parms->type);
+	flow_log("  cipher_iv_len: %u\n", cipher_parms->iv_len);
+	flow_log("    key: %d\n", cipher_parms->key_len);
+	flow_dump("    key: ", cipher_parms->key_buf, cipher_parms->key_len);
+
+	/* Convert to spu2 values */
+	err = spu2_cipher_xlate(cipher_parms->alg, cipher_parms->mode,
+				cipher_parms->type, &spu2_type, &spu2_mode);
+	if (err)
+		return 0;
+
+	flow_log("spu2 cipher type %s, cipher mode %s\n",
+		 spu2_ciph_type_name(spu2_type),
+		 spu2_ciph_mode_name(spu2_mode));
+
+	/* Construct the FMD header */
+	fmd = (struct SPU2_FMD *)spu_hdr;
+	err = spu2_fmd_init(fmd, spu2_type, spu2_mode, cipher_parms->key_len,
+			    cipher_parms->iv_len);
+	if (err)
+		return 0;
+
+	/* Write cipher key to OMD */
+	omd = (u8 *)(fmd + 1);
+	if (cipher_parms->key_buf && cipher_parms->key_len)
+		memcpy(omd, cipher_parms->key_buf, cipher_parms->key_len);
+
+	packet_dump("  SPU request header: ", spu_hdr,
+		    FMD_SIZE + cipher_parms->key_len + cipher_parms->iv_len);
+
+	return FMD_SIZE + cipher_parms->key_len + cipher_parms->iv_len;
+}
+
+/**
+ * spu_cipher_req_finish() - Finish building a SPU request message header for a
+ * block cipher request.
+ * @spu_hdr:         Start of the request message header (MH field)
+ * @spu_req_hdr_len: Length in bytes of the SPU request header
+ * @isInbound:       0 encrypt, 1 decrypt
+ * @cipher_parms:    Parameters describing cipher operation to be performed
+ * @update_key:      If true, rewrite the cipher key in SCTX
+ * @data_size:       Length of the data in the BD field
+ *
+ * Assumes much of the header was already filled in at setkey() time in
+ * spu_cipher_req_init().
+ * spu_cipher_req_init() fills in the encryption key. For RC4, when submitting a
+ * request for a non-first chunk, we use the 260-byte SUPDT field from the
+ * previous response as the key. update_key is true for this case. Unused in all
+ * other cases.
+ */
+void spu2_cipher_req_finish(u8 *spu_hdr,
+			    u16 spu_req_hdr_len,
+			    unsigned int is_inbound,
+			    struct spu_cipher_parms *cipher_parms,
+			    bool update_key,
+			    unsigned int data_size)
+{
+	struct SPU2_FMD *fmd;
+	u8 *omd;		/* start of optional metadata */
+	u64 ctrl0;
+	u64 ctrl3;
+
+	flow_log("%s()\n", __func__);
+	flow_log(" in: %u\n", is_inbound);
+	flow_log(" cipher alg: %u, cipher_type: %u\n", cipher_parms->alg,
+		 cipher_parms->type);
+	if (update_key) {
+		flow_log(" cipher key len: %u\n", cipher_parms->key_len);
+		flow_dump("  key: ", cipher_parms->key_buf,
+			  cipher_parms->key_len);
+	}
+	flow_log(" iv len: %d\n", cipher_parms->iv_len);
+	flow_dump("    iv: ", cipher_parms->iv_buf, cipher_parms->iv_len);
+	flow_log(" data_size: %u\n", data_size);
+
+	fmd = (struct SPU2_FMD *)spu_hdr;
+	omd = (u8 *)(fmd + 1);
+
+	/*
+	 * FMD ctrl0 was initialized at setkey time. update it to indicate
+	 * whether we are encrypting or decrypting.
+	 */
+	ctrl0 = le64_to_cpu(fmd->ctrl0);
+	if (is_inbound)
+		ctrl0 &= ~SPU2_CIPH_ENCRYPT_EN;	/* decrypt */
+	else
+		ctrl0 |= SPU2_CIPH_ENCRYPT_EN;	/* encrypt */
+	fmd->ctrl0 = cpu_to_le64(ctrl0);
+
+	if (cipher_parms->alg && cipher_parms->iv_buf && cipher_parms->iv_len) {
+		/* cipher iv provided so put it in here */
+		memcpy(omd + cipher_parms->key_len, cipher_parms->iv_buf,
+		       cipher_parms->iv_len);
+	}
+
+	ctrl3 = le64_to_cpu(fmd->ctrl3);
+	data_size &= SPU2_PL_LEN;
+	ctrl3 |= data_size;
+	fmd->ctrl3 = cpu_to_le64(ctrl3);
+
+	packet_dump("  SPU request header: ", spu_hdr, spu_req_hdr_len);
+}
+
+/**
+ * spu_request_pad() - Create pad bytes at the end of the data.
+ * @pad_start:      Start of buffer where pad bytes are to be written
+ * @gcm_padding:    Length of GCM padding, in bytes
+ * @hash_pad_len:   Number of bytes of padding extend data to full block
+ * @auth_alg:       Authentication algorithm
+ * @auth_mode:      Authentication mode
+ * @total_sent:     Length inserted at end of hash pad
+ * @status_padding: Number of bytes of padding to align STATUS word
+ *
+ * There may be three forms of pad:
+ *  1. GCM pad - for GCM mode ciphers, pad to 16-byte alignment
+ *  2. hash pad - pad to a block length, with 0x80 data terminator and
+ *                size at the end
+ *  3. STAT pad - to ensure the STAT field is 4-byte aligned
+ */
+void spu2_request_pad(u8 *pad_start, u32 gcm_padding, u32 hash_pad_len,
+		      enum hash_alg auth_alg, enum hash_mode auth_mode,
+		      unsigned int total_sent, u32 status_padding)
+{
+	u8 *ptr = pad_start;
+
+	/* fix data alignent for GCM */
+	if (gcm_padding > 0) {
+		flow_log("  GCM: padding to 16 byte alignment: %u bytes\n",
+			 gcm_padding);
+		memset(ptr, 0, gcm_padding);
+		ptr += gcm_padding;
+	}
+
+	if (hash_pad_len > 0) {
+		/* clear the padding section */
+		memset(ptr, 0, hash_pad_len);
+
+		/* terminate the data */
+		*ptr = 0x80;
+		ptr += (hash_pad_len - sizeof(u64));
+
+		/* add the size at the end as required per alg */
+		if (auth_alg == HASH_ALG_MD5)
+			*(u64 *)ptr = cpu_to_le64((u64)total_sent * 8);
+		else		/* SHA1, SHA2-224, SHA2-256 */
+			*(u64 *)ptr = cpu_to_be64((u64)total_sent * 8);
+		ptr += sizeof(u64);
+	}
+
+	/* pad to a 4byte alignment for STAT */
+	if (status_padding > 0) {
+		flow_log("  STAT: padding to 4 byte alignment: %u bytes\n",
+			 status_padding);
+
+		memset(ptr, 0, status_padding);
+		ptr += status_padding;
+	}
+}
+
+/**
+ * spu2_xts_tweak_in_payload() - Indicate that SPU2 does NOT place the XTS
+ * tweak field in the packet payload (it uses IV instead)
+ *
+ * Return: 0
+ */
+u8 spu2_xts_tweak_in_payload(void)
+{
+	return 0;
+}
+
+/**
+ * spu2_tx_status_len() - Return the length of the STATUS field in a SPU
+ * response message.
+ *
+ * Return: Length of STATUS field in bytes.
+ */
+u8 spu2_tx_status_len(void)
+{
+	return SPU2_TX_STATUS_LEN;
+}
+
+/**
+ * spu2_rx_status_len() - Return the length of the STATUS field in a SPU
+ * response message.
+ *
+ * Return: Length of STATUS field in bytes.
+ */
+u8 spu2_rx_status_len(void)
+{
+	return SPU2_RX_STATUS_LEN;
+}
+
+/**
+ * spu_status_process() - Process the status from a SPU response message.
+ * @statp:  start of STATUS word
+ *
+ * Return:  0 - if status is good and response should be processed
+ *         !0 - status indicates an error and response is invalid
+ */
+int spu2_status_process(u8 *statp)
+{
+	/* SPU2 status is 2 bytes by default - SPU_RX_STATUS_LEN */
+	u16 status = le16_to_cpu(*(__le16 *)statp);
+
+	if (status == 0)
+		return 0;
+
+	flow_log("rx status is %#x\n", status);
+	if (status == SPU2_INVALID_ICV)
+		return SPU_INVALID_ICV;
+
+	return -EBADMSG;
+}
+
+/**
+ * spu2_ccm_update_iv() - Update the IV as per the requirements for CCM mode.
+ *
+ * @digestsize:		Digest size of this request
+ * @cipher_parms:	(pointer to) cipher parmaeters, includes IV buf & IV len
+ * @assoclen:		Length of AAD data
+ * @chunksize:		length of input data to be sent in this req
+ * @is_encrypt:		true if this is an output/encrypt operation
+ * @is_esp:		true if this is an ESP / RFC4309 operation
+ *
+ */
+void spu2_ccm_update_iv(unsigned int digestsize,
+			struct spu_cipher_parms *cipher_parms,
+			unsigned int assoclen, unsigned int chunksize,
+			bool is_encrypt, bool is_esp)
+{
+	int L;  /* size of length field, in bytes */
+
+	/*
+	 * In RFC4309 mode, L is fixed at 4 bytes; otherwise, IV from
+	 * testmgr contains (L-1) in bottom 3 bits of first byte,
+	 * per RFC 3610.
+	 */
+	if (is_esp)
+		L = CCM_ESP_L_VALUE;
+	else
+		L = ((cipher_parms->iv_buf[0] & CCM_B0_L_PRIME) >>
+		      CCM_B0_L_PRIME_SHIFT) + 1;
+
+	/* SPU2 doesn't want these length bytes nor the first byte... */
+	cipher_parms->iv_len -= (1 + L);
+	memmove(cipher_parms->iv_buf, &cipher_parms->iv_buf[1],
+		cipher_parms->iv_len);
+}
+
+/**
+ * spu2_wordalign_padlen() - SPU2 does not require padding.
+ * @data_size: length of data field in bytes
+ *
+ * Return: length of status field padding, in bytes (always 0 on SPU2)
+ */
+u32 spu2_wordalign_padlen(u32 data_size)
+{
+	return 0;
+}
diff --git a/drivers/crypto/bcm/spu2.h b/drivers/crypto/bcm/spu2.h
new file mode 100644
index 000000000000..ab1f59934828
--- /dev/null
+++ b/drivers/crypto/bcm/spu2.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+/*
+ * This file contains SPU message definitions specific to SPU2.
+ */
+
+#ifndef _SPU2_H
+#define _SPU2_H
+
+enum spu2_cipher_type {
+	SPU2_CIPHER_TYPE_NONE = 0x0,
+	SPU2_CIPHER_TYPE_AES128 = 0x1,
+	SPU2_CIPHER_TYPE_AES192 = 0x2,
+	SPU2_CIPHER_TYPE_AES256 = 0x3,
+	SPU2_CIPHER_TYPE_DES = 0x4,
+	SPU2_CIPHER_TYPE_3DES = 0x5,
+	SPU2_CIPHER_TYPE_LAST
+};
+
+enum spu2_cipher_mode {
+	SPU2_CIPHER_MODE_ECB = 0x0,
+	SPU2_CIPHER_MODE_CBC = 0x1,
+	SPU2_CIPHER_MODE_CTR = 0x2,
+	SPU2_CIPHER_MODE_CFB = 0x3,
+	SPU2_CIPHER_MODE_OFB = 0x4,
+	SPU2_CIPHER_MODE_XTS = 0x5,
+	SPU2_CIPHER_MODE_CCM = 0x6,
+	SPU2_CIPHER_MODE_GCM = 0x7,
+	SPU2_CIPHER_MODE_LAST
+};
+
+enum spu2_hash_type {
+	SPU2_HASH_TYPE_NONE = 0x0,
+	SPU2_HASH_TYPE_AES128 = 0x1,
+	SPU2_HASH_TYPE_AES192 = 0x2,
+	SPU2_HASH_TYPE_AES256 = 0x3,
+	SPU2_HASH_TYPE_MD5 = 0x6,
+	SPU2_HASH_TYPE_SHA1 = 0x7,
+	SPU2_HASH_TYPE_SHA224 = 0x8,
+	SPU2_HASH_TYPE_SHA256 = 0x9,
+	SPU2_HASH_TYPE_SHA384 = 0xa,
+	SPU2_HASH_TYPE_SHA512 = 0xb,
+	SPU2_HASH_TYPE_SHA512_224 = 0xc,
+	SPU2_HASH_TYPE_SHA512_256 = 0xd,
+	SPU2_HASH_TYPE_SHA3_224 = 0xe,
+	SPU2_HASH_TYPE_SHA3_256 = 0xf,
+	SPU2_HASH_TYPE_SHA3_384 = 0x10,
+	SPU2_HASH_TYPE_SHA3_512 = 0x11,
+	SPU2_HASH_TYPE_LAST
+};
+
+enum spu2_hash_mode {
+	SPU2_HASH_MODE_CMAC = 0x0,
+	SPU2_HASH_MODE_CBC_MAC = 0x1,
+	SPU2_HASH_MODE_XCBC_MAC = 0x2,
+	SPU2_HASH_MODE_HMAC = 0x3,
+	SPU2_HASH_MODE_RABIN = 0x4,
+	SPU2_HASH_MODE_CCM = 0x5,
+	SPU2_HASH_MODE_GCM = 0x6,
+	SPU2_HASH_MODE_RESERVED = 0x7,
+	SPU2_HASH_MODE_LAST
+};
+
+enum spu2_ret_md_opts {
+	SPU2_RET_NO_MD = 0,	/* return no metadata */
+	SPU2_RET_FMD_OMD = 1,	/* return both FMD and OMD */
+	SPU2_RET_FMD_ONLY = 2,	/* return only FMD */
+	SPU2_RET_FMD_OMD_IV = 3,	/* return FMD and OMD with just IVs */
+};
+
+/* Fixed Metadata format */
+struct SPU2_FMD {
+	u64 ctrl0;
+	u64 ctrl1;
+	u64 ctrl2;
+	u64 ctrl3;
+};
+
+#define FMD_SIZE  sizeof(struct SPU2_FMD)
+
+/* Fixed part of request message header length in bytes. Just FMD. */
+#define SPU2_REQ_FIXED_LEN FMD_SIZE
+#define SPU2_HEADER_ALLOC_LEN (SPU_REQ_FIXED_LEN + \
+				2 * MAX_KEY_SIZE + 2 * MAX_IV_SIZE)
+
+/* FMD ctrl0 field masks */
+#define SPU2_CIPH_ENCRYPT_EN            0x1 /* 0: decrypt, 1: encrypt */
+#define SPU2_CIPH_TYPE                 0xF0 /* one of spu2_cipher_type */
+#define SPU2_CIPH_TYPE_SHIFT              4
+#define SPU2_CIPH_MODE                0xF00 /* one of spu2_cipher_mode */
+#define SPU2_CIPH_MODE_SHIFT              8
+#define SPU2_CFB_MASK                0x7000 /* cipher feedback mask */
+#define SPU2_CFB_MASK_SHIFT              12
+#define SPU2_PROTO_SEL             0xF00000 /* MACsec, IPsec, TLS... */
+#define SPU2_PROTO_SEL_SHIFT             20
+#define SPU2_HASH_FIRST           0x1000000 /* 1: hash input is input pkt
+					     * data
+					     */
+#define SPU2_CHK_TAG              0x2000000 /* 1: check digest provided */
+#define SPU2_HASH_TYPE          0x1F0000000 /* one of spu2_hash_type */
+#define SPU2_HASH_TYPE_SHIFT             28
+#define SPU2_HASH_MODE         0xF000000000 /* one of spu2_hash_mode */
+#define SPU2_HASH_MODE_SHIFT             36
+#define SPU2_CIPH_PAD_EN     0x100000000000 /* 1: Add pad to end of payload for
+					     *    enc
+					     */
+#define SPU2_CIPH_PAD      0xFF000000000000 /* cipher pad value */
+#define SPU2_CIPH_PAD_SHIFT              48
+
+/* FMD ctrl1 field masks */
+#define SPU2_TAG_LOC                    0x1 /* 1: end of payload, 0: undef */
+#define SPU2_HAS_FR_DATA                0x2 /* 1: msg has frame data */
+#define SPU2_HAS_AAD1                   0x4 /* 1: msg has AAD1 field */
+#define SPU2_HAS_NAAD                   0x8 /* 1: msg has NAAD field */
+#define SPU2_HAS_AAD2                  0x10 /* 1: msg has AAD2 field */
+#define SPU2_HAS_ESN                   0x20 /* 1: msg has ESN field */
+#define SPU2_HASH_KEY_LEN            0xFF00 /* len of hash key in bytes.
+					     * HMAC only.
+					     */
+#define SPU2_HASH_KEY_LEN_SHIFT           8
+#define SPU2_CIPH_KEY_LEN         0xFF00000 /* len of cipher key in bytes */
+#define SPU2_CIPH_KEY_LEN_SHIFT          20
+#define SPU2_GENIV               0x10000000 /* 1: hw generates IV */
+#define SPU2_HASH_IV             0x20000000 /* 1: IV incl in hash */
+#define SPU2_RET_IV              0x40000000 /* 1: return IV in output msg
+					     *    b4 payload
+					     */
+#define SPU2_RET_IV_LEN         0xF00000000 /* length in bytes of IV returned.
+					     * 0 = 16 bytes
+					     */
+#define SPU2_RET_IV_LEN_SHIFT            32
+#define SPU2_IV_OFFSET         0xF000000000 /* gen IV offset */
+#define SPU2_IV_OFFSET_SHIFT             36
+#define SPU2_IV_LEN          0x1F0000000000 /* length of input IV in bytes */
+#define SPU2_IV_LEN_SHIFT                40
+#define SPU2_HASH_TAG_LEN  0x7F000000000000 /* hash tag length in bytes */
+#define SPU2_HASH_TAG_LEN_SHIFT          48
+#define SPU2_RETURN_MD    0x300000000000000 /* return metadata */
+#define SPU2_RETURN_MD_SHIFT             56
+#define SPU2_RETURN_FD    0x400000000000000
+#define SPU2_RETURN_AAD1  0x800000000000000
+#define SPU2_RETURN_NAAD 0x1000000000000000
+#define SPU2_RETURN_AAD2 0x2000000000000000
+#define SPU2_RETURN_PAY  0x4000000000000000 /* return payload */
+
+/* FMD ctrl2 field masks */
+#define SPU2_AAD1_OFFSET              0xFFF /* byte offset of AAD1 field */
+#define SPU2_AAD1_LEN               0xFF000 /* length of AAD1 in bytes */
+#define SPU2_AAD1_LEN_SHIFT              12
+#define SPU2_AAD2_OFFSET         0xFFF00000 /* byte offset of AAD2 field */
+#define SPU2_AAD2_OFFSET_SHIFT           20
+#define SPU2_PL_OFFSET   0xFFFFFFFF00000000 /* payload offset from AAD2 */
+#define SPU2_PL_OFFSET_SHIFT             32
+
+/* FMD ctrl3 field masks */
+#define SPU2_PL_LEN              0xFFFFFFFF /* payload length in bytes */
+#define SPU2_TLS_LEN         0xFFFF00000000 /* TLS encrypt: cipher len
+					     * TLS decrypt: compressed len
+					     */
+#define SPU2_TLS_LEN_SHIFT               32
+
+/*
+ * Max value that can be represented in the Payload Length field of the
+ * ctrl3 word of FMD.
+ */
+#define SPU2_MAX_PAYLOAD  SPU2_PL_LEN
+
+/* Error values returned in STATUS field of response messages */
+#define SPU2_INVALID_ICV  1
+
+void spu2_dump_msg_hdr(u8 *buf, unsigned int buf_len);
+u32 spu2_ctx_max_payload(enum spu_cipher_alg cipher_alg,
+			 enum spu_cipher_mode cipher_mode,
+			 unsigned int blocksize);
+u32 spu2_payload_length(u8 *spu_hdr);
+u16 spu2_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash);
+u16 spu2_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode,
+		      u32 chunksize, u16 hash_block_size);
+u32 spu2_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode,
+			 unsigned int data_size);
+u32 spu2_assoc_resp_len(enum spu_cipher_mode cipher_mode,
+			unsigned int assoc_len, unsigned int iv_len,
+			bool is_encrypt);
+u8 spu2_aead_ivlen(enum spu_cipher_mode cipher_mode,
+		   u16 iv_len);
+enum hash_type spu2_hash_type(u32 src_sent);
+u32 spu2_digest_size(u32 alg_digest_size, enum hash_alg alg,
+		     enum hash_type htype);
+u32 spu2_create_request(u8 *spu_hdr,
+			struct spu_request_opts *req_opts,
+			struct spu_cipher_parms *cipher_parms,
+			struct spu_hash_parms *hash_parms,
+			struct spu_aead_parms *aead_parms,
+			unsigned int data_size);
+u16 spu2_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms);
+void spu2_cipher_req_finish(u8 *spu_hdr,
+			    u16 spu_req_hdr_len,
+			    unsigned int is_inbound,
+			    struct spu_cipher_parms *cipher_parms,
+			    bool update_key,
+			    unsigned int data_size);
+void spu2_request_pad(u8 *pad_start, u32 gcm_padding, u32 hash_pad_len,
+		      enum hash_alg auth_alg, enum hash_mode auth_mode,
+		      unsigned int total_sent, u32 status_padding);
+u8 spu2_xts_tweak_in_payload(void);
+u8 spu2_tx_status_len(void);
+u8 spu2_rx_status_len(void);
+int spu2_status_process(u8 *statp);
+void spu2_ccm_update_iv(unsigned int digestsize,
+			struct spu_cipher_parms *cipher_parms,
+			unsigned int assoclen, unsigned int chunksize,
+			bool is_encrypt, bool is_esp);
+u32 spu2_wordalign_padlen(u32 data_size);
+#endif
diff --git a/drivers/crypto/bcm/spum.h b/drivers/crypto/bcm/spum.h
new file mode 100644
index 000000000000..d0a5b5828638
--- /dev/null
+++ b/drivers/crypto/bcm/spum.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+/*
+ * This file contains SPU message definitions specific to SPU-M.
+ */
+
+#ifndef _SPUM_H_
+#define _SPUM_H_
+
+#define SPU_CRYPTO_OPERATION_GENERIC	0x1
+
+/* Length of STATUS field in tx and rx packets */
+#define SPU_TX_STATUS_LEN  4
+
+/* SPU-M error codes */
+#define SPU_STATUS_MASK                 0x0000FF00
+#define SPU_STATUS_SUCCESS              0x00000000
+#define SPU_STATUS_INVALID_ICV          0x00000100
+
+#define SPU_STATUS_ERROR_FLAG           0x00020000
+
+/* Request message. MH + EMH + BDESC + BD header */
+#define SPU_REQ_FIXED_LEN 24
+
+/*
+ * Max length of a SPU message header. Used to allocate a buffer where
+ * the SPU message header is constructed. Can be used for either a SPU-M
+ * header or a SPU2 header.
+ * For SPU-M, sum of the following:
+ *    MH - 4 bytes
+ *    EMH - 4
+ *    SCTX - 3 +
+ *      max auth key len - 64
+ *      max cipher key len - 264 (RC4)
+ *      max IV len - 16
+ *    BDESC - 12
+ *    BD header - 4
+ * Total:  371
+ *
+ * For SPU2, FMD_SIZE (32) plus lengths of hash and cipher keys,
+ * hash and cipher IVs. If SPU2 does not support RC4, then
+ */
+#define SPU_HEADER_ALLOC_LEN  (SPU_REQ_FIXED_LEN + MAX_KEY_SIZE + \
+				MAX_KEY_SIZE + MAX_IV_SIZE)
+
+/*
+ * Response message header length. Normally MH, EMH, BD header, but when
+ * BD_SUPPRESS is used for hash requests, there is no BD header.
+ */
+#define SPU_RESP_HDR_LEN 12
+#define SPU_HASH_RESP_HDR_LEN 8
+
+/*
+ * Max value that can be represented in the Payload Length field of the BD
+ * header. This is a 16-bit field.
+ */
+#define SPUM_NS2_MAX_PAYLOAD  (BIT(16) - 1)
+
+/*
+ * NSP SPU is limited to ~9KB because of FA2 FIFO size limitations;
+ * Set MAX_PAYLOAD to 8k to allow for addition of header, digest, etc.
+ * and stay within limitation.
+ */
+
+#define SPUM_NSP_MAX_PAYLOAD	8192
+
+/* Buffer Descriptor Header [BDESC]. SPU in big-endian mode. */
+struct BDESC_HEADER {
+	u16 offset_mac;		/* word 0 [31-16] */
+	u16 length_mac;		/* word 0 [15-0]  */
+	u16 offset_crypto;	/* word 1 [31-16] */
+	u16 length_crypto;	/* word 1 [15-0]  */
+	u16 offset_icv;		/* word 2 [31-16] */
+	u16 offset_iv;		/* word 2 [15-0]  */
+};
+
+/* Buffer Data Header [BD]. SPU in big-endian mode. */
+struct BD_HEADER {
+	u16 size;
+	u16 prev_length;
+};
+
+/* Command Context Header. SPU-M in big endian mode. */
+struct MHEADER {
+	u8 flags;	/* [31:24] */
+	u8 op_code;	/* [23:16] */
+	u16 reserved;	/* [15:0] */
+};
+
+/* MH header flags bits */
+#define MH_SUPDT_PRES   BIT(0)
+#define MH_HASH_PRES    BIT(2)
+#define MH_BD_PRES      BIT(3)
+#define MH_MFM_PRES     BIT(4)
+#define MH_BDESC_PRES   BIT(5)
+#define MH_SCTX_PRES	BIT(7)
+
+/* SCTX word 0 bit offsets and fields masks */
+#define SCTX_SIZE               0x000000FF
+
+/* SCTX word 1 bit shifts and field masks */
+#define  UPDT_OFST              0x000000FF   /* offset of SCTX updateable fld */
+#define  HASH_TYPE              0x00000300   /* hash alg operation type */
+#define  HASH_TYPE_SHIFT                 8
+#define  HASH_MODE              0x00001C00   /* one of spu2_hash_mode */
+#define  HASH_MODE_SHIFT                10
+#define  HASH_ALG               0x0000E000   /* hash algorithm */
+#define  HASH_ALG_SHIFT                 13
+#define  CIPHER_TYPE            0x00030000   /* encryption operation type */
+#define  CIPHER_TYPE_SHIFT              16
+#define  CIPHER_MODE            0x001C0000   /* encryption mode */
+#define  CIPHER_MODE_SHIFT              18
+#define  CIPHER_ALG             0x00E00000   /* encryption algo */
+#define  CIPHER_ALG_SHIFT               21
+#define  ICV_IS_512                BIT(27)
+#define  ICV_IS_512_SHIFT		27
+#define  CIPHER_ORDER               BIT(30)
+#define  CIPHER_ORDER_SHIFT             30
+#define  CIPHER_INBOUND             BIT(31)
+#define  CIPHER_INBOUND_SHIFT           31
+
+/* SCTX word 2 bit shifts and field masks */
+#define  EXP_IV_SIZE                   0x7
+#define  IV_OFFSET                   BIT(3)
+#define  IV_OFFSET_SHIFT                 3
+#define  GEN_IV                      BIT(5)
+#define  GEN_IV_SHIFT                    5
+#define  EXPLICIT_IV                 BIT(6)
+#define  EXPLICIT_IV_SHIFT               6
+#define  SCTX_IV                     BIT(7)
+#define  SCTX_IV_SHIFT                   7
+#define  ICV_SIZE                   0x0F00
+#define  ICV_SIZE_SHIFT                  8
+#define  CHECK_ICV                  BIT(12)
+#define  CHECK_ICV_SHIFT                12
+#define  INSERT_ICV                 BIT(13)
+#define  INSERT_ICV_SHIFT               13
+#define  BD_SUPPRESS                BIT(19)
+#define  BD_SUPPRESS_SHIFT              19
+
+/* Generic Mode Security Context Structure [SCTX] */
+struct SCTX {
+/* word 0: protocol flags */
+	u32 proto_flags;
+
+/* word 1: cipher flags */
+	u32 cipher_flags;
+
+/* word 2: Extended cipher flags */
+	u32 ecf;
+
+};
+
+struct SPUHEADER {
+	struct MHEADER mh;
+	u32 emh;
+	struct SCTX sa;
+};
+
+#endif /* _SPUM_H_ */
diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c
new file mode 100644
index 000000000000..0502f460dacd
--- /dev/null
+++ b/drivers/crypto/bcm/util.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/debugfs.h>
+
+#include "cipher.h"
+#include "util.h"
+
+/* offset of SPU_OFIFO_CTRL register */
+#define SPU_OFIFO_CTRL      0x40
+#define SPU_FIFO_WATERMARK  0x1FF
+
+/**
+ * spu_sg_at_offset() - Find the scatterlist entry at a given distance from the
+ * start of a scatterlist.
+ * @sg:         [in]  Start of a scatterlist
+ * @skip:       [in]  Distance from the start of the scatterlist, in bytes
+ * @sge:        [out] Scatterlist entry at skip bytes from start
+ * @sge_offset: [out] Number of bytes from start of sge buffer to get to
+ *                    requested distance.
+ *
+ * Return: 0 if entry found at requested distance
+ *         < 0 otherwise
+ */
+int spu_sg_at_offset(struct scatterlist *sg, unsigned int skip,
+		     struct scatterlist **sge, unsigned int *sge_offset)
+{
+	/* byte index from start of sg to the end of the previous entry */
+	unsigned int index = 0;
+	/* byte index from start of sg to the end of the current entry */
+	unsigned int next_index;
+
+	next_index = sg->length;
+	while (next_index <= skip) {
+		sg = sg_next(sg);
+		index = next_index;
+		if (!sg)
+			return -EINVAL;
+		next_index += sg->length;
+	}
+
+	*sge_offset = skip - index;
+	*sge = sg;
+	return 0;
+}
+
+/* Copy len bytes of sg data, starting at offset skip, to a dest buffer */
+void sg_copy_part_to_buf(struct scatterlist *src, u8 *dest,
+			 unsigned int len, unsigned int skip)
+{
+	size_t copied;
+	unsigned int nents = sg_nents(src);
+
+	copied = sg_pcopy_to_buffer(src, nents, dest, len, skip);
+	if (copied != len) {
+		flow_log("%s copied %u bytes of %u requested. ",
+			 __func__, (u32)copied, len);
+		flow_log("sg with %u entries and skip %u\n", nents, skip);
+	}
+}
+
+/*
+ * Copy data into a scatterlist starting at a specified offset in the
+ * scatterlist. Specifically, copy len bytes of data in the buffer src
+ * into the scatterlist dest, starting skip bytes into the scatterlist.
+ */
+void sg_copy_part_from_buf(struct scatterlist *dest, u8 *src,
+			   unsigned int len, unsigned int skip)
+{
+	size_t copied;
+	unsigned int nents = sg_nents(dest);
+
+	copied = sg_pcopy_from_buffer(dest, nents, src, len, skip);
+	if (copied != len) {
+		flow_log("%s copied %u bytes of %u requested. ",
+			 __func__, (u32)copied, len);
+		flow_log("sg with %u entries and skip %u\n", nents, skip);
+	}
+}
+
+/**
+ * spu_sg_count() - Determine number of elements in scatterlist to provide a
+ * specified number of bytes.
+ * @sg_list:  scatterlist to examine
+ * @skip:     index of starting point
+ * @nbytes:   consider elements of scatterlist until reaching this number of
+ *	      bytes
+ *
+ * Return: the number of sg entries contributing to nbytes of data
+ */
+int spu_sg_count(struct scatterlist *sg_list, unsigned int skip, int nbytes)
+{
+	struct scatterlist *sg;
+	int sg_nents = 0;
+	unsigned int offset;
+
+	if (!sg_list)
+		return 0;
+
+	if (spu_sg_at_offset(sg_list, skip, &sg, &offset) < 0)
+		return 0;
+
+	while (sg && (nbytes > 0)) {
+		sg_nents++;
+		nbytes -= (sg->length - offset);
+		offset = 0;
+		sg = sg_next(sg);
+	}
+	return sg_nents;
+}
+
+/**
+ * spu_msg_sg_add() - Copy scatterlist entries from one sg to another, up to a
+ * given length.
+ * @to_sg:       scatterlist to copy to
+ * @from_sg:     scatterlist to copy from
+ * @from_skip:   number of bytes to skip in from_sg. Non-zero when previous
+ *		 request included part of the buffer in entry in from_sg.
+ *		 Assumes from_skip < from_sg->length.
+ * @from_nents   number of entries in from_sg
+ * @length       number of bytes to copy. may reach this limit before exhausting
+ *		 from_sg.
+ *
+ * Copies the entries themselves, not the data in the entries. Assumes to_sg has
+ * enough entries. Does not limit the size of an individual buffer in to_sg.
+ *
+ * to_sg, from_sg, skip are all updated to end of copy
+ *
+ * Return: Number of bytes copied
+ */
+u32 spu_msg_sg_add(struct scatterlist **to_sg,
+		   struct scatterlist **from_sg, u32 *from_skip,
+		   u8 from_nents, u32 length)
+{
+	struct scatterlist *sg;	/* an entry in from_sg */
+	struct scatterlist *to = *to_sg;
+	struct scatterlist *from = *from_sg;
+	u32 skip = *from_skip;
+	u32 offset;
+	int i;
+	u32 entry_len = 0;
+	u32 frag_len = 0;	/* length of entry added to to_sg */
+	u32 copied = 0;		/* number of bytes copied so far */
+
+	if (length == 0)
+		return 0;
+
+	for_each_sg(from, sg, from_nents, i) {
+		/* number of bytes in this from entry not yet used */
+		entry_len = sg->length - skip;
+		frag_len = min(entry_len, length - copied);
+		offset = sg->offset + skip;
+		if (frag_len)
+			sg_set_page(to++, sg_page(sg), frag_len, offset);
+		copied += frag_len;
+		if (copied == entry_len) {
+			/* used up all of from entry */
+			skip = 0;	/* start at beginning of next entry */
+		}
+		if (copied == length)
+			break;
+	}
+	*to_sg = to;
+	*from_sg = sg;
+	if (frag_len < entry_len)
+		*from_skip = skip + frag_len;
+	else
+		*from_skip = 0;
+
+	return copied;
+}
+
+void add_to_ctr(u8 *ctr_pos, unsigned int increment)
+{
+	__be64 *high_be = (__be64 *)ctr_pos;
+	__be64 *low_be = high_be + 1;
+	u64 orig_low = __be64_to_cpu(*low_be);
+	u64 new_low = orig_low + (u64)increment;
+
+	*low_be = __cpu_to_be64(new_low);
+	if (new_low < orig_low)
+		/* there was a carry from the low 8 bytes */
+		*high_be = __cpu_to_be64(__be64_to_cpu(*high_be) + 1);
+}
+
+struct sdesc {
+	struct shash_desc shash;
+	char ctx[];
+};
+
+/* do a synchronous decrypt operation */
+int do_decrypt(char *alg_name,
+	       void *key_ptr, unsigned int key_len,
+	       void *iv_ptr, void *src_ptr, void *dst_ptr,
+	       unsigned int block_len)
+{
+	struct scatterlist sg_in[1], sg_out[1];
+	struct crypto_blkcipher *tfm =
+	    crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	struct blkcipher_desc desc = {.tfm = tfm, .flags = 0 };
+	int ret = 0;
+	void *iv;
+	int ivsize;
+
+	flow_log("%s() name:%s block_len:%u\n", __func__, alg_name, block_len);
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	crypto_blkcipher_setkey((void *)tfm, key_ptr, key_len);
+
+	sg_init_table(sg_in, 1);
+	sg_set_buf(sg_in, src_ptr, block_len);
+
+	sg_init_table(sg_out, 1);
+	sg_set_buf(sg_out, dst_ptr, block_len);
+
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+	memcpy(iv, iv_ptr, ivsize);
+
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, block_len);
+	crypto_free_blkcipher(tfm);
+
+	if (ret < 0)
+		pr_err("aes_decrypt failed %d\n", ret);
+
+	return ret;
+}
+
+/**
+ * do_shash() - Do a synchronous hash operation in software
+ * @name:       The name of the hash algorithm
+ * @result:     Buffer where digest is to be written
+ * @data1:      First part of data to hash. May be NULL.
+ * @data1_len:  Length of data1, in bytes
+ * @data2:      Second part of data to hash. May be NULL.
+ * @data2_len:  Length of data2, in bytes
+ * @key:	Key (if keyed hash)
+ * @key_len:	Length of key, in bytes (or 0 if non-keyed hash)
+ *
+ * Note that the crypto API will not select this driver's own transform because
+ * this driver only registers asynchronous algos.
+ *
+ * Return: 0 if hash successfully stored in result
+ *         < 0 otherwise
+ */
+int do_shash(unsigned char *name, unsigned char *result,
+	     const u8 *data1, unsigned int data1_len,
+	     const u8 *data2, unsigned int data2_len,
+	     const u8 *key, unsigned int key_len)
+{
+	int rc;
+	unsigned int size;
+	struct crypto_shash *hash;
+	struct sdesc *sdesc;
+
+	hash = crypto_alloc_shash(name, 0, 0);
+	if (IS_ERR(hash)) {
+		rc = PTR_ERR(hash);
+		pr_err("%s: Crypto %s allocation error %d", __func__, name, rc);
+		return rc;
+	}
+
+	size = sizeof(struct shash_desc) + crypto_shash_descsize(hash);
+	sdesc = kmalloc(size, GFP_KERNEL);
+	if (!sdesc) {
+		rc = -ENOMEM;
+		pr_err("%s: Memory allocation failure", __func__);
+		goto do_shash_err;
+	}
+	sdesc->shash.tfm = hash;
+	sdesc->shash.flags = 0x0;
+
+	if (key_len > 0) {
+		rc = crypto_shash_setkey(hash, key, key_len);
+		if (rc) {
+			pr_err("%s: Could not setkey %s shash", __func__, name);
+			goto do_shash_err;
+		}
+	}
+
+	rc = crypto_shash_init(&sdesc->shash);
+	if (rc) {
+		pr_err("%s: Could not init %s shash", __func__, name);
+		goto do_shash_err;
+	}
+	rc = crypto_shash_update(&sdesc->shash, data1, data1_len);
+	if (rc) {
+		pr_err("%s: Could not update1", __func__);
+		goto do_shash_err;
+	}
+	if (data2 && data2_len) {
+		rc = crypto_shash_update(&sdesc->shash, data2, data2_len);
+		if (rc) {
+			pr_err("%s: Could not update2", __func__);
+			goto do_shash_err;
+		}
+	}
+	rc = crypto_shash_final(&sdesc->shash, result);
+	if (rc)
+		pr_err("%s: Could not genereate %s hash", __func__, name);
+
+do_shash_err:
+	crypto_free_shash(hash);
+	kfree(sdesc);
+
+	return rc;
+}
+
+/* Dump len bytes of a scatterlist starting at skip bytes into the sg */
+void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len)
+{
+	u8 dbuf[16];
+	unsigned int idx = skip;
+	unsigned int num_out = 0;	/* number of bytes dumped so far */
+	unsigned int count;
+
+	if (packet_debug_logging) {
+		while (num_out < len) {
+			count = (len - num_out > 16) ? 16 : len - num_out;
+			sg_copy_part_to_buf(sg, dbuf, count, idx);
+			num_out += count;
+			print_hex_dump(KERN_ALERT, "  sg: ", DUMP_PREFIX_NONE,
+				       4, 1, dbuf, count, false);
+			idx += 16;
+		}
+	}
+	if (debug_logging_sleep)
+		msleep(debug_logging_sleep);
+}
+
+/* Returns the name for a given cipher alg/mode */
+char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode)
+{
+	switch (alg) {
+	case CIPHER_ALG_RC4:
+		return "rc4";
+	case CIPHER_ALG_AES:
+		switch (mode) {
+		case CIPHER_MODE_CBC:
+			return "cbc(aes)";
+		case CIPHER_MODE_ECB:
+			return "ecb(aes)";
+		case CIPHER_MODE_OFB:
+			return "ofb(aes)";
+		case CIPHER_MODE_CFB:
+			return "cfb(aes)";
+		case CIPHER_MODE_CTR:
+			return "ctr(aes)";
+		case CIPHER_MODE_XTS:
+			return "xts(aes)";
+		case CIPHER_MODE_GCM:
+			return "gcm(aes)";
+		default:
+			return "aes";
+		}
+		break;
+	case CIPHER_ALG_DES:
+		switch (mode) {
+		case CIPHER_MODE_CBC:
+			return "cbc(des)";
+		case CIPHER_MODE_ECB:
+			return "ecb(des)";
+		case CIPHER_MODE_CTR:
+			return "ctr(des)";
+		default:
+			return "des";
+		}
+		break;
+	case CIPHER_ALG_3DES:
+		switch (mode) {
+		case CIPHER_MODE_CBC:
+			return "cbc(des3_ede)";
+		case CIPHER_MODE_ECB:
+			return "ecb(des3_ede)";
+		case CIPHER_MODE_CTR:
+			return "ctr(des3_ede)";
+		default:
+			return "3des";
+		}
+		break;
+	default:
+		return "other";
+	}
+}
+
+static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf,
+				size_t count, loff_t *offp)
+{
+	struct device_private *ipriv;
+	char *buf;
+	ssize_t ret, out_offset, out_count;
+	int i;
+	u32 fifo_len;
+	u32 spu_ofifo_ctrl;
+	u32 alg;
+	u32 mode;
+	u32 op_cnt;
+
+	out_count = 2048;
+
+	buf = kmalloc(out_count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ipriv = filp->private_data;
+	out_offset = 0;
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Number of SPUs.........%u\n",
+			       ipriv->spu.num_spu);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Current sessions.......%u\n",
+			       atomic_read(&ipriv->session_count));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Session count..........%u\n",
+			       atomic_read(&ipriv->stream_count));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Cipher setkey..........%u\n",
+			       atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER]));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Cipher Ops.............%u\n",
+			       atomic_read(&ipriv->op_counts[SPU_OP_CIPHER]));
+	for (alg = 0; alg < CIPHER_ALG_LAST; alg++) {
+		for (mode = 0; mode < CIPHER_MODE_LAST; mode++) {
+			op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]);
+			if (op_cnt) {
+				out_offset += snprintf(buf + out_offset,
+						       out_count - out_offset,
+			       "  %-13s%11u\n",
+			       spu_alg_name(alg, mode), op_cnt);
+			}
+		}
+	}
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Hash Ops...............%u\n",
+			       atomic_read(&ipriv->op_counts[SPU_OP_HASH]));
+	for (alg = 0; alg < HASH_ALG_LAST; alg++) {
+		op_cnt = atomic_read(&ipriv->hash_cnt[alg]);
+		if (op_cnt) {
+			out_offset += snprintf(buf + out_offset,
+					       out_count - out_offset,
+		       "  %-13s%11u\n",
+		       hash_alg_name[alg], op_cnt);
+		}
+	}
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "HMAC setkey............%u\n",
+			       atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC]));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "HMAC Ops...............%u\n",
+			       atomic_read(&ipriv->op_counts[SPU_OP_HMAC]));
+	for (alg = 0; alg < HASH_ALG_LAST; alg++) {
+		op_cnt = atomic_read(&ipriv->hmac_cnt[alg]);
+		if (op_cnt) {
+			out_offset += snprintf(buf + out_offset,
+					       out_count - out_offset,
+		       "  %-13s%11u\n",
+		       hash_alg_name[alg], op_cnt);
+		}
+	}
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "AEAD setkey............%u\n",
+			       atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD]));
+
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "AEAD Ops...............%u\n",
+			       atomic_read(&ipriv->op_counts[SPU_OP_AEAD]));
+	for (alg = 0; alg < AEAD_TYPE_LAST; alg++) {
+		op_cnt = atomic_read(&ipriv->aead_cnt[alg]);
+		if (op_cnt) {
+			out_offset += snprintf(buf + out_offset,
+					       out_count - out_offset,
+		       "  %-13s%11u\n",
+		       aead_alg_name[alg], op_cnt);
+		}
+	}
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Bytes of req data......%llu\n",
+			       (u64)atomic64_read(&ipriv->bytes_out));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Bytes of resp data.....%llu\n",
+			       (u64)atomic64_read(&ipriv->bytes_in));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Mailbox full...........%u\n",
+			       atomic_read(&ipriv->mb_no_spc));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Mailbox send failures..%u\n",
+			       atomic_read(&ipriv->mb_send_fail));
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Check ICV errors.......%u\n",
+			       atomic_read(&ipriv->bad_icv));
+	if (ipriv->spu.spu_type == SPU_TYPE_SPUM)
+		for (i = 0; i < ipriv->spu.num_spu; i++) {
+			spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] +
+						  SPU_OFIFO_CTRL);
+			fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK;
+			out_offset += snprintf(buf + out_offset,
+					       out_count - out_offset,
+				       "SPU %d output FIFO high water.....%u\n",
+				       i, fifo_len);
+		}
+
+	if (out_offset > out_count)
+		out_offset = out_count;
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations spu_debugfs_stats = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = spu_debugfs_read,
+};
+
+/*
+ * Create the debug FS directories. If the top-level directory has not yet
+ * been created, create it now. Create a stats file in this directory for
+ * a SPU.
+ */
+void spu_setup_debugfs(void)
+{
+	if (!debugfs_initialized())
+		return;
+
+	if (!iproc_priv.debugfs_dir)
+		iproc_priv.debugfs_dir = debugfs_create_dir(KBUILD_MODNAME,
+							    NULL);
+
+	if (!iproc_priv.debugfs_stats)
+		/* Create file with permissions S_IRUSR */
+		debugfs_create_file("stats", 0400, iproc_priv.debugfs_dir,
+				    &iproc_priv, &spu_debugfs_stats);
+}
+
+void spu_free_debugfs(void)
+{
+	debugfs_remove_recursive(iproc_priv.debugfs_dir);
+	iproc_priv.debugfs_dir = NULL;
+}
+
+/**
+ * format_value_ccm() - Format a value into a buffer, using a specified number
+ *			of bytes (i.e. maybe writing value X into a 4 byte
+ *			buffer, or maybe into a 12 byte buffer), as per the
+ *			SPU CCM spec.
+ *
+ * @val:		value to write (up to max of unsigned int)
+ * @buf:		(pointer to) buffer to write the value
+ * @len:		number of bytes to use (0 to 255)
+ *
+ */
+void format_value_ccm(unsigned int val, u8 *buf, u8 len)
+{
+	int i;
+
+	/* First clear full output buffer */
+	memset(buf, 0, len);
+
+	/* Then, starting from right side, fill in with data */
+	for (i = 0; i < len; i++) {
+		buf[len - i - 1] = (val >> (8 * i)) & 0xff;
+		if (i >= 3)
+			break;  /* Only handle up to 32 bits of 'val' */
+	}
+}
diff --git a/drivers/crypto/bcm/util.h b/drivers/crypto/bcm/util.h
new file mode 100644
index 000000000000..712e029795f8
--- /dev/null
+++ b/drivers/crypto/bcm/util.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#ifndef _UTIL_H
+#define _UTIL_H
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+
+#include "spu.h"
+
+extern int flow_debug_logging;
+extern int packet_debug_logging;
+extern int debug_logging_sleep;
+
+#ifdef DEBUG
+#define flow_log(...)	                \
+	do {	                              \
+		if (flow_debug_logging) {	        \
+			printk(__VA_ARGS__);	          \
+			if (debug_logging_sleep)	      \
+				msleep(debug_logging_sleep);	\
+		}	                                \
+	} while (0)
+#define flow_dump(msg, var, var_len)	   \
+	do {	                                 \
+		if (flow_debug_logging) {	           \
+			print_hex_dump(KERN_ALERT, msg, DUMP_PREFIX_NONE,  \
+					16, 1, var, var_len, false); \
+				if (debug_logging_sleep)	       \
+					msleep(debug_logging_sleep);   \
+		}                                    \
+	} while (0)
+
+#define packet_log(...)               \
+	do {                                \
+		if (packet_debug_logging) {       \
+			printk(__VA_ARGS__);            \
+			if (debug_logging_sleep)        \
+				msleep(debug_logging_sleep);  \
+		}                                 \
+	} while (0)
+#define packet_dump(msg, var, var_len)   \
+	do {                                   \
+		if (packet_debug_logging) {          \
+			print_hex_dump(KERN_ALERT, msg, DUMP_PREFIX_NONE,  \
+					16, 1, var, var_len, false); \
+			if (debug_logging_sleep)           \
+				msleep(debug_logging_sleep);     \
+		}                                    \
+	} while (0)
+
+void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len);
+
+#define dump_sg(sg, skip, len)     __dump_sg(sg, skip, len)
+
+#else /* !DEBUG_ON */
+
+#define flow_log(...) do {} while (0)
+#define flow_dump(msg, var, var_len) do {} while (0)
+#define packet_log(...) do {} while (0)
+#define packet_dump(msg, var, var_len) do {} while (0)
+
+#define dump_sg(sg, skip, len) do {} while (0)
+
+#endif /* DEBUG_ON */
+
+int spu_sg_at_offset(struct scatterlist *sg, unsigned int skip,
+		     struct scatterlist **sge, unsigned int *sge_offset);
+
+/* Copy sg data, from skip, length len, to dest */
+void sg_copy_part_to_buf(struct scatterlist *src, u8 *dest,
+			 unsigned int len, unsigned int skip);
+/* Copy src into scatterlist from offset, length len */
+void sg_copy_part_from_buf(struct scatterlist *dest, u8 *src,
+			   unsigned int len, unsigned int skip);
+
+int spu_sg_count(struct scatterlist *sg_list, unsigned int skip, int nbytes);
+u32 spu_msg_sg_add(struct scatterlist **to_sg,
+		   struct scatterlist **from_sg, u32 *skip,
+		   u8 from_nents, u32 tot_len);
+
+void add_to_ctr(u8 *ctr_pos, unsigned int increment);
+
+/* do a synchronous decrypt operation */
+int do_decrypt(char *alg_name,
+	       void *key_ptr, unsigned int key_len,
+	       void *iv_ptr, void *src_ptr, void *dst_ptr,
+	       unsigned int block_len);
+
+/* produce a message digest from data of length n bytes */
+int do_shash(unsigned char *name, unsigned char *result,
+	     const u8 *data1, unsigned int data1_len,
+	     const u8 *data2, unsigned int data2_len,
+	     const u8 *key, unsigned int key_len);
+
+char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode);
+
+void spu_setup_debugfs(void);
+void spu_free_debugfs(void);
+void format_value_ccm(unsigned int val, u8 *buf, u8 len);
+
+#endif
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
index 10db7df366c8..a118b9bed669 100644
--- a/drivers/crypto/bfin_crc.c
+++ b/drivers/crypto/bfin_crc.c
@@ -203,7 +203,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
 			crc->sg_cpu[i].x_count = 1;
 			crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
 			dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-				"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
+				"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
 				i, crc->sg_cpu[i].start_addr,
 				crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
 				crc->sg_cpu[i].x_modify);
@@ -233,7 +233,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
 		crc->sg_cpu[i].x_count = dma_count;
 		crc->sg_cpu[i].x_modify = dma_mod;
 		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
+			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
 			i, crc->sg_cpu[i].start_addr,
 			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
 			crc->sg_cpu[i].x_modify);
@@ -257,7 +257,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
 		crc->sg_cpu[i].x_count = 1;
 		crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
 		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
+			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
 			i, crc->sg_cpu[i].start_addr,
 			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
 			crc->sg_cpu[i].x_modify);
diff --git a/drivers/crypto/bfin_crc.h b/drivers/crypto/bfin_crc.h
index 75cef4dc85a1..786ef746d109 100644
--- a/drivers/crypto/bfin_crc.h
+++ b/drivers/crypto/bfin_crc.h
@@ -55,7 +55,6 @@ struct crc_info {
 
 #include <linux/types.h>
 #include <linux/spinlock.h>
-#include <linux/miscdevice.h>
 
 struct crc_register {
 	u32 control;
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 64bf3024b680..bc0d3569f8d9 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -74,7 +74,7 @@ config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
 
 config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 	tristate "Register algorithm implementations with the Crypto API"
-	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+	depends on CRYPTO_DEV_FSL_CAAM_JR
 	default y
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
@@ -89,7 +89,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 
 config CRYPTO_DEV_FSL_CAAM_AHASH_API
 	tristate "Register hash algorithm implementations with Crypto API"
-	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+	depends on CRYPTO_DEV_FSL_CAAM_JR
 	default y
 	select CRYPTO_HASH
 	help
@@ -101,7 +101,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
 
 config CRYPTO_DEV_FSL_CAAM_PKC_API
         tristate "Register public key cryptography implementations with Crypto API"
-        depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+        depends on CRYPTO_DEV_FSL_CAAM_JR
         default y
         select CRYPTO_RSA
         help
@@ -113,7 +113,7 @@ config CRYPTO_DEV_FSL_CAAM_PKC_API
 
 config CRYPTO_DEV_FSL_CAAM_RNG_API
 	tristate "Register caam device for hwrng API"
-	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+	depends on CRYPTO_DEV_FSL_CAAM_JR
 	default y
 	select CRYPTO_RNG
 	select HW_RANDOM
@@ -134,3 +134,6 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG
 	help
 	  Selecting this will enable printing of various debug
 	  information in the CAAM driver.
+
+config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
+	def_tristate CRYPTO_DEV_FSL_CAAM_CRYPTO_API
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 08bf5515ae8a..6554742f357e 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -8,6 +8,7 @@ endif
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC) += caamalg_desc.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 954a64c7757b..9bc80eb06934 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -2,6 +2,7 @@
  * caam - Freescale FSL CAAM support for crypto API
  *
  * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ * Copyright 2016 NXP
  *
  * Based on talitos crypto API driver.
  *
@@ -53,6 +54,7 @@
 #include "error.h"
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
+#include "caamalg_desc.h"
 
 /*
  * crypto alg
@@ -62,8 +64,6 @@
 #define CAAM_MAX_KEY_SIZE		(AES_MAX_KEY_SIZE + \
 					 CTR_RFC3686_NONCE_SIZE + \
 					 SHA512_DIGEST_SIZE * 2)
-/* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
-#define CAAM_MAX_IV_LENGTH		16
 
 #define AEAD_DESC_JOB_IO_LEN		(DESC_JOB_IO_LEN + CAAM_CMD_SZ * 2)
 #define GCM_DESC_JOB_IO_LEN		(AEAD_DESC_JOB_IO_LEN + \
@@ -71,37 +71,6 @@
 #define AUTHENC_DESC_JOB_IO_LEN		(AEAD_DESC_JOB_IO_LEN + \
 					 CAAM_CMD_SZ * 5)
 
-/* length of descriptors text */
-#define DESC_AEAD_BASE			(4 * CAAM_CMD_SZ)
-#define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
-#define DESC_AEAD_DEC_LEN		(DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
-#define DESC_AEAD_GIVENC_LEN		(DESC_AEAD_ENC_LEN + 9 * CAAM_CMD_SZ)
-
-/* Note: Nonce is counted in enckeylen */
-#define DESC_AEAD_CTR_RFC3686_LEN	(4 * CAAM_CMD_SZ)
-
-#define DESC_AEAD_NULL_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_AEAD_NULL_ENC_LEN		(DESC_AEAD_NULL_BASE + 11 * CAAM_CMD_SZ)
-#define DESC_AEAD_NULL_DEC_LEN		(DESC_AEAD_NULL_BASE + 13 * CAAM_CMD_SZ)
-
-#define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
-#define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
-#define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
-
-#define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
-#define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
-
-#define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
-#define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
-
-#define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
-					 20 * CAAM_CMD_SZ)
-#define DESC_ABLKCIPHER_DEC_LEN		(DESC_ABLKCIPHER_BASE + \
-					 15 * CAAM_CMD_SZ)
-
 #define DESC_MAX_USED_BYTES		(CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN)
 #define DESC_MAX_USED_LEN		(DESC_MAX_USED_BYTES / CAAM_CMD_SZ)
 
@@ -117,8 +86,7 @@
 
 static void dbg_dump_sg(const char *level, const char *prefix_str,
 			int prefix_type, int rowsize, int groupsize,
-			struct scatterlist *sg, size_t tlen, bool ascii,
-			bool may_sleep)
+			struct scatterlist *sg, size_t tlen, bool ascii)
 {
 	struct scatterlist *it;
 	void *it_page;
@@ -152,7 +120,6 @@ static struct list_head alg_list;
 struct caam_alg_entry {
 	int class1_alg_type;
 	int class2_alg_type;
-	int alg_op;
 	bool rfc3686;
 	bool geniv;
 };
@@ -163,302 +130,67 @@ struct caam_aead_alg {
 	bool registered;
 };
 
-/* Set DK bit in class 1 operation if shared */
-static inline void append_dec_op1(u32 *desc, u32 type)
-{
-	u32 *jump_cmd, *uncond_jump_cmd;
-
-	/* DK bit is valid only for AES */
-	if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) {
-		append_operation(desc, type | OP_ALG_AS_INITFINAL |
-				 OP_ALG_DECRYPT);
-		return;
-	}
-
-	jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD);
-	append_operation(desc, type | OP_ALG_AS_INITFINAL |
-			 OP_ALG_DECRYPT);
-	uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL);
-	set_jump_tgt_here(desc, jump_cmd);
-	append_operation(desc, type | OP_ALG_AS_INITFINAL |
-			 OP_ALG_DECRYPT | OP_ALG_AAI_DK);
-	set_jump_tgt_here(desc, uncond_jump_cmd);
-}
-
-/*
- * For aead functions, read payload and write payload,
- * both of which are specified in req->src and req->dst
- */
-static inline void aead_append_src_dst(u32 *desc, u32 msg_type)
-{
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH |
-			     KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH);
-}
-
-/*
- * For ablkcipher encrypt and decrypt, read from req->src and
- * write to req->dst
- */
-static inline void ablkcipher_append_src_dst(u32 *desc)
-{
-	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
-			     KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
-}
-
 /*
  * per-session context
  */
 struct caam_ctx {
-	struct device *jrdev;
 	u32 sh_desc_enc[DESC_MAX_USED_LEN];
 	u32 sh_desc_dec[DESC_MAX_USED_LEN];
 	u32 sh_desc_givenc[DESC_MAX_USED_LEN];
+	u8 key[CAAM_MAX_KEY_SIZE];
 	dma_addr_t sh_desc_enc_dma;
 	dma_addr_t sh_desc_dec_dma;
 	dma_addr_t sh_desc_givenc_dma;
-	u32 class1_alg_type;
-	u32 class2_alg_type;
-	u32 alg_op;
-	u8 key[CAAM_MAX_KEY_SIZE];
 	dma_addr_t key_dma;
-	unsigned int enckeylen;
-	unsigned int split_key_len;
-	unsigned int split_key_pad_len;
+	struct device *jrdev;
+	struct alginfo adata;
+	struct alginfo cdata;
 	unsigned int authsize;
 };
 
-static void append_key_aead(u32 *desc, struct caam_ctx *ctx,
-			    int keys_fit_inline, bool is_rfc3686)
-{
-	u32 *nonce;
-	unsigned int enckeylen = ctx->enckeylen;
-
-	/*
-	 * RFC3686 specific:
-	 *	| ctx->key = {AUTH_KEY, ENC_KEY, NONCE}
-	 *	| enckeylen = encryption key size + nonce size
-	 */
-	if (is_rfc3686)
-		enckeylen -= CTR_RFC3686_NONCE_SIZE;
-
-	if (keys_fit_inline) {
-		append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-				  ctx->split_key_len, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-		append_key_as_imm(desc, (void *)ctx->key +
-				  ctx->split_key_pad_len, enckeylen,
-				  enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	} else {
-		append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
-		append_key(desc, ctx->key_dma + ctx->split_key_pad_len,
-			   enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	}
-
-	/* Load Counter into CONTEXT1 reg */
-	if (is_rfc3686) {
-		nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len +
-			       enckeylen);
-		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-				   LDST_CLASS_IND_CCB |
-				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-		append_move(desc,
-			    MOVE_SRC_OUTFIFO |
-			    MOVE_DEST_CLASS1CTX |
-			    (16 << MOVE_OFFSET_SHIFT) |
-			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-	}
-}
-
-static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
-				  int keys_fit_inline, bool is_rfc3686)
-{
-	u32 *key_jump_cmd;
-
-	/* Note: Context registers are saved. */
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	append_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-	set_jump_tgt_here(desc, key_jump_cmd);
-}
-
 static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline = false;
-	u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN -
+			ctx->adata.keylen_pad;
 
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_AEAD_NULL_ENC_LEN + AEAD_DESC_JOB_IO_LEN +
-	    ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_AEAD_NULL_ENC_LEN) {
+		ctx->adata.key_inline = true;
+		ctx->adata.key_virt = ctx->key;
+	} else {
+		ctx->adata.key_inline = false;
+		ctx->adata.key_dma = ctx->key_dma;
+	}
 
 	/* aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-				  ctx->split_key_len, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	else
-		append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* assoclen + cryptlen = seqinlen */
-	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Prepare to read and write cryptlen + assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/*
-	 * MOVE_LEN opcode is not available in all SEC HW revisions,
-	 * thus need to do some magic, i.e. self-patch the descriptor
-	 * buffer.
-	 */
-	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
-				    MOVE_DEST_MATH3 |
-				    (0x6 << MOVE_LEN_SHIFT));
-	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 |
-				     MOVE_DEST_DESCBUF |
-				     MOVE_WAITCOMP |
-				     (0x8 << MOVE_LEN_SHIFT));
-
-	/* Class 2 operation */
-	append_operation(desc, ctx->class2_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* Read and write cryptlen bytes */
-	aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-	set_move_tgt_here(desc, read_move_cmd);
-	set_move_tgt_here(desc, write_move_cmd);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
-		    MOVE_AUX_LS);
-
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "aead null enc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN +
-	    ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
-
-	desc = ctx->sh_desc_dec;
+	if (rem_bytes >= DESC_AEAD_NULL_DEC_LEN) {
+		ctx->adata.key_inline = true;
+		ctx->adata.key_virt = ctx->key;
+	} else {
+		ctx->adata.key_inline = false;
+		ctx->adata.key_dma = ctx->key_dma;
+	}
 
 	/* aead_decrypt shared descriptor */
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-				  ctx->split_key_len, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	else
-		append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Class 2 operation */
-	append_operation(desc, ctx->class2_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-	/* assoclen + cryptlen = seqoutlen */
-	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/* Prepare to read and write cryptlen + assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
-
-	/*
-	 * MOVE_LEN opcode is not available in all SEC HW revisions,
-	 * thus need to do some magic, i.e. self-patch the descriptor
-	 * buffer.
-	 */
-	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
-				    MOVE_DEST_MATH2 |
-				    (0x6 << MOVE_LEN_SHIFT));
-	write_move_cmd = append_move(desc, MOVE_SRC_MATH2 |
-				     MOVE_DEST_DESCBUF |
-				     MOVE_WAITCOMP |
-				     (0x8 << MOVE_LEN_SHIFT));
-
-	/* Read and write cryptlen bytes */
-	aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-	/*
-	 * Insert a NOP here, since we need at least 4 instructions between
-	 * code patching the descriptor buffer and the location being patched.
-	 */
-	jump_cmd = append_jump(desc, JUMP_TEST_ALL);
-	set_jump_tgt_here(desc, jump_cmd);
-
-	set_move_tgt_here(desc, read_move_cmd);
-	set_move_tgt_here(desc, write_move_cmd);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
-		    MOVE_AUX_LS);
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-	/* Load ICV */
-	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 |
-			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "aead null dec shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	desc = ctx->sh_desc_dec;
+	cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	return 0;
 }
@@ -470,11 +202,11 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline;
-	u32 geniv, moveiv;
 	u32 ctx1_iv_off = 0;
-	u32 *desc;
-	const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
+	u32 *desc, *nonce = NULL;
+	u32 inl_mask;
+	unsigned int data_len[2];
+	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
 
@@ -482,7 +214,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 		return 0;
 
 	/* NULL encryption / decryption */
-	if (!ctx->enckeylen)
+	if (!ctx->cdata.keylen)
 		return aead_null_set_sh_desc(aead);
 
 	/*
@@ -497,8 +229,14 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	 * RFC3686 specific:
 	 *	CONTEXT1[255:128] = {NONCE, IV, COUNTER}
 	 */
-	if (is_rfc3686)
+	if (is_rfc3686) {
 		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+		nonce = (u32 *)((void *)ctx->key + ctx->adata.keylen_pad +
+				ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE);
+	}
+
+	data_len[0] = ctx->adata.keylen_pad;
+	data_len[1] = ctx->cdata.keylen;
 
 	if (alg->caam.geniv)
 		goto skip_enc;
@@ -507,146 +245,63 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_AEAD_ENC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-	    ctx->split_key_pad_len + ctx->enckeylen +
-	    (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
-
-	/* aead_encrypt shared descriptor */
-	desc = ctx->sh_desc_enc;
-
-	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-	/* Class 2 operation */
-	append_operation(desc, ctx->class2_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
+	if (desc_inline_query(DESC_AEAD_ENC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
 
-	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (inl_mask & 1)
+		ctx->adata.key_virt = ctx->key;
+	else
+		ctx->adata.key_dma = ctx->key_dma;
 
-	/* Skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+	if (inl_mask & 2)
+		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	else
+		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
-	/* read assoc before reading payload */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
-				      FIFOLDST_VLF);
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
 
-	/* Load Counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* Read and write cryptlen bytes */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
-
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "aead enc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	/* aead_encrypt shared descriptor */
+	desc = ctx->sh_desc_enc;
+	cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata, ctx->authsize,
+			       is_rfc3686, nonce, ctx1_iv_off);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 skip_enc:
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_AEAD_DEC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-	    ctx->split_key_pad_len + ctx->enckeylen +
-	    (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
-
-	/* aead_decrypt shared descriptor */
-	desc = ctx->sh_desc_dec;
-
-	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-	/* Class 2 operation */
-	append_operation(desc, ctx->class2_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+	if (desc_inline_query(DESC_AEAD_DEC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
 
-	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	if (alg->caam.geniv)
-		append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
+	if (inl_mask & 1)
+		ctx->adata.key_virt = ctx->key;
 	else
-		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* Skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* read assoc before reading payload */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
-			     KEY_VLF);
-
-	if (alg->caam.geniv) {
-		append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
-				LDST_SRCDST_BYTE_CONTEXT |
-				(ctx1_iv_off << LDST_OFFSET_SHIFT));
-		append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
-			    (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
-	}
-
-	/* Load Counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
+		ctx->adata.key_dma = ctx->key_dma;
 
-	/* Choose operation */
-	if (ctr_mode)
-		append_operation(desc, ctx->class1_alg_type |
-				 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT);
+	if (inl_mask & 2)
+		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
 	else
-		append_dec_op1(desc, ctx->class1_alg_type);
+		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
-	/* Read and write cryptlen bytes */
-	append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-	aead_append_src_dst(desc, FIFOLD_TYPE_MSG);
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
 
-	/* Load ICV */
-	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 |
-			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "aead dec shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	/* aead_decrypt shared descriptor */
+	desc = ctx->sh_desc_dec;
+	cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata, ivsize,
+			       ctx->authsize, alg->caam.geniv, is_rfc3686,
+			       nonce, ctx1_iv_off);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	if (!alg->caam.geniv)
 		goto skip_givenc;
@@ -655,105 +310,32 @@ skip_enc:
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_AEAD_GIVENC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-	    ctx->split_key_pad_len + ctx->enckeylen +
-	    (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
-
-	/* aead_givencrypt shared descriptor */
-	desc = ctx->sh_desc_enc;
-
-	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-	if (is_rfc3686)
-		goto copy_iv;
-
-	/* Generate IV */
-	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
-	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	append_move(desc, MOVE_WAITCOMP |
-		    MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX |
-		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-		    (ivsize << MOVE_LEN_SHIFT));
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-copy_iv:
-	/* Copy IV to class 1 context */
-	append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO |
-		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-		    (ivsize << MOVE_LEN_SHIFT));
-
-	/* Return to encryption */
-	append_operation(desc, ctx->class2_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* ivsize + cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
-	/* Skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* read assoc before reading payload */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
-			     KEY_VLF);
-
-	/* Copy iv from outfifo to class 2 fifo */
-	moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 |
-		 NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT);
-	append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB |
-			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB |
-			    LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM);
-
-	/* Load Counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* Will write ivsize + cryptlen */
-	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	if (desc_inline_query(DESC_AEAD_GIVENC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
 
-	/* Not need to reload iv */
-	append_seq_fifo_load(desc, ivsize,
-			     FIFOLD_CLASS_SKIP);
+	if (inl_mask & 1)
+		ctx->adata.key_virt = ctx->key;
+	else
+		ctx->adata.key_dma = ctx->key_dma;
 
-	/* Will read cryptlen */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
+	if (inl_mask & 2)
+		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	else
+		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
 
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "aead givenc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	/* aead_givencrypt shared descriptor */
+	desc = ctx->sh_desc_enc;
+	cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata, ivsize,
+				  ctx->authsize, is_rfc3686, nonce,
+				  ctx1_iv_off);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 skip_givenc:
 	return 0;
@@ -774,12 +356,11 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline = false;
-	u32 *key_jump_cmd, *zero_payload_jump_cmd,
-	    *zero_assoc_jump_cmd1, *zero_assoc_jump_cmd2;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
 
-	if (!ctx->enckeylen || !ctx->authsize)
+	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
 
 	/*
@@ -787,175 +368,35 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_GCM_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_GCM_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
 
 	desc = ctx->sh_desc_enc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* skip key loading if they are loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD | JUMP_COND_SELF);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* if assoclen + cryptlen is ZERO, skip to ICV write */
-	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
-						 JUMP_COND_MATH_Z);
-
-	/* if assoclen is ZERO, skip reading the assoc data */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
-						 JUMP_COND_MATH_Z);
-
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* cryptlen = seqinlen - assoclen */
-	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
-
-	/* if cryptlen is ZERO jump to zero-payload commands */
-	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
-					    JUMP_COND_MATH_Z);
-
-	/* read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-	set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
-
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* write encrypted data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-	/* read payload data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-
-	/* jump the zero-payload commands */
-	append_jump(desc, JUMP_TEST_ALL | 2);
-
-	/* zero-payload commands */
-	set_jump_tgt_here(desc, zero_payload_jump_cmd);
-
-	/* read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
-
-	/* There is no input data */
-	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
-
-	/* write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "gcm enc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_GCM_DEC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_GCM_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
 
 	desc = ctx->sh_desc_dec;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* skip key loading if they are loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL |
-				   JUMP_TEST_ALL | JUMP_COND_SHRD |
-				   JUMP_COND_SELF);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-	/* if assoclen is ZERO, skip reading the assoc data */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
-						 JUMP_COND_MATH_Z);
-
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-	set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
-
-	/* cryptlen = seqoutlen - assoclen */
-	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/* jump to zero-payload command if cryptlen is zero */
-	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
-					    JUMP_COND_MATH_Z);
-
-	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/* store encrypted data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-	/* read payload data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-	/* zero-payload command */
-	set_jump_tgt_here(desc, zero_payload_jump_cmd);
-
-	/* read ICV */
-	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "gcm dec shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	return 0;
 }
@@ -974,11 +415,11 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline = false;
-	u32 *key_jump_cmd;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
 
-	if (!ctx->enckeylen || !ctx->authsize)
+	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
 
 	/*
@@ -986,148 +427,35 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4106_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_RFC4106_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
 
 	desc = ctx->sh_desc_enc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip key loading if it is loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* Read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-	/* Will read cryptlen bytes */
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
-
-	/* Skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* cryptlen = seqoutlen - assoclen */
-	append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Write encrypted data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-	/* Read payload data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "rfc4106 enc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_RFC4106_DEC_LEN + DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_RFC4106_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
 
 	desc = ctx->sh_desc_dec;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip key loading if it is loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL |
-				   JUMP_TEST_ALL | JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* Read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-	/* Will read cryptlen bytes */
-	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
-
-	/* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
-
-	/* Skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* Will write cryptlen bytes */
-	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/* Store payload data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-	/* Read encrypted data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-	/* Read ICV */
-	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "rfc4106 dec shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	return 0;
 }
@@ -1147,12 +475,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline = false;
-	u32 *key_jump_cmd;
-	u32 *read_move_cmd, *write_move_cmd;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
 
-	if (!ctx->enckeylen || !ctx->authsize)
+	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
 
 	/*
@@ -1160,151 +487,35 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4543_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_RFC4543_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
 
 	desc = ctx->sh_desc_enc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip key loading if it is loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* assoclen + cryptlen = seqinlen */
-	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/*
-	 * MOVE_LEN opcode is not available in all SEC HW revisions,
-	 * thus need to do some magic, i.e. self-patch the descriptor
-	 * buffer.
-	 */
-	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
-				    (0x6 << MOVE_LEN_SHIFT));
-	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-				     (0x8 << MOVE_LEN_SHIFT));
-
-	/* Will read assoclen + cryptlen bytes */
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Will write assoclen + cryptlen bytes */
-	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Read and write assoclen + cryptlen bytes */
-	aead_append_src_dst(desc, FIFOLD_TYPE_AAD);
-
-	set_move_tgt_here(desc, read_move_cmd);
-	set_move_tgt_here(desc, write_move_cmd);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	/* Move payload data to OFIFO */
-	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
-
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "rfc4543 enc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	keys_fit_inline = false;
-	if (DESC_RFC4543_DEC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
+	if (rem_bytes >= DESC_RFC4543_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
 
 	desc = ctx->sh_desc_dec;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip key loading if it is loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL |
-				   JUMP_TEST_ALL | JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-	/* assoclen + cryptlen = seqoutlen */
-	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/*
-	 * MOVE_LEN opcode is not available in all SEC HW revisions,
-	 * thus need to do some magic, i.e. self-patch the descriptor
-	 * buffer.
-	 */
-	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
-				    (0x6 << MOVE_LEN_SHIFT));
-	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-				     (0x8 << MOVE_LEN_SHIFT));
-
-	/* Will read assoclen + cryptlen bytes */
-	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/* Will write assoclen + cryptlen bytes */
-	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-	/* Store payload data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-	/* In-snoop assoclen + cryptlen data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1);
-
-	set_move_tgt_here(desc, read_move_cmd);
-	set_move_tgt_here(desc, write_move_cmd);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	/* Move payload data to OFIFO */
-	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-	/* Read ICV */
-	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "rfc4543 dec shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	return 0;
 }
@@ -1320,19 +531,9 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc,
 	return 0;
 }
 
-static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
-			      u32 authkeylen)
-{
-	return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
-			       ctx->split_key_pad_len, key_in, authkeylen,
-			       ctx->alg_op);
-}
-
 static int aead_setkey(struct crypto_aead *aead,
 			       const u8 *key, unsigned int keylen)
 {
-	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
-	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct crypto_authenc_keys keys;
@@ -1341,53 +542,32 @@ static int aead_setkey(struct crypto_aead *aead,
 	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
 		goto badkey;
 
-	/* Pick class 2 key length from algorithm submask */
-	ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
-				      OP_ALG_ALGSEL_SHIFT] * 2;
-	ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
-
-	if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE)
-		goto badkey;
-
 #ifdef DEBUG
 	printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n",
 	       keys.authkeylen + keys.enckeylen, keys.enckeylen,
 	       keys.authkeylen);
-	printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
-	       ctx->split_key_len, ctx->split_key_pad_len);
 	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
-	ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen);
+	ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, keys.authkey,
+			    keys.authkeylen, CAAM_MAX_KEY_SIZE -
+			    keys.enckeylen);
 	if (ret) {
 		goto badkey;
 	}
 
 	/* postpend encryption key to auth split key */
-	memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen);
-
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len +
-				      keys.enckeylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
+	memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->adata.keylen_pad +
+				   keys.enckeylen, DMA_TO_DEVICE);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-		       ctx->split_key_pad_len + keys.enckeylen, 1);
+		       ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
-
-	ctx->enckeylen = keys.enckeylen;
-
-	ret = aead_set_sh_desc(aead);
-	if (ret) {
-		dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len +
-				 keys.enckeylen, DMA_TO_DEVICE);
-	}
-
-	return ret;
+	ctx->cdata.keylen = keys.enckeylen;
+	return aead_set_sh_desc(aead);
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
@@ -1398,7 +578,6 @@ static int gcm_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	int ret = 0;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
@@ -1406,21 +585,10 @@ static int gcm_setkey(struct crypto_aead *aead,
 #endif
 
 	memcpy(ctx->key, key, keylen);
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
-	ctx->enckeylen = keylen;
-
-	ret = gcm_set_sh_desc(aead);
-	if (ret) {
-		dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen,
-				 DMA_TO_DEVICE);
-	}
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
+	ctx->cdata.keylen = keylen;
 
-	return ret;
+	return gcm_set_sh_desc(aead);
 }
 
 static int rfc4106_setkey(struct crypto_aead *aead,
@@ -1428,7 +596,6 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	int ret = 0;
 
 	if (keylen < 4)
 		return -EINVAL;
@@ -1444,22 +611,10 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 	 * The last four bytes of the key material are used as the salt value
 	 * in the nonce. Update the AES key length.
 	 */
-	ctx->enckeylen = keylen - 4;
-
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
-
-	ret = rfc4106_set_sh_desc(aead);
-	if (ret) {
-		dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen,
-				 DMA_TO_DEVICE);
-	}
-
-	return ret;
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   DMA_TO_DEVICE);
+	return rfc4106_set_sh_desc(aead);
 }
 
 static int rfc4543_setkey(struct crypto_aead *aead,
@@ -1467,7 +622,6 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	int ret = 0;
 
 	if (keylen < 4)
 		return -EINVAL;
@@ -1483,43 +637,28 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	 * The last four bytes of the key material are used as the salt value
 	 * in the nonce. Update the AES key length.
 	 */
-	ctx->enckeylen = keylen - 4;
-
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
-
-	ret = rfc4543_set_sh_desc(aead);
-	if (ret) {
-		dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen,
-				 DMA_TO_DEVICE);
-	}
-
-	return ret;
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   DMA_TO_DEVICE);
+	return rfc4543_set_sh_desc(aead);
 }
 
 static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			     const u8 *key, unsigned int keylen)
 {
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	struct ablkcipher_tfm *crt = &ablkcipher->base.crt_ablkcipher;
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablkcipher);
 	const char *alg_name = crypto_tfm_alg_name(tfm);
 	struct device *jrdev = ctx->jrdev;
-	int ret = 0;
-	u32 *key_jump_cmd;
+	unsigned int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	u32 *desc;
-	u8 *nonce;
-	u32 geniv;
 	u32 ctx1_iv_off = 0;
-	const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
+	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = (ctr_mode &&
 				 (strstr(alg_name, "rfc3686") != NULL));
 
+	memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -1542,215 +681,33 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		keylen -= CTR_RFC3686_NONCE_SIZE;
 	}
 
-	memcpy(ctx->key, key, keylen);
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
-	ctx->enckeylen = keylen;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
+	ctx->cdata.keylen = keylen;
+	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_inline = true;
 
 	/* ablkcipher_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	/* Load class1 key only */
-	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-			  ctx->enckeylen, CLASS_1 |
-			  KEY_DEST_CLASS_REG);
+	cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686,
+				     ctx1_iv_off);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
-	/* Load nonce into CONTEXT1 reg */
-	if (is_rfc3686) {
-		nonce = (u8 *)key + keylen;
-		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-				   LDST_CLASS_IND_CCB |
-				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-		append_move(desc, MOVE_WAITCOMP |
-			    MOVE_SRC_OUTFIFO |
-			    MOVE_DEST_CLASS1CTX |
-			    (16 << MOVE_OFFSET_SHIFT) |
-			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-	}
-
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Load iv */
-	append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT |
-			LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-	/* Load counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
-
-	/* Load operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "ablkcipher enc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
 	/* ablkcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
+	cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686,
+				     ctx1_iv_off);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	/* Load class1 key only */
-	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-			  ctx->enckeylen, CLASS_1 |
-			  KEY_DEST_CLASS_REG);
-
-	/* Load nonce into CONTEXT1 reg */
-	if (is_rfc3686) {
-		nonce = (u8 *)key + keylen;
-		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-				   LDST_CLASS_IND_CCB |
-				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-		append_move(desc, MOVE_WAITCOMP |
-			    MOVE_SRC_OUTFIFO |
-			    MOVE_DEST_CLASS1CTX |
-			    (16 << MOVE_OFFSET_SHIFT) |
-			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-	}
-
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* load IV */
-	append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT |
-			LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-	/* Load counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
-
-	/* Choose operation */
-	if (ctr_mode)
-		append_operation(desc, ctx->class1_alg_type |
-				 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT);
-	else
-		append_dec_op1(desc, ctx->class1_alg_type);
-
-	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
-					      desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "ablkcipher dec shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
 	/* ablkcipher_givencrypt shared descriptor */
 	desc = ctx->sh_desc_givenc;
+	cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata, ivsize, is_rfc3686,
+					ctx1_iv_off);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_givenc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	/* Load class1 key only */
-	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-			  ctx->enckeylen, CLASS_1 |
-			  KEY_DEST_CLASS_REG);
-
-	/* Load Nonce into CONTEXT1 reg */
-	if (is_rfc3686) {
-		nonce = (u8 *)key + keylen;
-		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-				   LDST_CLASS_IND_CCB |
-				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-		append_move(desc, MOVE_WAITCOMP |
-			    MOVE_SRC_OUTFIFO |
-			    MOVE_DEST_CLASS1CTX |
-			    (16 << MOVE_OFFSET_SHIFT) |
-			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-	}
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Generate IV */
-	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (crt->ivsize << NFIFOENTRY_DLEN_SHIFT);
-	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	append_move(desc, MOVE_WAITCOMP |
-		    MOVE_SRC_INFIFO |
-		    MOVE_DEST_CLASS1CTX |
-		    (crt->ivsize << MOVE_LEN_SHIFT) |
-		    (ctx1_iv_off << MOVE_OFFSET_SHIFT));
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-	/* Copy generated IV to memory */
-	append_seq_store(desc, crt->ivsize,
-			 LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
-			 (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-	/* Load Counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
-
-	if (ctx1_iv_off)
-		append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
-			    (1 << JUMP_OFFSET_SHIFT));
-
-	/* Load operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
-
-	ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc,
-						 desc_bytes(desc),
-						 DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
-
-	return ret;
+	return 0;
 }
 
 static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
@@ -1758,8 +715,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 {
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	u32 *key_jump_cmd, *desc;
-	__be64 sector_size = cpu_to_be64(512);
+	u32 *desc;
 
 	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
 		crypto_ablkcipher_set_flags(ablkcipher,
@@ -1769,126 +725,38 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	}
 
 	memcpy(ctx->key, key, keylen);
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
-	ctx->enckeylen = keylen;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
+	ctx->cdata.keylen = keylen;
+	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_inline = true;
 
 	/* xts_ablkcipher_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	/* Load class1 keys only */
-	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-			  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-
-	/* Load sector size with index 40 bytes (0x28) */
-	append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
-	append_data(desc, (void *)&sector_size, 8);
-
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/*
-	 * create sequence for loading the sector index
-	 * Upper 8B of IV - will be used as sector index
-	 * Lower 8B of IV - will be discarded
-	 */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-	/* Load operation */
-	append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL |
-			 OP_ALG_ENCRYPT);
-
-	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
-
-	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	/* xts_ablkcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	/* Load class1 key only */
-	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-			  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-
-	/* Load sector size with index 40 bytes (0x28) */
-	append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
-	append_data(desc, (void *)&sector_size, 8);
-
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/*
-	 * create sequence for loading the sector index
-	 * Upper 8B of IV - will be used as sector index
-	 * Lower 8B of IV - will be discarded
-	 */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-	/* Load operation */
-	append_dec_op1(desc, ctx->class1_alg_type);
-
-	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
-
-	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
-		dma_unmap_single(jrdev, ctx->sh_desc_enc_dma,
-				 desc_bytes(ctx->sh_desc_enc), DMA_TO_DEVICE);
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+	cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 
 	return 0;
 }
 
 /*
  * aead_edesc - s/w-extended aead descriptor
- * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
- * @src_nents: number of segments in input scatterlist
- * @dst_nents: number of segments in output scatterlist
- * @iv_dma: dma address of iv for checking continuity and link table
- * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
+ * @src_nents: number of segments in input s/w scatterlist
+ * @dst_nents: number of segments in output s/w scatterlist
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_dma: bus physical mapped address of h/w link table
+ * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  */
 struct aead_edesc {
-	int assoc_nents;
 	int src_nents;
 	int dst_nents;
-	dma_addr_t iv_dma;
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
@@ -1897,12 +765,12 @@ struct aead_edesc {
 
 /*
  * ablkcipher_edesc - s/w-extended ablkcipher descriptor
- * @src_nents: number of segments in input scatterlist
- * @dst_nents: number of segments in output scatterlist
+ * @src_nents: number of segments in input s/w scatterlist
+ * @dst_nents: number of segments in output s/w scatterlist
  * @iv_dma: dma address of iv for checking continuity and link table
- * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_dma: bus physical mapped address of h/w link table
+ * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  */
 struct ablkcipher_edesc {
@@ -1922,10 +790,11 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
 		       int sec4_sg_bytes)
 {
 	if (dst != src) {
-		dma_unmap_sg(dev, src, src_nents ? : 1, DMA_TO_DEVICE);
-		dma_unmap_sg(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE);
+		if (src_nents)
+			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
+		dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
 	} else {
-		dma_unmap_sg(dev, src, src_nents ? : 1, DMA_BIDIRECTIONAL);
+		dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
 	}
 
 	if (iv_dma)
@@ -2019,8 +888,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = (struct ablkcipher_edesc *)((char *)desc -
-		 offsetof(struct ablkcipher_edesc, hw_desc));
+	edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
 
 	if (err)
 		caam_jr_strstatus(jrdev, err);
@@ -2031,7 +899,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
 	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
 		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
+		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
 	ablkcipher_unmap(jrdev, edesc, req);
@@ -2052,8 +920,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = (struct ablkcipher_edesc *)((char *)desc -
-		 offsetof(struct ablkcipher_edesc, hw_desc));
+	edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
@@ -2063,7 +930,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 		       ivsize, 1);
 	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
 		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
+		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
 	ablkcipher_unmap(jrdev, edesc, req);
@@ -2096,7 +963,7 @@ static void init_aead_job(struct aead_request *req,
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 	if (all_contig) {
-		src_dma = sg_dma_address(req->src);
+		src_dma = edesc->src_nents ? sg_dma_address(req->src) : 0;
 		in_options = 0;
 	} else {
 		src_dma = edesc->sec4_sg_dma;
@@ -2111,7 +978,7 @@ static void init_aead_job(struct aead_request *req,
 	out_options = in_options;
 
 	if (unlikely(req->src != req->dst)) {
-		if (!edesc->dst_nents) {
+		if (edesc->dst_nents == 1) {
 			dst_dma = sg_dma_address(req->dst);
 		} else {
 			dst_dma = edesc->sec4_sg_dma +
@@ -2157,7 +1024,7 @@ static void init_gcm_job(struct aead_request *req,
 			 FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | 12 | last);
 	/* Append Salt */
 	if (!generic_gcm)
-		append_data(desc, ctx->key + ctx->enckeylen, 4);
+		append_data(desc, ctx->key + ctx->cdata.keylen, 4);
 	/* Append IV */
 	append_data(desc, req->iv, ivsize);
 	/* End of blank commands */
@@ -2172,7 +1039,7 @@ static void init_authenc_job(struct aead_request *req,
 						 struct caam_aead_alg, aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
-	const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
+	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
 	u32 *desc = edesc->hw_desc;
@@ -2218,15 +1085,14 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 	int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
-	bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					      CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
 	print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
-	printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes);
+	pr_err("asked=%d, nbytes%d\n",
+	       (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes);
 	dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
 		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
+		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
 	len = desc_len(sh_desc);
@@ -2243,7 +1109,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 	append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options);
 
 	if (likely(req->src == req->dst)) {
-		if (!edesc->src_nents && iv_contig) {
+		if (edesc->src_nents == 1 && iv_contig) {
 			dst_dma = sg_dma_address(req->src);
 		} else {
 			dst_dma = edesc->sec4_sg_dma +
@@ -2251,7 +1117,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 			out_options = LDST_SGF;
 		}
 	} else {
-		if (!edesc->dst_nents) {
+		if (edesc->dst_nents == 1) {
 			dst_dma = sg_dma_address(req->dst);
 		} else {
 			dst_dma = edesc->sec4_sg_dma +
@@ -2278,20 +1144,18 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 	int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
-	bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					      CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
 	dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
 		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
+		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
 
-	if (!edesc->src_nents) {
+	if (edesc->src_nents == 1) {
 		src_dma = sg_dma_address(req->src);
 		in_options = 0;
 	} else {
@@ -2324,85 +1188,98 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	int src_nents, dst_nents = 0;
+	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct aead_edesc *edesc;
-	int sgc;
-	bool all_contig = true;
-	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
+	int sec4_sg_index, sec4_sg_len, sec4_sg_bytes;
 	unsigned int authsize = ctx->authsize;
 
 	if (unlikely(req->dst != req->src)) {
-		src_nents = sg_count(req->src, req->assoclen + req->cryptlen);
-		dst_nents = sg_count(req->dst,
-				     req->assoclen + req->cryptlen +
-					(encrypt ? authsize : (-authsize)));
-	} else {
-		src_nents = sg_count(req->src,
-				     req->assoclen + req->cryptlen +
-					(encrypt ? authsize : 0));
-	}
-
-	/* Check if data are contiguous. */
-	all_contig = !src_nents;
-	if (!all_contig) {
-		src_nents = src_nents ? : 1;
-		sec4_sg_len = src_nents;
-	}
-
-	sec4_sg_len += dst_nents;
-
-	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
+		src_nents = sg_nents_for_len(req->src, req->assoclen +
+					     req->cryptlen);
+		if (unlikely(src_nents < 0)) {
+			dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
+				req->assoclen + req->cryptlen);
+			return ERR_PTR(src_nents);
+		}
 
-	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
-			GFP_DMA | flags);
-	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
-		return ERR_PTR(-ENOMEM);
+		dst_nents = sg_nents_for_len(req->dst, req->assoclen +
+					     req->cryptlen +
+						(encrypt ? authsize :
+							   (-authsize)));
+		if (unlikely(dst_nents < 0)) {
+			dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n",
+				req->assoclen + req->cryptlen +
+				(encrypt ? authsize : (-authsize)));
+			return ERR_PTR(dst_nents);
+		}
+	} else {
+		src_nents = sg_nents_for_len(req->src, req->assoclen +
+					     req->cryptlen +
+					     (encrypt ? authsize : 0));
+		if (unlikely(src_nents < 0)) {
+			dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
+				req->assoclen + req->cryptlen +
+				(encrypt ? authsize : 0));
+			return ERR_PTR(src_nents);
+		}
 	}
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_BIDIRECTIONAL);
-		if (unlikely(!sgc)) {
+		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
+					      DMA_BIDIRECTIONAL);
+		if (unlikely(!mapped_src_nents)) {
 			dev_err(jrdev, "unable to map source\n");
-			kfree(edesc);
 			return ERR_PTR(-ENOMEM);
 		}
 	} else {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_TO_DEVICE);
-		if (unlikely(!sgc)) {
-			dev_err(jrdev, "unable to map source\n");
-			kfree(edesc);
-			return ERR_PTR(-ENOMEM);
+		/* Cover also the case of null (zero length) input data */
+		if (src_nents) {
+			mapped_src_nents = dma_map_sg(jrdev, req->src,
+						      src_nents, DMA_TO_DEVICE);
+			if (unlikely(!mapped_src_nents)) {
+				dev_err(jrdev, "unable to map source\n");
+				return ERR_PTR(-ENOMEM);
+			}
+		} else {
+			mapped_src_nents = 0;
 		}
 
-		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
-				 DMA_FROM_DEVICE);
-		if (unlikely(!sgc)) {
+		mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents,
+					      DMA_FROM_DEVICE);
+		if (unlikely(!mapped_dst_nents)) {
 			dev_err(jrdev, "unable to map destination\n");
-			dma_unmap_sg(jrdev, req->src, src_nents ? : 1,
-				     DMA_TO_DEVICE);
-			kfree(edesc);
+			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return ERR_PTR(-ENOMEM);
 		}
 	}
 
+	sec4_sg_len = mapped_src_nents > 1 ? mapped_src_nents : 0;
+	sec4_sg_len += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
+			GFP_DMA | flags);
+	if (!edesc) {
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
 			 desc_bytes;
-	*all_contig_ptr = all_contig;
+	*all_contig_ptr = !(mapped_src_nents > 1);
 
 	sec4_sg_index = 0;
-	if (!all_contig) {
-		sg_to_sec4_sg_last(req->src, src_nents,
-			      edesc->sec4_sg + sec4_sg_index, 0);
-		sec4_sg_index += src_nents;
+	if (mapped_src_nents > 1) {
+		sg_to_sec4_sg_last(req->src, mapped_src_nents,
+				   edesc->sec4_sg + sec4_sg_index, 0);
+		sec4_sg_index += mapped_src_nents;
 	}
-	if (dst_nents) {
-		sg_to_sec4_sg_last(req->dst, dst_nents,
+	if (mapped_dst_nents > 1) {
+		sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
 				   edesc->sec4_sg + sec4_sg_index, 0);
 	}
 
@@ -2556,11 +1433,9 @@ static int aead_decrypt(struct aead_request *req)
 	int ret = 0;
 
 #ifdef DEBUG
-	bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					      CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
 	dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
 		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    req->assoclen + req->cryptlen, 1, may_sleep);
+		    req->assoclen + req->cryptlen, 1);
 #endif
 
 	/* allocate extended descriptor */
@@ -2602,51 +1477,80 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	int src_nents, dst_nents = 0, sec4_sg_bytes;
+	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
 	dma_addr_t iv_dma = 0;
-	bool iv_contig = false;
-	int sgc;
+	bool in_contig;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	int sec4_sg_index;
+	int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
 
-	src_nents = sg_count(req->src, req->nbytes);
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (unlikely(src_nents < 0)) {
+		dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
+			req->nbytes);
+		return ERR_PTR(src_nents);
+	}
 
-	if (req->dst != req->src)
-		dst_nents = sg_count(req->dst, req->nbytes);
+	if (req->dst != req->src) {
+		dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+		if (unlikely(dst_nents < 0)) {
+			dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n",
+				req->nbytes);
+			return ERR_PTR(dst_nents);
+		}
+	}
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_BIDIRECTIONAL);
+		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
+					      DMA_BIDIRECTIONAL);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(jrdev, "unable to map source\n");
+			return ERR_PTR(-ENOMEM);
+		}
 	} else {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_TO_DEVICE);
-		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
-				 DMA_FROM_DEVICE);
+		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
+					      DMA_TO_DEVICE);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(jrdev, "unable to map source\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents,
+					      DMA_FROM_DEVICE);
+		if (unlikely(!mapped_dst_nents)) {
+			dev_err(jrdev, "unable to map destination\n");
+			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
+			return ERR_PTR(-ENOMEM);
+		}
 	}
 
 	iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, iv_dma)) {
 		dev_err(jrdev, "unable to map IV\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	/*
-	 * Check if iv can be contiguous with source and destination.
-	 * If so, include it. If not, create scatterlist.
-	 */
-	if (!src_nents && iv_dma + ivsize == sg_dma_address(req->src))
-		iv_contig = true;
-	else
-		src_nents = src_nents ? : 1;
-	sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) *
-			sizeof(struct sec4_sg_entry);
+	if (mapped_src_nents == 1 &&
+	    iv_dma + ivsize == sg_dma_address(req->src)) {
+		in_contig = true;
+		sec4_sg_ents = 0;
+	} else {
+		in_contig = false;
+		sec4_sg_ents = 1 + mapped_src_nents;
+	}
+	dst_sg_idx = sec4_sg_ents;
+	sec4_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
 			GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -2656,23 +1560,24 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
 			 desc_bytes;
 
-	sec4_sg_index = 0;
-	if (!iv_contig) {
+	if (!in_contig) {
 		dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
-		sg_to_sec4_sg_last(req->src, src_nents,
+		sg_to_sec4_sg_last(req->src, mapped_src_nents,
 				   edesc->sec4_sg + 1, 0);
-		sec4_sg_index += 1 + src_nents;
 	}
 
-	if (dst_nents) {
-		sg_to_sec4_sg_last(req->dst, dst_nents,
-			edesc->sec4_sg + sec4_sg_index, 0);
+	if (mapped_dst_nents > 1) {
+		sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
+				   edesc->sec4_sg + dst_sg_idx, 0);
 	}
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
+		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -2684,7 +1589,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 		       sec4_sg_bytes, 1);
 #endif
 
-	*iv_contig_out = iv_contig;
+	*iv_contig_out = in_contig;
 	return edesc;
 }
 
@@ -2778,27 +1683,52 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
 		       GFP_KERNEL : GFP_ATOMIC;
-	int src_nents, dst_nents = 0, sec4_sg_bytes;
+	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
 	dma_addr_t iv_dma = 0;
-	bool iv_contig = false;
-	int sgc;
+	bool out_contig;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	int sec4_sg_index;
+	int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
 
-	src_nents = sg_count(req->src, req->nbytes);
-
-	if (unlikely(req->dst != req->src))
-		dst_nents = sg_count(req->dst, req->nbytes);
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (unlikely(src_nents < 0)) {
+		dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
+			req->nbytes);
+		return ERR_PTR(src_nents);
+	}
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_BIDIRECTIONAL);
+		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
+					      DMA_BIDIRECTIONAL);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(jrdev, "unable to map source\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		dst_nents = src_nents;
+		mapped_dst_nents = src_nents;
 	} else {
-		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
-				 DMA_TO_DEVICE);
-		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
-				 DMA_FROM_DEVICE);
+		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
+					      DMA_TO_DEVICE);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(jrdev, "unable to map source\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+		if (unlikely(dst_nents < 0)) {
+			dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n",
+				req->nbytes);
+			return ERR_PTR(dst_nents);
+		}
+
+		mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents,
+					      DMA_FROM_DEVICE);
+		if (unlikely(!mapped_dst_nents)) {
+			dev_err(jrdev, "unable to map destination\n");
+			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
+			return ERR_PTR(-ENOMEM);
+		}
 	}
 
 	/*
@@ -2808,21 +1738,29 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, iv_dma)) {
 		dev_err(jrdev, "unable to map IV\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	if (!dst_nents && iv_dma + ivsize == sg_dma_address(req->dst))
-		iv_contig = true;
-	else
-		dst_nents = dst_nents ? : 1;
-	sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) *
-			sizeof(struct sec4_sg_entry);
+	sec4_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
+	dst_sg_idx = sec4_sg_ents;
+	if (mapped_dst_nents == 1 &&
+	    iv_dma + ivsize == sg_dma_address(req->dst)) {
+		out_contig = true;
+	} else {
+		out_contig = false;
+		sec4_sg_ents += 1 + mapped_dst_nents;
+	}
 
 	/* allocate space for base edesc and hw desc commands, link tables */
+	sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
 	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
 			GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -2832,24 +1770,24 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
 			 desc_bytes;
 
-	sec4_sg_index = 0;
-	if (src_nents) {
-		sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
-		sec4_sg_index += src_nents;
-	}
+	if (mapped_src_nents > 1)
+		sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg,
+				   0);
 
-	if (!iv_contig) {
-		dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
+	if (!out_contig) {
+		dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx,
 				   iv_dma, ivsize, 0);
-		sec4_sg_index += 1;
-		sg_to_sec4_sg_last(req->dst, dst_nents,
-				   edesc->sec4_sg + sec4_sg_index, 0);
+		sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
+				   edesc->sec4_sg + dst_sg_idx + 1, 0);
 	}
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
+		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
 	edesc->iv_dma = iv_dma;
@@ -2861,7 +1799,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 		       sec4_sg_bytes, 1);
 #endif
 
-	*iv_contig_out = iv_contig;
+	*iv_contig_out = out_contig;
 	return edesc;
 }
 
@@ -2872,7 +1810,7 @@ static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq)
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	bool iv_contig;
+	bool iv_contig = false;
 	u32 *desc;
 	int ret = 0;
 
@@ -2916,7 +1854,6 @@ struct caam_alg_template {
 	} template_u;
 	u32 class1_alg_type;
 	u32 class2_alg_type;
-	u32 alg_op;
 };
 
 static struct caam_alg_template driver_algs[] = {
@@ -3101,7 +2038,6 @@ static struct caam_aead_alg driver_aeads[] = {
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3123,7 +2059,6 @@ static struct caam_aead_alg driver_aeads[] = {
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3145,7 +2080,6 @@ static struct caam_aead_alg driver_aeads[] = {
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3167,7 +2101,6 @@ static struct caam_aead_alg driver_aeads[] = {
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3189,7 +2122,6 @@ static struct caam_aead_alg driver_aeads[] = {
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3211,7 +2143,6 @@ static struct caam_aead_alg driver_aeads[] = {
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3233,7 +2164,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3256,7 +2186,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3279,7 +2208,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3302,7 +2230,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3325,7 +2252,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3348,7 +2274,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3371,7 +2296,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3394,7 +2318,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3417,7 +2340,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3440,7 +2362,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3463,7 +2384,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3486,7 +2406,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3509,7 +2428,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 		}
 	},
 	{
@@ -3532,7 +2450,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		}
 	},
@@ -3556,7 +2473,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3580,7 +2496,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3604,7 +2519,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3628,7 +2542,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3652,7 +2565,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3676,7 +2588,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3700,7 +2611,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3724,7 +2634,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3748,7 +2657,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3772,7 +2680,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3795,7 +2702,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3818,7 +2724,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3841,7 +2746,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3864,7 +2768,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3887,7 +2790,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3910,7 +2812,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3933,7 +2834,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -3956,7 +2856,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -3979,7 +2878,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -4002,7 +2900,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -4025,7 +2922,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 		},
 	},
 	{
@@ -4048,7 +2944,6 @@ static struct caam_aead_alg driver_aeads[] = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 			.geniv = true,
 		},
 	},
@@ -4073,7 +2968,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 		},
 	},
@@ -4098,7 +2992,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 			.geniv = true,
 		},
@@ -4124,7 +3017,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 		},
 	},
@@ -4149,7 +3041,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 			.geniv = true,
 		},
@@ -4175,7 +3066,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 		},
 	},
@@ -4200,7 +3090,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 			.geniv = true,
 		},
@@ -4226,7 +3115,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 		},
 	},
@@ -4251,7 +3139,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 			.geniv = true,
 		},
@@ -4277,7 +3164,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 		},
 	},
@@ -4302,7 +3188,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 			.geniv = true,
 		},
@@ -4328,7 +3213,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 		},
 	},
@@ -4353,7 +3237,6 @@ static struct caam_aead_alg driver_aeads[] = {
 					   OP_ALG_AAI_CTR_MOD128,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
-			.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 			.rfc3686 = true,
 			.geniv = true,
 		},
@@ -4368,16 +3251,34 @@ struct caam_crypto_alg {
 
 static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
 {
+	dma_addr_t dma_addr;
+
 	ctx->jrdev = caam_jr_alloc();
 	if (IS_ERR(ctx->jrdev)) {
 		pr_err("Job Ring Device allocation for transform failed\n");
 		return PTR_ERR(ctx->jrdev);
 	}
 
+	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc,
+					offsetof(struct caam_ctx,
+						 sh_desc_enc_dma),
+					DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
+		dev_err(ctx->jrdev, "unable to map key, shared descriptors\n");
+		caam_jr_free(ctx->jrdev);
+		return -ENOMEM;
+	}
+
+	ctx->sh_desc_enc_dma = dma_addr;
+	ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx,
+						   sh_desc_dec);
+	ctx->sh_desc_givenc_dma = dma_addr + offsetof(struct caam_ctx,
+						      sh_desc_givenc);
+	ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key);
+
 	/* copy descriptor header template value */
-	ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
-	ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
-	ctx->alg_op = OP_TYPE_CLASS2_ALG | caam->alg_op;
+	ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
+	ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
 
 	return 0;
 }
@@ -4404,25 +3305,9 @@ static int caam_aead_init(struct crypto_aead *tfm)
 
 static void caam_exit_common(struct caam_ctx *ctx)
 {
-	if (ctx->sh_desc_enc_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_enc_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_enc_dma,
-				 desc_bytes(ctx->sh_desc_enc), DMA_TO_DEVICE);
-	if (ctx->sh_desc_dec_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_dec_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_dec_dma,
-				 desc_bytes(ctx->sh_desc_dec), DMA_TO_DEVICE);
-	if (ctx->sh_desc_givenc_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_givenc_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_givenc_dma,
-				 desc_bytes(ctx->sh_desc_givenc),
-				 DMA_TO_DEVICE);
-	if (ctx->key_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->key_dma))
-		dma_unmap_single(ctx->jrdev, ctx->key_dma,
-				 ctx->enckeylen + ctx->split_key_pad_len,
-				 DMA_TO_DEVICE);
-
+	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma,
+			       offsetof(struct caam_ctx, sh_desc_enc_dma),
+			       DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
 	caam_jr_free(ctx->jrdev);
 }
 
@@ -4498,7 +3383,6 @@ static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template
 
 	t_alg->caam.class1_alg_type = template->class1_alg_type;
 	t_alg->caam.class2_alg_type = template->class2_alg_type;
-	t_alg->caam.alg_op = template->alg_op;
 
 	return t_alg;
 }
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
new file mode 100644
index 000000000000..f3f48c10b9d6
--- /dev/null
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -0,0 +1,1306 @@
+/*
+ * Shared descriptors for aead, ablkcipher algorithms
+ *
+ * Copyright 2016 NXP
+ */
+
+#include "compat.h"
+#include "desc_constr.h"
+#include "caamalg_desc.h"
+
+/*
+ * For aead functions, read payload and write payload,
+ * both of which are specified in req->src and req->dst
+ */
+static inline void aead_append_src_dst(u32 *desc, u32 msg_type)
+{
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH |
+			     KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH);
+}
+
+/* Set DK bit in class 1 operation if shared */
+static inline void append_dec_op1(u32 *desc, u32 type)
+{
+	u32 *jump_cmd, *uncond_jump_cmd;
+
+	/* DK bit is valid only for AES */
+	if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) {
+		append_operation(desc, type | OP_ALG_AS_INITFINAL |
+				 OP_ALG_DECRYPT);
+		return;
+	}
+
+	jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD);
+	append_operation(desc, type | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT);
+	uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL);
+	set_jump_tgt_here(desc, jump_cmd);
+	append_operation(desc, type | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT | OP_ALG_AAI_DK);
+	set_jump_tgt_here(desc, uncond_jump_cmd);
+}
+
+/**
+ * cnstr_shdsc_aead_null_encap - IPSec ESP encapsulation shared descriptor
+ *                               (non-protocol) with no (null) encryption.
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
+				 unsigned int icvsize)
+{
+	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (adata->key_inline)
+		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
+				  adata->keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT |
+				  KEY_ENC);
+	else
+		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* assoclen + cryptlen = seqinlen */
+	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Prepare to read and write cryptlen + assoclen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/*
+	 * MOVE_LEN opcode is not available in all SEC HW revisions,
+	 * thus need to do some magic, i.e. self-patch the descriptor
+	 * buffer.
+	 */
+	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
+				    MOVE_DEST_MATH3 |
+				    (0x6 << MOVE_LEN_SHIFT));
+	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 |
+				     MOVE_DEST_DESCBUF |
+				     MOVE_WAITCOMP |
+				     (0x8 << MOVE_LEN_SHIFT));
+
+	/* Class 2 operation */
+	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Read and write cryptlen bytes */
+	aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+	set_move_tgt_here(desc, read_move_cmd);
+	set_move_tgt_here(desc, write_move_cmd);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
+		    MOVE_AUX_LS);
+
+	/* Write ICV */
+	append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "aead null enc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_null_encap);
+
+/**
+ * cnstr_shdsc_aead_null_decap - IPSec ESP decapsulation shared descriptor
+ *                               (non-protocol) with no (null) decryption.
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
+				 unsigned int icvsize)
+{
+	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd, *jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (adata->key_inline)
+		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
+				  adata->keylen, CLASS_2 |
+				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	else
+		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Class 2 operation */
+	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+	/* assoclen + cryptlen = seqoutlen */
+	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* Prepare to read and write cryptlen + assoclen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
+
+	/*
+	 * MOVE_LEN opcode is not available in all SEC HW revisions,
+	 * thus need to do some magic, i.e. self-patch the descriptor
+	 * buffer.
+	 */
+	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
+				    MOVE_DEST_MATH2 |
+				    (0x6 << MOVE_LEN_SHIFT));
+	write_move_cmd = append_move(desc, MOVE_SRC_MATH2 |
+				     MOVE_DEST_DESCBUF |
+				     MOVE_WAITCOMP |
+				     (0x8 << MOVE_LEN_SHIFT));
+
+	/* Read and write cryptlen bytes */
+	aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+	/*
+	 * Insert a NOP here, since we need at least 4 instructions between
+	 * code patching the descriptor buffer and the location being patched.
+	 */
+	jump_cmd = append_jump(desc, JUMP_TEST_ALL);
+	set_jump_tgt_here(desc, jump_cmd);
+
+	set_move_tgt_here(desc, read_move_cmd);
+	set_move_tgt_here(desc, write_move_cmd);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
+		    MOVE_AUX_LS);
+	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+	/* Load ICV */
+	append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 |
+			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "aead null dec shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_null_decap);
+
+static void init_sh_desc_key_aead(u32 * const desc,
+				  struct alginfo * const cdata,
+				  struct alginfo * const adata,
+				  const bool is_rfc3686, u32 *nonce)
+{
+	u32 *key_jump_cmd;
+	unsigned int enckeylen = cdata->keylen;
+
+	/* Note: Context registers are saved. */
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/*
+	 * RFC3686 specific:
+	 *	| key = {AUTH_KEY, ENC_KEY, NONCE}
+	 *	| enckeylen = encryption key size + nonce size
+	 */
+	if (is_rfc3686)
+		enckeylen -= CTR_RFC3686_NONCE_SIZE;
+
+	if (adata->key_inline)
+		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
+				  adata->keylen, CLASS_2 |
+				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	else
+		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, enckeylen,
+				  enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, enckeylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+
+	/* Load Counter into CONTEXT1 reg */
+	if (is_rfc3686) {
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		append_move(desc,
+			    MOVE_SRC_OUTFIFO |
+			    MOVE_DEST_CLASS1CTX |
+			    (16 << MOVE_OFFSET_SHIFT) |
+			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+	}
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+}
+
+/**
+ * cnstr_shdsc_aead_encap - IPSec ESP encapsulation shared descriptor
+ *                          (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @nonce: pointer to rfc3686 nonce
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
+			    struct alginfo *adata, unsigned int icvsize,
+			    const bool is_rfc3686, u32 *nonce,
+			    const u32 ctx1_iv_off)
+{
+	/* Note: Context registers are saved. */
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+
+	/* Class 2 operation */
+	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Read and write assoclen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* read assoc before reading payload */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+				      FIFOLDST_VLF);
+
+	/* Load Counter into CONTEXT1 reg */
+	if (is_rfc3686)
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
+
+	/* Class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Read and write cryptlen bytes */
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
+
+	/* Write ICV */
+	append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "aead enc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_encap);
+
+/**
+ * cnstr_shdsc_aead_decap - IPSec ESP decapsulation shared descriptor
+ *                          (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @ivsize: initialization vector size
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @nonce: pointer to rfc3686 nonce
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
+			    struct alginfo *adata, unsigned int ivsize,
+			    unsigned int icvsize, const bool geniv,
+			    const bool is_rfc3686, u32 *nonce,
+			    const u32 ctx1_iv_off)
+{
+	/* Note: Context registers are saved. */
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+
+	/* Class 2 operation */
+	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+	/* Read and write assoclen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (geniv)
+		append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
+	else
+		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* read assoc before reading payload */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+			     KEY_VLF);
+
+	if (geniv) {
+		append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
+				LDST_SRCDST_BYTE_CONTEXT |
+				(ctx1_iv_off << LDST_OFFSET_SHIFT));
+		append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
+			    (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
+	}
+
+	/* Load Counter into CONTEXT1 reg */
+	if (is_rfc3686)
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
+
+	/* Choose operation */
+	if (ctx1_iv_off)
+		append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+				 OP_ALG_DECRYPT);
+	else
+		append_dec_op1(desc, cdata->algtype);
+
+	/* Read and write cryptlen bytes */
+	append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+	aead_append_src_dst(desc, FIFOLD_TYPE_MSG);
+
+	/* Load ICV */
+	append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 |
+			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "aead dec shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_decap);
+
+/**
+ * cnstr_shdsc_aead_givencap - IPSec ESP encapsulation shared descriptor
+ *                             (non-protocol) with HW-generated initialization
+ *                             vector.
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @ivsize: initialization vector size
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @nonce: pointer to rfc3686 nonce
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
+			       struct alginfo *adata, unsigned int ivsize,
+			       unsigned int icvsize, const bool is_rfc3686,
+			       u32 *nonce, const u32 ctx1_iv_off)
+{
+	u32 geniv, moveiv;
+
+	/* Note: Context registers are saved. */
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+
+	if (is_rfc3686)
+		goto copy_iv;
+
+	/* Generate IV */
+	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
+		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
+		NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
+	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
+			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	append_move(desc, MOVE_WAITCOMP |
+		    MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX |
+		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
+		    (ivsize << MOVE_LEN_SHIFT));
+	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+copy_iv:
+	/* Copy IV to class 1 context */
+	append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO |
+		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
+		    (ivsize << MOVE_LEN_SHIFT));
+
+	/* Return to encryption */
+	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Read and write assoclen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* read assoc before reading payload */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+			     KEY_VLF);
+
+	/* Copy iv from outfifo to class 2 fifo */
+	moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 |
+		 NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT);
+	append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB |
+			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
+	append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB |
+			    LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM);
+
+	/* Load Counter into CONTEXT1 reg */
+	if (is_rfc3686)
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
+
+	/* Class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Will write ivsize + cryptlen */
+	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Not need to reload iv */
+	append_seq_fifo_load(desc, ivsize,
+			     FIFOLD_CLASS_SKIP);
+
+	/* Will read cryptlen */
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
+			     FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+
+	/* Write ICV */
+	append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "aead givenc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
+
+/**
+ * cnstr_shdsc_gcm_encap - gcm encapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
+			   unsigned int icvsize)
+{
+	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1,
+	    *zero_assoc_jump_cmd2;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* skip key loading if they are loaded due to sharing */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD | JUMP_COND_SELF);
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* if assoclen + cryptlen is ZERO, skip to ICV write */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
+						 JUMP_COND_MATH_Z);
+
+	/* if assoclen is ZERO, skip reading the assoc data */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
+					   JUMP_COND_MATH_Z);
+
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* cryptlen = seqinlen - assoclen */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
+
+	/* if cryptlen is ZERO jump to zero-payload commands */
+	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
+					    JUMP_COND_MATH_Z);
+
+	/* read assoc data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+	set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
+
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* write encrypted data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+	/* read payload data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+
+	/* jump the zero-payload commands */
+	append_jump(desc, JUMP_TEST_ALL | 2);
+
+	/* zero-payload commands */
+	set_jump_tgt_here(desc, zero_payload_jump_cmd);
+
+	/* read assoc data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
+
+	/* There is no input data */
+	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
+
+	/* write ICV */
+	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "gcm enc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_gcm_encap);
+
+/**
+ * cnstr_shdsc_gcm_decap - gcm decapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
+			   unsigned int icvsize)
+{
+	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* skip key loading if they are loaded due to sharing */
+	key_jump_cmd = append_jump(desc, JUMP_JSL |
+				   JUMP_TEST_ALL | JUMP_COND_SHRD |
+				   JUMP_COND_SELF);
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+	/* if assoclen is ZERO, skip reading the assoc data */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
+						 JUMP_COND_MATH_Z);
+
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* read assoc data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
+	set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
+
+	/* cryptlen = seqoutlen - assoclen */
+	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* jump to zero-payload command if cryptlen is zero */
+	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
+					    JUMP_COND_MATH_Z);
+
+	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* store encrypted data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+	/* read payload data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+	/* zero-payload command */
+	set_jump_tgt_here(desc, zero_payload_jump_cmd);
+
+	/* read ICV */
+	append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
+			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "gcm dec shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_gcm_decap);
+
+/**
+ * cnstr_shdsc_rfc4106_encap - IPSec ESP gcm encapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize)
+{
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip key loading if it is loaded due to sharing */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* Read assoc data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
+	/* Skip IV */
+	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+	/* Will read cryptlen bytes */
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
+
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* cryptlen = seqoutlen - assoclen */
+	append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Write encrypted data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+	/* Read payload data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+
+	/* Write ICV */
+	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "rfc4106 enc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap);
+
+/**
+ * cnstr_shdsc_rfc4106_decap - IPSec ESP gcm decapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize)
+{
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip key loading if it is loaded due to sharing */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+				  cdata->keylen, CLASS_1 |
+				  KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* Read assoc data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
+	/* Skip IV */
+	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+	/* Will read cryptlen bytes */
+	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
+
+	/* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
+
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* Will write cryptlen bytes */
+	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* Store payload data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+	/* Read encrypted data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+	/* Read ICV */
+	append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
+			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "rfc4106 dec shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap);
+
+/**
+ * cnstr_shdsc_rfc4543_encap - IPSec ESP gmac encapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize)
+{
+	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip key loading if it is loaded due to sharing */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* assoclen + cryptlen = seqinlen */
+	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/*
+	 * MOVE_LEN opcode is not available in all SEC HW revisions,
+	 * thus need to do some magic, i.e. self-patch the descriptor
+	 * buffer.
+	 */
+	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
+				    (0x6 << MOVE_LEN_SHIFT));
+	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
+				     (0x8 << MOVE_LEN_SHIFT));
+
+	/* Will read assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Will write assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Read and write assoclen + cryptlen bytes */
+	aead_append_src_dst(desc, FIFOLD_TYPE_AAD);
+
+	set_move_tgt_here(desc, read_move_cmd);
+	set_move_tgt_here(desc, write_move_cmd);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	/* Move payload data to OFIFO */
+	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
+
+	/* Write ICV */
+	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "rfc4543 enc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap);
+
+/**
+ * cnstr_shdsc_rfc4543_decap - IPSec ESP gmac decapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize)
+{
+	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip key loading if it is loaded due to sharing */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (cdata->key_inline)
+		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
+		append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+			   KEY_DEST_CLASS_REG);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Class 1 operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+	/* assoclen + cryptlen = seqoutlen */
+	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/*
+	 * MOVE_LEN opcode is not available in all SEC HW revisions,
+	 * thus need to do some magic, i.e. self-patch the descriptor
+	 * buffer.
+	 */
+	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
+				    (0x6 << MOVE_LEN_SHIFT));
+	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
+				     (0x8 << MOVE_LEN_SHIFT));
+
+	/* Will read assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* Will write assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* Store payload data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+	/* In-snoop assoclen + cryptlen data */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1);
+
+	set_move_tgt_here(desc, read_move_cmd);
+	set_move_tgt_here(desc, write_move_cmd);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	/* Move payload data to OFIFO */
+	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
+	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+	/* Read ICV */
+	append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
+			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "rfc4543 dec shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4543_decap);
+
+/*
+ * For ablkcipher encrypt and decrypt, read from req->src and
+ * write to req->dst
+ */
+static inline void ablkcipher_append_src_dst(u32 *desc)
+{
+	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
+			     KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+}
+
+/**
+ * cnstr_shdsc_ablkcipher_encap - ablkcipher encapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @ivsize: initialization vector size
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ */
+void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
+				  unsigned int ivsize, const bool is_rfc3686,
+				  const u32 ctx1_iv_off)
+{
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 key only */
+	append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+			  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load nonce into CONTEXT1 reg */
+	if (is_rfc3686) {
+		u8 *nonce = cdata->key_virt + cdata->keylen;
+
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
+			    MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
+			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+	}
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Load iv */
+	append_seq_load(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+			LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
+
+	/* Load counter into CONTEXT1 reg */
+	if (is_rfc3686)
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
+
+	/* Load operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ablkcipher enc shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_encap);
+
+/**
+ * cnstr_shdsc_ablkcipher_decap - ablkcipher decapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @ivsize: initialization vector size
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ */
+void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
+				  unsigned int ivsize, const bool is_rfc3686,
+				  const u32 ctx1_iv_off)
+{
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 key only */
+	append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+			  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load nonce into CONTEXT1 reg */
+	if (is_rfc3686) {
+		u8 *nonce = cdata->key_virt + cdata->keylen;
+
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
+			    MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
+			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+	}
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* load IV */
+	append_seq_load(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+			LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
+
+	/* Load counter into CONTEXT1 reg */
+	if (is_rfc3686)
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
+
+	/* Choose operation */
+	if (ctx1_iv_off)
+		append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+				 OP_ALG_DECRYPT);
+	else
+		append_dec_op1(desc, cdata->algtype);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ablkcipher dec shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_decap);
+
+/**
+ * cnstr_shdsc_ablkcipher_givencap - ablkcipher encapsulation shared descriptor
+ *                                   with HW-generated initialization vector.
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC.
+ * @ivsize: initialization vector size
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ */
+void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
+				     unsigned int ivsize, const bool is_rfc3686,
+				     const u32 ctx1_iv_off)
+{
+	u32 *key_jump_cmd, geniv;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 key only */
+	append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+			  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load Nonce into CONTEXT1 reg */
+	if (is_rfc3686) {
+		u8 *nonce = cdata->key_virt + cdata->keylen;
+
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
+			    MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
+			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+	}
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Generate IV */
+	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
+		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | NFIFOENTRY_PTYPE_RND |
+		(ivsize << NFIFOENTRY_DLEN_SHIFT);
+	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
+			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO |
+		    MOVE_DEST_CLASS1CTX | (ivsize << MOVE_LEN_SHIFT) |
+		    (ctx1_iv_off << MOVE_OFFSET_SHIFT));
+	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+	/* Copy generated IV to memory */
+	append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+			 LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
+
+	/* Load Counter into CONTEXT1 reg */
+	if (is_rfc3686)
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
+
+	if (ctx1_iv_off)
+		append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
+			    (1 << JUMP_OFFSET_SHIFT));
+
+	/* Load operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_givencap);
+
+/**
+ * cnstr_shdsc_xts_ablkcipher_encap - xts ablkcipher encapsulation shared
+ *                                    descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS.
+ */
+void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata)
+{
+	__be64 sector_size = cpu_to_be64(512);
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 keys only */
+	append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+			  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load sector size with index 40 bytes (0x28) */
+	append_load_as_imm(desc, (void *)&sector_size, 8, LDST_CLASS_1_CCB |
+			   LDST_SRCDST_BYTE_CONTEXT |
+			   (0x28 << LDST_OFFSET_SHIFT));
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/*
+	 * create sequence for loading the sector index
+	 * Upper 8B of IV - will be used as sector index
+	 * Lower 8B of IV - will be discarded
+	 */
+	append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+			(0x20 << LDST_OFFSET_SHIFT));
+	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+	/* Load operation */
+	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_encap);
+
+/**
+ * cnstr_shdsc_xts_ablkcipher_decap - xts ablkcipher decapsulation shared
+ *                                    descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS.
+ */
+void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata)
+{
+	__be64 sector_size = cpu_to_be64(512);
+	u32 *key_jump_cmd;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 key only */
+	append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+			  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load sector size with index 40 bytes (0x28) */
+	append_load_as_imm(desc, (void *)&sector_size, 8, LDST_CLASS_1_CCB |
+			   LDST_SRCDST_BYTE_CONTEXT |
+			   (0x28 << LDST_OFFSET_SHIFT));
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/*
+	 * create sequence for loading the sector index
+	 * Upper 8B of IV - will be used as sector index
+	 * Lower 8B of IV - will be discarded
+	 */
+	append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+			(0x20 << LDST_OFFSET_SHIFT));
+	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+	/* Load operation */
+	append_dec_op1(desc, cdata->algtype);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_decap);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM descriptor support");
+MODULE_AUTHOR("Freescale Semiconductor - NMG/STC");
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
new file mode 100644
index 000000000000..95551737333a
--- /dev/null
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -0,0 +1,97 @@
+/*
+ * Shared descriptors for aead, ablkcipher algorithms
+ *
+ * Copyright 2016 NXP
+ */
+
+#ifndef _CAAMALG_DESC_H_
+#define _CAAMALG_DESC_H_
+
+/* length of descriptors text */
+#define DESC_AEAD_BASE			(4 * CAAM_CMD_SZ)
+#define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_AEAD_DEC_LEN		(DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
+#define DESC_AEAD_GIVENC_LEN		(DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ)
+
+/* Note: Nonce is counted in cdata.keylen */
+#define DESC_AEAD_CTR_RFC3686_LEN	(4 * CAAM_CMD_SZ)
+
+#define DESC_AEAD_NULL_BASE		(3 * CAAM_CMD_SZ)
+#define DESC_AEAD_NULL_ENC_LEN		(DESC_AEAD_NULL_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_AEAD_NULL_DEC_LEN		(DESC_AEAD_NULL_BASE + 13 * CAAM_CMD_SZ)
+
+#define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
+#define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
+#define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
+
+#define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
+#define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+
+#define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
+#define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
+
+#define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
+#define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
+					 20 * CAAM_CMD_SZ)
+#define DESC_ABLKCIPHER_DEC_LEN		(DESC_ABLKCIPHER_BASE + \
+					 15 * CAAM_CMD_SZ)
+
+void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
+				 unsigned int icvsize);
+
+void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
+				 unsigned int icvsize);
+
+void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
+			    struct alginfo *adata, unsigned int icvsize,
+			    const bool is_rfc3686, u32 *nonce,
+			    const u32 ctx1_iv_off);
+
+void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
+			    struct alginfo *adata, unsigned int ivsize,
+			    unsigned int icvsize, const bool geniv,
+			    const bool is_rfc3686, u32 *nonce,
+			    const u32 ctx1_iv_off);
+
+void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
+			       struct alginfo *adata, unsigned int ivsize,
+			       unsigned int icvsize, const bool is_rfc3686,
+			       u32 *nonce, const u32 ctx1_iv_off);
+
+void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
+			   unsigned int icvsize);
+
+void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
+			   unsigned int icvsize);
+
+void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize);
+
+void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize);
+
+void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize);
+
+void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
+			       unsigned int icvsize);
+
+void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
+				  unsigned int ivsize, const bool is_rfc3686,
+				  const u32 ctx1_iv_off);
+
+void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
+				  unsigned int ivsize, const bool is_rfc3686,
+				  const u32 ctx1_iv_off);
+
+void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
+				     unsigned int ivsize, const bool is_rfc3686,
+				     const u32 ctx1_iv_off);
+
+void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata);
+
+void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata);
+
+#endif /* _CAAMALG_DESC_H_ */
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 660dc206969f..da4f94eab3da 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -72,7 +72,7 @@
 #define CAAM_MAX_HASH_DIGEST_SIZE	SHA512_DIGEST_SIZE
 
 /* length of descriptors text */
-#define DESC_AHASH_BASE			(4 * CAAM_CMD_SZ)
+#define DESC_AHASH_BASE			(3 * CAAM_CMD_SZ)
 #define DESC_AHASH_UPDATE_LEN		(6 * CAAM_CMD_SZ)
 #define DESC_AHASH_UPDATE_FIRST_LEN	(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
 #define DESC_AHASH_FINAL_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
@@ -103,20 +103,14 @@ struct caam_hash_ctx {
 	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
-	u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	dma_addr_t sh_desc_update_dma ____cacheline_aligned;
 	dma_addr_t sh_desc_update_first_dma;
 	dma_addr_t sh_desc_fin_dma;
 	dma_addr_t sh_desc_digest_dma;
-	dma_addr_t sh_desc_finup_dma;
 	struct device *jrdev;
-	u32 alg_type;
-	u32 alg_op;
 	u8 key[CAAM_MAX_HASH_KEY_SIZE];
-	dma_addr_t key_dma;
 	int ctx_len;
-	unsigned int split_key_len;
-	unsigned int split_key_pad_len;
+	struct alginfo adata;
 };
 
 /* ahash state */
@@ -143,6 +137,31 @@ struct caam_export_state {
 	int (*finup)(struct ahash_request *req);
 };
 
+static inline void switch_buf(struct caam_hash_state *state)
+{
+	state->current_buf ^= 1;
+}
+
+static inline u8 *current_buf(struct caam_hash_state *state)
+{
+	return state->current_buf ? state->buf_1 : state->buf_0;
+}
+
+static inline u8 *alt_buf(struct caam_hash_state *state)
+{
+	return state->current_buf ? state->buf_0 : state->buf_1;
+}
+
+static inline int *current_buflen(struct caam_hash_state *state)
+{
+	return state->current_buf ? &state->buflen_1 : &state->buflen_0;
+}
+
+static inline int *alt_buflen(struct caam_hash_state *state)
+{
+	return state->current_buf ? &state->buflen_0 : &state->buflen_1;
+}
+
 /* Common job descriptor seq in/out ptr routines */
 
 /* Map state->caam_ctx, and append seq_out_ptr command that points to it */
@@ -154,6 +173,7 @@ static inline int map_seq_out_ptr_ctx(u32 *desc, struct device *jrdev,
 					ctx_len, DMA_FROM_DEVICE);
 	if (dma_mapping_error(jrdev, state->ctx_dma)) {
 		dev_err(jrdev, "unable to map ctx\n");
+		state->ctx_dma = 0;
 		return -ENOMEM;
 	}
 
@@ -174,36 +194,27 @@ static inline dma_addr_t map_seq_out_ptr_result(u32 *desc, struct device *jrdev,
 	return dst_dma;
 }
 
-/* Map current buffer in state and put it in link table */
-static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev,
-					    struct sec4_sg_entry *sec4_sg,
-					    u8 *buf, int buflen)
+/* Map current buffer in state (if length > 0) and put it in link table */
+static inline int buf_map_to_sec4_sg(struct device *jrdev,
+				     struct sec4_sg_entry *sec4_sg,
+				     struct caam_hash_state *state)
 {
-	dma_addr_t buf_dma;
+	int buflen = *current_buflen(state);
 
-	buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
-	dma_to_sec4_sg_one(sec4_sg, buf_dma, buflen, 0);
+	if (!buflen)
+		return 0;
 
-	return buf_dma;
-}
+	state->buf_dma = dma_map_single(jrdev, current_buf(state), buflen,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, state->buf_dma)) {
+		dev_err(jrdev, "unable to map buf\n");
+		state->buf_dma = 0;
+		return -ENOMEM;
+	}
 
-/*
- * Only put buffer in link table if it contains data, which is possible,
- * since a buffer has previously been used, and needs to be unmapped,
- */
-static inline dma_addr_t
-try_buf_map_to_sec4_sg(struct device *jrdev, struct sec4_sg_entry *sec4_sg,
-		       u8 *buf, dma_addr_t buf_dma, int buflen,
-		       int last_buflen)
-{
-	if (buf_dma && !dma_mapping_error(jrdev, buf_dma))
-		dma_unmap_single(jrdev, buf_dma, last_buflen, DMA_TO_DEVICE);
-	if (buflen)
-		buf_dma = buf_map_to_sec4_sg(jrdev, sec4_sg, buf, buflen);
-	else
-		buf_dma = 0;
-
-	return buf_dma;
+	dma_to_sec4_sg_one(sec4_sg, state->buf_dma, buflen, 0);
+
+	return 0;
 }
 
 /* Map state->caam_ctx, and add it to link table */
@@ -214,6 +225,7 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
 	state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, ctx_len, flag);
 	if (dma_mapping_error(jrdev, state->ctx_dma)) {
 		dev_err(jrdev, "unable to map ctx\n");
+		state->ctx_dma = 0;
 		return -ENOMEM;
 	}
 
@@ -222,89 +234,54 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
 	return 0;
 }
 
-/* Common shared descriptor commands */
-static inline void append_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
-{
-	append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-			  ctx->split_key_len, CLASS_2 |
-			  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-}
-
-/* Append key if it has been set */
-static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
-{
-	u32 *key_jump_cmd;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	if (ctx->split_key_len) {
-		/* Skip if already shared */
-		key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-					   JUMP_COND_SHRD);
-
-		append_key_ahash(desc, ctx);
-
-		set_jump_tgt_here(desc, key_jump_cmd);
-	}
-
-	/* Propagate errors from shared to job descriptor */
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-}
-
 /*
- * For ahash read data from seqin following state->caam_ctx,
- * and write resulting class2 context to seqout, which may be state->caam_ctx
- * or req->result
+ * For ahash update, final and finup (import_ctx = true)
+ *     import context, read and write to seqout
+ * For ahash firsts and digest (import_ctx = false)
+ *     read and write to seqout
  */
-static inline void ahash_append_load_str(u32 *desc, int digestsize)
+static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
+				     struct caam_hash_ctx *ctx, bool import_ctx)
 {
-	/* Calculate remaining bytes to read */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Read remaining bytes */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
-			     FIFOLD_TYPE_MSG | KEY_VLF);
+	u32 op = ctx->adata.algtype;
+	u32 *skip_key_load;
 
-	/* Store class2 context bytes */
-	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-}
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
-/*
- * For ahash update, final and finup, import context, read and write to seqout
- */
-static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state,
-					 int digestsize,
-					 struct caam_hash_ctx *ctx)
-{
-	init_sh_desc_key_ahash(desc, ctx);
+	/* Append key if it has been set; ahash update excluded */
+	if ((state != OP_ALG_AS_UPDATE) && (ctx->adata.keylen)) {
+		/* Skip key loading if already shared */
+		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_SHRD);
 
-	/* Import context from software */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_2_CCB | ctx->ctx_len);
+		append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
+				  ctx->adata.keylen, CLASS_2 |
+				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
 
-	/* Class 2 operation */
-	append_operation(desc, op | state | OP_ALG_ENCRYPT);
+		set_jump_tgt_here(desc, skip_key_load);
 
-	/*
-	 * Load from buf and/or src and write to req->result or state->context
-	 */
-	ahash_append_load_str(desc, digestsize);
-}
+		op |= OP_ALG_AAI_HMAC_PRECOMP;
+	}
 
-/* For ahash firsts and digest, read and write to seqout */
-static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state,
-				     int digestsize, struct caam_hash_ctx *ctx)
-{
-	init_sh_desc_key_ahash(desc, ctx);
+	/* If needed, import context from software */
+	if (import_ctx)
+		append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
+				LDST_SRCDST_BYTE_CONTEXT);
 
 	/* Class 2 operation */
 	append_operation(desc, op | state | OP_ALG_ENCRYPT);
 
 	/*
 	 * Load from buf and/or src and write to req->result or state->context
+	 * Calculate remaining bytes to read
 	 */
-	ahash_append_load_str(desc, digestsize);
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	/* Read remaining bytes */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
+			     FIFOLD_TYPE_MSG | KEY_VLF);
+	/* Store class2 context bytes */
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
 }
 
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
@@ -312,34 +289,13 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
-	u32 have_key = 0;
 	u32 *desc;
 
-	if (ctx->split_key_len)
-		have_key = OP_ALG_AAI_HMAC_PRECOMP;
-
 	/* ahash_update shared descriptor */
 	desc = ctx->sh_desc_update;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Import context from software */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_2_CCB | ctx->ctx_len);
-
-	/* Class 2 operation */
-	append_operation(desc, ctx->alg_type | OP_ALG_AS_UPDATE |
-			 OP_ALG_ENCRYPT);
-
-	/* Load data and write to result or context */
-	ahash_append_load_str(desc, ctx->ctx_len);
-
-	ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-						 DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ahash update shdesc@"__stringify(__LINE__)": ",
@@ -348,17 +304,9 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_update_first shared descriptor */
 	desc = ctx->sh_desc_update_first;
-
-	ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT,
-			  ctx->ctx_len, ctx);
-
-	ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc,
-						       desc_bytes(desc),
-						       DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_update_first_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ahash update first shdesc@"__stringify(__LINE__)": ",
@@ -367,53 +315,20 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_final shared descriptor */
 	desc = ctx->sh_desc_fin;
-
-	ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
-			      OP_ALG_AS_FINALIZE, digestsize, ctx);
-
-	ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ahash final shdesc@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
 		       desc_bytes(desc), 1);
 #endif
 
-	/* ahash_finup shared descriptor */
-	desc = ctx->sh_desc_finup;
-
-	ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
-			      OP_ALG_AS_FINALIZE, digestsize, ctx);
-
-	ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-						DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ahash finup shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
-
 	/* ahash_digest shared descriptor */
 	desc = ctx->sh_desc_digest;
-
-	ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL,
-			  digestsize, ctx);
-
-	ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc,
-						 desc_bytes(desc),
-						 DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_digest_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false);
+	dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
+				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ahash digest shdesc@"__stringify(__LINE__)": ",
@@ -424,14 +339,6 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	return 0;
 }
 
-static int gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in,
-			      u32 keylen)
-{
-	return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
-			       ctx->split_key_pad_len, key_in, keylen,
-			       ctx->alg_op);
-}
-
 /* Digest hash size if it is too large */
 static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 			   u32 *keylen, u8 *key_out, u32 digestsize)
@@ -467,7 +374,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 	}
 
 	/* Job descriptor to perform unkeyed hash on key_in */
-	append_operation(desc, ctx->alg_type | OP_ALG_ENCRYPT |
+	append_operation(desc, ctx->adata.algtype | OP_ALG_ENCRYPT |
 			 OP_ALG_AS_INITFINAL);
 	append_seq_in_ptr(desc, src_dma, *keylen, 0);
 	append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 |
@@ -511,10 +418,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 static int ahash_setkey(struct crypto_ahash *ahash,
 			const u8 *key, unsigned int keylen)
 {
-	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
-	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct device *jrdev = ctx->jrdev;
 	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int ret;
@@ -537,43 +441,19 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 		key = hashed_key;
 	}
 
-	/* Pick class 2 key length from algorithm submask */
-	ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
-				      OP_ALG_ALGSEL_SHIFT] * 2;
-	ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
-
-#ifdef DEBUG
-	printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
-	       ctx->split_key_len, ctx->split_key_pad_len);
-	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
-
-	ret = gen_split_hash_key(ctx, key, keylen);
+	ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key, keylen,
+			    CAAM_MAX_HASH_KEY_SIZE);
 	if (ret)
 		goto bad_free_key;
 
-	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->key_dma)) {
-		dev_err(jrdev, "unable to map key i/o memory\n");
-		ret = -ENOMEM;
-		goto error_free_key;
-	}
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-		       ctx->split_key_pad_len, 1);
+		       ctx->adata.keylen_pad, 1);
 #endif
 
-	ret = ahash_set_sh_desc(ahash);
-	if (ret) {
-		dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
-				 DMA_TO_DEVICE);
-	}
- error_free_key:
 	kfree(hashed_key);
-	return ret;
+	return ahash_set_sh_desc(ahash);
  bad_free_key:
 	kfree(hashed_key);
 	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -602,6 +482,8 @@ static inline void ahash_unmap(struct device *dev,
 			struct ahash_edesc *edesc,
 			struct ahash_request *req, int dst_len)
 {
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
 	if (edesc->src_nents)
 		dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
 	if (edesc->dst_dma)
@@ -610,6 +492,12 @@ static inline void ahash_unmap(struct device *dev,
 	if (edesc->sec4_sg_bytes)
 		dma_unmap_single(dev, edesc->sec4_sg_dma,
 				 edesc->sec4_sg_bytes, DMA_TO_DEVICE);
+
+	if (state->buf_dma) {
+		dma_unmap_single(dev, state->buf_dma, *current_buflen(state),
+				 DMA_TO_DEVICE);
+		state->buf_dma = 0;
+	}
 }
 
 static inline void ahash_unmap_ctx(struct device *dev,
@@ -620,8 +508,10 @@ static inline void ahash_unmap_ctx(struct device *dev,
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 
-	if (state->ctx_dma)
+	if (state->ctx_dma) {
 		dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag);
+		state->ctx_dma = 0;
+	}
 	ahash_unmap(dev, edesc, req, dst_len);
 }
 
@@ -639,8 +529,7 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = (struct ahash_edesc *)((char *)desc -
-		 offsetof(struct ahash_edesc, hw_desc));
+	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
@@ -667,19 +556,19 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-#ifdef DEBUG
 	struct caam_hash_state *state = ahash_request_ctx(req);
+#ifdef DEBUG
 	int digestsize = crypto_ahash_digestsize(ahash);
 
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = (struct ahash_edesc *)((char *)desc -
-		 offsetof(struct ahash_edesc, hw_desc));
+	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	switch_buf(state);
 	kfree(edesc);
 
 #ifdef DEBUG
@@ -709,8 +598,7 @@ static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = (struct ahash_edesc *)((char *)desc -
-		 offsetof(struct ahash_edesc, hw_desc));
+	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
@@ -737,19 +625,19 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-#ifdef DEBUG
 	struct caam_hash_state *state = ahash_request_ctx(req);
+#ifdef DEBUG
 	int digestsize = crypto_ahash_digestsize(ahash);
 
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = (struct ahash_edesc *)((char *)desc -
-		 offsetof(struct ahash_edesc, hw_desc));
+	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
+	switch_buf(state);
 	kfree(edesc);
 
 #ifdef DEBUG
@@ -833,11 +721,10 @@ static int ahash_update_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0;
-	u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1;
-	int *next_buflen = state->current_buf ? &state->buflen_0 :
-			   &state->buflen_1, last_buflen;
+	u8 *buf = current_buf(state);
+	int *buflen = current_buflen(state);
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state), last_buflen;
 	int in_len = *buflen + req->nbytes, to_hash;
 	u32 *desc;
 	int src_nents, mapped_nents, sec4_sg_bytes, sec4_sg_src_index;
@@ -891,10 +778,9 @@ static int ahash_update_ctx(struct ahash_request *req)
 		if (ret)
 			goto unmap_ctx;
 
-		state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
-							edesc->sec4_sg + 1,
-							buf, state->buf_dma,
-							*buflen, last_buflen);
+		ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, state);
+		if (ret)
+			goto unmap_ctx;
 
 		if (mapped_nents) {
 			sg_to_sec4_sg_last(req->src, mapped_nents,
@@ -909,8 +795,6 @@ static int ahash_update_ctx(struct ahash_request *req)
 				cpu_to_caam32(SEC4_SG_LEN_FIN);
 		}
 
-		state->current_buf = !state->current_buf;
-
 		desc = edesc->hw_desc;
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
@@ -967,10 +851,7 @@ static int ahash_final_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
-	int last_buflen = state->current_buf ? state->buflen_0 :
-			  state->buflen_1;
+	int buflen = *current_buflen(state);
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index;
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -997,9 +878,10 @@ static int ahash_final_ctx(struct ahash_request *req)
 	if (ret)
 		goto unmap_ctx;
 
-	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
-						buf, state->buf_dma, buflen,
-						last_buflen);
+	ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, state);
+	if (ret)
+		goto unmap_ctx;
+
 	(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
 		cpu_to_caam32(SEC4_SG_LEN_FIN);
 
@@ -1046,10 +928,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
-	int last_buflen = state->current_buf ? state->buflen_0 :
-			  state->buflen_1;
+	int buflen = *current_buflen(state);
 	u32 *desc;
 	int sec4_sg_src_index;
 	int src_nents, mapped_nents;
@@ -1078,7 +957,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
-				  ctx->sh_desc_finup, ctx->sh_desc_finup_dma,
+				  ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
 				  flags);
 	if (!edesc) {
 		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
@@ -1094,9 +973,9 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	if (ret)
 		goto unmap_ctx;
 
-	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
-						buf, state->buf_dma, buflen,
-						last_buflen);
+	ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, state);
+	if (ret)
+		goto unmap_ctx;
 
 	ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents,
 				  sec4_sg_src_index, ctx->ctx_len + buflen,
@@ -1132,6 +1011,7 @@ static int ahash_digest(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
@@ -1141,6 +1021,8 @@ static int ahash_digest(struct ahash_request *req)
 	struct ahash_edesc *edesc;
 	int ret;
 
+	state->buf_dma = 0;
+
 	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	if (src_nents < 0) {
 		dev_err(jrdev, "Invalid number of src SG.\n");
@@ -1213,8 +1095,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
+	u8 *buf = current_buf(state);
+	int buflen = *current_buflen(state);
 	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
@@ -1274,11 +1156,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0;
-	u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1;
-	int *next_buflen = state->current_buf ? &state->buflen_0 :
-			   &state->buflen_1;
+	u8 *buf = current_buf(state);
+	int *buflen = current_buflen(state);
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state);
 	int in_len = *buflen + req->nbytes, to_hash;
 	int sec4_sg_bytes, src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
@@ -1327,8 +1208,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 		edesc->dst_dma = 0;
 
-		state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
-						    buf, *buflen);
+		ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, state);
+		if (ret)
+			goto unmap_ctx;
+
 		sg_to_sec4_sg_last(req->src, mapped_nents,
 				   edesc->sec4_sg + 1, 0);
 
@@ -1338,8 +1221,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 						 *next_buflen, 0);
 		}
 
-		state->current_buf = !state->current_buf;
-
 		desc = edesc->hw_desc;
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
@@ -1401,10 +1282,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
-	int last_buflen = state->current_buf ? state->buflen_0 :
-			  state->buflen_1;
+	int buflen = *current_buflen(state);
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -1446,9 +1324,9 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	edesc->src_nents = src_nents;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 
-	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf,
-						state->buf_dma, buflen,
-						last_buflen);
+	ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, state);
+	if (ret)
+		goto unmap;
 
 	ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 1, buflen,
 				  req->nbytes);
@@ -1494,9 +1372,8 @@ static int ahash_update_first(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u8 *next_buf = state->current_buf ? state->buf_1 : state->buf_0;
-	int *next_buflen = state->current_buf ?
-		&state->buflen_1 : &state->buflen_0;
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state);
 	int to_hash;
 	u32 *desc;
 	int src_nents, mapped_nents;
@@ -1578,6 +1455,7 @@ static int ahash_update_first(struct ahash_request *req)
 		state->final = ahash_final_no_ctx;
 		scatterwalk_map_and_copy(next_buf, req->src, 0,
 					 req->nbytes, 0);
+		switch_buf(state);
 	}
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ",
@@ -1605,6 +1483,7 @@ static int ahash_init(struct ahash_request *req)
 	state->finup = ahash_finup_first;
 	state->final = ahash_final_no_ctx;
 
+	state->ctx_dma = 0;
 	state->current_buf = 0;
 	state->buf_dma = 0;
 	state->buflen_0 = 0;
@@ -1683,7 +1562,6 @@ struct caam_hash_template {
 	unsigned int blocksize;
 	struct ahash_alg template_ahash;
 	u32 alg_type;
-	u32 alg_op;
 };
 
 /* ahash descriptors */
@@ -1709,7 +1587,6 @@ static struct caam_hash_template driver_hash[] = {
 			},
 		},
 		.alg_type = OP_ALG_ALGSEL_SHA1,
-		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 	}, {
 		.name = "sha224",
 		.driver_name = "sha224-caam",
@@ -1731,7 +1608,6 @@ static struct caam_hash_template driver_hash[] = {
 			},
 		},
 		.alg_type = OP_ALG_ALGSEL_SHA224,
-		.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 	}, {
 		.name = "sha256",
 		.driver_name = "sha256-caam",
@@ -1753,7 +1629,6 @@ static struct caam_hash_template driver_hash[] = {
 			},
 		},
 		.alg_type = OP_ALG_ALGSEL_SHA256,
-		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 	}, {
 		.name = "sha384",
 		.driver_name = "sha384-caam",
@@ -1775,7 +1650,6 @@ static struct caam_hash_template driver_hash[] = {
 			},
 		},
 		.alg_type = OP_ALG_ALGSEL_SHA384,
-		.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 	}, {
 		.name = "sha512",
 		.driver_name = "sha512-caam",
@@ -1797,7 +1671,6 @@ static struct caam_hash_template driver_hash[] = {
 			},
 		},
 		.alg_type = OP_ALG_ALGSEL_SHA512,
-		.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 	}, {
 		.name = "md5",
 		.driver_name = "md5-caam",
@@ -1819,14 +1692,12 @@ static struct caam_hash_template driver_hash[] = {
 			},
 		},
 		.alg_type = OP_ALG_ALGSEL_MD5,
-		.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 	},
 };
 
 struct caam_hash_alg {
 	struct list_head entry;
 	int alg_type;
-	int alg_op;
 	struct ahash_alg ahash_alg;
 };
 
@@ -1848,6 +1719,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
 					 HASH_MSG_LEN + 64,
 					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
+	dma_addr_t dma_addr;
 
 	/*
 	 * Get a Job ring from Job Ring driver to ensure in-order
@@ -1858,11 +1730,31 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 		pr_err("Job Ring Device allocation for transform failed\n");
 		return PTR_ERR(ctx->jrdev);
 	}
+
+	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update,
+					offsetof(struct caam_hash_ctx,
+						 sh_desc_update_dma),
+					DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
+		dev_err(ctx->jrdev, "unable to map shared descriptors\n");
+		caam_jr_free(ctx->jrdev);
+		return -ENOMEM;
+	}
+
+	ctx->sh_desc_update_dma = dma_addr;
+	ctx->sh_desc_update_first_dma = dma_addr +
+					offsetof(struct caam_hash_ctx,
+						 sh_desc_update_first);
+	ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx,
+						   sh_desc_fin);
+	ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx,
+						      sh_desc_digest);
+
 	/* copy descriptor header template value */
-	ctx->alg_type = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
-	ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_hash->alg_op;
+	ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
 
-	ctx->ctx_len = runninglen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
+	ctx->ctx_len = runninglen[(ctx->adata.algtype &
+				   OP_ALG_ALGSEL_SUBMASK) >>
 				  OP_ALG_ALGSEL_SHIFT];
 
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
@@ -1874,30 +1766,10 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 {
 	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (ctx->sh_desc_update_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_dma,
-				 desc_bytes(ctx->sh_desc_update),
-				 DMA_TO_DEVICE);
-	if (ctx->sh_desc_update_first_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_first_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_first_dma,
-				 desc_bytes(ctx->sh_desc_update_first),
-				 DMA_TO_DEVICE);
-	if (ctx->sh_desc_fin_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_fin_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_fin_dma,
-				 desc_bytes(ctx->sh_desc_fin), DMA_TO_DEVICE);
-	if (ctx->sh_desc_digest_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_digest_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma,
-				 desc_bytes(ctx->sh_desc_digest),
-				 DMA_TO_DEVICE);
-	if (ctx->sh_desc_finup_dma &&
-	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma))
-		dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma,
-				 desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE);
-
+	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma,
+			       offsetof(struct caam_hash_ctx,
+					sh_desc_update_dma),
+			       DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
 	caam_jr_free(ctx->jrdev);
 }
 
@@ -1956,7 +1828,6 @@ caam_hash_alloc(struct caam_hash_template *template,
 	alg->cra_type = &crypto_ahash_type;
 
 	t_alg->alg_type = template->alg_type;
-	t_alg->alg_op = template->alg_op;
 
 	return t_alg;
 }
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 851015e652b8..49cbdcba7883 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -395,7 +395,7 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 				unsigned int keylen)
 {
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct rsa_key raw_key = {0};
+	struct rsa_key raw_key = {NULL};
 	struct caam_rsa_key *rsa_key = &ctx->key;
 	int ret;
 
@@ -441,7 +441,7 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 				 unsigned int keylen)
 {
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct rsa_key raw_key = {0};
+	struct rsa_key raw_key = {NULL};
 	struct caam_rsa_key *rsa_key = &ctx->key;
 	int ret;
 
@@ -506,7 +506,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 	ctx->dev = caam_jr_alloc();
 
 	if (IS_ERR(ctx->dev)) {
-		dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n");
+		pr_err("Job Ring Device allocation for transform failed\n");
 		return PTR_ERR(ctx->dev);
 	}
 
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 9b92af2c7241..41398da3edf4 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -52,7 +52,7 @@
 
 /* length of descriptors */
 #define DESC_JOB_O_LEN			(CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
-#define DESC_RNG_LEN			(4 * CAAM_CMD_SZ)
+#define DESC_RNG_LEN			(3 * CAAM_CMD_SZ)
 
 /* Buffer, its dma address and lock */
 struct buf_data {
@@ -100,8 +100,7 @@ static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
 {
 	struct buf_data *bd;
 
-	bd = (struct buf_data *)((char *)desc -
-	      offsetof(struct buf_data, hw_desc));
+	bd = container_of(desc, struct buf_data, hw_desc[0]);
 
 	if (err)
 		caam_jr_strstatus(jrdev, err);
@@ -196,9 +195,6 @@ static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
-	/* Propagate errors from shared to job descriptor */
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-
 	/* Generate random bytes */
 	append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
 
@@ -351,7 +347,7 @@ static int __init caam_rng_init(void)
 		pr_err("Job Ring Device allocation for transform failed\n");
 		return PTR_ERR(dev);
 	}
-	rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA);
+	rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
 	if (!rng_ctx) {
 		err = -ENOMEM;
 		goto free_caam_alloc;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index e483b78c6343..5d7f73d60515 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -13,7 +13,6 @@
 #include "intern.h"
 #include "jr.h"
 #include "desc_constr.h"
-#include "error.h"
 #include "ctrl.h"
 
 bool caam_little_end;
@@ -270,7 +269,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
 		/*
 		 * If the corresponding bit is set, then it means the state
 		 * handle was initialized by us, and thus it needs to be
-		 * deintialized as well
+		 * deinitialized as well
 		 */
 		if ((1 << sh_idx) & state_handle_mask) {
 			/*
@@ -282,7 +281,8 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
 			/* Try to run it through DECO0 */
 			ret = run_descriptor_deco0(ctrldev, desc, &status);
 
-			if (ret || status) {
+			if (ret ||
+			    (status && status != JRSTA_SSRC_JUMP_HALT_CC)) {
 				dev_err(ctrldev,
 					"Failed to deinstantiate RNG4 SH%d\n",
 					sh_idx);
@@ -302,17 +302,13 @@ static int caam_remove(struct platform_device *pdev)
 	struct device *ctrldev;
 	struct caam_drv_private *ctrlpriv;
 	struct caam_ctrl __iomem *ctrl;
-	int ring;
 
 	ctrldev = &pdev->dev;
 	ctrlpriv = dev_get_drvdata(ctrldev);
 	ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
 
-	/* Remove platform devices for JobRs */
-	for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) {
-		if (ctrlpriv->jrpdev[ring])
-			of_device_unregister(ctrlpriv->jrpdev[ring]);
-	}
+	/* Remove platform devices under the crypto node */
+	of_platform_depopulate(ctrldev);
 
 	/* De-initialize RNG state handles initialized by this driver. */
 	if (ctrlpriv->rng4_sh_init)
@@ -330,8 +326,8 @@ static int caam_remove(struct platform_device *pdev)
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	clk_disable_unprepare(ctrlpriv->caam_mem);
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
-	clk_disable_unprepare(ctrlpriv->caam_emi_slow);
-
+	if (ctrlpriv->caam_emi_slow)
+		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
 	return 0;
 }
 
@@ -365,11 +361,8 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 	 */
 	val = (rd_reg32(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK)
 	      >> RTSDCTL_ENT_DLY_SHIFT;
-	if (ent_delay <= val) {
-		/* put RNG4 into run mode */
-		clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0);
-		return;
-	}
+	if (ent_delay <= val)
+		goto start_rng;
 
 	val = rd_reg32(&r4tst->rtsdctl);
 	val = (val & ~RTSDCTL_ENT_DLY_MASK) |
@@ -381,15 +374,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 	wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
 	/* read the control register */
 	val = rd_reg32(&r4tst->rtmctl);
+start_rng:
 	/*
 	 * select raw sampling in both entropy shifter
-	 * and statistical checker
+	 * and statistical checker; ; put RNG4 into run mode
 	 */
-	clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC);
-	/* put RNG4 into run mode */
-	clrsetbits_32(&val, RTMCTL_PRGM, 0);
-	/* write back the control register */
-	wr_reg32(&r4tst->rtmctl, val);
+	clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
 }
 
 /**
@@ -427,10 +417,21 @@ DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n");
 DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
 #endif
 
+static const struct of_device_id caam_match[] = {
+	{
+		.compatible = "fsl,sec-v4.0",
+	},
+	{
+		.compatible = "fsl,sec4.0",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, caam_match);
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
-	int ret, ring, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
+	int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
 	u64 caam_id;
 	struct device *dev;
 	struct device_node *nprop, *np;
@@ -482,14 +483,16 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	ctrlpriv->caam_aclk = clk;
 
-	clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_err(&pdev->dev,
-			"can't identify CAAM emi_slow clk: %d\n", ret);
-		return ret;
+	if (!of_machine_is_compatible("fsl,imx6ul")) {
+		clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			dev_err(&pdev->dev,
+				"can't identify CAAM emi_slow clk: %d\n", ret);
+			return ret;
+		}
+		ctrlpriv->caam_emi_slow = clk;
 	}
-	ctrlpriv->caam_emi_slow = clk;
 
 	ret = clk_prepare_enable(ctrlpriv->caam_ipg);
 	if (ret < 0) {
@@ -510,11 +513,13 @@ static int caam_probe(struct platform_device *pdev)
 		goto disable_caam_mem;
 	}
 
-	ret = clk_prepare_enable(ctrlpriv->caam_emi_slow);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n",
-			ret);
-		goto disable_caam_aclk;
+	if (ctrlpriv->caam_emi_slow) {
+		ret = clk_prepare_enable(ctrlpriv->caam_emi_slow);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n",
+				ret);
+			goto disable_caam_aclk;
+		}
 	}
 
 	/* Get configuration properties from device tree */
@@ -541,13 +546,13 @@ static int caam_probe(struct platform_device *pdev)
 	else
 		BLOCK_OFFSET = PG_SIZE_64K;
 
-	ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl;
-	ctrlpriv->assure = (struct caam_assurance __force *)
-			   ((uint8_t *)ctrl +
+	ctrlpriv->ctrl = (struct caam_ctrl __iomem __force *)ctrl;
+	ctrlpriv->assure = (struct caam_assurance __iomem __force *)
+			   ((__force uint8_t *)ctrl +
 			    BLOCK_OFFSET * ASSURE_BLOCK_NUMBER
 			   );
-	ctrlpriv->deco = (struct caam_deco __force *)
-			 ((uint8_t *)ctrl +
+	ctrlpriv->deco = (struct caam_deco __iomem __force *)
+			 ((__force uint8_t *)ctrl +
 			 BLOCK_OFFSET * DECO_BLOCK_NUMBER
 			 );
 
@@ -589,60 +594,45 @@ static int caam_probe(struct platform_device *pdev)
 			      JRSTART_JR1_START | JRSTART_JR2_START |
 			      JRSTART_JR3_START);
 
-	if (sizeof(dma_addr_t) == sizeof(u64))
+	if (sizeof(dma_addr_t) == sizeof(u64)) {
 		if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
-			dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
 		else
-			dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
-	else
-		dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
-	/*
-	 * Detect and enable JobRs
-	 * First, find out how many ring spec'ed, allocate references
-	 * for all, then go probe each one.
-	 */
-	rspec = 0;
-	for_each_available_child_of_node(nprop, np)
-		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
-		    of_device_is_compatible(np, "fsl,sec4.0-job-ring"))
-			rspec++;
+			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
+	} else {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	}
+	if (ret) {
+		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+		goto iounmap_ctrl;
+	}
 
-	ctrlpriv->jrpdev = devm_kcalloc(&pdev->dev, rspec,
-					sizeof(*ctrlpriv->jrpdev), GFP_KERNEL);
-	if (ctrlpriv->jrpdev == NULL) {
-		ret = -ENOMEM;
+	ret = of_platform_populate(nprop, caam_match, NULL, dev);
+	if (ret) {
+		dev_err(dev, "JR platform devices creation error\n");
 		goto iounmap_ctrl;
 	}
 
 	ring = 0;
-	ctrlpriv->total_jobrs = 0;
 	for_each_available_child_of_node(nprop, np)
 		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
 		    of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
-			ctrlpriv->jrpdev[ring] =
-				of_platform_device_create(np, NULL, dev);
-			if (!ctrlpriv->jrpdev[ring]) {
-				pr_warn("JR%d Platform device creation error\n",
-					ring);
-				continue;
-			}
-			ctrlpriv->jr[ring] = (struct caam_job_ring __force *)
-					     ((uint8_t *)ctrl +
+			ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
+					     ((__force uint8_t *)ctrl +
 					     (ring + JR_BLOCK_NUMBER) *
 					      BLOCK_OFFSET
 					     );
 			ctrlpriv->total_jobrs++;
 			ring++;
-	}
+		}
 
 	/* Check to see if QI present. If so, enable */
 	ctrlpriv->qi_present =
 			!!(rd_reg32(&ctrl->perfmon.comp_parms_ms) &
 			   CTPR_MS_QI_MASK);
 	if (ctrlpriv->qi_present) {
-		ctrlpriv->qi = (struct caam_queue_if __force *)
-			       ((uint8_t *)ctrl +
+		ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
+			       ((__force uint8_t *)ctrl +
 				 BLOCK_OFFSET * QI_BLOCK_NUMBER
 			       );
 		/* This is all that's required to physically enable QI */
@@ -800,7 +790,7 @@ static int caam_probe(struct platform_device *pdev)
 				    &caam_fops_u32_ro);
 
 	/* Internal covering keys (useful in non-secure mode only) */
-	ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0];
+	ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];
 	ctrlpriv->ctl_kek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
 	ctrlpriv->ctl_kek = debugfs_create_blob("kek",
 						S_IRUSR |
@@ -808,7 +798,7 @@ static int caam_probe(struct platform_device *pdev)
 						ctrlpriv->ctl,
 						&ctrlpriv->ctl_kek_wrap);
 
-	ctrlpriv->ctl_tkek_wrap.data = &ctrlpriv->ctrl->tkek[0];
+	ctrlpriv->ctl_tkek_wrap.data = (__force void *)&ctrlpriv->ctrl->tkek[0];
 	ctrlpriv->ctl_tkek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
 	ctrlpriv->ctl_tkek = debugfs_create_blob("tkek",
 						 S_IRUSR |
@@ -816,7 +806,7 @@ static int caam_probe(struct platform_device *pdev)
 						 ctrlpriv->ctl,
 						 &ctrlpriv->ctl_tkek_wrap);
 
-	ctrlpriv->ctl_tdsk_wrap.data = &ctrlpriv->ctrl->tdsk[0];
+	ctrlpriv->ctl_tdsk_wrap.data = (__force void *)&ctrlpriv->ctrl->tdsk[0];
 	ctrlpriv->ctl_tdsk_wrap.size = KEK_KEY_SIZE * sizeof(u32);
 	ctrlpriv->ctl_tdsk = debugfs_create_blob("tdsk",
 						 S_IRUSR |
@@ -833,7 +823,8 @@ caam_remove:
 iounmap_ctrl:
 	iounmap(ctrl);
 disable_caam_emi_slow:
-	clk_disable_unprepare(ctrlpriv->caam_emi_slow);
+	if (ctrlpriv->caam_emi_slow)
+		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
 disable_caam_aclk:
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 disable_caam_mem:
@@ -843,17 +834,6 @@ disable_caam_ipg:
 	return ret;
 }
 
-static struct of_device_id caam_match[] = {
-	{
-		.compatible = "fsl,sec-v4.0",
-	},
-	{
-		.compatible = "fsl,sec4.0",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, caam_match);
-
 static struct platform_driver caam_driver = {
 	.driver = {
 		.name = "caam",
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 513b6646bb36..2e6766a1573f 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -22,12 +22,6 @@
 #define SEC4_SG_LEN_MASK	0x3fffffff	/* Excludes EXT and FINAL */
 #define SEC4_SG_OFFSET_MASK	0x00001fff
 
-struct sec4_sg_entry {
-	u64 ptr;
-	u32 len;
-	u32 bpid_offset;
-};
-
 /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
 #define MAX_CAAM_DESCSIZE	64
 
@@ -90,8 +84,8 @@ struct sec4_sg_entry {
 #define HDR_ZRO			0x00008000
 
 /* Start Index or SharedDesc Length */
-#define HDR_START_IDX_MASK	0x3f
 #define HDR_START_IDX_SHIFT	16
+#define HDR_START_IDX_MASK	(0x3f << HDR_START_IDX_SHIFT)
 
 /* If shared descriptor header, 6-bit length */
 #define HDR_DESCLEN_SHR_MASK	0x3f
@@ -121,10 +115,10 @@ struct sec4_sg_entry {
 #define HDR_PROP_DNR		0x00000800
 
 /* JobDesc/SharedDesc share property */
-#define HDR_SD_SHARE_MASK	0x03
 #define HDR_SD_SHARE_SHIFT	8
-#define HDR_JD_SHARE_MASK	0x07
+#define HDR_SD_SHARE_MASK	(0x03 << HDR_SD_SHARE_SHIFT)
 #define HDR_JD_SHARE_SHIFT	8
+#define HDR_JD_SHARE_MASK	(0x07 << HDR_JD_SHARE_SHIFT)
 
 #define HDR_SHARE_NEVER		(0x00 << HDR_SD_SHARE_SHIFT)
 #define HDR_SHARE_WAIT		(0x01 << HDR_SD_SHARE_SHIFT)
@@ -235,7 +229,7 @@ struct sec4_sg_entry {
 #define LDST_SRCDST_WORD_DECO_MATH2	(0x0a << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_DECO_AAD_SZ	(0x0b << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_DECO_MATH3	(0x0b << LDST_SRCDST_SHIFT)
-#define LDST_SRCDST_WORD_CLASS1_ICV_SZ	(0x0c << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_CLASS1_IV_SZ	(0x0c << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_ALTDS_CLASS1	(0x0f << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_PKHA_A_SZ	(0x10 << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_PKHA_B_SZ	(0x11 << LDST_SRCDST_SHIFT)
@@ -400,7 +394,7 @@ struct sec4_sg_entry {
 #define FIFOST_TYPE_PKHA_N	 (0x08 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_A	 (0x0c << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_B	 (0x0d << FIFOST_TYPE_SHIFT)
-#define FIFOST_TYPE_AF_SBOX_JKEK (0x10 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_AF_SBOX_JKEK (0x20 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_AF_SBOX_TKEK (0x21 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_E_JKEK	 (0x22 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_E_TKEK	 (0x23 << FIFOST_TYPE_SHIFT)
@@ -1107,8 +1101,8 @@ struct sec4_sg_entry {
 /* For non-protocol/alg-only op commands */
 #define OP_ALG_TYPE_SHIFT	24
 #define OP_ALG_TYPE_MASK	(0x7 << OP_ALG_TYPE_SHIFT)
-#define OP_ALG_TYPE_CLASS1	2
-#define OP_ALG_TYPE_CLASS2	4
+#define OP_ALG_TYPE_CLASS1	(2 << OP_ALG_TYPE_SHIFT)
+#define OP_ALG_TYPE_CLASS2	(4 << OP_ALG_TYPE_SHIFT)
 
 #define OP_ALG_ALGSEL_SHIFT	16
 #define OP_ALG_ALGSEL_MASK	(0xff << OP_ALG_ALGSEL_SHIFT)
@@ -1249,7 +1243,7 @@ struct sec4_sg_entry {
 #define OP_ALG_PKMODE_MOD_PRIMALITY	0x00f
 
 /* PKHA mode copy-memory functions */
-#define OP_ALG_PKMODE_SRC_REG_SHIFT	13
+#define OP_ALG_PKMODE_SRC_REG_SHIFT	17
 #define OP_ALG_PKMODE_SRC_REG_MASK	(7 << OP_ALG_PKMODE_SRC_REG_SHIFT)
 #define OP_ALG_PKMODE_DST_REG_SHIFT	10
 #define OP_ALG_PKMODE_DST_REG_MASK	(7 << OP_ALG_PKMODE_DST_REG_SHIFT)
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index a8cd8a78ec1f..b9c8d98ef826 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -33,38 +33,39 @@
 
 extern bool caam_little_end;
 
-static inline int desc_len(u32 *desc)
+static inline int desc_len(u32 * const desc)
 {
 	return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
 }
 
-static inline int desc_bytes(void *desc)
+static inline int desc_bytes(void * const desc)
 {
 	return desc_len(desc) * CAAM_CMD_SZ;
 }
 
-static inline u32 *desc_end(u32 *desc)
+static inline u32 *desc_end(u32 * const desc)
 {
 	return desc + desc_len(desc);
 }
 
-static inline void *sh_desc_pdb(u32 *desc)
+static inline void *sh_desc_pdb(u32 * const desc)
 {
 	return desc + 1;
 }
 
-static inline void init_desc(u32 *desc, u32 options)
+static inline void init_desc(u32 * const desc, u32 options)
 {
 	*desc = cpu_to_caam32((options | HDR_ONE) + 1);
 }
 
-static inline void init_sh_desc(u32 *desc, u32 options)
+static inline void init_sh_desc(u32 * const desc, u32 options)
 {
 	PRINT_POS;
 	init_desc(desc, CMD_SHARED_DESC_HDR | options);
 }
 
-static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+static inline void init_sh_desc_pdb(u32 * const desc, u32 options,
+				    size_t pdb_bytes)
 {
 	u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
 
@@ -72,19 +73,20 @@ static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
 		     options);
 }
 
-static inline void init_job_desc(u32 *desc, u32 options)
+static inline void init_job_desc(u32 * const desc, u32 options)
 {
 	init_desc(desc, CMD_DESC_HDR | options);
 }
 
-static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+static inline void init_job_desc_pdb(u32 * const desc, u32 options,
+				     size_t pdb_bytes)
 {
 	u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
 
 	init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options);
 }
 
-static inline void append_ptr(u32 *desc, dma_addr_t ptr)
+static inline void append_ptr(u32 * const desc, dma_addr_t ptr)
 {
 	dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
 
@@ -94,8 +96,8 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr)
 				CAAM_PTR_SZ / CAAM_CMD_SZ);
 }
 
-static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
-					u32 options)
+static inline void init_job_desc_shared(u32 * const desc, dma_addr_t ptr,
+					int len, u32 options)
 {
 	PRINT_POS;
 	init_job_desc(desc, HDR_SHARED | options |
@@ -103,7 +105,7 @@ static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
 	append_ptr(desc, ptr);
 }
 
-static inline void append_data(u32 *desc, void *data, int len)
+static inline void append_data(u32 * const desc, void *data, int len)
 {
 	u32 *offset = desc_end(desc);
 
@@ -114,7 +116,7 @@ static inline void append_data(u32 *desc, void *data, int len)
 				(len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ);
 }
 
-static inline void append_cmd(u32 *desc, u32 command)
+static inline void append_cmd(u32 * const desc, u32 command)
 {
 	u32 *cmd = desc_end(desc);
 
@@ -125,7 +127,7 @@ static inline void append_cmd(u32 *desc, u32 command)
 
 #define append_u32 append_cmd
 
-static inline void append_u64(u32 *desc, u64 data)
+static inline void append_u64(u32 * const desc, u64 data)
 {
 	u32 *offset = desc_end(desc);
 
@@ -142,14 +144,14 @@ static inline void append_u64(u32 *desc, u64 data)
 }
 
 /* Write command without affecting header, and return pointer to next word */
-static inline u32 *write_cmd(u32 *desc, u32 command)
+static inline u32 *write_cmd(u32 * const desc, u32 command)
 {
 	*desc = cpu_to_caam32(command);
 
 	return desc + 1;
 }
 
-static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
+static inline void append_cmd_ptr(u32 * const desc, dma_addr_t ptr, int len,
 				  u32 command)
 {
 	append_cmd(desc, command | len);
@@ -157,7 +159,7 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
 }
 
 /* Write length after pointer, rather than inside command */
-static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
+static inline void append_cmd_ptr_extlen(u32 * const desc, dma_addr_t ptr,
 					 unsigned int len, u32 command)
 {
 	append_cmd(desc, command);
@@ -166,7 +168,7 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
 	append_cmd(desc, len);
 }
 
-static inline void append_cmd_data(u32 *desc, void *data, int len,
+static inline void append_cmd_data(u32 * const desc, void *data, int len,
 				   u32 command)
 {
 	append_cmd(desc, command | IMMEDIATE | len);
@@ -174,7 +176,7 @@ static inline void append_cmd_data(u32 *desc, void *data, int len,
 }
 
 #define APPEND_CMD_RET(cmd, op) \
-static inline u32 *append_##cmd(u32 *desc, u32 options) \
+static inline u32 *append_##cmd(u32 * const desc, u32 options) \
 { \
 	u32 *cmd = desc_end(desc); \
 	PRINT_POS; \
@@ -184,13 +186,13 @@ static inline u32 *append_##cmd(u32 *desc, u32 options) \
 APPEND_CMD_RET(jump, JUMP)
 APPEND_CMD_RET(move, MOVE)
 
-static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
+static inline void set_jump_tgt_here(u32 * const desc, u32 *jump_cmd)
 {
 	*jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) |
 				  (desc_len(desc) - (jump_cmd - desc)));
 }
 
-static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
+static inline void set_move_tgt_here(u32 * const desc, u32 *move_cmd)
 {
 	u32 val = caam32_to_cpu(*move_cmd);
 
@@ -200,7 +202,7 @@ static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
 }
 
 #define APPEND_CMD(cmd, op) \
-static inline void append_##cmd(u32 *desc, u32 options) \
+static inline void append_##cmd(u32 * const desc, u32 options) \
 { \
 	PRINT_POS; \
 	append_cmd(desc, CMD_##op | options); \
@@ -208,7 +210,8 @@ static inline void append_##cmd(u32 *desc, u32 options) \
 APPEND_CMD(operation, OPERATION)
 
 #define APPEND_CMD_LEN(cmd, op) \
-static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \
+static inline void append_##cmd(u32 * const desc, unsigned int len, \
+				u32 options) \
 { \
 	PRINT_POS; \
 	append_cmd(desc, CMD_##op | len | options); \
@@ -220,8 +223,8 @@ APPEND_CMD_LEN(seq_fifo_load, SEQ_FIFO_LOAD)
 APPEND_CMD_LEN(seq_fifo_store, SEQ_FIFO_STORE)
 
 #define APPEND_CMD_PTR(cmd, op) \
-static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
-				u32 options) \
+static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \
+				unsigned int len, u32 options) \
 { \
 	PRINT_POS; \
 	append_cmd_ptr(desc, ptr, len, CMD_##op | options); \
@@ -231,8 +234,8 @@ APPEND_CMD_PTR(load, LOAD)
 APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
 APPEND_CMD_PTR(fifo_store, FIFO_STORE)
 
-static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
-				u32 options)
+static inline void append_store(u32 * const desc, dma_addr_t ptr,
+				unsigned int len, u32 options)
 {
 	u32 cmd_src;
 
@@ -249,7 +252,8 @@ static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
 }
 
 #define APPEND_SEQ_PTR_INTLEN(cmd, op) \
-static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
+static inline void append_seq_##cmd##_ptr_intlen(u32 * const desc, \
+						 dma_addr_t ptr, \
 						 unsigned int len, \
 						 u32 options) \
 { \
@@ -263,7 +267,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN)
 APPEND_SEQ_PTR_INTLEN(out, OUT)
 
 #define APPEND_CMD_PTR_TO_IMM(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
 					 unsigned int len, u32 options) \
 { \
 	PRINT_POS; \
@@ -273,7 +277,7 @@ APPEND_CMD_PTR_TO_IMM(load, LOAD);
 APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD);
 
 #define APPEND_CMD_PTR_EXTLEN(cmd, op) \
-static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \
+static inline void append_##cmd##_extlen(u32 * const desc, dma_addr_t ptr, \
 					 unsigned int len, u32 options) \
 { \
 	PRINT_POS; \
@@ -287,7 +291,7 @@ APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR)
  * the size of its type
  */
 #define APPEND_CMD_PTR_LEN(cmd, op, type) \
-static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \
+static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \
 				type len, u32 options) \
 { \
 	PRINT_POS; \
@@ -304,7 +308,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
  * from length of immediate data provided, e.g., split keys
  */
 #define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
 					 unsigned int data_len, \
 					 unsigned int len, u32 options) \
 { \
@@ -315,7 +319,7 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
 APPEND_CMD_PTR_TO_IMM2(key, KEY);
 
 #define APPEND_CMD_RAW_IMM(cmd, op, type) \
-static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
+static inline void append_##cmd##_imm_##type(u32 * const desc, type immediate, \
 					     u32 options) \
 { \
 	PRINT_POS; \
@@ -426,3 +430,64 @@ do { \
 	APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data)
 #define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \
 	APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data)
+
+/**
+ * struct alginfo - Container for algorithm details
+ * @algtype: algorithm selector; for valid values, see documentation of the
+ *           functions where it is used.
+ * @keylen: length of the provided algorithm key, in bytes
+ * @keylen_pad: padded length of the provided algorithm key, in bytes
+ * @key: address where algorithm key resides; virtual address if key_inline
+ *       is true, dma (bus) address if key_inline is false.
+ * @key_inline: true - key can be inlined in the descriptor; false - key is
+ *              referenced by the descriptor
+ */
+struct alginfo {
+	u32 algtype;
+	unsigned int keylen;
+	unsigned int keylen_pad;
+	union {
+		dma_addr_t key_dma;
+		void *key_virt;
+	};
+	bool key_inline;
+};
+
+/**
+ * desc_inline_query() - Provide indications on which data items can be inlined
+ *                       and which shall be referenced in a shared descriptor.
+ * @sd_base_len: Shared descriptor base length - bytes consumed by the commands,
+ *               excluding the data items to be inlined (or corresponding
+ *               pointer if an item is not inlined). Each cnstr_* function that
+ *               generates descriptors should have a define mentioning
+ *               corresponding length.
+ * @jd_len: Maximum length of the job descriptor(s) that will be used
+ *          together with the shared descriptor.
+ * @data_len: Array of lengths of the data items trying to be inlined
+ * @inl_mask: 32bit mask with bit x = 1 if data item x can be inlined, 0
+ *            otherwise.
+ * @count: Number of data items (size of @data_len array); must be <= 32
+ *
+ * Return: 0 if data can be inlined / referenced, negative value if not. If 0,
+ *         check @inl_mask for details.
+ */
+static inline int desc_inline_query(unsigned int sd_base_len,
+				    unsigned int jd_len, unsigned int *data_len,
+				    u32 *inl_mask, unsigned int count)
+{
+	int rem_bytes = (int)(CAAM_DESC_BYTES_MAX - sd_base_len - jd_len);
+	unsigned int i;
+
+	*inl_mask = 0;
+	for (i = 0; (i < count) && (rem_bytes > 0); i++) {
+		if (rem_bytes - (int)(data_len[i] +
+			(count - i - 1) * CAAM_PTR_SZ) >= 0) {
+			rem_bytes -= data_len[i];
+			*inl_mask |= (1 << i);
+		} else {
+			rem_bytes -= CAAM_PTR_SZ;
+		}
+	}
+
+	return (rem_bytes >= 0) ? 0 : -1;
+}
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 33e41ea83fcc..6f44ccb55c63 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -6,9 +6,7 @@
 
 #include "compat.h"
 #include "regs.h"
-#include "intern.h"
 #include "desc.h"
-#include "jr.h"
 #include "error.h"
 
 static const struct {
@@ -146,10 +144,9 @@ static void report_ccb_status(struct device *jrdev, const u32 status,
 	    strlen(rng_err_id_list[err_id])) {
 		/* RNG-only error */
 		err_str = rng_err_id_list[err_id];
-	} else if (err_id < ARRAY_SIZE(err_id_list))
+	} else {
 		err_str = err_id_list[err_id];
-	else
-		snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id);
+	}
 
 	/*
 	 * CCB ICV check failures are part of normal operation life;
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 5d4c05074a5c..dbed8baeebe5 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -41,6 +41,7 @@ struct caam_drv_private_jr {
 	struct device		*dev;
 	int ridx;
 	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
+	struct tasklet_struct irqtask;
 	int irq;			/* One per queue */
 
 	/* Number of scatterlist crypt transforms active on the JobR */
@@ -65,7 +66,6 @@ struct caam_drv_private_jr {
 struct caam_drv_private {
 
 	struct device *dev;
-	struct platform_device **jrpdev; /* Alloc'ed array per sub-device */
 	struct platform_device *pdev;
 
 	/* Physical-presence section */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 757c27f9953d..27631000b9f8 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev)
 
 	ret = caam_reset_hw_jr(dev);
 
+	tasklet_kill(&jrp->irqtask);
+
 	/* Release interrupt */
 	free_irq(jrp->irq, dev);
 
@@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 
 	/*
 	 * Check the output ring for ready responses, kick
-	 * the threaded irq if jobs done.
+	 * tasklet if jobs done.
 	 */
 	irqstate = rd_reg32(&jrp->rregs->jrintstatus);
 	if (!irqstate)
@@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 	/* Have valid interrupt at this point, just ACK and trigger */
 	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
-	return IRQ_WAKE_THREAD;
+	preempt_disable();
+	tasklet_schedule(&jrp->irqtask);
+	preempt_enable();
+
+	return IRQ_HANDLED;
 }
 
-static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void caam_jr_dequeue(unsigned long devarg)
 {
 	int hw_idx, sw_idx, i, head, tail;
-	struct device *dev = st_dev;
+	struct device *dev = (struct device *)devarg;
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
 	u32 *userdesc, userstatus;
@@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
 
 	/* reenable / unmask IRQs */
 	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
-
-	return IRQ_HANDLED;
 }
 
 /**
@@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev)
 
 	jrp = dev_get_drvdata(dev);
 
+	tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
+
 	/* Connect job ring interrupt handler. */
-	error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
-				     caam_jr_threadirq, IRQF_SHARED,
-				     dev_name(dev), dev);
+	error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
+			    dev_name(dev), dev);
 	if (error) {
 		dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
 			jrp->ridx, jrp->irq);
@@ -454,6 +460,7 @@ out_free_inpring:
 out_free_irq:
 	free_irq(jrp->irq, dev);
 out_kill_deq:
+	tasklet_kill(&jrp->irqtask);
 	return error;
 }
 
@@ -489,15 +496,24 @@ static int caam_jr_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	jrpriv->rregs = (struct caam_job_ring __force *)ctrl;
+	jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 
-	if (sizeof(dma_addr_t) == sizeof(u64))
+	if (sizeof(dma_addr_t) == sizeof(u64)) {
 		if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring"))
-			dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(40));
+			error = dma_set_mask_and_coherent(jrdev,
+							  DMA_BIT_MASK(40));
 		else
-			dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(36));
-	else
-		dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(32));
+			error = dma_set_mask_and_coherent(jrdev,
+							  DMA_BIT_MASK(36));
+	} else {
+		error = dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(32));
+	}
+	if (error) {
+		dev_err(jrdev, "dma_set_mask_and_coherent failed (%d)\n",
+			error);
+		iounmap(ctrl);
+		return error;
+	}
 
 	/* Identify the interrupt */
 	jrpriv->irq = irq_of_parse_and_map(nprop, 0);
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index e1eaf4ff9762..1bb2816a9b4d 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -10,6 +10,36 @@
 #include "desc_constr.h"
 #include "key_gen.h"
 
+/**
+ * split_key_len - Compute MDHA split key length for a given algorithm
+ * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
+ *        SHA224, SHA384, SHA512.
+ *
+ * Return: MDHA split key length
+ */
+static inline u32 split_key_len(u32 hash)
+{
+	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
+	u32 idx;
+
+	idx = (hash & OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT;
+
+	return (u32)(mdpadlen[idx] * 2);
+}
+
+/**
+ * split_key_pad_len - Compute MDHA split key pad length for a given algorithm
+ * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
+ *        SHA224, SHA384, SHA512.
+ *
+ * Return: MDHA split key pad length
+ */
+static inline u32 split_key_pad_len(u32 hash)
+{
+	return ALIGN(split_key_len(hash), 16);
+}
+
 void split_key_done(struct device *dev, u32 *desc, u32 err,
 			   void *context)
 {
@@ -41,15 +71,29 @@ Split key generation-----------------------------------------------
 [06] 0x64260028    fifostr: class2 mdsplit-jdk len=40
 			@0xffe04000
 */
-int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
-		  int split_key_pad_len, const u8 *key_in, u32 keylen,
-		  u32 alg_op)
+int gen_split_key(struct device *jrdev, u8 *key_out,
+		  struct alginfo * const adata, const u8 *key_in, u32 keylen,
+		  int max_keylen)
 {
 	u32 *desc;
 	struct split_key_result result;
 	dma_addr_t dma_addr_in, dma_addr_out;
 	int ret = -ENOMEM;
 
+	adata->keylen = split_key_len(adata->algtype & OP_ALG_ALGSEL_MASK);
+	adata->keylen_pad = split_key_pad_len(adata->algtype &
+					      OP_ALG_ALGSEL_MASK);
+
+#ifdef DEBUG
+	dev_err(jrdev, "split keylen %d split keylen padded %d\n",
+		adata->keylen, adata->keylen_pad);
+	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
+#endif
+
+	if (adata->keylen_pad > max_keylen)
+		return -EINVAL;
+
 	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
 	if (!desc) {
 		dev_err(jrdev, "unable to allocate key input memory\n");
@@ -63,7 +107,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
 		goto out_free;
 	}
 
-	dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len,
+	dma_addr_out = dma_map_single(jrdev, key_out, adata->keylen_pad,
 				      DMA_FROM_DEVICE);
 	if (dma_mapping_error(jrdev, dma_addr_out)) {
 		dev_err(jrdev, "unable to map key output memory\n");
@@ -74,7 +118,9 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
 	append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG);
 
 	/* Sets MDHA up into an HMAC-INIT */
-	append_operation(desc, alg_op | OP_ALG_DECRYPT | OP_ALG_AS_INIT);
+	append_operation(desc, (adata->algtype & OP_ALG_ALGSEL_MASK) |
+			 OP_ALG_AAI_HMAC | OP_TYPE_CLASS2_ALG | OP_ALG_DECRYPT |
+			 OP_ALG_AS_INIT);
 
 	/*
 	 * do a FIFO_LOAD of zero, this will trigger the internal key expansion
@@ -87,7 +133,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
 	 * FIFO_STORE with the explicit split-key content store
 	 * (0x26 output type)
 	 */
-	append_fifo_store(desc, dma_addr_out, split_key_len,
+	append_fifo_store(desc, dma_addr_out, adata->keylen,
 			  LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
 
 #ifdef DEBUG
@@ -108,11 +154,11 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
 #ifdef DEBUG
 		print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
 			       DUMP_PREFIX_ADDRESS, 16, 4, key_out,
-			       split_key_pad_len, 1);
+			       adata->keylen_pad, 1);
 #endif
 	}
 
-	dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len,
+	dma_unmap_single(jrdev, dma_addr_out, adata->keylen_pad,
 			 DMA_FROM_DEVICE);
 out_unmap_in:
 	dma_unmap_single(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE);
diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h
index c5588f6d8109..4628f389eb64 100644
--- a/drivers/crypto/caam/key_gen.h
+++ b/drivers/crypto/caam/key_gen.h
@@ -12,6 +12,6 @@ struct split_key_result {
 
 void split_key_done(struct device *dev, u32 *desc, u32 err, void *context);
 
-int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
-		    int split_key_pad_len, const u8 *key_in, u32 keylen,
-		    u32 alg_op);
+int gen_split_key(struct device *jrdev, u8 *key_out,
+		  struct alginfo * const adata, const u8 *key_in, u32 keylen,
+		  int max_keylen);
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 41cd5a356d05..c6adad09c972 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -7,7 +7,11 @@
 
 #include "regs.h"
 
-struct sec4_sg_entry;
+struct sec4_sg_entry {
+	u64 ptr;
+	u32 len;
+	u32 bpid_offset;
+};
 
 /*
  * convert single dma address to h/w link table format
@@ -69,14 +73,3 @@ static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
 	} while (total);
 	return sec4_sg_ptr - 1;
 }
-
-/* derive number of elements in scatterlist, but return 0 for 1 */
-static inline int sg_count(struct scatterlist *sg_list, int nbytes)
-{
-	int sg_nents = sg_nents_for_len(sg_list, nbytes);
-
-	if (likely(sg_nents == 1))
-		return 0;
-
-	return sg_nents;
-}
diff --git a/drivers/crypto/cavium/cpt/Kconfig b/drivers/crypto/cavium/cpt/Kconfig
new file mode 100644
index 000000000000..cbd51b1aa046
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/Kconfig
@@ -0,0 +1,17 @@
+#
+# Cavium crypto device configuration
+#
+
+config CRYPTO_DEV_CPT
+	tristate
+
+config CAVIUM_CPT
+	tristate "Cavium Cryptographic Accelerator driver"
+	depends on ARCH_THUNDER || COMPILE_TEST
+	depends on PCI_MSI && 64BIT
+	select CRYPTO_DEV_CPT
+	help
+	  Support for Cavium CPT block found in octeon-tx series of
+	  processors.
+
+	  To compile this as a module, choose M here.
diff --git a/drivers/crypto/cavium/cpt/Makefile b/drivers/crypto/cavium/cpt/Makefile
new file mode 100644
index 000000000000..dbf055e14622
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_CAVIUM_CPT) += cptpf.o cptvf.o
+cptpf-objs := cptpf_main.o cptpf_mbox.o
+cptvf-objs := cptvf_main.o cptvf_reqmanager.o cptvf_mbox.o cptvf_algs.o
diff --git a/drivers/crypto/cavium/cpt/cpt_common.h b/drivers/crypto/cavium/cpt/cpt_common.h
new file mode 100644
index 000000000000..225078d03773
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cpt_common.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __CPT_COMMON_H
+#define __CPT_COMMON_H
+
+#include <asm/byteorder.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#include "cpt_hw_types.h"
+
+/* Device ID */
+#define CPT_81XX_PCI_PF_DEVICE_ID 0xa040
+#define CPT_81XX_PCI_VF_DEVICE_ID 0xa041
+
+/* flags to indicate the features supported */
+#define CPT_FLAG_SRIOV_ENABLED BIT(1)
+#define CPT_FLAG_VF_DRIVER BIT(2)
+#define CPT_FLAG_DEVICE_READY BIT(3)
+
+#define cpt_sriov_enabled(cpt) ((cpt)->flags & CPT_FLAG_SRIOV_ENABLED)
+#define cpt_vf_driver(cpt) ((cpt)->flags & CPT_FLAG_VF_DRIVER)
+#define cpt_device_ready(cpt) ((cpt)->flags & CPT_FLAG_DEVICE_READY)
+
+#define CPT_MBOX_MSG_TYPE_ACK 1
+#define CPT_MBOX_MSG_TYPE_NACK 2
+#define CPT_MBOX_MSG_TIMEOUT 2000
+#define VF_STATE_DOWN 0
+#define VF_STATE_UP 1
+
+/*
+ * CPT Registers map for 81xx
+ */
+
+/* PF registers */
+#define CPTX_PF_CONSTANTS(a) (0x0ll + ((u64)(a) << 36))
+#define CPTX_PF_RESET(a) (0x100ll + ((u64)(a) << 36))
+#define CPTX_PF_DIAG(a) (0x120ll + ((u64)(a) << 36))
+#define CPTX_PF_BIST_STATUS(a) (0x160ll + ((u64)(a) << 36))
+#define CPTX_PF_ECC0_CTL(a) (0x200ll + ((u64)(a) << 36))
+#define CPTX_PF_ECC0_FLIP(a) (0x210ll + ((u64)(a) << 36))
+#define CPTX_PF_ECC0_INT(a) (0x220ll + ((u64)(a) << 36))
+#define CPTX_PF_ECC0_INT_W1S(a) (0x230ll + ((u64)(a) << 36))
+#define CPTX_PF_ECC0_ENA_W1S(a)	(0x240ll + ((u64)(a) << 36))
+#define CPTX_PF_ECC0_ENA_W1C(a)	(0x250ll + ((u64)(a) << 36))
+#define CPTX_PF_MBOX_INTX(a, b)	\
+	(0x400ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_MBOX_INT_W1SX(a, b) \
+	(0x420ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_MBOX_ENA_W1CX(a, b) \
+	(0x440ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_MBOX_ENA_W1SX(a, b) \
+	(0x460ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_EXEC_INT(a) (0x500ll + 0x1000000000ll * ((a) & 0x1))
+#define CPTX_PF_EXEC_INT_W1S(a)	(0x520ll + ((u64)(a) << 36))
+#define CPTX_PF_EXEC_ENA_W1C(a)	(0x540ll + ((u64)(a) << 36))
+#define CPTX_PF_EXEC_ENA_W1S(a)	(0x560ll + ((u64)(a) << 36))
+#define CPTX_PF_GX_EN(a, b) \
+	(0x600ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_EXEC_INFO(a) (0x700ll + ((u64)(a) << 36))
+#define CPTX_PF_EXEC_BUSY(a) (0x800ll + ((u64)(a) << 36))
+#define CPTX_PF_EXEC_INFO0(a) (0x900ll + ((u64)(a) << 36))
+#define CPTX_PF_EXEC_INFO1(a) (0x910ll + ((u64)(a) << 36))
+#define CPTX_PF_INST_REQ_PC(a) (0x10000ll + ((u64)(a) << 36))
+#define CPTX_PF_INST_LATENCY_PC(a) \
+	(0x10020ll + ((u64)(a) << 36))
+#define CPTX_PF_RD_REQ_PC(a) (0x10040ll + ((u64)(a) << 36))
+#define CPTX_PF_RD_LATENCY_PC(a) (0x10060ll + ((u64)(a) << 36))
+#define CPTX_PF_RD_UC_PC(a) (0x10080ll + ((u64)(a) << 36))
+#define CPTX_PF_ACTIVE_CYCLES_PC(a) (0x10100ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_CTL(a) (0x4000000ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_STATUS(a) (0x4000008ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_CLK(a) (0x4000010ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_DBG_CTL(a) (0x4000018ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_DBG_DATA(a)	(0x4000020ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_BIST_STATUS(a) (0x4000028ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_REQ_TIMER(a) (0x4000030ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_MEM_CTL(a) (0x4000038ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_PERF_CTL(a)	(0x4001000ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_DBG_CNTX(a, b) \
+	(0x4001100ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_EXE_PERF_EVENT_CNT(a) (0x4001180ll + ((u64)(a) << 36))
+#define CPTX_PF_EXE_EPCI_INBX_CNT(a, b) \
+	(0x4001200ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_EXE_EPCI_OUTBX_CNT(a, b) \
+	(0x4001240ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_ENGX_UCODE_BASE(a, b) \
+	(0x4002000ll + ((u64)(a) << 36) + ((b) << 3))
+#define CPTX_PF_QX_CTL(a, b) \
+	(0x8000000ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_PF_QX_GMCTL(a, b) \
+	(0x8000020ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_PF_QX_CTL2(a, b) \
+	(0x8000100ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_PF_VFX_MBOXX(a, b, c) \
+	(0x8001000ll + ((u64)(a) << 36) + ((b) << 20) + ((c) << 8))
+
+/* VF registers */
+#define CPTX_VQX_CTL(a, b) (0x100ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_SADDR(a, b) (0x200ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE_WAIT(a, b) (0x400ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_INPROG(a, b) (0x410ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE(a, b) (0x420ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE_ACK(a, b) (0x440ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE_INT_W1S(a, b) (0x460ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE_INT_W1C(a, b) (0x468ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE_ENA_W1S(a, b) (0x470ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DONE_ENA_W1C(a, b) (0x478ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_MISC_INT(a, b)	(0x500ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_MISC_INT_W1S(a, b) (0x508ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_MISC_ENA_W1S(a, b) (0x510ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_MISC_ENA_W1C(a, b) (0x518ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VQX_DOORBELL(a, b) (0x600ll + ((u64)(a) << 36) + ((b) << 20))
+#define CPTX_VFX_PF_MBOXX(a, b, c) \
+	(0x1000ll + ((u64)(a) << 36) + ((b) << 20) + ((c) << 3))
+
+enum vftype {
+	AE_TYPES = 1,
+	SE_TYPES = 2,
+	BAD_CPT_TYPES,
+};
+
+/* Max CPT devices supported */
+enum cpt_mbox_opcode {
+	CPT_MSG_VF_UP = 1,
+	CPT_MSG_VF_DOWN,
+	CPT_MSG_READY,
+	CPT_MSG_QLEN,
+	CPT_MSG_QBIND_GRP,
+	CPT_MSG_VQ_PRIORITY,
+};
+
+/* CPT mailbox structure */
+struct cpt_mbox {
+	u64 msg; /* Message type MBOX[0] */
+	u64 data;/* Data         MBOX[1] */
+};
+
+/* Register read/write APIs */
+static inline void cpt_write_csr64(u8 __iomem *hw_addr, u64 offset,
+				   u64 val)
+{
+	writeq(val, hw_addr + offset);
+}
+
+static inline u64 cpt_read_csr64(u8 __iomem *hw_addr, u64 offset)
+{
+	return readq(hw_addr + offset);
+}
+#endif /* __CPT_COMMON_H */
diff --git a/drivers/crypto/cavium/cpt/cpt_hw_types.h b/drivers/crypto/cavium/cpt/cpt_hw_types.h
new file mode 100644
index 000000000000..279669494196
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cpt_hw_types.h
@@ -0,0 +1,658 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __CPT_HW_TYPES_H
+#define __CPT_HW_TYPES_H
+
+#include "cpt_common.h"
+
+/**
+ * Enumeration cpt_comp_e
+ *
+ * CPT Completion Enumeration
+ * Enumerates the values of CPT_RES_S[COMPCODE].
+ */
+enum cpt_comp_e {
+	CPT_COMP_E_NOTDONE = 0x00,
+	CPT_COMP_E_GOOD = 0x01,
+	CPT_COMP_E_FAULT = 0x02,
+	CPT_COMP_E_SWERR = 0x03,
+	CPT_COMP_E_LAST_ENTRY = 0xFF
+};
+
+/**
+ * Structure cpt_inst_s
+ *
+ * CPT Instruction Structure
+ * This structure specifies the instruction layout. Instructions are
+ * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set.
+ * cpt_inst_s_s
+ * Word 0
+ * doneint:1 Done interrupt.
+ *	0 = No interrupts related to this instruction.
+ *	1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be
+ *	incremented,and based on the rules described there an interrupt may
+ *	occur.
+ * Word 1
+ * res_addr [127: 64] Result IOVA.
+ *	If nonzero, specifies where to write CPT_RES_S.
+ *	If zero, no result structure will be written.
+ *	Address must be 16-byte aligned.
+ *	Bits <63:49> are ignored by hardware; software should use a
+ *	sign-extended bit <48> for forward compatibility.
+ * Word 2
+ *  grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when
+ *	CPT submits work SSO.
+ *	For the SSO to not discard the add-work request, FPA_PF_MAP() must map
+ *	[GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+ *  tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT
+ *	submits work to SSO
+ *  tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT
+ *	submits work to SSO.
+ * Word 3
+ *  wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a
+ *	work-queue entry that CPT submits work to SSO after all context,
+ *	output data, and result write operations are visible to other
+ *	CNXXXX units and the cores. Bits <2:0> must be zero.
+ *	Bits <63:49> are ignored by hardware; software should
+ *	use a sign-extended bit <48> for forward compatibility.
+ *	Internal:
+ *	Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0.
+ * Word 4
+ *  ei0; [319:256] Engine instruction word 0. Passed to the AE/SE.
+ * Word 5
+ *  ei1; [383:320] Engine instruction word 1. Passed to the AE/SE.
+ * Word 6
+ *  ei2; [447:384] Engine instruction word 1. Passed to the AE/SE.
+ * Word 7
+ *  ei3; [511:448] Engine instruction word 1. Passed to the AE/SE.
+ *
+ */
+union cpt_inst_s {
+	u64 u[8];
+	struct cpt_inst_s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_17_63:47;
+		u64 doneint:1;
+		u64 reserved_0_1:16;
+#else /* Word 0 - Little Endian */
+		u64 reserved_0_15:16;
+		u64 doneint:1;
+		u64 reserved_17_63:47;
+#endif /* Word 0 - End */
+		u64 res_addr;
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */
+		u64 reserved_172_19:20;
+		u64 grp:10;
+		u64 tt:2;
+		u64 tag:32;
+#else /* Word 2 - Little Endian */
+		u64 tag:32;
+		u64 tt:2;
+		u64 grp:10;
+		u64 reserved_172_191:20;
+#endif /* Word 2 - End */
+		u64 wq_ptr;
+		u64 ei0;
+		u64 ei1;
+		u64 ei2;
+		u64 ei3;
+	} s;
+};
+
+/**
+ * Structure cpt_res_s
+ *
+ * CPT Result Structure
+ * The CPT coprocessor writes the result structure after it completes a
+ * CPT_INST_S instruction. The result structure is exactly 16 bytes, and
+ * each instruction completion produces exactly one result structure.
+ *
+ * This structure is stored in memory as little-endian unless
+ * CPT()_PF_Q()_CTL[INST_BE] is set.
+ * cpt_res_s_s
+ * Word 0
+ *  doneint:1 [16:16] Done interrupt. This bit is copied from the
+ *	corresponding instruction's CPT_INST_S[DONEINT].
+ *  compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor
+ *	for the	associated instruction, as enumerated by CPT_COMP_E.
+ *	Core software may write the memory location containing [COMPCODE] to
+ *	0x0 before ringing the doorbell, and then poll for completion by
+ *	checking for a nonzero value.
+ *	Once the core observes a nonzero [COMPCODE] value in this case,the CPT
+ *	coprocessor will have also completed L2/DRAM write operations.
+ * Word 1
+ *  reserved
+ *
+ */
+union cpt_res_s {
+	u64 u[2];
+	struct cpt_res_s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_17_63:47;
+		u64 doneint:1;
+		u64 reserved_8_15:8;
+		u64 compcode:8;
+#else /* Word 0 - Little Endian */
+		u64 compcode:8;
+		u64 reserved_8_15:8;
+		u64 doneint:1;
+		u64 reserved_17_63:47;
+#endif /* Word 0 - End */
+		u64 reserved_64_127;
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_pf_bist_status
+ *
+ * CPT PF Control Bist Status Register
+ * This register has the BIST status of memories. Each bit is the BIST result
+ * of an individual memory (per bit, 0 = pass and 1 = fail).
+ * cptx_pf_bist_status_s
+ * Word0
+ *  bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by
+ *	CPT_RAMS_E.
+ */
+union cptx_pf_bist_status {
+	u64 u;
+	struct cptx_pf_bist_status_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_30_63:34;
+		u64 bstatus:30;
+#else /* Word 0 - Little Endian */
+		u64 bstatus:30;
+		u64 reserved_30_63:34;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_pf_constants
+ *
+ * CPT PF Constants Register
+ * This register contains implementation-related parameters of CPT in CNXXXX.
+ * cptx_pf_constants_s
+ * Word 0
+ *  reserved_40_63:24 [63:40] Reserved.
+ *  epcis:8 [39:32](RO) Number of EPCI busses.
+ *  grps:8 [31:24](RO) Number of engine groups implemented.
+ *  ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0,
+ *	for CPT1 returns 0x18, or less if there are fuse-disables.
+ *  se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30,
+ *	or less if there are fuse-disables, for CPT1 returns 0x0.
+ *  vq:8 [7:0](RO) Number of VQs.
+ */
+union cptx_pf_constants {
+	u64 u;
+	struct cptx_pf_constants_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_40_63:24;
+		u64 epcis:8;
+		u64 grps:8;
+		u64 ae:8;
+		u64 se:8;
+		u64 vq:8;
+#else /* Word 0 - Little Endian */
+		u64 vq:8;
+		u64 se:8;
+		u64 ae:8;
+		u64 grps:8;
+		u64 epcis:8;
+		u64 reserved_40_63:24;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_pf_exe_bist_status
+ *
+ * CPT PF Engine Bist Status Register
+ * This register has the BIST status of each engine.  Each bit is the
+ * BIST result of an individual engine (per bit, 0 = pass and 1 = fail).
+ * cptx_pf_exe_bist_status_s
+ * Word0
+ *  reserved_48_63:16 [63:48] reserved
+ *  bstatus:48 [47:0](RO/H) BIST status. One bit per engine.
+ *
+ */
+union cptx_pf_exe_bist_status {
+	u64 u;
+	struct cptx_pf_exe_bist_status_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_48_63:16;
+		u64 bstatus:48;
+#else /* Word 0 - Little Endian */
+		u64 bstatus:48;
+		u64 reserved_48_63:16;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_pf_q#_ctl
+ *
+ * CPT Queue Control Register
+ * This register configures queues. This register should be changed only
+ * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ * cptx_pf_qx_ctl_s
+ * Word0
+ *  reserved_60_63:4 [63:60] reserved.
+ *  aura:12; [59:48](R/W) Guest-aura for returning this queue's
+ *	instruction-chunk buffers to FPA. Only used when [INST_FREE] is set.
+ *	For the FPA to not discard the request, FPA_PF_MAP() must map
+ *	[AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+ *  reserved_45_47:3 [47:45] reserved.
+ *  size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per
+ *	command buffer segment. Must be 8*n + 1, where n is the number of
+ *	instructions per buffer segment.
+ *  reserved_11_31:21 [31:11] Reserved.
+ *  cont_err:1 [10:10](R/W) Continue on error.
+ *	0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or
+ *	CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via
+ *	CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared.  Due to
+ *	pipelining, additional instructions may have been processed between the
+ *	instruction causing the error and the next instruction in the disabled
+ *	queue (the instruction at CPT()_VQ()_SADDR).
+ *	1 = Ignore errors and continue processing instructions.
+ *	For diagnostic use only.
+ *  inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the
+ *	end of an instruction chunk, that chunk will be freed to the FPA.
+ *  inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions,
+ *	instruction next chunk pointers, and result structures are stored in
+ *	big-endian format in memory.
+ *  iqb_ldwb:1 [7:7](R/W) Instruction load don't write back.
+ *	0 = The hardware issues NCB transient load (LDT) towards the cache,
+ *	which if the line hits and is is dirty will cause the line to be
+ *	written back before being replaced.
+ *	1 = The hardware issues NCB LDWB read-and-invalidate command towards
+ *	the cache when fetching the last word of instructions; as a result the
+ *	line will not be written back when replaced.  This improves
+ *	performance, but software must not read the instructions after they are
+ *	posted to the hardware.	Reads that do not consume the last word of a
+ *	cache line always use LDI.
+ *  reserved_4_6:3 [6:4] Reserved.
+ *  grp:3; [3:1](R/W) Engine group.
+ *  pri:1; [0:0](R/W) Queue priority.
+ *	1 = This queue has higher priority. Round-robin between higher
+ *	priority queues.
+ *	0 = This queue has lower priority. Round-robin between lower
+ *	priority queues.
+ */
+union cptx_pf_qx_ctl {
+	u64 u;
+	struct cptx_pf_qx_ctl_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_60_63:4;
+		u64 aura:12;
+		u64 reserved_45_47:3;
+		u64 size:13;
+		u64 reserved_11_31:21;
+		u64 cont_err:1;
+		u64 inst_free:1;
+		u64 inst_be:1;
+		u64 iqb_ldwb:1;
+		u64 reserved_4_6:3;
+		u64 grp:3;
+		u64 pri:1;
+#else /* Word 0 - Little Endian */
+		u64 pri:1;
+		u64 grp:3;
+		u64 reserved_4_6:3;
+		u64 iqb_ldwb:1;
+		u64 inst_be:1;
+		u64 inst_free:1;
+		u64 cont_err:1;
+		u64 reserved_11_31:21;
+		u64 size:13;
+		u64 reserved_45_47:3;
+		u64 aura:12;
+		u64 reserved_60_63:4;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_saddr
+ *
+ * CPT Queue Starting Buffer Address Registers
+ * These registers set the instruction buffer starting address.
+ * cptx_vqx_saddr_s
+ * Word0
+ *  reserved_49_63:15 [63:49] Reserved.
+ *  ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned).
+ *	When written, it is the initial buffer starting address; when read,
+ *	it is the next read pointer to be requested from L2C. The PTR field
+ *	is overwritten with the next pointer each time that the command buffer
+ *	segment is exhausted. New commands will then be read from the newly
+ *	specified command buffer pointer.
+ *  reserved_0_5:6 [5:0] Reserved.
+ *
+ */
+union cptx_vqx_saddr {
+	u64 u;
+	struct cptx_vqx_saddr_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_49_63:15;
+		u64 ptr:43;
+		u64 reserved_0_5:6;
+#else /* Word 0 - Little Endian */
+		u64 reserved_0_5:6;
+		u64 ptr:43;
+		u64 reserved_49_63:15;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_misc_ena_w1s
+ *
+ * CPT Queue Misc Interrupt Enable Set Register
+ * This register sets interrupt enable bits.
+ * cptx_vqx_misc_ena_w1s_s
+ * Word0
+ * reserved_5_63:59 [63:5] Reserved.
+ * swerr:1 [4:4](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[SWERR].
+ * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[NWRP].
+ * irde:1 [2:2](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[IRDE].
+ * dovf:1 [1:1](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[DOVF].
+ * mbox:1 [0:0](R/W1S/H) Reads or sets enable for
+ *	CPT(0..1)_VQ(0..63)_MISC_INT[MBOX].
+ *
+ */
+union cptx_vqx_misc_ena_w1s {
+	u64 u;
+	struct cptx_vqx_misc_ena_w1s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_5_63:59;
+		u64 swerr:1;
+		u64 nwrp:1;
+		u64 irde:1;
+		u64 dovf:1;
+		u64 mbox:1;
+#else /* Word 0 - Little Endian */
+		u64 mbox:1;
+		u64 dovf:1;
+		u64 irde:1;
+		u64 nwrp:1;
+		u64 swerr:1;
+		u64 reserved_5_63:59;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_doorbell
+ *
+ * CPT Queue Doorbell Registers
+ * Doorbells for the CPT instruction queues.
+ * cptx_vqx_doorbell_s
+ * Word0
+ *  reserved_20_63:44 [63:20] Reserved.
+ *  dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add
+ *	to the CPT instruction doorbell count. Readback value is the the
+ *	current number of pending doorbell requests. If counter overflows
+ *	CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to
+ *	zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF],
+ *	then write a value of 2^20 minus the read [DBELL_CNT], then write one
+ *	to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and
+ *	CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8.
+ *	All CPT instructions are 8 words and require a doorbell count of
+ *	multiple of 8.
+ */
+union cptx_vqx_doorbell {
+	u64 u;
+	struct cptx_vqx_doorbell_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_20_63:44;
+		u64 dbell_cnt:20;
+#else /* Word 0 - Little Endian */
+		u64 dbell_cnt:20;
+		u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_inprog
+ *
+ * CPT Queue In Progress Count Registers
+ * These registers contain the per-queue instruction in flight registers.
+ * cptx_vqx_inprog_s
+ * Word0
+ *  reserved_8_63:56 [63:8] Reserved.
+ *  inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions
+ *	for the VF for which CPT is fetching, executing or responding to
+ *	instructions. However this does not include any interrupts that are
+ *	awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0).
+ *	A queue may not be reconfigured until:
+ *	1. CPT()_VQ()_CTL[ENA] is cleared by software.
+ *	2. [INFLIGHT] is polled until equals to zero.
+ */
+union cptx_vqx_inprog {
+	u64 u;
+	struct cptx_vqx_inprog_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_8_63:56;
+		u64 inflight:8;
+#else /* Word 0 - Little Endian */
+		u64 inflight:8;
+		u64 reserved_8_63:56;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_misc_int
+ *
+ * CPT Queue Misc Interrupt Register
+ * These registers contain the per-queue miscellaneous interrupts.
+ * cptx_vqx_misc_int_s
+ * Word 0
+ *  reserved_5_63:59 [63:5] Reserved.
+ *  swerr:1 [4:4](R/W1C/H) Software error from engines.
+ *  nwrp:1  [3:3](R/W1C/H) NCB result write response error.
+ *  irde:1  [2:2](R/W1C/H) Instruction NCB read response error.
+ *  dovf:1 [1:1](R/W1C/H) Doorbell overflow.
+ *  mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when
+ *	CPT()_VF()_PF_MBOX(0) is written.
+ *
+ */
+union cptx_vqx_misc_int {
+	u64 u;
+	struct cptx_vqx_misc_int_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_5_63:59;
+		u64 swerr:1;
+		u64 nwrp:1;
+		u64 irde:1;
+		u64 dovf:1;
+		u64 mbox:1;
+#else /* Word 0 - Little Endian */
+		u64 mbox:1;
+		u64 dovf:1;
+		u64 irde:1;
+		u64 nwrp:1;
+		u64 swerr:1;
+		u64 reserved_5_63:59;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_done_ack
+ *
+ * CPT Queue Done Count Ack Registers
+ * This register is written by software to acknowledge interrupts.
+ * cptx_vqx_done_ack_s
+ * Word0
+ *  reserved_20_63:44 [63:20] Reserved.
+ *  done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE].
+ *	Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge
+ *	interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt
+ *	will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE]
+ *	are satisfied.
+ *
+ */
+union cptx_vqx_done_ack {
+	u64 u;
+	struct cptx_vqx_done_ack_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_20_63:44;
+		u64 done_ack:20;
+#else /* Word 0 - Little Endian */
+		u64 done_ack:20;
+		u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_done
+ *
+ * CPT Queue Done Count Registers
+ * These registers contain the per-queue instruction done count.
+ * cptx_vqx_done_s
+ * Word0
+ *  reserved_20_63:44 [63:20] Reserved.
+ *  done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that
+ *	instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the
+ *	instruction finishes. Write to this field are for diagnostic use only;
+ *	instead software writes CPT()_VQ()_DONE_ACK with the number of
+ *	decrements for this field.
+ *	Interrupts are sent as follows:
+ *	* When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the
+ *	interrupt coalescing timer is held to zero, and an interrupt is not
+ *	sent.
+ *	* When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer
+ *	counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or
+ *	CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough
+ *	time has passed or enough results have arrived, then the interrupt is
+ *	sent.
+ *	* When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written
+ *	but this is not typical), the interrupt coalescing timer restarts.
+ *	Note after decrementing this interrupt equation is recomputed,
+ *	for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT]
+ *	and because the timer is zero, the interrupt will be resent immediately.
+ *	(This covers the race case between software acknowledging an interrupt
+ *	and a result returning.)
+ *	* When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent,
+ *	but the counting described above still occurs.
+ *	Since CPT instructions complete out-of-order, if software is using
+ *	completion interrupts the suggested scheme is to request a DONEINT on
+ *	each request, and when an interrupt arrives perform a "greedy" scan for
+ *	completions; even if a later command is acknowledged first this will
+ *	not result in missing a completion.
+ *	Software is responsible for making sure [DONE] does not overflow;
+ *	for example by insuring there are not more than 2^20-1 instructions in
+ *	flight that may request interrupts.
+ *
+ */
+union cptx_vqx_done {
+	u64 u;
+	struct cptx_vqx_done_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_20_63:44;
+		u64 done:20;
+#else /* Word 0 - Little Endian */
+		u64 done:20;
+		u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_done_wait
+ *
+ * CPT Queue Done Interrupt Coalescing Wait Registers
+ * Specifies the per queue interrupt coalescing settings.
+ * cptx_vqx_done_wait_s
+ * Word0
+ *  reserved_48_63:16 [63:48] Reserved.
+ *  time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0
+ *	or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer
+ *	reaches [TIME_WAIT]*1024 then interrupt coalescing ends.
+ *	see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled.
+ *  reserved_20_31:12 [31:20] Reserved.
+ *  num_wait:20 [19:0](R/W) Number of messages hold-off.
+ *	When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends
+ *	see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1.
+ *
+ */
+union cptx_vqx_done_wait {
+	u64 u;
+	struct cptx_vqx_done_wait_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_48_63:16;
+		u64 time_wait:16;
+		u64 reserved_20_31:12;
+		u64 num_wait:20;
+#else /* Word 0 - Little Endian */
+		u64 num_wait:20;
+		u64 reserved_20_31:12;
+		u64 time_wait:16;
+		u64 reserved_48_63:16;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_done_ena_w1s
+ *
+ * CPT Queue Done Interrupt Enable Set Registers
+ * Write 1 to these registers will enable the DONEINT interrupt for the queue.
+ * cptx_vqx_done_ena_w1s_s
+ * Word0
+ *  reserved_1_63:63 [63:1] Reserved.
+ *  done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue.
+ *	Write 0 has no effect. Read will return the enable bit.
+ */
+union cptx_vqx_done_ena_w1s {
+	u64 u;
+	struct cptx_vqx_done_ena_w1s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_1_63:63;
+		u64 done:1;
+#else /* Word 0 - Little Endian */
+		u64 done:1;
+		u64 reserved_1_63:63;
+#endif /* Word 0 - End */
+	} s;
+};
+
+/**
+ * Register (NCB) cpt#_vq#_ctl
+ *
+ * CPT VF Queue Control Registers
+ * This register configures queues. This register should be changed (other than
+ * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ * cptx_vqx_ctl_s
+ * Word0
+ *  reserved_1_63:63 [63:1] Reserved.
+ *  ena:1 [0:0](R/W/H) Enables the logical instruction queue.
+ *	See also CPT()_PF_Q()_CTL[CONT_ERR] and	CPT()_VQ()_INPROG[INFLIGHT].
+ *	1 = Queue is enabled.
+ *	0 = Queue is disabled.
+ */
+union cptx_vqx_ctl {
+	u64 u;
+	struct cptx_vqx_ctl_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+		u64 reserved_1_63:63;
+		u64 ena:1;
+#else /* Word 0 - Little Endian */
+		u64 ena:1;
+		u64 reserved_1_63:63;
+#endif /* Word 0 - End */
+	} s;
+};
+#endif /*__CPT_HW_TYPES_H*/
diff --git a/drivers/crypto/cavium/cpt/cptpf.h b/drivers/crypto/cavium/cpt/cptpf.h
new file mode 100644
index 000000000000..c0556c5f63c9
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptpf.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __CPTPF_H
+#define __CPTPF_H
+
+#include "cpt_common.h"
+
+#define CSR_DELAY 30
+#define CPT_MAX_CORE_GROUPS 8
+#define CPT_MAX_SE_CORES 10
+#define CPT_MAX_AE_CORES 6
+#define CPT_MAX_TOTAL_CORES (CPT_MAX_SE_CORES + CPT_MAX_AE_CORES)
+#define CPT_MAX_VF_NUM 16
+#define	CPT_PF_MSIX_VECTORS 3
+#define CPT_PF_INT_VEC_E_MBOXX(a) (0x02 + (a))
+#define CPT_UCODE_VERSION_SZ 32
+struct cpt_device;
+
+struct microcode {
+	u8 is_mc_valid;
+	u8 is_ae;
+	u8 group;
+	u8 num_cores;
+	u32 code_size;
+	u64 core_mask;
+	u8 version[CPT_UCODE_VERSION_SZ];
+	/* Base info */
+	dma_addr_t phys_base;
+	void *code;
+};
+
+struct cpt_vf_info {
+	u8 state;
+	u8 priority;
+	u8 id;
+	u32 qlen;
+};
+
+/**
+ * cpt device structure
+ */
+struct cpt_device {
+	u16 flags;	/* Flags to hold device status bits */
+	u8 num_vf_en; /* Number of VFs enabled (0...CPT_MAX_VF_NUM) */
+	struct cpt_vf_info vfinfo[CPT_MAX_VF_NUM]; /* Per VF info */
+
+	void __iomem *reg_base; /* Register start address */
+	struct pci_dev *pdev; /* pci device handle */
+
+	struct microcode mcode[CPT_MAX_CORE_GROUPS];
+	u8 next_mc_idx; /* next microcode index */
+	u8 next_group;
+	u8 max_se_cores;
+	u8 max_ae_cores;
+};
+
+void cpt_mbox_intr_handler(struct cpt_device *cpt, int mbx);
+#endif /* __CPTPF_H */
diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c
new file mode 100644
index 000000000000..4119c40e7c4b
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptpf_main.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+#include <linux/version.h>
+
+#include "cptpf.h"
+
+#define DRV_NAME	"thunder-cpt"
+#define DRV_VERSION	"1.0"
+
+static u32 num_vfs = 4; /* Default 4 VF enabled */
+module_param(num_vfs, uint, 0444);
+MODULE_PARM_DESC(num_vfs, "Number of VFs to enable(1-16)");
+
+/*
+ * Disable cores specified by coremask
+ */
+static void cpt_disable_cores(struct cpt_device *cpt, u64 coremask,
+			      u8 type, u8 grp)
+{
+	u64 pf_exe_ctl;
+	u32 timeout = 100;
+	u64 grpmask = 0;
+	struct device *dev = &cpt->pdev->dev;
+
+	if (type == AE_TYPES)
+		coremask = (coremask << cpt->max_se_cores);
+
+	/* Disengage the cores from groups */
+	grpmask = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp));
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp),
+			(grpmask & ~coremask));
+	udelay(CSR_DELAY);
+	grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0));
+	while (grp & coremask) {
+		dev_err(dev, "Cores still busy %llx", coremask);
+		grp = cpt_read_csr64(cpt->reg_base,
+				     CPTX_PF_EXEC_BUSY(0));
+		if (timeout--)
+			break;
+
+		udelay(CSR_DELAY);
+	}
+
+	/* Disable the cores */
+	pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0));
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0),
+			(pf_exe_ctl & ~coremask));
+	udelay(CSR_DELAY);
+}
+
+/*
+ * Enable cores specified by coremask
+ */
+static void cpt_enable_cores(struct cpt_device *cpt, u64 coremask,
+			     u8 type)
+{
+	u64 pf_exe_ctl;
+
+	if (type == AE_TYPES)
+		coremask = (coremask << cpt->max_se_cores);
+
+	pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0));
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0),
+			(pf_exe_ctl | coremask));
+	udelay(CSR_DELAY);
+}
+
+static void cpt_configure_group(struct cpt_device *cpt, u8 grp,
+				u64 coremask, u8 type)
+{
+	u64 pf_gx_en = 0;
+
+	if (type == AE_TYPES)
+		coremask = (coremask << cpt->max_se_cores);
+
+	pf_gx_en = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp));
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp),
+			(pf_gx_en | coremask));
+	udelay(CSR_DELAY);
+}
+
+static void cpt_disable_mbox_interrupts(struct cpt_device *cpt)
+{
+	/* Clear mbox(0) interupts for all vfs */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1CX(0, 0), ~0ull);
+}
+
+static void cpt_disable_ecc_interrupts(struct cpt_device *cpt)
+{
+	/* Clear ecc(0) interupts for all vfs */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_ECC0_ENA_W1C(0), ~0ull);
+}
+
+static void cpt_disable_exec_interrupts(struct cpt_device *cpt)
+{
+	/* Clear exec interupts for all vfs */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_EXEC_ENA_W1C(0), ~0ull);
+}
+
+static void cpt_disable_all_interrupts(struct cpt_device *cpt)
+{
+	cpt_disable_mbox_interrupts(cpt);
+	cpt_disable_ecc_interrupts(cpt);
+	cpt_disable_exec_interrupts(cpt);
+}
+
+static void cpt_enable_mbox_interrupts(struct cpt_device *cpt)
+{
+	/* Set mbox(0) interupts for all vfs */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1SX(0, 0), ~0ull);
+}
+
+static int cpt_load_microcode(struct cpt_device *cpt, struct microcode *mcode)
+{
+	int ret = 0, core = 0, shift = 0;
+	u32 total_cores = 0;
+	struct device *dev = &cpt->pdev->dev;
+
+	if (!mcode || !mcode->code) {
+		dev_err(dev, "Either the mcode is null or data is NULL\n");
+		return -EINVAL;
+	}
+
+	if (mcode->code_size == 0) {
+		dev_err(dev, "microcode size is 0\n");
+		return -EINVAL;
+	}
+
+	/* Assumes 0-9 are SE cores for UCODE_BASE registers and
+	 * AE core bases follow
+	 */
+	if (mcode->is_ae) {
+		core = CPT_MAX_SE_CORES; /* start couting from 10 */
+		total_cores = CPT_MAX_TOTAL_CORES; /* upto 15 */
+	} else {
+		core = 0; /* start couting from 0 */
+		total_cores = CPT_MAX_SE_CORES; /* upto 9 */
+	}
+
+	/* Point to microcode for each core of the group */
+	for (; core < total_cores ; core++, shift++) {
+		if (mcode->core_mask & (1 << shift)) {
+			cpt_write_csr64(cpt->reg_base,
+					CPTX_PF_ENGX_UCODE_BASE(0, core),
+					(u64)mcode->phys_base);
+		}
+	}
+	return ret;
+}
+
+static int do_cpt_init(struct cpt_device *cpt, struct microcode *mcode)
+{
+	int ret = 0;
+	struct device *dev = &cpt->pdev->dev;
+
+	/* Make device not ready */
+	cpt->flags &= ~CPT_FLAG_DEVICE_READY;
+	/* Disable All PF interrupts */
+	cpt_disable_all_interrupts(cpt);
+	/* Calculate mcode group and coremasks */
+	if (mcode->is_ae) {
+		if (mcode->num_cores > cpt->max_ae_cores) {
+			dev_err(dev, "Requested for more cores than available AE cores\n");
+			ret = -EINVAL;
+			goto cpt_init_fail;
+		}
+
+		if (cpt->next_group >= CPT_MAX_CORE_GROUPS) {
+			dev_err(dev, "Can't load, all eight microcode groups in use");
+			return -ENFILE;
+		}
+
+		mcode->group = cpt->next_group;
+		/* Convert requested cores to mask */
+		mcode->core_mask = GENMASK(mcode->num_cores, 0);
+		cpt_disable_cores(cpt, mcode->core_mask, AE_TYPES,
+				  mcode->group);
+		/* Load microcode for AE engines */
+		ret = cpt_load_microcode(cpt, mcode);
+		if (ret) {
+			dev_err(dev, "Microcode load Failed for %s\n",
+				mcode->version);
+			goto cpt_init_fail;
+		}
+		cpt->next_group++;
+		/* Configure group mask for the mcode */
+		cpt_configure_group(cpt, mcode->group, mcode->core_mask,
+				    AE_TYPES);
+		/* Enable AE cores for the group mask */
+		cpt_enable_cores(cpt, mcode->core_mask, AE_TYPES);
+	} else {
+		if (mcode->num_cores > cpt->max_se_cores) {
+			dev_err(dev, "Requested for more cores than available SE cores\n");
+			ret = -EINVAL;
+			goto cpt_init_fail;
+		}
+		if (cpt->next_group >= CPT_MAX_CORE_GROUPS) {
+			dev_err(dev, "Can't load, all eight microcode groups in use");
+			return -ENFILE;
+		}
+
+		mcode->group = cpt->next_group;
+		/* Covert requested cores to mask */
+		mcode->core_mask = GENMASK(mcode->num_cores, 0);
+		cpt_disable_cores(cpt, mcode->core_mask, SE_TYPES,
+				  mcode->group);
+		/* Load microcode for SE engines */
+		ret = cpt_load_microcode(cpt, mcode);
+		if (ret) {
+			dev_err(dev, "Microcode load Failed for %s\n",
+				mcode->version);
+			goto cpt_init_fail;
+		}
+		cpt->next_group++;
+		/* Configure group mask for the mcode */
+		cpt_configure_group(cpt, mcode->group, mcode->core_mask,
+				    SE_TYPES);
+		/* Enable SE cores for the group mask */
+		cpt_enable_cores(cpt, mcode->core_mask, SE_TYPES);
+	}
+
+	/* Enabled PF mailbox interrupts */
+	cpt_enable_mbox_interrupts(cpt);
+	cpt->flags |= CPT_FLAG_DEVICE_READY;
+
+	return ret;
+
+cpt_init_fail:
+	/* Enabled PF mailbox interrupts */
+	cpt_enable_mbox_interrupts(cpt);
+
+	return ret;
+}
+
+struct ucode_header {
+	u8 version[CPT_UCODE_VERSION_SZ];
+	u32 code_length;
+	u32 data_length;
+	u64 sram_address;
+};
+
+static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
+{
+	const struct firmware *fw_entry;
+	struct device *dev = &cpt->pdev->dev;
+	struct ucode_header *ucode;
+	struct microcode *mcode;
+	int j, ret = 0;
+
+	ret = request_firmware(&fw_entry, fw, dev);
+	if (ret)
+		return ret;
+
+	ucode = (struct ucode_header *)fw_entry->data;
+	mcode = &cpt->mcode[cpt->next_mc_idx];
+	memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ);
+	mcode->code_size = ntohl(ucode->code_length) * 2;
+	if (!mcode->code_size)
+		return -EINVAL;
+
+	mcode->is_ae = is_ae;
+	mcode->core_mask = 0ULL;
+	mcode->num_cores = is_ae ? 6 : 10;
+
+	/*  Allocate DMAable space */
+	mcode->code = dma_zalloc_coherent(&cpt->pdev->dev, mcode->code_size,
+					  &mcode->phys_base, GFP_KERNEL);
+	if (!mcode->code) {
+		dev_err(dev, "Unable to allocate space for microcode");
+		return -ENOMEM;
+	}
+
+	memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)),
+	       mcode->code_size);
+
+	/* Byte swap 64-bit */
+	for (j = 0; j < (mcode->code_size / 8); j++)
+		((u64 *)mcode->code)[j] = cpu_to_be64(((u64 *)mcode->code)[j]);
+	/*  MC needs 16-bit swap */
+	for (j = 0; j < (mcode->code_size / 2); j++)
+		((u16 *)mcode->code)[j] = cpu_to_be16(((u16 *)mcode->code)[j]);
+
+	dev_dbg(dev, "mcode->code_size = %u\n", mcode->code_size);
+	dev_dbg(dev, "mcode->is_ae = %u\n", mcode->is_ae);
+	dev_dbg(dev, "mcode->num_cores = %u\n", mcode->num_cores);
+	dev_dbg(dev, "mcode->code = %llx\n", (u64)mcode->code);
+	dev_dbg(dev, "mcode->phys_base = %llx\n", mcode->phys_base);
+
+	ret = do_cpt_init(cpt, mcode);
+	if (ret) {
+		dev_err(dev, "do_cpt_init failed with ret: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(dev, "Microcode Loaded %s\n", mcode->version);
+	mcode->is_mc_valid = 1;
+	cpt->next_mc_idx++;
+	release_firmware(fw_entry);
+
+	return ret;
+}
+
+static int cpt_ucode_load(struct cpt_device *cpt)
+{
+	int ret = 0;
+	struct device *dev = &cpt->pdev->dev;
+
+	ret = cpt_ucode_load_fw(cpt, "cpt8x-mc-ae.out", true);
+	if (ret) {
+		dev_err(dev, "ae:cpt_ucode_load failed with ret: %d\n", ret);
+		return ret;
+	}
+	ret = cpt_ucode_load_fw(cpt, "cpt8x-mc-se.out", false);
+	if (ret) {
+		dev_err(dev, "se:cpt_ucode_load failed with ret: %d\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static irqreturn_t cpt_mbx0_intr_handler(int irq, void *cpt_irq)
+{
+	struct cpt_device *cpt = (struct cpt_device *)cpt_irq;
+
+	cpt_mbox_intr_handler(cpt, 0);
+
+	return IRQ_HANDLED;
+}
+
+static void cpt_reset(struct cpt_device *cpt)
+{
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_RESET(0), 1);
+}
+
+static void cpt_find_max_enabled_cores(struct cpt_device *cpt)
+{
+	union cptx_pf_constants pf_cnsts = {0};
+
+	pf_cnsts.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_CONSTANTS(0));
+	cpt->max_se_cores = pf_cnsts.s.se;
+	cpt->max_ae_cores = pf_cnsts.s.ae;
+}
+
+static u32 cpt_check_bist_status(struct cpt_device *cpt)
+{
+	union cptx_pf_bist_status bist_sts = {0};
+
+	bist_sts.u = cpt_read_csr64(cpt->reg_base,
+				    CPTX_PF_BIST_STATUS(0));
+
+	return bist_sts.u;
+}
+
+static u64 cpt_check_exe_bist_status(struct cpt_device *cpt)
+{
+	union cptx_pf_exe_bist_status bist_sts = {0};
+
+	bist_sts.u = cpt_read_csr64(cpt->reg_base,
+				    CPTX_PF_EXE_BIST_STATUS(0));
+
+	return bist_sts.u;
+}
+
+static void cpt_disable_all_cores(struct cpt_device *cpt)
+{
+	u32 grp, timeout = 100;
+	struct device *dev = &cpt->pdev->dev;
+
+	/* Disengage the cores from groups */
+	for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) {
+		cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), 0);
+		udelay(CSR_DELAY);
+	}
+
+	grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0));
+	while (grp) {
+		dev_err(dev, "Cores still busy");
+		grp = cpt_read_csr64(cpt->reg_base,
+				     CPTX_PF_EXEC_BUSY(0));
+		if (timeout--)
+			break;
+
+		udelay(CSR_DELAY);
+	}
+	/* Disable the cores */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0);
+}
+
+/**
+ * Ensure all cores are disengaged from all groups by
+ * calling cpt_disable_all_cores() before calling this
+ * function.
+ */
+static void cpt_unload_microcode(struct cpt_device *cpt)
+{
+	u32 grp = 0, core;
+
+	/* Free microcode bases and reset group masks */
+	for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) {
+		struct microcode *mcode = &cpt->mcode[grp];
+
+		if (cpt->mcode[grp].code)
+			dma_free_coherent(&cpt->pdev->dev, mcode->code_size,
+					  mcode->code, mcode->phys_base);
+		mcode->code = NULL;
+	}
+	/* Clear UCODE_BASE registers for all engines */
+	for (core = 0; core < CPT_MAX_TOTAL_CORES; core++)
+		cpt_write_csr64(cpt->reg_base,
+				CPTX_PF_ENGX_UCODE_BASE(0, core), 0ull);
+}
+
+static int cpt_device_init(struct cpt_device *cpt)
+{
+	u64 bist;
+	struct device *dev = &cpt->pdev->dev;
+
+	/* Reset the PF when probed first */
+	cpt_reset(cpt);
+	mdelay(100);
+
+	/*Check BIST status*/
+	bist = (u64)cpt_check_bist_status(cpt);
+	if (bist) {
+		dev_err(dev, "RAM BIST failed with code 0x%llx", bist);
+		return -ENODEV;
+	}
+
+	bist = cpt_check_exe_bist_status(cpt);
+	if (bist) {
+		dev_err(dev, "Engine BIST failed with code 0x%llx", bist);
+		return -ENODEV;
+	}
+
+	/*Get CLK frequency*/
+	/*Get max enabled cores */
+	cpt_find_max_enabled_cores(cpt);
+	/*Disable all cores*/
+	cpt_disable_all_cores(cpt);
+	/*Reset device parameters*/
+	cpt->next_mc_idx   = 0;
+	cpt->next_group = 0;
+	/* PF is ready */
+	cpt->flags |= CPT_FLAG_DEVICE_READY;
+
+	return 0;
+}
+
+static int cpt_register_interrupts(struct cpt_device *cpt)
+{
+	int ret;
+	struct device *dev = &cpt->pdev->dev;
+
+	/* Enable MSI-X */
+	ret = pci_alloc_irq_vectors(cpt->pdev, CPT_PF_MSIX_VECTORS,
+			CPT_PF_MSIX_VECTORS, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_err(&cpt->pdev->dev, "Request for #%d msix vectors failed\n",
+			CPT_PF_MSIX_VECTORS);
+		return ret;
+	}
+
+	/* Register mailbox interrupt handlers */
+	ret = request_irq(pci_irq_vector(cpt->pdev, CPT_PF_INT_VEC_E_MBOXX(0)),
+			  cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt);
+	if (ret)
+		goto fail;
+
+	/* Enable mailbox interrupt */
+	cpt_enable_mbox_interrupts(cpt);
+	return 0;
+
+fail:
+	dev_err(dev, "Request irq failed\n");
+	pci_disable_msix(cpt->pdev);
+	return ret;
+}
+
+static void cpt_unregister_interrupts(struct cpt_device *cpt)
+{
+	free_irq(pci_irq_vector(cpt->pdev, CPT_PF_INT_VEC_E_MBOXX(0)), cpt);
+	pci_disable_msix(cpt->pdev);
+}
+
+static int cpt_sriov_init(struct cpt_device *cpt, int num_vfs)
+{
+	int pos = 0;
+	int err;
+	u16 total_vf_cnt;
+	struct pci_dev *pdev = cpt->pdev;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (!pos) {
+		dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n");
+		return -ENODEV;
+	}
+
+	cpt->num_vf_en = num_vfs; /* User requested VFs */
+	pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt);
+	if (total_vf_cnt < cpt->num_vf_en)
+		cpt->num_vf_en = total_vf_cnt;
+
+	if (!total_vf_cnt)
+		return 0;
+
+	/*Enabled the available VFs */
+	err = pci_enable_sriov(pdev, cpt->num_vf_en);
+	if (err) {
+		dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n",
+			cpt->num_vf_en);
+		cpt->num_vf_en = 0;
+		return err;
+	}
+
+	/* TODO: Optionally enable static VQ priorities feature */
+
+	dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n",
+		 cpt->num_vf_en);
+
+	cpt->flags |= CPT_FLAG_SRIOV_ENABLED;
+
+	return 0;
+}
+
+static int cpt_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct cpt_device *cpt;
+	int err;
+
+	if (num_vfs > 16 || num_vfs < 4) {
+		dev_warn(dev, "Invalid vf count %d, Resetting it to 4(default)\n",
+			 num_vfs);
+		num_vfs = 4;
+	}
+
+	cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL);
+	if (!cpt)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, cpt);
+	cpt->pdev = pdev;
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		pci_set_drvdata(pdev, NULL);
+		return err;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto cpt_err_disable_device;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get usable DMA configuration\n");
+		goto cpt_err_release_regions;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+		goto cpt_err_release_regions;
+	}
+
+	/* MAP PF's configuration registers */
+	cpt->reg_base = pcim_iomap(pdev, 0, 0);
+	if (!cpt->reg_base) {
+		dev_err(dev, "Cannot map config register space, aborting\n");
+		err = -ENOMEM;
+		goto cpt_err_release_regions;
+	}
+
+	/* CPT device HW initialization */
+	cpt_device_init(cpt);
+
+	/* Register interrupts */
+	err = cpt_register_interrupts(cpt);
+	if (err)
+		goto cpt_err_release_regions;
+
+	err = cpt_ucode_load(cpt);
+	if (err)
+		goto cpt_err_unregister_interrupts;
+
+	/* Configure SRIOV */
+	err = cpt_sriov_init(cpt, num_vfs);
+	if (err)
+		goto cpt_err_unregister_interrupts;
+
+	return 0;
+
+cpt_err_unregister_interrupts:
+	cpt_unregister_interrupts(cpt);
+cpt_err_release_regions:
+	pci_release_regions(pdev);
+cpt_err_disable_device:
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	return err;
+}
+
+static void cpt_remove(struct pci_dev *pdev)
+{
+	struct cpt_device *cpt = pci_get_drvdata(pdev);
+
+	/* Disengage SE and AE cores from all groups*/
+	cpt_disable_all_cores(cpt);
+	/* Unload microcodes */
+	cpt_unload_microcode(cpt);
+	cpt_unregister_interrupts(cpt);
+	pci_disable_sriov(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static void cpt_shutdown(struct pci_dev *pdev)
+{
+	struct cpt_device *cpt = pci_get_drvdata(pdev);
+
+	if (!cpt)
+		return;
+
+	dev_info(&pdev->dev, "Shutdown device %x:%x.\n",
+		 (u32)pdev->vendor, (u32)pdev->device);
+
+	cpt_unregister_interrupts(cpt);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+/* Supported devices */
+static const struct pci_device_id cpt_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CPT_81XX_PCI_PF_DEVICE_ID) },
+	{ 0, }  /* end of table */
+};
+
+static struct pci_driver cpt_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = cpt_id_table,
+	.probe = cpt_probe,
+	.remove = cpt_remove,
+	.shutdown = cpt_shutdown,
+};
+
+module_pci_driver(cpt_pci_driver);
+
+MODULE_AUTHOR("George Cherian <george.cherian@cavium.com>");
+MODULE_DESCRIPTION("Cavium Thunder CPT Physical Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, cpt_id_table);
diff --git a/drivers/crypto/cavium/cpt/cptpf_mbox.c b/drivers/crypto/cavium/cpt/cptpf_mbox.c
new file mode 100644
index 000000000000..20f2c6ee46a5
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptpf_mbox.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include "cptpf.h"
+
+static void cpt_send_msg_to_vf(struct cpt_device *cpt, int vf,
+			       struct cpt_mbox *mbx)
+{
+	/* Writing mbox(0) causes interrupt */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 1),
+			mbx->data);
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 0), mbx->msg);
+}
+
+/* ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void cpt_mbox_send_ack(struct cpt_device *cpt, int vf,
+			      struct cpt_mbox *mbx)
+{
+	mbx->data = 0ull;
+	mbx->msg = CPT_MBOX_MSG_TYPE_ACK;
+	cpt_send_msg_to_vf(cpt, vf, mbx);
+}
+
+static void cpt_clear_mbox_intr(struct cpt_device *cpt, u32 vf)
+{
+	/* W1C for the VF */
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_INTX(0, 0), (1 << vf));
+}
+
+/*
+ *  Configure QLEN/Chunk sizes for VF
+ */
+static void cpt_cfg_qlen_for_vf(struct cpt_device *cpt, int vf, u32 size)
+{
+	union cptx_pf_qx_ctl pf_qx_ctl;
+
+	pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf));
+	pf_qx_ctl.s.size = size;
+	pf_qx_ctl.s.cont_err = true;
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u);
+}
+
+/*
+ * Configure VQ priority
+ */
+static void cpt_cfg_vq_priority(struct cpt_device *cpt, int vf, u32 pri)
+{
+	union cptx_pf_qx_ctl pf_qx_ctl;
+
+	pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf));
+	pf_qx_ctl.s.pri = pri;
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u);
+}
+
+static int cpt_bind_vq_to_grp(struct cpt_device *cpt, u8 q, u8 grp)
+{
+	struct microcode *mcode = cpt->mcode;
+	union cptx_pf_qx_ctl pf_qx_ctl;
+	struct device *dev = &cpt->pdev->dev;
+
+	if (q >= CPT_MAX_VF_NUM) {
+		dev_err(dev, "Queues are more than cores in the group");
+		return -EINVAL;
+	}
+	if (grp >= CPT_MAX_CORE_GROUPS) {
+		dev_err(dev, "Request group is more than possible groups");
+		return -EINVAL;
+	}
+	if (grp >= cpt->next_mc_idx) {
+		dev_err(dev, "Request group is higher than available functional groups");
+		return -EINVAL;
+	}
+	pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, q));
+	pf_qx_ctl.s.grp = mcode[grp].group;
+	cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, q), pf_qx_ctl.u);
+	dev_dbg(dev, "VF %d TYPE %s", q, (mcode[grp].is_ae ? "AE" : "SE"));
+
+	return mcode[grp].is_ae ? AE_TYPES : SE_TYPES;
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+static void cpt_handle_mbox_intr(struct cpt_device *cpt, int vf)
+{
+	struct cpt_vf_info *vfx = &cpt->vfinfo[vf];
+	struct cpt_mbox mbx = {};
+	int vftype;
+	struct device *dev = &cpt->pdev->dev;
+	/*
+	 * MBOX[0] contains msg
+	 * MBOX[1] contains data
+	 */
+	mbx.msg  = cpt_read_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 0));
+	mbx.data = cpt_read_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 1));
+	dev_dbg(dev, "%s: Mailbox msg 0x%llx from VF%d", __func__, mbx.msg, vf);
+	switch (mbx.msg) {
+	case CPT_MSG_VF_UP:
+		vfx->state = VF_STATE_UP;
+		try_module_get(THIS_MODULE);
+		cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	case CPT_MSG_READY:
+		mbx.msg  = CPT_MSG_READY;
+		mbx.data = vf;
+		cpt_send_msg_to_vf(cpt, vf, &mbx);
+		break;
+	case CPT_MSG_VF_DOWN:
+		/* First msg in VF teardown sequence */
+		vfx->state = VF_STATE_DOWN;
+		module_put(THIS_MODULE);
+		cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	case CPT_MSG_QLEN:
+		vfx->qlen = mbx.data;
+		cpt_cfg_qlen_for_vf(cpt, vf, vfx->qlen);
+		cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	case CPT_MSG_QBIND_GRP:
+		vftype = cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data);
+		if ((vftype != AE_TYPES) && (vftype != SE_TYPES))
+			dev_err(dev, "Queue %d binding to group %llu failed",
+				vf, mbx.data);
+		else {
+			dev_dbg(dev, "Queue %d binding to group %llu successful",
+				vf, mbx.data);
+			mbx.msg = CPT_MSG_QBIND_GRP;
+			mbx.data = vftype;
+			cpt_send_msg_to_vf(cpt, vf, &mbx);
+		}
+		break;
+	case CPT_MSG_VQ_PRIORITY:
+		vfx->priority = mbx.data;
+		cpt_cfg_vq_priority(cpt, vf, vfx->priority);
+		cpt_mbox_send_ack(cpt, vf, &mbx);
+		break;
+	default:
+		dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n",
+			vf, mbx.msg);
+		break;
+	}
+}
+
+void cpt_mbox_intr_handler (struct cpt_device *cpt, int mbx)
+{
+	u64 intr;
+	u8  vf;
+
+	intr = cpt_read_csr64(cpt->reg_base, CPTX_PF_MBOX_INTX(0, 0));
+	dev_dbg(&cpt->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
+	for (vf = 0; vf < CPT_MAX_VF_NUM; vf++) {
+		if (intr & (1ULL << vf)) {
+			dev_dbg(&cpt->pdev->dev, "Intr from VF %d\n", vf);
+			cpt_handle_mbox_intr(cpt, vf);
+			cpt_clear_mbox_intr(cpt, vf);
+		}
+	}
+}
diff --git a/drivers/crypto/cavium/cpt/cptvf.h b/drivers/crypto/cavium/cpt/cptvf.h
new file mode 100644
index 000000000000..0a835a07d4f2
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptvf.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __CPTVF_H
+#define __CPTVF_H
+
+#include <linux/list.h>
+#include "cpt_common.h"
+
+/* Default command queue length */
+#define CPT_CMD_QLEN 2046
+#define CPT_CMD_QCHUNK_SIZE 1023
+
+/* Default command timeout in seconds */
+#define CPT_COMMAND_TIMEOUT 4
+#define CPT_TIMER_THOLD	0xFFFF
+#define CPT_NUM_QS_PER_VF 1
+#define CPT_INST_SIZE 64
+#define CPT_NEXT_CHUNK_PTR_SIZE 8
+
+#define	CPT_VF_MSIX_VECTORS 2
+#define CPT_VF_INTR_MBOX_MASK BIT(0)
+#define CPT_VF_INTR_DOVF_MASK BIT(1)
+#define CPT_VF_INTR_IRDE_MASK BIT(2)
+#define CPT_VF_INTR_NWRP_MASK BIT(3)
+#define CPT_VF_INTR_SERR_MASK BIT(4)
+#define DMA_DIRECT_DIRECT 0 /* Input DIRECT, Output DIRECT */
+#define DMA_GATHER_SCATTER 1
+#define FROM_DPTR 1
+
+/**
+ * Enumeration cpt_vf_int_vec_e
+ *
+ * CPT VF MSI-X Vector Enumeration
+ * Enumerates the MSI-X interrupt vectors.
+ */
+enum cpt_vf_int_vec_e {
+	CPT_VF_INT_VEC_E_MISC = 0x00,
+	CPT_VF_INT_VEC_E_DONE = 0x01
+};
+
+struct command_chunk {
+	u8 *head;
+	dma_addr_t dma_addr;
+	u32 size; /* Chunk size, max CPT_INST_CHUNK_MAX_SIZE */
+	struct hlist_node nextchunk;
+};
+
+struct command_queue {
+	spinlock_t lock; /* command queue lock */
+	u32 idx; /* Command queue host write idx */
+	u32 nchunks; /* Number of command chunks */
+	struct command_chunk *qhead;	/* Command queue head, instructions
+					 * are inserted here
+					 */
+	struct hlist_head chead;
+};
+
+struct command_qinfo {
+	u32 cmd_size;
+	u32 qchunksize; /* Command queue chunk size */
+	struct command_queue queue[CPT_NUM_QS_PER_VF];
+};
+
+struct pending_entry {
+	u8 busy; /* Entry status (free/busy) */
+
+	volatile u64 *completion_addr; /* Completion address */
+	void *post_arg;
+	void (*callback)(int, void *); /* Kernel ASYNC request callabck */
+	void *callback_arg; /* Kernel ASYNC request callabck arg */
+};
+
+struct pending_queue {
+	struct pending_entry *head;	/* head of the queue */
+	u32 front; /* Process work from here */
+	u32 rear; /* Append new work here */
+	atomic64_t pending_count;
+	spinlock_t lock; /* Queue lock */
+};
+
+struct pending_qinfo {
+	u32 nr_queues;	/* Number of queues supported */
+	u32 qlen; /* Queue length */
+	struct pending_queue queue[CPT_NUM_QS_PER_VF];
+};
+
+#define for_each_pending_queue(qinfo, q, i)	\
+	for (i = 0, q = &qinfo->queue[i]; i < qinfo->nr_queues; i++, \
+	     q = &qinfo->queue[i])
+
+struct cpt_vf {
+	u16 flags; /* Flags to hold device status bits */
+	u8 vfid; /* Device Index 0...CPT_MAX_VF_NUM */
+	u8 vftype; /* VF type of SE_TYPE(1) or AE_TYPE(1) */
+	u8 vfgrp; /* VF group (0 - 8) */
+	u8 node; /* Operating node: Bits (46:44) in BAR0 address */
+	u8 priority; /* VF priority ring: 1-High proirity round
+		      * robin ring;0-Low priority round robin ring;
+		      */
+	struct pci_dev *pdev; /* pci device handle */
+	void __iomem *reg_base; /* Register start address */
+	void *wqe_info;	/* BH worker info */
+	/* MSI-X */
+	cpumask_var_t affinity_mask[CPT_VF_MSIX_VECTORS];
+	/* Command and Pending queues */
+	u32 qsize;
+	u32 nr_queues;
+	struct command_qinfo cqinfo; /* Command queue information */
+	struct pending_qinfo pqinfo; /* Pending queue information */
+	/* VF-PF mailbox communication */
+	bool pf_acked;
+	bool pf_nacked;
+};
+
+int cptvf_send_vf_up(struct cpt_vf *cptvf);
+int cptvf_send_vf_down(struct cpt_vf *cptvf);
+int cptvf_send_vf_to_grp_msg(struct cpt_vf *cptvf);
+int cptvf_send_vf_priority_msg(struct cpt_vf *cptvf);
+int cptvf_send_vq_size_msg(struct cpt_vf *cptvf);
+int cptvf_check_pf_ready(struct cpt_vf *cptvf);
+void cptvf_handle_mbox_intr(struct cpt_vf *cptvf);
+void cvm_crypto_exit(void);
+int cvm_crypto_init(struct cpt_vf *cptvf);
+void vq_post_process(struct cpt_vf *cptvf, u32 qno);
+void cptvf_write_vq_doorbell(struct cpt_vf *cptvf, u32 val);
+#endif /* __CPTVF_H */
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
new file mode 100644
index 000000000000..cc853f913d4b
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -0,0 +1,444 @@
+
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/authenc.h>
+#include <crypto/cryptd.h>
+#include <crypto/crypto_wq.h>
+#include <crypto/des.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include "cptvf.h"
+#include "cptvf_algs.h"
+
+struct cpt_device_handle {
+	void *cdev[MAX_DEVICES];
+	u32 dev_count;
+};
+
+static struct cpt_device_handle dev_handle;
+
+static void cvm_callback(u32 status, void *arg)
+{
+	struct crypto_async_request *req = (struct crypto_async_request *)arg;
+
+	req->complete(req, !status);
+}
+
+static inline void update_input_iv(struct cpt_request_info *req_info,
+				   u8 *iv, u32 enc_iv_len,
+				   u32 *argcnt)
+{
+	/* Setting the iv information */
+	req_info->in[*argcnt].vptr = (void *)iv;
+	req_info->in[*argcnt].size = enc_iv_len;
+	req_info->req.dlen += enc_iv_len;
+
+	++(*argcnt);
+}
+
+static inline void update_output_iv(struct cpt_request_info *req_info,
+				    u8 *iv, u32 enc_iv_len,
+				    u32 *argcnt)
+{
+	/* Setting the iv information */
+	req_info->out[*argcnt].vptr = (void *)iv;
+	req_info->out[*argcnt].size = enc_iv_len;
+	req_info->rlen += enc_iv_len;
+
+	++(*argcnt);
+}
+
+static inline void update_input_data(struct cpt_request_info *req_info,
+				     struct scatterlist *inp_sg,
+				     u32 nbytes, u32 *argcnt)
+{
+	req_info->req.dlen += nbytes;
+
+	while (nbytes) {
+		u32 len = min(nbytes, inp_sg->length);
+		u8 *ptr = sg_virt(inp_sg);
+
+		req_info->in[*argcnt].vptr = (void *)ptr;
+		req_info->in[*argcnt].size = len;
+		nbytes -= len;
+
+		++(*argcnt);
+		++inp_sg;
+	}
+}
+
+static inline void update_output_data(struct cpt_request_info *req_info,
+				      struct scatterlist *outp_sg,
+				      u32 nbytes, u32 *argcnt)
+{
+	req_info->rlen += nbytes;
+
+	while (nbytes) {
+		u32 len = min(nbytes, outp_sg->length);
+		u8 *ptr = sg_virt(outp_sg);
+
+		req_info->out[*argcnt].vptr = (void *)ptr;
+		req_info->out[*argcnt].size = len;
+		nbytes -= len;
+		++(*argcnt);
+		++outp_sg;
+	}
+}
+
+static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
+				 u32 cipher_type, u32 aes_key_type,
+				 u32 *argcnt)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct fc_context *fctx = &rctx->fctx;
+	u64 *offset_control = &rctx->control_word;
+	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
+	struct cpt_request_info *req_info = &rctx->cpt_req;
+	u64 *ctrl_flags = NULL;
+
+	req_info->ctrl.s.grp = 0;
+	req_info->ctrl.s.dma_mode = DMA_GATHER_SCATTER;
+	req_info->ctrl.s.se_req = SE_CORE_REQ;
+
+	req_info->req.opcode.s.major = MAJOR_OP_FC |
+					DMA_MODE_FLAG(DMA_GATHER_SCATTER);
+	if (enc)
+		req_info->req.opcode.s.minor = 2;
+	else
+		req_info->req.opcode.s.minor = 3;
+
+	req_info->req.param1 = req->nbytes; /* Encryption Data length */
+	req_info->req.param2 = 0; /*Auth data length */
+
+	fctx->enc.enc_ctrl.e.enc_cipher = cipher_type;
+	fctx->enc.enc_ctrl.e.aes_key = aes_key_type;
+	fctx->enc.enc_ctrl.e.iv_source = FROM_DPTR;
+
+	if (cipher_type == AES_XTS)
+		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
+	else
+		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
+	ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags;
+	*ctrl_flags = cpu_to_be64(*ctrl_flags);
+
+	*offset_control = cpu_to_be64(((u64)(enc_iv_len) << 16));
+	/* Storing  Packet Data Information in offset
+	 * Control Word First 8 bytes
+	 */
+	req_info->in[*argcnt].vptr = (u8 *)offset_control;
+	req_info->in[*argcnt].size = CONTROL_WORD_LEN;
+	req_info->req.dlen += CONTROL_WORD_LEN;
+	++(*argcnt);
+
+	req_info->in[*argcnt].vptr = (u8 *)fctx;
+	req_info->in[*argcnt].size = sizeof(struct fc_context);
+	req_info->req.dlen += sizeof(struct fc_context);
+
+	++(*argcnt);
+
+	return 0;
+}
+
+static inline u32 create_input_list(struct ablkcipher_request  *req, u32 enc,
+				    u32 cipher_type, u32 aes_key_type,
+				    u32 enc_iv_len)
+{
+	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct cpt_request_info *req_info = &rctx->cpt_req;
+	u32 argcnt =  0;
+
+	create_ctx_hdr(req, enc, cipher_type, aes_key_type, &argcnt);
+	update_input_iv(req_info, req->info, enc_iv_len, &argcnt);
+	update_input_data(req_info, req->src, req->nbytes, &argcnt);
+	req_info->incnt = argcnt;
+
+	return 0;
+}
+
+static inline void store_cb_info(struct ablkcipher_request *req,
+				 struct cpt_request_info *req_info)
+{
+	req_info->callback = (void *)cvm_callback;
+	req_info->callback_arg = (void *)&req->base;
+}
+
+static inline void create_output_list(struct ablkcipher_request *req,
+				      u32 cipher_type,
+				      u32 enc_iv_len)
+{
+	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct cpt_request_info *req_info = &rctx->cpt_req;
+	u32 argcnt = 0;
+
+	/* OUTPUT Buffer Processing
+	 * AES encryption/decryption output would be
+	 * received in the following format
+	 *
+	 * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----|
+	 * [ 16 Bytes/     [   Request Enc/Dec/ DATA Len AES CBC ]
+	 */
+	/* Reading IV information */
+	update_output_iv(req_info, req->info, enc_iv_len, &argcnt);
+	update_output_data(req_info, req->dst, req->nbytes, &argcnt);
+	req_info->outcnt = argcnt;
+}
+
+static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
+			      u32 cipher_type)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	u32 key_type = AES_128_BIT;
+	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
+	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
+	struct fc_context *fctx = &rctx->fctx;
+	struct cpt_request_info *req_info = &rctx->cpt_req;
+	void *cdev = NULL;
+	int status;
+
+	switch (ctx->key_len) {
+	case 16:
+		key_type = AES_128_BIT;
+		break;
+	case 24:
+		key_type = AES_192_BIT;
+		break;
+	case 32:
+		if (cipher_type == AES_XTS)
+			key_type = AES_128_BIT;
+		else
+			key_type = AES_256_BIT;
+		break;
+	case 64:
+		if (cipher_type == AES_XTS)
+			key_type = AES_256_BIT;
+		else
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (cipher_type == DES3_CBC)
+		key_type = 0;
+
+	memset(req_info, 0, sizeof(struct cpt_request_info));
+	memset(fctx, 0, sizeof(struct fc_context));
+	create_input_list(req, enc, cipher_type, key_type, enc_iv_len);
+	create_output_list(req, cipher_type, enc_iv_len);
+	store_cb_info(req, req_info);
+	cdev = dev_handle.cdev[smp_processor_id()];
+	status = cptvf_do_request(cdev, req_info);
+	/* We perform an asynchronous send and once
+	 * the request is completed the driver would
+	 * intimate through  registered call back functions
+	 */
+
+	if (status)
+		return status;
+	else
+		return -EINPROGRESS;
+}
+
+int cvm_des3_encrypt_cbc(struct ablkcipher_request *req)
+{
+	return cvm_enc_dec(req, true, DES3_CBC);
+}
+
+int cvm_des3_decrypt_cbc(struct ablkcipher_request *req)
+{
+	return cvm_enc_dec(req, false, DES3_CBC);
+}
+
+int cvm_aes_encrypt_xts(struct ablkcipher_request *req)
+{
+	return cvm_enc_dec(req, true, AES_XTS);
+}
+
+int cvm_aes_decrypt_xts(struct ablkcipher_request *req)
+{
+	return cvm_enc_dec(req, false, AES_XTS);
+}
+
+int cvm_aes_encrypt_cbc(struct ablkcipher_request *req)
+{
+	return cvm_enc_dec(req, true, AES_CBC);
+}
+
+int cvm_aes_decrypt_cbc(struct ablkcipher_request *req)
+{
+	return cvm_enc_dec(req, false, AES_CBC);
+}
+
+int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		   u32 keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+	const u8 *key1 = key;
+	const u8 *key2 = key + (keylen / 2);
+
+	err = xts_check_key(tfm, key, keylen);
+	if (err)
+		return err;
+	ctx->key_len = keylen;
+	memcpy(ctx->enc_key, key1, keylen / 2);
+	memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2);
+
+	return 0;
+}
+
+int cvm_enc_dec_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		       u32 keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if ((keylen == 16) || (keylen == 24) || (keylen == 32)) {
+		ctx->key_len = keylen;
+		memcpy(ctx->enc_key, key, keylen);
+		return 0;
+	}
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+	return -EINVAL;
+}
+
+int cvm_enc_dec_init(struct crypto_tfm *tfm)
+{
+	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+	tfm->crt_ablkcipher.reqsize = sizeof(struct cvm_req_ctx) +
+					sizeof(struct ablkcipher_request);
+	/* Additional memory for ablkcipher_request is
+	 * allocated since the cryptd daemon uses
+	 * this memory for request_ctx information
+	 */
+
+	return 0;
+}
+
+struct crypto_alg algs[] = { {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "xts(aes)",
+	.cra_driver_name = "cavium-xts-aes",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = 2 * AES_MIN_KEY_SIZE,
+			.max_keysize = 2 * AES_MAX_KEY_SIZE,
+			.setkey = cvm_xts_setkey,
+			.encrypt = cvm_aes_encrypt_xts,
+			.decrypt = cvm_aes_decrypt_xts,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "cbc(aes)",
+	.cra_driver_name = "cavium-cbc-aes",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = cvm_enc_dec_setkey,
+			.encrypt = cvm_aes_encrypt_cbc,
+			.decrypt = cvm_aes_decrypt_cbc,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_des3_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "cbc(des3_ede)",
+	.cra_driver_name = "cavium-cbc-des3_ede",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
+			.setkey = cvm_enc_dec_setkey,
+			.encrypt = cvm_des3_encrypt_cbc,
+			.decrypt = cvm_des3_decrypt_cbc,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+} };
+
+static inline int cav_register_algs(void)
+{
+	int err = 0;
+
+	err = crypto_register_algs(algs, ARRAY_SIZE(algs));
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static inline void cav_unregister_algs(void)
+{
+	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+int cvm_crypto_init(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	u32 dev_count;
+
+	dev_count = dev_handle.dev_count;
+	dev_handle.cdev[dev_count] = cptvf;
+	dev_handle.dev_count++;
+
+	if (dev_count == 3) {
+		if (cav_register_algs()) {
+			dev_err(&pdev->dev, "Error in registering crypto algorithms\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void cvm_crypto_exit(void)
+{
+	u32 dev_count;
+
+	dev_count = --dev_handle.dev_count;
+	if (!dev_count)
+		cav_unregister_algs();
+}
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.h b/drivers/crypto/cavium/cpt/cptvf_algs.h
new file mode 100644
index 000000000000..a12050d11b0c
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef _CPTVF_ALGS_H_
+#define _CPTVF_ALGS_H_
+
+#include "request_manager.h"
+
+#define MAX_DEVICES 16
+#define MAJOR_OP_FC 0x33
+#define MAX_ENC_KEY_SIZE 32
+#define MAX_HASH_KEY_SIZE 64
+#define MAX_KEY_SIZE (MAX_ENC_KEY_SIZE + MAX_HASH_KEY_SIZE)
+#define CONTROL_WORD_LEN 8
+#define KEY2_OFFSET 48
+
+#define DMA_MODE_FLAG(dma_mode) \
+	(((dma_mode) == DMA_GATHER_SCATTER) ? (1 << 7) : 0)
+
+enum req_type {
+	AE_CORE_REQ,
+	SE_CORE_REQ,
+};
+
+enum cipher_type {
+	DES3_CBC = 0x1,
+	DES3_ECB = 0x2,
+	AES_CBC = 0x3,
+	AES_ECB = 0x4,
+	AES_CFB = 0x5,
+	AES_CTR = 0x6,
+	AES_GCM = 0x7,
+	AES_XTS = 0x8
+};
+
+enum aes_type {
+	AES_128_BIT = 0x1,
+	AES_192_BIT = 0x2,
+	AES_256_BIT = 0x3
+};
+
+union encr_ctrl {
+	u64 flags;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 enc_cipher:4;
+		u64 reserved1:1;
+		u64 aes_key:2;
+		u64 iv_source:1;
+		u64 hash_type:4;
+		u64 reserved2:3;
+		u64 auth_input_type:1;
+		u64 mac_len:8;
+		u64 reserved3:8;
+		u64 encr_offset:16;
+		u64 iv_offset:8;
+		u64 auth_offset:8;
+#else
+		u64 auth_offset:8;
+		u64 iv_offset:8;
+		u64 encr_offset:16;
+		u64 reserved3:8;
+		u64 mac_len:8;
+		u64 auth_input_type:1;
+		u64 reserved2:3;
+		u64 hash_type:4;
+		u64 iv_source:1;
+		u64 aes_key:2;
+		u64 reserved1:1;
+		u64 enc_cipher:4;
+#endif
+	} e;
+};
+
+struct enc_context {
+	union encr_ctrl enc_ctrl;
+	u8 encr_key[32];
+	u8 encr_iv[16];
+};
+
+struct fchmac_context {
+	u8 ipad[64];
+	u8 opad[64]; /* or OPAD */
+};
+
+struct fc_context {
+	struct enc_context enc;
+	struct fchmac_context hmac;
+};
+
+struct cvm_enc_ctx {
+	u32 key_len;
+	u8 enc_key[MAX_KEY_SIZE];
+};
+
+struct cvm_des3_ctx {
+	u32 key_len;
+	u8 des3_key[MAX_KEY_SIZE];
+};
+
+struct cvm_req_ctx {
+	struct cpt_request_info cpt_req;
+	u64 control_word;
+	struct fc_context fctx;
+};
+
+int cptvf_do_request(void *cptvf, struct cpt_request_info *req);
+#endif /*_CPTVF_ALGS_H_*/
diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
new file mode 100644
index 000000000000..6ffc740c7431
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -0,0 +1,866 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include "cptvf.h"
+
+#define DRV_NAME	"thunder-cptvf"
+#define DRV_VERSION	"1.0"
+
+struct cptvf_wqe {
+	struct tasklet_struct twork;
+	void *cptvf;
+	u32 qno;
+};
+
+struct cptvf_wqe_info {
+	struct cptvf_wqe vq_wqe[CPT_NUM_QS_PER_VF];
+};
+
+static void vq_work_handler(unsigned long data)
+{
+	struct cptvf_wqe_info *cwqe_info = (struct cptvf_wqe_info *)data;
+	struct cptvf_wqe *cwqe = &cwqe_info->vq_wqe[0];
+
+	vq_post_process(cwqe->cptvf, cwqe->qno);
+}
+
+static int init_worker_threads(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cptvf_wqe_info *cwqe_info;
+	int i;
+
+	cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL);
+	if (!cwqe_info)
+		return -ENOMEM;
+
+	if (cptvf->nr_queues) {
+		dev_info(&pdev->dev, "Creating VQ worker threads (%d)\n",
+			 cptvf->nr_queues);
+	}
+
+	for (i = 0; i < cptvf->nr_queues; i++) {
+		tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler,
+			     (u64)cwqe_info);
+		cwqe_info->vq_wqe[i].qno = i;
+		cwqe_info->vq_wqe[i].cptvf = cptvf;
+	}
+
+	cptvf->wqe_info = cwqe_info;
+
+	return 0;
+}
+
+static void cleanup_worker_threads(struct cpt_vf *cptvf)
+{
+	struct cptvf_wqe_info *cwqe_info;
+	struct pci_dev *pdev = cptvf->pdev;
+	int i;
+
+	cwqe_info = (struct cptvf_wqe_info *)cptvf->wqe_info;
+	if (!cwqe_info)
+		return;
+
+	if (cptvf->nr_queues) {
+		dev_info(&pdev->dev, "Cleaning VQ worker threads (%u)\n",
+			 cptvf->nr_queues);
+	}
+
+	for (i = 0; i < cptvf->nr_queues; i++)
+		tasklet_kill(&cwqe_info->vq_wqe[i].twork);
+
+	kzfree(cwqe_info);
+	cptvf->wqe_info = NULL;
+}
+
+static void free_pending_queues(struct pending_qinfo *pqinfo)
+{
+	int i;
+	struct pending_queue *queue;
+
+	for_each_pending_queue(pqinfo, queue, i) {
+		if (!queue->head)
+			continue;
+
+		/* free single queue */
+		kzfree((queue->head));
+
+		queue->front = 0;
+		queue->rear = 0;
+
+		return;
+	}
+
+	pqinfo->qlen = 0;
+	pqinfo->nr_queues = 0;
+}
+
+static int alloc_pending_queues(struct pending_qinfo *pqinfo, u32 qlen,
+				u32 nr_queues)
+{
+	u32 i;
+	size_t size;
+	int ret;
+	struct pending_queue *queue = NULL;
+
+	pqinfo->nr_queues = nr_queues;
+	pqinfo->qlen = qlen;
+
+	size = (qlen * sizeof(struct pending_entry));
+
+	for_each_pending_queue(pqinfo, queue, i) {
+		queue->head = kzalloc((size), GFP_KERNEL);
+		if (!queue->head) {
+			ret = -ENOMEM;
+			goto pending_qfail;
+		}
+
+		queue->front = 0;
+		queue->rear = 0;
+		atomic64_set((&queue->pending_count), (0));
+
+		/* init queue spin lock */
+		spin_lock_init(&queue->lock);
+	}
+
+	return 0;
+
+pending_qfail:
+	free_pending_queues(pqinfo);
+
+	return ret;
+}
+
+static int init_pending_queues(struct cpt_vf *cptvf, u32 qlen, u32 nr_queues)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int ret;
+
+	if (!nr_queues)
+		return 0;
+
+	ret = alloc_pending_queues(&cptvf->pqinfo, qlen, nr_queues);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to setup pending queues (%u)\n",
+			nr_queues);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void cleanup_pending_queues(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (!cptvf->nr_queues)
+		return;
+
+	dev_info(&pdev->dev, "Cleaning VQ pending queue (%u)\n",
+		 cptvf->nr_queues);
+	free_pending_queues(&cptvf->pqinfo);
+}
+
+static void free_command_queues(struct cpt_vf *cptvf,
+				struct command_qinfo *cqinfo)
+{
+	int i;
+	struct command_queue *queue = NULL;
+	struct command_chunk *chunk = NULL;
+	struct pci_dev *pdev = cptvf->pdev;
+	struct hlist_node *node;
+
+	/* clean up for each queue */
+	for (i = 0; i < cptvf->nr_queues; i++) {
+		queue = &cqinfo->queue[i];
+		if (hlist_empty(&cqinfo->queue[i].chead))
+			continue;
+
+		hlist_for_each_entry_safe(chunk, node, &cqinfo->queue[i].chead,
+					  nextchunk) {
+			dma_free_coherent(&pdev->dev, chunk->size,
+					  chunk->head,
+					  chunk->dma_addr);
+			chunk->head = NULL;
+			chunk->dma_addr = 0;
+			hlist_del(&chunk->nextchunk);
+			kzfree(chunk);
+		}
+
+		queue->nchunks = 0;
+		queue->idx = 0;
+	}
+
+	/* common cleanup */
+	cqinfo->cmd_size = 0;
+}
+
+static int alloc_command_queues(struct cpt_vf *cptvf,
+				struct command_qinfo *cqinfo, size_t cmd_size,
+				u32 qlen)
+{
+	int i;
+	size_t q_size;
+	struct command_queue *queue = NULL;
+	struct pci_dev *pdev = cptvf->pdev;
+
+	/* common init */
+	cqinfo->cmd_size = cmd_size;
+	/* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */
+	cptvf->qsize = min(qlen, cqinfo->qchunksize) *
+			CPT_NEXT_CHUNK_PTR_SIZE + 1;
+	/* Qsize in bytes to create space for alignment */
+	q_size = qlen * cqinfo->cmd_size;
+
+	/* per queue initialization */
+	for (i = 0; i < cptvf->nr_queues; i++) {
+		size_t c_size = 0;
+		size_t rem_q_size = q_size;
+		struct command_chunk *curr = NULL, *first = NULL, *last = NULL;
+		u32 qcsize_bytes = cqinfo->qchunksize * cqinfo->cmd_size;
+
+		queue = &cqinfo->queue[i];
+		INIT_HLIST_HEAD(&cqinfo->queue[i].chead);
+		do {
+			curr = kzalloc(sizeof(*curr), GFP_KERNEL);
+			if (!curr)
+				goto cmd_qfail;
+
+			c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes :
+					rem_q_size;
+			curr->head = (u8 *)dma_zalloc_coherent(&pdev->dev,
+					  c_size + CPT_NEXT_CHUNK_PTR_SIZE,
+					  &curr->dma_addr, GFP_KERNEL);
+			if (!curr->head) {
+				dev_err(&pdev->dev, "Command Q (%d) chunk (%d) allocation failed\n",
+					i, queue->nchunks);
+				kfree(curr);
+				goto cmd_qfail;
+			}
+
+			curr->size = c_size;
+			if (queue->nchunks == 0) {
+				hlist_add_head(&curr->nextchunk,
+					       &cqinfo->queue[i].chead);
+				first = curr;
+			} else {
+				hlist_add_behind(&curr->nextchunk,
+						 &last->nextchunk);
+			}
+
+			queue->nchunks++;
+			rem_q_size -= c_size;
+			if (last)
+				*((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr;
+
+			last = curr;
+		} while (rem_q_size);
+
+		/* Make the queue circular */
+		/* Tie back last chunk entry to head */
+		curr = first;
+		*((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr;
+		queue->qhead = curr;
+		spin_lock_init(&queue->lock);
+	}
+	return 0;
+
+cmd_qfail:
+	free_command_queues(cptvf, cqinfo);
+	return -ENOMEM;
+}
+
+static int init_command_queues(struct cpt_vf *cptvf, u32 qlen)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int ret;
+
+	/* setup AE command queues */
+	ret = alloc_command_queues(cptvf, &cptvf->cqinfo, CPT_INST_SIZE,
+				   qlen);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to allocate AE command queues (%u)\n",
+			cptvf->nr_queues);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void cleanup_command_queues(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (!cptvf->nr_queues)
+		return;
+
+	dev_info(&pdev->dev, "Cleaning VQ command queue (%u)\n",
+		 cptvf->nr_queues);
+	free_command_queues(cptvf, &cptvf->cqinfo);
+}
+
+static void cptvf_sw_cleanup(struct cpt_vf *cptvf)
+{
+	cleanup_worker_threads(cptvf);
+	cleanup_pending_queues(cptvf);
+	cleanup_command_queues(cptvf);
+}
+
+static int cptvf_sw_init(struct cpt_vf *cptvf, u32 qlen, u32 nr_queues)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int ret = 0;
+	u32 max_dev_queues = 0;
+
+	max_dev_queues = CPT_NUM_QS_PER_VF;
+	/* possible cpus */
+	nr_queues = min_t(u32, nr_queues, max_dev_queues);
+	cptvf->nr_queues = nr_queues;
+
+	ret = init_command_queues(cptvf, qlen);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup command queues (%u)\n",
+			nr_queues);
+		return ret;
+	}
+
+	ret = init_pending_queues(cptvf, qlen, nr_queues);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n",
+			nr_queues);
+		goto setup_pqfail;
+	}
+
+	/* Create worker threads for BH processing */
+	ret = init_worker_threads(cptvf);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup worker threads\n");
+		goto init_work_fail;
+	}
+
+	return 0;
+
+init_work_fail:
+	cleanup_worker_threads(cptvf);
+	cleanup_pending_queues(cptvf);
+
+setup_pqfail:
+	cleanup_command_queues(cptvf);
+
+	return ret;
+}
+
+static void cptvf_free_irq_affinity(struct cpt_vf *cptvf, int vec)
+{
+	irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL);
+	free_cpumask_var(cptvf->affinity_mask[vec]);
+}
+
+static void cptvf_write_vq_ctl(struct cpt_vf *cptvf, bool val)
+{
+	union cptx_vqx_ctl vqx_ctl;
+
+	vqx_ctl.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_CTL(0, 0));
+	vqx_ctl.s.ena = val;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_CTL(0, 0), vqx_ctl.u);
+}
+
+void cptvf_write_vq_doorbell(struct cpt_vf *cptvf, u32 val)
+{
+	union cptx_vqx_doorbell vqx_dbell;
+
+	vqx_dbell.u = cpt_read_csr64(cptvf->reg_base,
+				     CPTX_VQX_DOORBELL(0, 0));
+	vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DOORBELL(0, 0),
+			vqx_dbell.u);
+}
+
+static void cptvf_write_vq_inprog(struct cpt_vf *cptvf, u8 val)
+{
+	union cptx_vqx_inprog vqx_inprg;
+
+	vqx_inprg.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_INPROG(0, 0));
+	vqx_inprg.s.inflight = val;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_INPROG(0, 0), vqx_inprg.u);
+}
+
+static void cptvf_write_vq_done_numwait(struct cpt_vf *cptvf, u32 val)
+{
+	union cptx_vqx_done_wait vqx_dwait;
+
+	vqx_dwait.u = cpt_read_csr64(cptvf->reg_base,
+				     CPTX_VQX_DONE_WAIT(0, 0));
+	vqx_dwait.s.num_wait = val;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_WAIT(0, 0),
+			vqx_dwait.u);
+}
+
+static void cptvf_write_vq_done_timewait(struct cpt_vf *cptvf, u16 time)
+{
+	union cptx_vqx_done_wait vqx_dwait;
+
+	vqx_dwait.u = cpt_read_csr64(cptvf->reg_base,
+				     CPTX_VQX_DONE_WAIT(0, 0));
+	vqx_dwait.s.time_wait = time;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_WAIT(0, 0),
+			vqx_dwait.u);
+}
+
+static void cptvf_enable_swerr_interrupts(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_ena_w1s vqx_misc_ena;
+
+	vqx_misc_ena.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_ENA_W1S(0, 0));
+	/* Set mbox(0) interupts for the requested vf */
+	vqx_misc_ena.s.swerr = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_ENA_W1S(0, 0),
+			vqx_misc_ena.u);
+}
+
+static void cptvf_enable_mbox_interrupts(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_ena_w1s vqx_misc_ena;
+
+	vqx_misc_ena.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_ENA_W1S(0, 0));
+	/* Set mbox(0) interupts for the requested vf */
+	vqx_misc_ena.s.mbox = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_ENA_W1S(0, 0),
+			vqx_misc_ena.u);
+}
+
+static void cptvf_enable_done_interrupts(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_done_ena_w1s vqx_done_ena;
+
+	vqx_done_ena.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_DONE_ENA_W1S(0, 0));
+	/* Set DONE interrupt for the requested vf */
+	vqx_done_ena.s.done = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_ENA_W1S(0, 0),
+			vqx_done_ena.u);
+}
+
+static void cptvf_clear_dovf_intr(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_INT(0, 0));
+	/* W1C for the VF */
+	vqx_misc_int.s.dovf = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0),
+			vqx_misc_int.u);
+}
+
+static void cptvf_clear_irde_intr(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_INT(0, 0));
+	/* W1C for the VF */
+	vqx_misc_int.s.irde = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0),
+			vqx_misc_int.u);
+}
+
+static void cptvf_clear_nwrp_intr(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_INT(0, 0));
+	/* W1C for the VF */
+	vqx_misc_int.s.nwrp = 1;
+	cpt_write_csr64(cptvf->reg_base,
+			CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+static void cptvf_clear_mbox_intr(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_INT(0, 0));
+	/* W1C for the VF */
+	vqx_misc_int.s.mbox = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0),
+			vqx_misc_int.u);
+}
+
+static void cptvf_clear_swerr_intr(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_misc_int vqx_misc_int;
+
+	vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_MISC_INT(0, 0));
+	/* W1C for the VF */
+	vqx_misc_int.s.swerr = 1;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0),
+			vqx_misc_int.u);
+}
+
+static u64 cptvf_read_vf_misc_intr_status(struct cpt_vf *cptvf)
+{
+	return cpt_read_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0));
+}
+
+static irqreturn_t cptvf_misc_intr_handler(int irq, void *cptvf_irq)
+{
+	struct cpt_vf *cptvf = (struct cpt_vf *)cptvf_irq;
+	struct pci_dev *pdev = cptvf->pdev;
+	u64 intr;
+
+	intr = cptvf_read_vf_misc_intr_status(cptvf);
+	/*Check for MISC interrupt types*/
+	if (likely(intr & CPT_VF_INTR_MBOX_MASK)) {
+		dev_err(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+		cptvf_handle_mbox_intr(cptvf);
+		cptvf_clear_mbox_intr(cptvf);
+	} else if (unlikely(intr & CPT_VF_INTR_DOVF_MASK)) {
+		cptvf_clear_dovf_intr(cptvf);
+		/*Clear doorbell count*/
+		cptvf_write_vq_doorbell(cptvf, 0);
+		dev_err(&pdev->dev, "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else if (unlikely(intr & CPT_VF_INTR_IRDE_MASK)) {
+		cptvf_clear_irde_intr(cptvf);
+		dev_err(&pdev->dev, "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else if (unlikely(intr & CPT_VF_INTR_NWRP_MASK)) {
+		cptvf_clear_nwrp_intr(cptvf);
+		dev_err(&pdev->dev, "NCB response write error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else if (unlikely(intr & CPT_VF_INTR_SERR_MASK)) {
+		cptvf_clear_swerr_intr(cptvf);
+		dev_err(&pdev->dev, "Software error interrupt 0x%llx on CPT VF %d\n",
+			intr, cptvf->vfid);
+	} else {
+		dev_err(&pdev->dev, "Unhandled interrupt in CPT VF %d\n",
+			cptvf->vfid);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static inline struct cptvf_wqe *get_cptvf_vq_wqe(struct cpt_vf *cptvf,
+						 int qno)
+{
+	struct cptvf_wqe_info *nwqe_info;
+
+	if (unlikely(qno >= cptvf->nr_queues))
+		return NULL;
+	nwqe_info = (struct cptvf_wqe_info *)cptvf->wqe_info;
+
+	return &nwqe_info->vq_wqe[qno];
+}
+
+static inline u32 cptvf_read_vq_done_count(struct cpt_vf *cptvf)
+{
+	union cptx_vqx_done vqx_done;
+
+	vqx_done.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_DONE(0, 0));
+	return vqx_done.s.done;
+}
+
+static inline void cptvf_write_vq_done_ack(struct cpt_vf *cptvf,
+					   u32 ackcnt)
+{
+	union cptx_vqx_done_ack vqx_dack_cnt;
+
+	vqx_dack_cnt.u = cpt_read_csr64(cptvf->reg_base,
+					CPTX_VQX_DONE_ACK(0, 0));
+	vqx_dack_cnt.s.done_ack = ackcnt;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_ACK(0, 0),
+			vqx_dack_cnt.u);
+}
+
+static irqreturn_t cptvf_done_intr_handler(int irq, void *cptvf_irq)
+{
+	struct cpt_vf *cptvf = (struct cpt_vf *)cptvf_irq;
+	struct pci_dev *pdev = cptvf->pdev;
+	/* Read the number of completions */
+	u32 intr = cptvf_read_vq_done_count(cptvf);
+
+	if (intr) {
+		struct cptvf_wqe *wqe;
+
+		/* Acknowledge the number of
+		 * scheduled completions for processing
+		 */
+		cptvf_write_vq_done_ack(cptvf, intr);
+		wqe = get_cptvf_vq_wqe(cptvf, 0);
+		if (unlikely(!wqe)) {
+			dev_err(&pdev->dev, "No work to schedule for VF (%d)",
+				cptvf->vfid);
+			return IRQ_NONE;
+		}
+		tasklet_hi_schedule(&wqe->twork);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void cptvf_set_irq_affinity(struct cpt_vf *cptvf, int vec)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	int cpu;
+
+	if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec],
+				GFP_KERNEL)) {
+		dev_err(&pdev->dev, "Allocation failed for affinity_mask for VF %d",
+			cptvf->vfid);
+		return;
+	}
+
+	cpu = cptvf->vfid % num_online_cpus();
+	cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node),
+			cptvf->affinity_mask[vec]);
+	irq_set_affinity_hint(pci_irq_vector(pdev, vec),
+			cptvf->affinity_mask[vec]);
+}
+
+static void cptvf_write_vq_saddr(struct cpt_vf *cptvf, u64 val)
+{
+	union cptx_vqx_saddr vqx_saddr;
+
+	vqx_saddr.u = val;
+	cpt_write_csr64(cptvf->reg_base, CPTX_VQX_SADDR(0, 0), vqx_saddr.u);
+}
+
+void cptvf_device_init(struct cpt_vf *cptvf)
+{
+	u64 base_addr = 0;
+
+	/* Disable the VQ */
+	cptvf_write_vq_ctl(cptvf, 0);
+	/* Reset the doorbell */
+	cptvf_write_vq_doorbell(cptvf, 0);
+	/* Clear inflight */
+	cptvf_write_vq_inprog(cptvf, 0);
+	/* Write VQ SADDR */
+	/* TODO: for now only one queue, so hard coded */
+	base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr);
+	cptvf_write_vq_saddr(cptvf, base_addr);
+	/* Configure timerhold / coalescence */
+	cptvf_write_vq_done_timewait(cptvf, CPT_TIMER_THOLD);
+	cptvf_write_vq_done_numwait(cptvf, 1);
+	/* Enable the VQ */
+	cptvf_write_vq_ctl(cptvf, 1);
+	/* Flag the VF ready */
+	cptvf->flags |= CPT_FLAG_DEVICE_READY;
+}
+
+static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct cpt_vf *cptvf;
+	int    err;
+
+	cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL);
+	if (!cptvf)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, cptvf);
+	cptvf->pdev = pdev;
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		pci_set_drvdata(pdev, NULL);
+		return err;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto cptvf_err_disable_device;
+	}
+	/* Mark as VF driver */
+	cptvf->flags |= CPT_FLAG_VF_DRIVER;
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get usable DMA configuration\n");
+		goto cptvf_err_release_regions;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+		goto cptvf_err_release_regions;
+	}
+
+	/* MAP PF's configuration registers */
+	cptvf->reg_base = pcim_iomap(pdev, 0, 0);
+	if (!cptvf->reg_base) {
+		dev_err(dev, "Cannot map config register space, aborting\n");
+		err = -ENOMEM;
+		goto cptvf_err_release_regions;
+	}
+
+	cptvf->node = dev_to_node(&pdev->dev);
+	err = pci_alloc_irq_vectors(pdev, CPT_VF_MSIX_VECTORS,
+			CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX);
+	if (err < 0) {
+		dev_err(dev, "Request for #%d msix vectors failed\n",
+			CPT_VF_MSIX_VECTORS);
+		goto cptvf_err_release_regions;
+	}
+
+	err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC),
+			  cptvf_misc_intr_handler, 0, "CPT VF misc intr",
+			  cptvf);
+	if (err) {
+		dev_err(dev, "Request misc irq failed");
+		goto cptvf_free_vectors;
+	}
+
+	/* Enable mailbox interrupt */
+	cptvf_enable_mbox_interrupts(cptvf);
+	cptvf_enable_swerr_interrupts(cptvf);
+
+	/* Check ready with PF */
+	/* Gets chip ID / device Id from PF if ready */
+	err = cptvf_check_pf_ready(cptvf);
+	if (err) {
+		dev_err(dev, "PF not responding to READY msg");
+		goto cptvf_free_misc_irq;
+	}
+
+	/* CPT VF software resources initialization */
+	cptvf->cqinfo.qchunksize = CPT_CMD_QCHUNK_SIZE;
+	err = cptvf_sw_init(cptvf, CPT_CMD_QLEN, CPT_NUM_QS_PER_VF);
+	if (err) {
+		dev_err(dev, "cptvf_sw_init() failed");
+		goto cptvf_free_misc_irq;
+	}
+	/* Convey VQ LEN to PF */
+	err = cptvf_send_vq_size_msg(cptvf);
+	if (err) {
+		dev_err(dev, "PF not responding to QLEN msg");
+		goto cptvf_free_misc_irq;
+	}
+
+	/* CPT VF device initialization */
+	cptvf_device_init(cptvf);
+	/* Send msg to PF to assign currnet Q to required group */
+	cptvf->vfgrp = 1;
+	err = cptvf_send_vf_to_grp_msg(cptvf);
+	if (err) {
+		dev_err(dev, "PF not responding to VF_GRP msg");
+		goto cptvf_free_misc_irq;
+	}
+
+	cptvf->priority = 1;
+	err = cptvf_send_vf_priority_msg(cptvf);
+	if (err) {
+		dev_err(dev, "PF not responding to VF_PRIO msg");
+		goto cptvf_free_misc_irq;
+	}
+
+	err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE),
+			  cptvf_done_intr_handler, 0, "CPT VF done intr",
+			  cptvf);
+	if (err) {
+		dev_err(dev, "Request done irq failed\n");
+		goto cptvf_free_misc_irq;
+	}
+
+	/* Enable mailbox interrupt */
+	cptvf_enable_done_interrupts(cptvf);
+
+	/* Set irq affinity masks */
+	cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+	cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+
+	err = cptvf_send_vf_up(cptvf);
+	if (err) {
+		dev_err(dev, "PF not responding to UP msg");
+		goto cptvf_free_irq_affinity;
+	}
+	err = cvm_crypto_init(cptvf);
+	if (err) {
+		dev_err(dev, "Algorithm register failed\n");
+		goto cptvf_free_irq_affinity;
+	}
+	return 0;
+
+cptvf_free_irq_affinity:
+	cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+	cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+cptvf_free_misc_irq:
+	free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf);
+cptvf_free_vectors:
+	pci_free_irq_vectors(cptvf->pdev);
+cptvf_err_release_regions:
+	pci_release_regions(pdev);
+cptvf_err_disable_device:
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+
+	return err;
+}
+
+static void cptvf_remove(struct pci_dev *pdev)
+{
+	struct cpt_vf *cptvf = pci_get_drvdata(pdev);
+
+	if (!cptvf) {
+		dev_err(&pdev->dev, "Invalid CPT-VF device\n");
+		return;
+	}
+
+	/* Convey DOWN to PF */
+	if (cptvf_send_vf_down(cptvf)) {
+		dev_err(&pdev->dev, "PF not responding to DOWN msg");
+	} else {
+		cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+		cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+		free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf);
+		free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf);
+		pci_free_irq_vectors(cptvf->pdev);
+		cptvf_sw_cleanup(cptvf);
+		pci_set_drvdata(pdev, NULL);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+		cvm_crypto_exit();
+	}
+}
+
+static void cptvf_shutdown(struct pci_dev *pdev)
+{
+	cptvf_remove(pdev);
+}
+
+/* Supported devices */
+static const struct pci_device_id cptvf_id_table[] = {
+	{PCI_VDEVICE(CAVIUM, CPT_81XX_PCI_VF_DEVICE_ID), 0},
+	{ 0, }  /* end of table */
+};
+
+static struct pci_driver cptvf_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = cptvf_id_table,
+	.probe = cptvf_probe,
+	.remove = cptvf_remove,
+	.shutdown = cptvf_shutdown,
+};
+
+module_pci_driver(cptvf_pci_driver);
+
+MODULE_AUTHOR("George Cherian <george.cherian@cavium.com>");
+MODULE_DESCRIPTION("Cavium Thunder CPT Virtual Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, cptvf_id_table);
diff --git a/drivers/crypto/cavium/cpt/cptvf_mbox.c b/drivers/crypto/cavium/cpt/cptvf_mbox.c
new file mode 100644
index 000000000000..d5ec3b8a9e61
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptvf_mbox.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include "cptvf.h"
+
+static void cptvf_send_msg_to_pf(struct cpt_vf *cptvf, struct cpt_mbox *mbx)
+{
+	/* Writing mbox(1) causes interrupt */
+	cpt_write_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 0),
+			mbx->msg);
+	cpt_write_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 1),
+			mbx->data);
+}
+
+/* ACKs PF's mailbox message
+ */
+void cptvf_mbox_send_ack(struct cpt_vf *cptvf, struct cpt_mbox *mbx)
+{
+	mbx->msg = CPT_MBOX_MSG_TYPE_ACK;
+	cptvf_send_msg_to_pf(cptvf, mbx);
+}
+
+/* NACKs PF's mailbox message that VF is not able to
+ * complete the action
+ */
+void cptvf_mbox_send_nack(struct cpt_vf *cptvf, struct cpt_mbox *mbx)
+{
+	mbx->msg = CPT_MBOX_MSG_TYPE_NACK;
+	cptvf_send_msg_to_pf(cptvf, mbx);
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+void cptvf_handle_mbox_intr(struct cpt_vf *cptvf)
+{
+	struct cpt_mbox mbx = {};
+
+	/*
+	 * MBOX[0] contains msg
+	 * MBOX[1] contains data
+	 */
+	mbx.msg  = cpt_read_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 0));
+	mbx.data = cpt_read_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 1));
+	dev_dbg(&cptvf->pdev->dev, "%s: Mailbox msg 0x%llx from PF\n",
+		__func__, mbx.msg);
+	switch (mbx.msg) {
+	case CPT_MSG_READY:
+	{
+		cptvf->pf_acked = true;
+		cptvf->vfid = mbx.data;
+		dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid);
+		break;
+	}
+	case CPT_MSG_QBIND_GRP:
+		cptvf->pf_acked = true;
+		cptvf->vftype = mbx.data;
+		dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n",
+			cptvf->vfid, ((mbx.data == SE_TYPES) ? "SE" : "AE"),
+			cptvf->vfgrp);
+		break;
+	case CPT_MBOX_MSG_TYPE_ACK:
+		cptvf->pf_acked = true;
+		break;
+	case CPT_MBOX_MSG_TYPE_NACK:
+		cptvf->pf_nacked = true;
+		break;
+	default:
+		dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n",
+			mbx.msg);
+		break;
+	}
+}
+
+static int cptvf_send_msg_to_pf_timeout(struct cpt_vf *cptvf,
+					struct cpt_mbox *mbx)
+{
+	int timeout = CPT_MBOX_MSG_TIMEOUT;
+	int sleep = 10;
+
+	cptvf->pf_acked = false;
+	cptvf->pf_nacked = false;
+	cptvf_send_msg_to_pf(cptvf, mbx);
+	/* Wait for previous message to be acked, timeout 2sec */
+	while (!cptvf->pf_acked) {
+		if (cptvf->pf_nacked)
+			return -EINVAL;
+		msleep(sleep);
+		if (cptvf->pf_acked)
+			break;
+		timeout -= sleep;
+		if (!timeout) {
+			dev_err(&cptvf->pdev->dev, "PF didn't ack to mbox msg %llx from VF%u\n",
+				(mbx->msg & 0xFF), cptvf->vfid);
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Checks if VF is able to comminicate with PF
+ * and also gets the CPT number this VF is associated to.
+ */
+int cptvf_check_pf_ready(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_mbox mbx = {};
+
+	mbx.msg = CPT_MSG_READY;
+	if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) {
+		dev_err(&pdev->dev, "PF didn't respond to READY msg\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/*
+ * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF.
+ * Must be ACKed.
+ */
+int cptvf_send_vq_size_msg(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_mbox mbx = {};
+
+	mbx.msg = CPT_MSG_QLEN;
+	mbx.data = cptvf->qsize;
+	if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) {
+		dev_err(&pdev->dev, "PF didn't respond to vq_size msg\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int cptvf_send_vf_to_grp_msg(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_mbox mbx = {};
+
+	mbx.msg = CPT_MSG_QBIND_GRP;
+	/* Convey group of the VF */
+	mbx.data = cptvf->vfgrp;
+	if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) {
+		dev_err(&pdev->dev, "PF didn't respond to vf_type msg\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int cptvf_send_vf_priority_msg(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_mbox mbx = {};
+
+	mbx.msg = CPT_MSG_VQ_PRIORITY;
+	/* Convey group of the VF */
+	mbx.data = cptvf->priority;
+	if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) {
+		dev_err(&pdev->dev, "PF didn't respond to vf_type msg\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/*
+ * Communicate to PF that VF is UP and running
+ */
+int cptvf_send_vf_up(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_mbox mbx = {};
+
+	mbx.msg = CPT_MSG_VF_UP;
+	if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) {
+		dev_err(&pdev->dev, "PF didn't respond to UP msg\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/*
+ * Communicate to PF that VF is DOWN and running
+ */
+int cptvf_send_vf_down(struct cpt_vf *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_mbox mbx = {};
+
+	mbx.msg = CPT_MSG_VF_DOWN;
+	if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) {
+		dev_err(&pdev->dev, "PF didn't respond to DOWN msg\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
new file mode 100644
index 000000000000..169e66231bcf
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include "cptvf.h"
+#include "request_manager.h"
+
+/**
+ * get_free_pending_entry - get free entry from pending queue
+ * @param pqinfo: pending_qinfo structure
+ * @param qno: queue number
+ */
+static struct pending_entry *get_free_pending_entry(struct pending_queue *q,
+						    int qlen)
+{
+	struct pending_entry *ent = NULL;
+
+	ent = &q->head[q->rear];
+	if (unlikely(ent->busy)) {
+		ent = NULL;
+		goto no_free_entry;
+	}
+
+	q->rear++;
+	if (unlikely(q->rear == qlen))
+		q->rear = 0;
+
+no_free_entry:
+	return ent;
+}
+
+static inline void pending_queue_inc_front(struct pending_qinfo *pqinfo,
+					   int qno)
+{
+	struct pending_queue *queue = &pqinfo->queue[qno];
+
+	queue->front++;
+	if (unlikely(queue->front == pqinfo->qlen))
+		queue->front = 0;
+}
+
+static int setup_sgio_components(struct cpt_vf *cptvf, struct buf_ptr *list,
+				 int buf_count, u8 *buffer)
+{
+	int ret = 0, i, j;
+	int components;
+	struct sglist_component *sg_ptr = NULL;
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (unlikely(!list)) {
+		dev_err(&pdev->dev, "Input List pointer is NULL\n");
+		return -EFAULT;
+	}
+
+	for (i = 0; i < buf_count; i++) {
+		if (likely(list[i].vptr)) {
+			list[i].dma_addr = dma_map_single(&pdev->dev,
+							  list[i].vptr,
+							  list[i].size,
+							  DMA_BIDIRECTIONAL);
+			if (unlikely(dma_mapping_error(&pdev->dev,
+						       list[i].dma_addr))) {
+				dev_err(&pdev->dev, "DMA map kernel buffer failed for component: %d\n",
+					i);
+				ret = -EIO;
+				goto sg_cleanup;
+			}
+		}
+	}
+
+	components = buf_count / 4;
+	sg_ptr = (struct sglist_component *)buffer;
+	for (i = 0; i < components; i++) {
+		sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
+		sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
+		sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
+		sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size);
+		sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
+		sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
+		sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
+		sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr);
+		sg_ptr++;
+	}
+
+	components = buf_count % 4;
+
+	switch (components) {
+	case 3:
+		sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
+		sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
+		/* Fall through */
+	case 2:
+		sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
+		sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
+		/* Fall through */
+	case 1:
+		sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
+		sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+
+sg_cleanup:
+	for (j = 0; j < i; j++) {
+		if (list[j].dma_addr) {
+			dma_unmap_single(&pdev->dev, list[i].dma_addr,
+					 list[i].size, DMA_BIDIRECTIONAL);
+		}
+
+		list[j].dma_addr = 0;
+	}
+
+	return ret;
+}
+
+static inline int setup_sgio_list(struct cpt_vf *cptvf,
+				  struct cpt_info_buffer *info,
+				  struct cpt_request_info *req)
+{
+	u16 g_sz_bytes = 0, s_sz_bytes = 0;
+	int ret = 0;
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (req->incnt > MAX_SG_IN_CNT || req->outcnt > MAX_SG_OUT_CNT) {
+		dev_err(&pdev->dev, "Request SG components are higher than supported\n");
+		ret = -EINVAL;
+		goto  scatter_gather_clean;
+	}
+
+	/* Setup gather (input) components */
+	g_sz_bytes = ((req->incnt + 3) / 4) * sizeof(struct sglist_component);
+	info->gather_components = kzalloc(g_sz_bytes, GFP_KERNEL);
+	if (!info->gather_components) {
+		ret = -ENOMEM;
+		goto  scatter_gather_clean;
+	}
+
+	ret = setup_sgio_components(cptvf, req->in,
+				    req->incnt,
+				    info->gather_components);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup gather list\n");
+		ret = -EFAULT;
+		goto  scatter_gather_clean;
+	}
+
+	/* Setup scatter (output) components */
+	s_sz_bytes = ((req->outcnt + 3) / 4) * sizeof(struct sglist_component);
+	info->scatter_components = kzalloc(s_sz_bytes, GFP_KERNEL);
+	if (!info->scatter_components) {
+		ret = -ENOMEM;
+		goto  scatter_gather_clean;
+	}
+
+	ret = setup_sgio_components(cptvf, req->out,
+				    req->outcnt,
+				    info->scatter_components);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to setup gather list\n");
+		ret = -EFAULT;
+		goto  scatter_gather_clean;
+	}
+
+	/* Create and initialize DPTR */
+	info->dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
+	info->in_buffer = kzalloc(info->dlen, GFP_KERNEL);
+	if (!info->in_buffer) {
+		ret = -ENOMEM;
+		goto  scatter_gather_clean;
+	}
+
+	((u16 *)info->in_buffer)[0] = req->outcnt;
+	((u16 *)info->in_buffer)[1] = req->incnt;
+	((u16 *)info->in_buffer)[2] = 0;
+	((u16 *)info->in_buffer)[3] = 0;
+	*(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer);
+
+	memcpy(&info->in_buffer[8], info->gather_components,
+	       g_sz_bytes);
+	memcpy(&info->in_buffer[8 + g_sz_bytes],
+	       info->scatter_components, s_sz_bytes);
+
+	info->dptr_baddr = dma_map_single(&pdev->dev,
+					  (void *)info->in_buffer,
+					  info->dlen,
+					  DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&pdev->dev, info->dptr_baddr)) {
+		dev_err(&pdev->dev, "Mapping DPTR Failed %d\n", info->dlen);
+		ret = -EIO;
+		goto  scatter_gather_clean;
+	}
+
+	/* Create and initialize RPTR */
+	info->out_buffer = kzalloc(COMPLETION_CODE_SIZE, GFP_KERNEL);
+	if (!info->out_buffer) {
+		ret = -ENOMEM;
+		goto scatter_gather_clean;
+	}
+
+	*((u64 *)info->out_buffer) = ~((u64)COMPLETION_CODE_INIT);
+	info->alternate_caddr = (u64 *)info->out_buffer;
+	info->rptr_baddr = dma_map_single(&pdev->dev,
+					  (void *)info->out_buffer,
+					  COMPLETION_CODE_SIZE,
+					  DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&pdev->dev, info->rptr_baddr)) {
+		dev_err(&pdev->dev, "Mapping RPTR Failed %d\n",
+			COMPLETION_CODE_SIZE);
+		ret = -EIO;
+		goto  scatter_gather_clean;
+	}
+
+	return 0;
+
+scatter_gather_clean:
+	return ret;
+}
+
+int send_cpt_command(struct cpt_vf *cptvf, union cpt_inst_s *cmd,
+		     u32 qno)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct command_qinfo *qinfo = NULL;
+	struct command_queue *queue;
+	struct command_chunk *chunk;
+	u8 *ent;
+	int ret = 0;
+
+	if (unlikely(qno >= cptvf->nr_queues)) {
+		dev_err(&pdev->dev, "Invalid queue (qno: %d, nr_queues: %d)\n",
+			qno, cptvf->nr_queues);
+		return -EINVAL;
+	}
+
+	qinfo = &cptvf->cqinfo;
+	queue = &qinfo->queue[qno];
+	/* lock commad queue */
+	spin_lock(&queue->lock);
+	ent = &queue->qhead->head[queue->idx * qinfo->cmd_size];
+	memcpy(ent, (void *)cmd, qinfo->cmd_size);
+
+	if (++queue->idx >= queue->qhead->size / 64) {
+		struct hlist_node *node;
+
+		hlist_for_each(node, &queue->chead) {
+			chunk = hlist_entry(node, struct command_chunk,
+					    nextchunk);
+			if (chunk == queue->qhead) {
+				continue;
+			} else {
+				queue->qhead = chunk;
+				break;
+			}
+		}
+		queue->idx = 0;
+	}
+	/* make sure all memory stores are done before ringing doorbell */
+	smp_wmb();
+	cptvf_write_vq_doorbell(cptvf, 1);
+	/* unlock command queue */
+	spin_unlock(&queue->lock);
+
+	return ret;
+}
+
+void do_request_cleanup(struct cpt_vf *cptvf,
+			struct cpt_info_buffer *info)
+{
+	int i;
+	struct pci_dev *pdev = cptvf->pdev;
+	struct cpt_request_info *req;
+
+	if (info->dptr_baddr)
+		dma_unmap_single(&pdev->dev, info->dptr_baddr,
+				 info->dlen, DMA_BIDIRECTIONAL);
+
+	if (info->rptr_baddr)
+		dma_unmap_single(&pdev->dev, info->rptr_baddr,
+				 COMPLETION_CODE_SIZE, DMA_BIDIRECTIONAL);
+
+	if (info->comp_baddr)
+		dma_unmap_single(&pdev->dev, info->comp_baddr,
+				 sizeof(union cpt_res_s), DMA_BIDIRECTIONAL);
+
+	if (info->req) {
+		req = info->req;
+		for (i = 0; i < req->outcnt; i++) {
+			if (req->out[i].dma_addr)
+				dma_unmap_single(&pdev->dev,
+						 req->out[i].dma_addr,
+						 req->out[i].size,
+						 DMA_BIDIRECTIONAL);
+		}
+
+		for (i = 0; i < req->incnt; i++) {
+			if (req->in[i].dma_addr)
+				dma_unmap_single(&pdev->dev,
+						 req->in[i].dma_addr,
+						 req->in[i].size,
+						 DMA_BIDIRECTIONAL);
+		}
+	}
+
+	if (info->scatter_components)
+		kzfree(info->scatter_components);
+
+	if (info->gather_components)
+		kzfree(info->gather_components);
+
+	if (info->out_buffer)
+		kzfree(info->out_buffer);
+
+	if (info->in_buffer)
+		kzfree(info->in_buffer);
+
+	if (info->completion_addr)
+		kzfree((void *)info->completion_addr);
+
+	kzfree(info);
+}
+
+void do_post_process(struct cpt_vf *cptvf, struct cpt_info_buffer *info)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (!info) {
+		dev_err(&pdev->dev, "incorrect cpt_info_buffer for post processing\n");
+		return;
+	}
+
+	do_request_cleanup(cptvf, info);
+}
+
+static inline void process_pending_queue(struct cpt_vf *cptvf,
+					 struct pending_qinfo *pqinfo,
+					 int qno)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	struct pending_queue *pqueue = &pqinfo->queue[qno];
+	struct pending_entry *pentry = NULL;
+	struct cpt_info_buffer *info = NULL;
+	union cpt_res_s *status = NULL;
+	unsigned char ccode;
+
+	while (1) {
+		spin_lock_bh(&pqueue->lock);
+		pentry = &pqueue->head[pqueue->front];
+		if (unlikely(!pentry->busy)) {
+			spin_unlock_bh(&pqueue->lock);
+			break;
+		}
+
+		info = (struct cpt_info_buffer *)pentry->post_arg;
+		if (unlikely(!info)) {
+			dev_err(&pdev->dev, "Pending Entry post arg NULL\n");
+			pending_queue_inc_front(pqinfo, qno);
+			spin_unlock_bh(&pqueue->lock);
+			continue;
+		}
+
+		status = (union cpt_res_s *)pentry->completion_addr;
+		ccode = status->s.compcode;
+		if ((status->s.compcode == CPT_COMP_E_FAULT) ||
+		    (status->s.compcode == CPT_COMP_E_SWERR)) {
+			dev_err(&pdev->dev, "Request failed with %s\n",
+				(status->s.compcode == CPT_COMP_E_FAULT) ?
+				"DMA Fault" : "Software error");
+			pentry->completion_addr = NULL;
+			pentry->busy = false;
+			atomic64_dec((&pqueue->pending_count));
+			pentry->post_arg = NULL;
+			pending_queue_inc_front(pqinfo, qno);
+			do_request_cleanup(cptvf, info);
+			spin_unlock_bh(&pqueue->lock);
+			break;
+		} else if (status->s.compcode == COMPLETION_CODE_INIT) {
+			/* check for timeout */
+			if (time_after_eq(jiffies,
+					  (info->time_in +
+					  (CPT_COMMAND_TIMEOUT * HZ)))) {
+				dev_err(&pdev->dev, "Request timed out");
+				pentry->completion_addr = NULL;
+				pentry->busy = false;
+				atomic64_dec((&pqueue->pending_count));
+				pentry->post_arg = NULL;
+				pending_queue_inc_front(pqinfo, qno);
+				do_request_cleanup(cptvf, info);
+				spin_unlock_bh(&pqueue->lock);
+				break;
+			} else if ((*info->alternate_caddr ==
+				(~COMPLETION_CODE_INIT)) &&
+				(info->extra_time < TIME_IN_RESET_COUNT)) {
+				info->time_in = jiffies;
+				info->extra_time++;
+				spin_unlock_bh(&pqueue->lock);
+				break;
+			}
+		}
+
+		pentry->completion_addr = NULL;
+		pentry->busy = false;
+		pentry->post_arg = NULL;
+		atomic64_dec((&pqueue->pending_count));
+		pending_queue_inc_front(pqinfo, qno);
+		spin_unlock_bh(&pqueue->lock);
+
+		do_post_process(info->cptvf, info);
+		/*
+		 * Calling callback after we find
+		 * that the request has been serviced
+		 */
+		pentry->callback(ccode, pentry->callback_arg);
+	}
+}
+
+int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req)
+{
+	int ret = 0, clear = 0, queue = 0;
+	struct cpt_info_buffer *info = NULL;
+	struct cptvf_request *cpt_req = NULL;
+	union ctrl_info *ctrl = NULL;
+	union cpt_res_s *result = NULL;
+	struct pending_entry *pentry = NULL;
+	struct pending_queue *pqueue = NULL;
+	struct pci_dev *pdev = cptvf->pdev;
+	u8 group = 0;
+	struct cpt_vq_command vq_cmd;
+	union cpt_inst_s cptinst;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (unlikely(!info)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for info_buffer\n");
+		return -ENOMEM;
+	}
+
+	cpt_req = (struct cptvf_request *)&req->req;
+	ctrl = (union ctrl_info *)&req->ctrl;
+
+	info->cptvf = cptvf;
+	group = ctrl->s.grp;
+	ret = setup_sgio_list(cptvf, info, req);
+	if (ret) {
+		dev_err(&pdev->dev, "Setting up SG list failed");
+		goto request_cleanup;
+	}
+
+	cpt_req->dlen = info->dlen;
+	/*
+	 * Get buffer for union cpt_res_s response
+	 * structure and its physical address
+	 */
+	info->completion_addr = kzalloc(sizeof(union cpt_res_s), GFP_KERNEL);
+	if (unlikely(!info->completion_addr)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for completion_addr\n");
+		return -ENOMEM;
+	}
+
+	result = (union cpt_res_s *)info->completion_addr;
+	result->s.compcode = COMPLETION_CODE_INIT;
+	info->comp_baddr = dma_map_single(&pdev->dev,
+					       (void *)info->completion_addr,
+					       sizeof(union cpt_res_s),
+					       DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&pdev->dev, info->comp_baddr)) {
+		dev_err(&pdev->dev, "mapping compptr Failed %lu\n",
+			sizeof(union cpt_res_s));
+		ret = -EFAULT;
+		goto  request_cleanup;
+	}
+
+	/* Fill the VQ command */
+	vq_cmd.cmd.u64 = 0;
+	vq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags);
+	vq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1);
+	vq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
+	vq_cmd.cmd.s.dlen   = cpu_to_be16(cpt_req->dlen);
+
+	/* 64-bit swap for microcode data reads, not needed for addresses*/
+	vq_cmd.cmd.u64 = cpu_to_be64(vq_cmd.cmd.u64);
+	vq_cmd.dptr = info->dptr_baddr;
+	vq_cmd.rptr = info->rptr_baddr;
+	vq_cmd.cptr.u64 = 0;
+	vq_cmd.cptr.s.grp = group;
+	/* Get Pending Entry to submit command */
+	/* Always queue 0, because 1 queue per VF */
+	queue = 0;
+	pqueue = &cptvf->pqinfo.queue[queue];
+
+	if (atomic64_read(&pqueue->pending_count) > PENDING_THOLD) {
+		dev_err(&pdev->dev, "pending threshold reached\n");
+		process_pending_queue(cptvf, &cptvf->pqinfo, queue);
+	}
+
+get_pending_entry:
+	spin_lock_bh(&pqueue->lock);
+	pentry = get_free_pending_entry(pqueue, cptvf->pqinfo.qlen);
+	if (unlikely(!pentry)) {
+		spin_unlock_bh(&pqueue->lock);
+		if (clear == 0) {
+			process_pending_queue(cptvf, &cptvf->pqinfo, queue);
+			clear = 1;
+			goto get_pending_entry;
+		}
+		dev_err(&pdev->dev, "Get free entry failed\n");
+		dev_err(&pdev->dev, "queue: %d, rear: %d, front: %d\n",
+			queue, pqueue->rear, pqueue->front);
+		ret = -EFAULT;
+		goto request_cleanup;
+	}
+
+	pentry->completion_addr = info->completion_addr;
+	pentry->post_arg = (void *)info;
+	pentry->callback = req->callback;
+	pentry->callback_arg = req->callback_arg;
+	info->pentry = pentry;
+	pentry->busy = true;
+	atomic64_inc(&pqueue->pending_count);
+
+	/* Send CPT command */
+	info->pentry = pentry;
+	info->time_in = jiffies;
+	info->req = req;
+
+	/* Create the CPT_INST_S type command for HW intrepretation */
+	cptinst.s.doneint = true;
+	cptinst.s.res_addr = (u64)info->comp_baddr;
+	cptinst.s.tag = 0;
+	cptinst.s.grp = 0;
+	cptinst.s.wq_ptr = 0;
+	cptinst.s.ei0 = vq_cmd.cmd.u64;
+	cptinst.s.ei1 = vq_cmd.dptr;
+	cptinst.s.ei2 = vq_cmd.rptr;
+	cptinst.s.ei3 = vq_cmd.cptr.u64;
+
+	ret = send_cpt_command(cptvf, &cptinst, queue);
+	spin_unlock_bh(&pqueue->lock);
+	if (unlikely(ret)) {
+		dev_err(&pdev->dev, "Send command failed for AE\n");
+		ret = -EFAULT;
+		goto request_cleanup;
+	}
+
+	return 0;
+
+request_cleanup:
+	dev_dbg(&pdev->dev, "Failed to submit CPT command\n");
+	do_request_cleanup(cptvf, info);
+
+	return ret;
+}
+
+void vq_post_process(struct cpt_vf *cptvf, u32 qno)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (unlikely(qno > cptvf->nr_queues)) {
+		dev_err(&pdev->dev, "Request for post processing on invalid pending queue: %u\n",
+			qno);
+		return;
+	}
+
+	process_pending_queue(cptvf, &cptvf->pqinfo, qno);
+}
+
+int cptvf_do_request(void *vfdev, struct cpt_request_info *req)
+{
+	struct cpt_vf *cptvf = (struct cpt_vf *)vfdev;
+	struct pci_dev *pdev = cptvf->pdev;
+
+	if (!cpt_device_ready(cptvf)) {
+		dev_err(&pdev->dev, "CPT Device is not ready");
+		return -ENODEV;
+	}
+
+	if ((cptvf->vftype == SE_TYPES) && (!req->ctrl.s.se_req)) {
+		dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request",
+			cptvf->vfid);
+		return -EINVAL;
+	} else if ((cptvf->vftype == AE_TYPES) && (req->ctrl.s.se_req)) {
+		dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request",
+			cptvf->vfid);
+		return -EINVAL;
+	}
+
+	return process_request(cptvf, req);
+}
diff --git a/drivers/crypto/cavium/cpt/request_manager.h b/drivers/crypto/cavium/cpt/request_manager.h
new file mode 100644
index 000000000000..80ee074c6e0c
--- /dev/null
+++ b/drivers/crypto/cavium/cpt/request_manager.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __REQUEST_MANAGER_H
+#define __REQUEST_MANAGER_H
+
+#include "cpt_common.h"
+
+#define TIME_IN_RESET_COUNT  5
+#define COMPLETION_CODE_SIZE 8
+#define COMPLETION_CODE_INIT 0
+#define PENDING_THOLD  100
+#define MAX_SG_IN_CNT 12
+#define MAX_SG_OUT_CNT 13
+#define SG_LIST_HDR_SIZE  8
+#define MAX_BUF_CNT	16
+
+union ctrl_info {
+	u32 flags;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u32 reserved0:26;
+		u32 grp:3; /* Group bits */
+		u32 dma_mode:2; /* DMA mode */
+		u32 se_req:1;/* To SE core */
+#else
+		u32 se_req:1; /* To SE core */
+		u32 dma_mode:2; /* DMA mode */
+		u32 grp:3; /* Group bits */
+		u32 reserved0:26;
+#endif
+	} s;
+};
+
+union opcode_info {
+	u16 flags;
+	struct {
+		u8 major;
+		u8 minor;
+	} s;
+};
+
+struct cptvf_request {
+	union opcode_info opcode;
+	u16 param1;
+	u16 param2;
+	u16 dlen;
+};
+
+struct buf_ptr {
+	u8 *vptr;
+	dma_addr_t dma_addr;
+	u16 size;
+};
+
+struct cpt_request_info {
+	u8 incnt; /* Number of input buffers */
+	u8 outcnt; /* Number of output buffers */
+	u16 rlen; /* Output length */
+	union ctrl_info ctrl; /* User control information */
+	struct cptvf_request req; /* Request Information (Core specific) */
+
+	struct buf_ptr in[MAX_BUF_CNT];
+	struct buf_ptr out[MAX_BUF_CNT];
+
+	void (*callback)(int, void *); /* Kernel ASYNC request callabck */
+	void *callback_arg; /* Kernel ASYNC request callabck arg */
+};
+
+struct sglist_component {
+	union {
+		u64 len;
+		struct {
+			u16 len0;
+			u16 len1;
+			u16 len2;
+			u16 len3;
+		} s;
+	} u;
+	u64 ptr0;
+	u64 ptr1;
+	u64 ptr2;
+	u64 ptr3;
+};
+
+struct cpt_info_buffer {
+	struct cpt_vf *cptvf;
+	unsigned long time_in;
+	u8 extra_time;
+
+	struct cpt_request_info *req;
+	dma_addr_t dptr_baddr;
+	u32 dlen;
+	dma_addr_t rptr_baddr;
+	dma_addr_t comp_baddr;
+	u8 *in_buffer;
+	u8 *out_buffer;
+	u8 *gather_components;
+	u8 *scatter_components;
+
+	struct pending_entry *pentry;
+	volatile u64 *completion_addr;
+	volatile u64 *alternate_caddr;
+};
+
+/*
+ * CPT_INST_S software command definitions
+ * Words EI (0-3)
+ */
+union vq_cmd_word0 {
+	u64 u64;
+	struct {
+		u16 opcode;
+		u16 param1;
+		u16 param2;
+		u16 dlen;
+	} s;
+};
+
+union vq_cmd_word3 {
+	u64 u64;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 grp:3;
+		u64 cptr:61;
+#else
+		u64 cptr:61;
+		u64 grp:3;
+#endif
+	} s;
+};
+
+struct cpt_vq_command {
+	union vq_cmd_word0 cmd;
+	u64 dptr;
+	u64 rptr;
+	union vq_cmd_word3 cptr;
+};
+
+void vq_post_process(struct cpt_vf *cptvf, u32 qno);
+int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req);
+#endif /* __REQUEST_MANAGER_H */
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 8d2dbacc6161..7bc09989e18a 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -404,10 +404,6 @@ static int ccp_init(struct ccp_device *ccp)
 		goto e_pool;
 	}
 
-	/* Initialize the queues used to wait for KSB space and suspend */
-	init_waitqueue_head(&ccp->sb_queue);
-	init_waitqueue_head(&ccp->suspend_queue);
-
 	dev_dbg(dev, "Starting threads...\n");
 	/* Create a kthread for each queue */
 	for (i = 0; i < ccp->cmd_q_count; i++) {
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index faf3cb3ddce2..fc08b4ed69d9 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -21,6 +21,12 @@
 
 #include "ccp-dev.h"
 
+/* Allocate the requested number of contiguous LSB slots
+ * from the LSB bitmap. Look in the private range for this
+ * queue first; failing that, check the public area.
+ * If no space is available, wait around.
+ * Return: first slot number
+ */
 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
 {
 	struct ccp_device *ccp;
@@ -50,7 +56,7 @@ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
 			bitmap_set(ccp->lsbmap, start, count);
 
 			mutex_unlock(&ccp->sb_mutex);
-			return start * LSB_ITEM_SIZE;
+			return start;
 		}
 
 		ccp->sb_avail = 0;
@@ -63,17 +69,18 @@ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
 	}
 }
 
+/* Free a number of LSB slots from the bitmap, starting at
+ * the indicated starting slot number.
+ */
 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
 			 unsigned int count)
 {
-	int lsbno = start / LSB_SIZE;
-
 	if (!start)
 		return;
 
-	if (cmd_q->lsb == lsbno) {
+	if (cmd_q->lsb == start) {
 		/* An entry from the private LSB */
-		bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
+		bitmap_clear(cmd_q->lsbmap, start, count);
 	} else {
 		/* From the shared LSBs */
 		struct ccp_device *ccp = cmd_q->ccp;
@@ -243,17 +250,20 @@ static int ccp5_do_cmd(struct ccp5_desc *desc,
 		ret = wait_event_interruptible(cmd_q->int_queue,
 					       cmd_q->int_rcvd);
 		if (ret || cmd_q->cmd_error) {
+			/* Log the error and flush the queue by
+			 * moving the head pointer
+			 */
 			if (cmd_q->cmd_error)
 				ccp_log_error(cmd_q->ccp,
 					      cmd_q->cmd_error);
-			/* A version 5 device doesn't use Job IDs... */
+			iowrite32(tail, cmd_q->reg_head_lo);
 			if (!ret)
 				ret = -EIO;
 		}
 		cmd_q->int_rcvd = 0;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int ccp5_perform_aes(struct ccp_op *op)
@@ -277,8 +287,7 @@ static int ccp5_perform_aes(struct ccp_op *op)
 	CCP_AES_ENCRYPT(&function) = op->u.aes.action;
 	CCP_AES_MODE(&function) = op->u.aes.mode;
 	CCP_AES_TYPE(&function) = op->u.aes.type;
-	if (op->u.aes.mode == CCP_AES_MODE_CFB)
-		CCP_AES_SIZE(&function) = 0x7f;
+	CCP_AES_SIZE(&function) = op->u.aes.size;
 
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 
@@ -396,7 +405,7 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 
 	function.raw = 0;
-	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
+	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 
 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
@@ -411,10 +420,10 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
-	/* Key (Exponent) is in external memory */
-	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
-	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
-	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+	/* Exponent is in LSB memory */
+	CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
+	CCP5_CMD_KEY_HI(&desc) = 0;
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
 
 	return ccp5_do_cmd(&desc, op->cmd_q);
 }
@@ -525,7 +534,7 @@ static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
 		status >>= LSB_REGION_WIDTH;
 	}
 	queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
-	dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
+	dev_dbg(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
 		 cmd_q->id, queues);
 
 	return queues ? 0 : -EINVAL;
@@ -567,7 +576,7 @@ static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
 					 */
 					cmd_q->lsb = bitno;
 					bitmap_clear(lsb_pub, bitno, 1);
-					dev_info(ccp->dev,
+					dev_dbg(ccp->dev,
 						 "Queue %d gets LSB %d\n",
 						 i, bitno);
 					break;
@@ -725,7 +734,6 @@ static int ccp5_init(struct ccp_device *ccp)
 		ret = -EIO;
 		goto e_pool;
 	}
-	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
 
 	/* Turn off the queues and disable interrupts until ready */
 	for (i = 0; i < ccp->cmd_q_count; i++) {
@@ -751,9 +759,6 @@ static int ccp5_init(struct ccp_device *ccp)
 		goto e_pool;
 	}
 
-	/* Initialize the queue used to suspend */
-	init_waitqueue_head(&ccp->suspend_queue);
-
 	dev_dbg(dev, "Loading LSB map...\n");
 	/* Copy the private LSB mask to the public registers */
 	status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
@@ -955,7 +960,7 @@ static irqreturn_t ccp5_irq_handler(int irq, void *data)
 static void ccp5_config(struct ccp_device *ccp)
 {
 	/* Public side */
-	iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
+	iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
 }
 
 static void ccp5other_config(struct ccp_device *ccp)
@@ -1010,6 +1015,7 @@ const struct ccp_vdata ccpv5a = {
 
 const struct ccp_vdata ccpv5b = {
 	.version = CCP_VERSION(5, 0),
+	.dma_chan_attr = DMA_PRIVATE,
 	.setup = ccp5other_config,
 	.perform = &ccp5_actions,
 	.bar = 2,
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index cafa633aae10..92d1c6959f08 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -41,7 +41,7 @@ struct ccp_tasklet_data {
 };
 
 /* Human-readable error strings */
-char *ccp_error_codes[] = {
+static char *ccp_error_codes[] = {
 	"",
 	"ERR 01: ILLEGAL_ENGINE",
 	"ERR 02: ILLEGAL_KEY_ID",
@@ -283,11 +283,14 @@ EXPORT_SYMBOL_GPL(ccp_version);
  */
 int ccp_enqueue_cmd(struct ccp_cmd *cmd)
 {
-	struct ccp_device *ccp = ccp_get_device();
+	struct ccp_device *ccp;
 	unsigned long flags;
 	unsigned int i;
 	int ret;
 
+	/* Some commands might need to be sent to a specific device */
+	ccp = cmd->ccp ? cmd->ccp : ccp_get_device();
+
 	if (!ccp)
 		return -ENODEV;
 
@@ -478,6 +481,10 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 	ccp->sb_count = KSB_COUNT;
 	ccp->sb_start = 0;
 
+	/* Initialize the wait queues */
+	init_waitqueue_head(&ccp->sb_queue);
+	init_waitqueue_head(&ccp->suspend_queue);
+
 	ccp->ord = ccp_increment_unit_ordinal();
 	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
 	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index da5f4a678083..aa36f3f81860 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -179,6 +179,10 @@
 
 /* ------------------------ General CCP Defines ------------------------ */
 
+#define	CCP_DMA_DFLT			0x0
+#define	CCP_DMA_PRIV			0x1
+#define	CCP_DMA_PUB			0x2
+
 #define CCP_DMAPOOL_MAX_SIZE		64
 #define CCP_DMAPOOL_ALIGN		BIT(5)
 
@@ -238,6 +242,7 @@ struct ccp_dma_chan {
 	struct ccp_device *ccp;
 
 	spinlock_t lock;
+	struct list_head created;
 	struct list_head pending;
 	struct list_head active;
 	struct list_head complete;
@@ -278,7 +283,7 @@ struct ccp_cmd_queue {
 	/* Private LSB that is assigned to this queue, or -1 if none.
 	 * Bitmap for my private LSB, unused otherwise
 	 */
-	unsigned int lsb;
+	int lsb;
 	DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
 
 	/* Queue processing thread */
@@ -466,6 +471,7 @@ struct ccp_aes_op {
 	enum ccp_aes_type type;
 	enum ccp_aes_mode mode;
 	enum ccp_aes_action action;
+	unsigned int size;
 };
 
 struct ccp_xts_aes_op {
@@ -515,7 +521,6 @@ struct ccp_op {
 		struct ccp_passthru_op passthru;
 		struct ccp_ecc_op ecc;
 	} u;
-	struct ccp_mem key;
 };
 
 static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
@@ -541,23 +546,23 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
  * word 7: upper 16 bits of key pointer; key memory type
  */
 struct dword0 {
-	__le32 soc:1;
-	__le32 ioc:1;
-	__le32 rsvd1:1;
-	__le32 init:1;
-	__le32 eom:1;		/* AES/SHA only */
-	__le32 function:15;
-	__le32 engine:4;
-	__le32 prot:1;
-	__le32 rsvd2:7;
+	unsigned int soc:1;
+	unsigned int ioc:1;
+	unsigned int rsvd1:1;
+	unsigned int init:1;
+	unsigned int eom:1;		/* AES/SHA only */
+	unsigned int function:15;
+	unsigned int engine:4;
+	unsigned int prot:1;
+	unsigned int rsvd2:7;
 };
 
 struct dword3 {
-	__le32 src_hi:16;
-	__le32 src_mem:2;
-	__le32 lsb_cxt_id:8;
-	__le32 rsvd1:5;
-	__le32 fixed:1;
+	unsigned int  src_hi:16;
+	unsigned int  src_mem:2;
+	unsigned int  lsb_cxt_id:8;
+	unsigned int  rsvd1:5;
+	unsigned int  fixed:1;
 };
 
 union dword4 {
@@ -567,18 +572,18 @@ union dword4 {
 
 union dword5 {
 	struct {
-		__le32 dst_hi:16;
-		__le32 dst_mem:2;
-		__le32 rsvd1:13;
-		__le32 fixed:1;
+		unsigned int  dst_hi:16;
+		unsigned int  dst_mem:2;
+		unsigned int  rsvd1:13;
+		unsigned int  fixed:1;
 	} fields;
 	__le32 sha_len_hi;
 };
 
 struct dword7 {
-	__le32 key_hi:16;
-	__le32 key_mem:2;
-	__le32 rsvd1:14;
+	unsigned int  key_hi:16;
+	unsigned int  key_mem:2;
+	unsigned int  rsvd1:14;
 };
 
 struct ccp5_desc {
@@ -635,6 +640,7 @@ struct ccp_actions {
 /* Structure to hold CCP version-specific values */
 struct ccp_vdata {
 	const unsigned int version;
+	const unsigned int dma_chan_attr;
 	void (*setup)(struct ccp_device *);
 	const struct ccp_actions *perform;
 	const unsigned int bar;
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 6553912804f7..e00be01fbf5a 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/dmaengine.h>
 #include <linux/spinlock.h>
@@ -25,6 +26,37 @@
 	(mask == 0) ? 64 : fls64(mask);	\
 })
 
+/* The CCP as a DMA provider can be configured for public or private
+ * channels. Default is specified in the vdata for the device (PCI ID).
+ * This module parameter will override for all channels on all devices:
+ *   dma_chan_attr = 0x2 to force all channels public
+ *                 = 0x1 to force all channels private
+ *                 = 0x0 to defer to the vdata setting
+ *                 = any other value: warning, revert to 0x0
+ */
+static unsigned int dma_chan_attr = CCP_DMA_DFLT;
+module_param(dma_chan_attr, uint, 0444);
+MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
+
+unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
+{
+	switch (dma_chan_attr) {
+	case CCP_DMA_DFLT:
+		return ccp->vdata->dma_chan_attr;
+
+	case CCP_DMA_PRIV:
+		return DMA_PRIVATE;
+
+	case CCP_DMA_PUB:
+		return 0;
+
+	default:
+		dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n",
+			      dma_chan_attr);
+		return ccp->vdata->dma_chan_attr;
+	}
+}
+
 static void ccp_free_cmd_resources(struct ccp_device *ccp,
 				   struct list_head *list)
 {
@@ -63,6 +95,7 @@ static void ccp_free_chan_resources(struct dma_chan *dma_chan)
 	ccp_free_desc_resources(chan->ccp, &chan->complete);
 	ccp_free_desc_resources(chan->ccp, &chan->active);
 	ccp_free_desc_resources(chan->ccp, &chan->pending);
+	ccp_free_desc_resources(chan->ccp, &chan->created);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 }
@@ -273,6 +306,7 @@ static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc)
 	spin_lock_irqsave(&chan->lock, flags);
 
 	cookie = dma_cookie_assign(tx_desc);
+	list_del(&desc->entry);
 	list_add_tail(&desc->entry, &chan->pending);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
@@ -388,6 +422,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
 			goto err;
 
 		ccp_cmd = &cmd->ccp_cmd;
+		ccp_cmd->ccp = chan->ccp;
 		ccp_pt = &ccp_cmd->u.passthru_nomap;
 		ccp_cmd->flags = CCP_CMD_MAY_BACKLOG;
 		ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP;
@@ -426,7 +461,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
 
 	spin_lock_irqsave(&chan->lock, sflags);
 
-	list_add_tail(&desc->entry, &chan->pending);
+	list_add_tail(&desc->entry, &chan->created);
 
 	spin_unlock_irqrestore(&chan->lock, sflags);
 
@@ -610,6 +645,7 @@ static int ccp_terminate_all(struct dma_chan *dma_chan)
 	/*TODO: Purge the complete list? */
 	ccp_free_desc_resources(chan->ccp, &chan->active);
 	ccp_free_desc_resources(chan->ccp, &chan->pending);
+	ccp_free_desc_resources(chan->ccp, &chan->created);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 
@@ -671,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 	dma_cap_set(DMA_SG, dma_dev->cap_mask);
 	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
 
+	/* The DMA channels for this device can be set to public or private,
+	 * and overridden by the module parameter dma_chan_attr.
+	 * Default: according to the value in vdata (dma_chan_attr=0)
+	 * dma_chan_attr=0x1: all channels private (override vdata)
+	 * dma_chan_attr=0x2: all channels public (override vdata)
+	 */
+	if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE)
+		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
 	INIT_LIST_HEAD(&dma_dev->channels);
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		chan = ccp->ccp_dma_chan + i;
@@ -679,6 +724,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 		chan->ccp = ccp;
 
 		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->created);
 		INIT_LIST_HEAD(&chan->pending);
 		INIT_LIST_HEAD(&chan->active);
 		INIT_LIST_HEAD(&chan->complete);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 50fae4442801..f1396c3aedac 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -184,62 +184,46 @@ static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
 }
 
 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
+				   unsigned int wa_offset,
 				   struct scatterlist *sg,
-				   unsigned int len, unsigned int se_len,
-				   bool sign_extend)
+				   unsigned int sg_offset,
+				   unsigned int len)
 {
-	unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
-	u8 buffer[CCP_REVERSE_BUF_SIZE];
-
-	if (WARN_ON(se_len > sizeof(buffer)))
-		return -EINVAL;
-
-	sg_offset = len;
-	dm_offset = 0;
-	nbytes = len;
-	while (nbytes) {
-		sb_len = min_t(unsigned int, nbytes, se_len);
-		sg_offset -= sb_len;
-
-		scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 0);
-		for (i = 0; i < sb_len; i++)
-			wa->address[dm_offset + i] = buffer[sb_len - i - 1];
-
-		dm_offset += sb_len;
-		nbytes -= sb_len;
-
-		if ((sb_len != se_len) && sign_extend) {
-			/* Must sign-extend to nearest sign-extend length */
-			if (wa->address[dm_offset - 1] & 0x80)
-				memset(wa->address + dm_offset, 0xff,
-				       se_len - sb_len);
-		}
+	u8 *p, *q;
+
+	ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
+
+	p = wa->address + wa_offset;
+	q = p + len - 1;
+	while (p < q) {
+		*p = *p ^ *q;
+		*q = *p ^ *q;
+		*p = *p ^ *q;
+		p++;
+		q--;
 	}
-
 	return 0;
 }
 
 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
+				    unsigned int wa_offset,
 				    struct scatterlist *sg,
+				    unsigned int sg_offset,
 				    unsigned int len)
 {
-	unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
-	u8 buffer[CCP_REVERSE_BUF_SIZE];
-
-	sg_offset = 0;
-	dm_offset = len;
-	nbytes = len;
-	while (nbytes) {
-		sb_len = min_t(unsigned int, nbytes, sizeof(buffer));
-		dm_offset -= sb_len;
-
-		for (i = 0; i < sb_len; i++)
-			buffer[sb_len - i - 1] = wa->address[dm_offset + i];
-		scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 1);
-
-		sg_offset += sb_len;
-		nbytes -= sb_len;
+	u8 *p, *q;
+
+	p = wa->address + wa_offset;
+	q = p + len - 1;
+	while (p < q) {
+		*p = *p ^ *q;
+		*q = *p ^ *q;
+		*p = *p ^ *q;
+		p++;
+		q--;
 	}
+
+	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
 }
 
 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
@@ -692,6 +676,14 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 			goto e_ctx;
 		}
 	}
+	switch (aes->mode) {
+	case CCP_AES_MODE_CFB: /* CFB128 only */
+	case CCP_AES_MODE_CTR:
+		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
+		break;
+	default:
+		op.u.aes.size = 0;
+	}
 
 	/* Prepare the input and output data workareas. For in-place
 	 * operations we need to set the dma direction to BIDIRECTIONAL
@@ -1261,8 +1253,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	if (ret)
 		goto e_sb;
 
-	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
-				      CCP_SB_BYTES, false);
+	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
 	if (ret)
 		goto e_exp;
 	ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
@@ -1280,16 +1271,12 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	if (ret)
 		goto e_exp;
 
-	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
-				      CCP_SB_BYTES, false);
+	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
 	if (ret)
 		goto e_src;
-	src.address += o_len;	/* Adjust the address for the copy operation */
-	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
-				      CCP_SB_BYTES, false);
+	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
 	if (ret)
 		goto e_src;
-	src.address -= o_len;	/* Reset the address to original value */
 
 	/* Prepare the output area for the operation */
 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
@@ -1314,7 +1301,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_dst;
 	}
 
-	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
+	ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len);
 
 e_dst:
 	ccp_free_data(&dst, cmd_q);
@@ -1566,25 +1553,22 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	save = src.address;
 
 	/* Copy the ECC modulus */
-	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
-				      CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
 	if (ret)
 		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	/* Copy the first operand */
-	ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
-				      ecc->u.mm.operand_1_len,
-				      CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
+				      ecc->u.mm.operand_1_len);
 	if (ret)
 		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
 		/* Copy the second operand */
-		ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
-					      ecc->u.mm.operand_2_len,
-					      CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
+					      ecc->u.mm.operand_2_len);
 		if (ret)
 			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
@@ -1623,7 +1607,8 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	}
 
 	/* Save the ECC result */
-	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
+	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
+				CCP_ECC_MODULUS_BYTES);
 
 e_dst:
 	ccp_dm_free(&dst);
@@ -1691,22 +1676,19 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	save = src.address;
 
 	/* Copy the ECC modulus */
-	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
-				      CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
 	if (ret)
 		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	/* Copy the first point X and Y coordinate */
-	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
-				      ecc->u.pm.point_1.x_len,
-				      CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
+				      ecc->u.pm.point_1.x_len);
 	if (ret)
 		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
-	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
-				      ecc->u.pm.point_1.y_len,
-				      CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
+				      ecc->u.pm.point_1.y_len);
 	if (ret)
 		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
@@ -1717,15 +1699,13 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
 		/* Copy the second point X and Y coordinate */
-		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
-					      ecc->u.pm.point_2.x_len,
-					      CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
+					      ecc->u.pm.point_2.x_len);
 		if (ret)
 			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
-		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
-					      ecc->u.pm.point_2.y_len,
-					      CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
+					      ecc->u.pm.point_2.y_len);
 		if (ret)
 			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
@@ -1735,19 +1715,17 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		src.address += CCP_ECC_OPERAND_SIZE;
 	} else {
 		/* Copy the Domain "a" parameter */
-		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
-					      ecc->u.pm.domain_a_len,
-					      CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
+					      ecc->u.pm.domain_a_len);
 		if (ret)
 			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
 
 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
 			/* Copy the scalar value */
-			ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
-						      ecc->u.pm.scalar_len,
-						      CCP_ECC_OPERAND_SIZE,
-						      false);
+			ret = ccp_reverse_set_dm_area(&src, 0,
+						      ecc->u.pm.scalar, 0,
+						      ecc->u.pm.scalar_len);
 			if (ret)
 				goto e_src;
 			src.address += CCP_ECC_OPERAND_SIZE;
@@ -1792,10 +1770,10 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	save = dst.address;
 
 	/* Save the ECC result X and Y coordinates */
-	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
+	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
 				CCP_ECC_MODULUS_BYTES);
 	dst.address += CCP_ECC_OUTPUT_SIZE;
-	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
+	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
 				CCP_ECC_MODULUS_BYTES);
 	dst.address += CCP_ECC_OUTPUT_SIZE;
 
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 4ce67fb9a880..3e104f5aa0c2 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -4,6 +4,7 @@ config CRYPTO_DEV_CHELSIO
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
+	select CRYPTO_AUTHENC
 	---help---
 	  The Chelsio Crypto Co-processor driver for T6 adapters.
 
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index e4ddb921d7b3..41bc7f4f58cd 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -54,6 +54,12 @@
 #include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/internal/hash.h>
 
 #include "t4fw_api.h"
@@ -62,6 +68,11 @@
 #include "chcr_algo.h"
 #include "chcr_crypto.h"
 
+static inline  struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx)
+{
+	return ctx->crypto_ctx->aeadctx;
+}
+
 static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx)
 {
 	return ctx->crypto_ctx->ablkctx;
@@ -72,6 +83,16 @@ static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx)
 	return ctx->crypto_ctx->hmacctx;
 }
 
+static inline struct chcr_gcm_ctx *GCM_CTX(struct chcr_aead_ctx *gctx)
+{
+	return gctx->ctx->gcm;
+}
+
+static inline struct chcr_authenc_ctx *AUTHENC_CTX(struct chcr_aead_ctx *gctx)
+{
+	return gctx->ctx->authenc;
+}
+
 static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx)
 {
 	return ctx->dev->u_ctx;
@@ -94,12 +115,37 @@ static inline unsigned int sgl_len(unsigned int n)
 	return (3 * n) / 2 + (n & 1) + 2;
 }
 
+static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
+{
+	u8 temp[SHA512_DIGEST_SIZE];
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	int authsize = crypto_aead_authsize(tfm);
+	struct cpl_fw6_pld *fw6_pld;
+	int cmp = 0;
+
+	fw6_pld = (struct cpl_fw6_pld *)input;
+	if ((get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) ||
+	    (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_GCM)) {
+		cmp = memcmp(&fw6_pld->data[2], (fw6_pld + 1), authsize);
+	} else {
+
+		sg_pcopy_to_buffer(req->src, sg_nents(req->src), temp,
+				authsize, req->assoclen +
+				req->cryptlen - authsize);
+		cmp = memcmp(temp, (fw6_pld + 1), authsize);
+	}
+	if (cmp)
+		*err = -EBADMSG;
+	else
+		*err = 0;
+}
+
 /*
  *	chcr_handle_resp - Unmap the DMA buffers associated with the request
  *	@req: crypto request
  */
 int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
-		     int error_status)
+			 int err)
 {
 	struct crypto_tfm *tfm = req->tfm;
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
@@ -109,17 +155,33 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 	unsigned int digestsize, updated_digestsize;
 
 	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_BLKCIPHER:
+	case CRYPTO_ALG_TYPE_AEAD:
+		ctx_req.req.aead_req = (struct aead_request *)req;
+		ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req);
+		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.ctx.reqctx->dst,
+			     ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE);
+		if (ctx_req.ctx.reqctx->skb) {
+			kfree_skb(ctx_req.ctx.reqctx->skb);
+			ctx_req.ctx.reqctx->skb = NULL;
+		}
+		if (ctx_req.ctx.reqctx->verify == VERIFY_SW) {
+			chcr_verify_tag(ctx_req.req.aead_req, input,
+					&err);
+			ctx_req.ctx.reqctx->verify = VERIFY_HW;
+		}
+		break;
+
+	case CRYPTO_ALG_TYPE_ABLKCIPHER:
 		ctx_req.req.ablk_req = (struct ablkcipher_request *)req;
 		ctx_req.ctx.ablk_ctx =
 			ablkcipher_request_ctx(ctx_req.req.ablk_req);
-		if (!error_status) {
+		if (!err) {
 			fw6_pld = (struct cpl_fw6_pld *)input;
 			memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2],
 			       AES_BLOCK_SIZE);
 		}
 		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst,
-			     ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE);
+			     ctx_req.ctx.ablk_ctx->dst_nents, DMA_FROM_DEVICE);
 		if (ctx_req.ctx.ablk_ctx->skb) {
 			kfree_skb(ctx_req.ctx.ablk_ctx->skb);
 			ctx_req.ctx.ablk_ctx->skb = NULL;
@@ -138,8 +200,10 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 			updated_digestsize = SHA256_DIGEST_SIZE;
 		else if (digestsize == SHA384_DIGEST_SIZE)
 			updated_digestsize = SHA512_DIGEST_SIZE;
-		if (ctx_req.ctx.ahash_ctx->skb)
+		if (ctx_req.ctx.ahash_ctx->skb) {
+			kfree_skb(ctx_req.ctx.ahash_ctx->skb);
 			ctx_req.ctx.ahash_ctx->skb = NULL;
+		}
 		if (ctx_req.ctx.ahash_ctx->result == 1) {
 			ctx_req.ctx.ahash_ctx->result = 0;
 			memcpy(ctx_req.req.ahash_req->result, input +
@@ -150,11 +214,9 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 			       sizeof(struct cpl_fw6_pld),
 			       updated_digestsize);
 		}
-		kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr);
-		ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL;
 		break;
 	}
-	return 0;
+	return err;
 }
 
 /*
@@ -178,40 +240,81 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
 	return flits + sgl_len(cnt);
 }
 
-static struct shash_desc *chcr_alloc_shash(unsigned int ds)
+static inline void get_aes_decrypt_key(unsigned char *dec_key,
+				       const unsigned char *key,
+				       unsigned int keylength)
+{
+	u32 temp;
+	u32 w_ring[MAX_NK];
+	int i, j, k;
+	u8  nr, nk;
+
+	switch (keylength) {
+	case AES_KEYLENGTH_128BIT:
+		nk = KEYLENGTH_4BYTES;
+		nr = NUMBER_OF_ROUNDS_10;
+		break;
+	case AES_KEYLENGTH_192BIT:
+		nk = KEYLENGTH_6BYTES;
+		nr = NUMBER_OF_ROUNDS_12;
+		break;
+	case AES_KEYLENGTH_256BIT:
+		nk = KEYLENGTH_8BYTES;
+		nr = NUMBER_OF_ROUNDS_14;
+		break;
+	default:
+		return;
+	}
+	for (i = 0; i < nk; i++)
+		w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
+
+	i = 0;
+	temp = w_ring[nk - 1];
+	while (i + nk < (nr + 1) * 4) {
+		if (!(i % nk)) {
+			/* RotWord(temp) */
+			temp = (temp << 8) | (temp >> 24);
+			temp = aes_ks_subword(temp);
+			temp ^= round_constant[i / nk];
+		} else if (nk == 8 && (i % 4 == 0)) {
+			temp = aes_ks_subword(temp);
+		}
+		w_ring[i % nk] ^= temp;
+		temp = w_ring[i % nk];
+		i++;
+	}
+	i--;
+	for (k = 0, j = i % nk; k < nk; k++) {
+		*((u32 *)dec_key + k) = htonl(w_ring[j]);
+		j--;
+		if (j < 0)
+			j += nk;
+	}
+}
+
+static struct crypto_shash *chcr_alloc_shash(unsigned int ds)
 {
 	struct crypto_shash *base_hash = NULL;
-	struct shash_desc *desc;
 
 	switch (ds) {
 	case SHA1_DIGEST_SIZE:
-		base_hash = crypto_alloc_shash("sha1-generic", 0, 0);
+		base_hash = crypto_alloc_shash("sha1", 0, 0);
 		break;
 	case SHA224_DIGEST_SIZE:
-		base_hash = crypto_alloc_shash("sha224-generic", 0, 0);
+		base_hash = crypto_alloc_shash("sha224", 0, 0);
 		break;
 	case SHA256_DIGEST_SIZE:
-		base_hash = crypto_alloc_shash("sha256-generic", 0, 0);
+		base_hash = crypto_alloc_shash("sha256", 0, 0);
 		break;
 	case SHA384_DIGEST_SIZE:
-		base_hash = crypto_alloc_shash("sha384-generic", 0, 0);
+		base_hash = crypto_alloc_shash("sha384", 0, 0);
 		break;
 	case SHA512_DIGEST_SIZE:
-		base_hash = crypto_alloc_shash("sha512-generic", 0, 0);
+		base_hash = crypto_alloc_shash("sha512", 0, 0);
 		break;
 	}
-	if (IS_ERR(base_hash)) {
-		pr_err("Can not allocate sha-generic algo.\n");
-		return (void *)base_hash;
-	}
 
-	desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash),
-		       GFP_KERNEL);
-	if (!desc)
-		return ERR_PTR(-ENOMEM);
-	desc->tfm = base_hash;
-	desc->flags = crypto_shash_get_flags(base_hash);
-	return desc;
+	return base_hash;
 }
 
 static int chcr_compute_partial_hash(struct shash_desc *desc,
@@ -279,31 +382,18 @@ static inline int is_hmac(struct crypto_tfm *tfm)
 	struct chcr_alg_template *chcr_crypto_alg =
 		container_of(__crypto_ahash_alg(alg), struct chcr_alg_template,
 			     alg.hash);
-	if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) ==
-	    CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
+	if (chcr_crypto_alg->type == CRYPTO_ALG_TYPE_HMAC)
 		return 1;
 	return 0;
 }
 
-static inline unsigned int ch_nents(struct scatterlist *sg,
-				    unsigned int *total_size)
-{
-	unsigned int nents;
-
-	for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) {
-		nents++;
-		*total_size += sg->length;
-	}
-	return nents;
-}
-
 static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
 			   struct scatterlist *sg,
 			   struct phys_sge_parm *sg_param)
 {
 	struct phys_sge_pairs *to;
-	unsigned int out_buf_size = sg_param->obsize;
-	unsigned int nents = sg_param->nents, i, j, tot_len = 0;
+	int out_buf_size = sg_param->obsize;
+	unsigned int nents = sg_param->nents, i, j = 0;
 
 	phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
 				    | CPL_RX_PHYS_DSGL_ISRDMA_V(0));
@@ -321,25 +411,24 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
 				       sizeof(struct cpl_rx_phys_dsgl));
 
 	for (i = 0; nents; to++) {
-		for (j = i; (nents && (j < (8 + i))); j++, nents--) {
-			to->len[j] = htons(sg->length);
+		for (j = 0; j < 8 && nents; j++, nents--) {
+			out_buf_size -= sg_dma_len(sg);
+			to->len[j] = htons(sg_dma_len(sg));
 			to->addr[j] = cpu_to_be64(sg_dma_address(sg));
-			if (out_buf_size) {
-				if (tot_len + sg_dma_len(sg) >= out_buf_size) {
-					to->len[j] = htons(out_buf_size -
-							   tot_len);
-					return;
-				}
-				tot_len += sg_dma_len(sg);
-			}
 			sg = sg_next(sg);
 		}
 	}
+	if (out_buf_size) {
+		j--;
+		to--;
+		to->len[j] = htons(ntohs(to->len[j]) + (out_buf_size));
+	}
 }
 
-static inline unsigned
-int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl,
-			 struct scatterlist *sg, struct phys_sge_parm *sg_param)
+static inline int map_writesg_phys_cpl(struct device *dev,
+					struct cpl_rx_phys_dsgl *phys_cpl,
+					struct scatterlist *sg,
+					struct phys_sge_parm *sg_param)
 {
 	if (!sg || !sg_param->nents)
 		return 0;
@@ -353,6 +442,14 @@ int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl,
 	return 0;
 }
 
+static inline int get_aead_subtype(struct crypto_aead *aead)
+{
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct chcr_alg_template *chcr_crypto_alg =
+		container_of(alg, struct chcr_alg_template, alg.aead);
+	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
+}
+
 static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg = tfm->__crt_alg;
@@ -362,8 +459,23 @@ static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
 	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
 }
 
+static inline void write_buffer_to_skb(struct sk_buff *skb,
+					unsigned int *frags,
+					char *bfr,
+					u8 bfr_len)
+{
+	skb->len += bfr_len;
+	skb->data_len += bfr_len;
+	skb->truesize += bfr_len;
+	get_page(virt_to_page(bfr));
+	skb_fill_page_desc(skb, *frags, virt_to_page(bfr),
+			   offset_in_page(bfr), bfr_len);
+	(*frags)++;
+}
+
+
 static inline void
-write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags,
+write_sg_to_skb(struct sk_buff *skb, unsigned int *frags,
 			struct scatterlist *sg, unsigned int count)
 {
 	struct page *spage;
@@ -372,8 +484,9 @@ write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags,
 	skb->len += count;
 	skb->data_len += count;
 	skb->truesize += count;
+
 	while (count > 0) {
-		if (sg && (!(sg->length)))
+		if (!sg || (!(sg->length)))
 			break;
 		spage = sg_page(sg);
 		get_page(spage);
@@ -389,29 +502,25 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 			       struct _key_ctx *key_ctx)
 {
 	if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
-		get_aes_decrypt_key(key_ctx->key, ablkctx->key,
-				    ablkctx->enckey_len << 3);
-		memset(key_ctx->key + ablkctx->enckey_len, 0,
-		       CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len);
+		memcpy(key_ctx->key, ablkctx->rrkey, ablkctx->enckey_len);
 	} else {
 		memcpy(key_ctx->key,
 		       ablkctx->key + (ablkctx->enckey_len >> 1),
 		       ablkctx->enckey_len >> 1);
-		get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1),
-				    ablkctx->key, ablkctx->enckey_len << 2);
+		memcpy(key_ctx->key + (ablkctx->enckey_len >> 1),
+		       ablkctx->rrkey, ablkctx->enckey_len >> 1);
 	}
 	return 0;
 }
 
 static inline void create_wreq(struct chcr_context *ctx,
-			       struct fw_crypto_lookaside_wr *wreq,
+			       struct chcr_wr *chcr_req,
 			       void *req, struct sk_buff *skb,
 			       int kctx_len, int hash_sz,
-			       unsigned int phys_dsgl)
+			       int is_iv,
+			       unsigned int sc_len)
 {
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
-	struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1);
-	struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1);
 	int iv_loc = IV_DSGL;
 	int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id];
 	unsigned int immdatalen = 0, nr_frags = 0;
@@ -423,27 +532,28 @@ static inline void create_wreq(struct chcr_context *ctx,
 		nr_frags = skb_shinfo(skb)->nr_frags;
 	}
 
-	wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen,
-						     (kctx_len >> 4));
-	wreq->pld_size_hash_size =
+	chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen,
+				((sizeof(chcr_req->key_ctx) + kctx_len) >> 4));
+	chcr_req->wreq.pld_size_hash_size =
 		htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) |
 		      FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz));
-	wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(
+	chcr_req->wreq.len16_pkd =
+		htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(
 				    (calc_tx_flits_ofld(skb) * 8), 16)));
-	wreq->cookie = cpu_to_be64((uintptr_t)req);
-	wreq->rx_chid_to_rx_q_id =
-		FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid,
-				(hash_sz) ? IV_NOP : iv_loc);
-
-	ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id);
-	ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8),
-					 16) - ((sizeof(*wreq)) >> 4)));
-
-	sc_imm->cmd_more = FILL_CMD_MORE(immdatalen);
-	sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len +
-				  ((hash_sz) ? DUMMY_BYTES :
-				  (sizeof(struct cpl_rx_phys_dsgl) +
-				   phys_dsgl)) + immdatalen);
+	chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
+	chcr_req->wreq.rx_chid_to_rx_q_id =
+		FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid,
+				is_iv ? iv_loc : IV_NOP, ctx->tx_channel_id);
+
+	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id,
+						       qid);
+	chcr_req->ulptx.len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8),
+					16) - ((sizeof(chcr_req->wreq)) >> 4)));
+
+	chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(immdatalen);
+	chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
+				   sizeof(chcr_req->key_ctx) +
+				   kctx_len + sc_len + immdatalen);
 }
 
 /**
@@ -454,86 +564,83 @@ static inline void create_wreq(struct chcr_context *ctx,
  *	@op_type:	encryption or decryption
  */
 static struct sk_buff
-*create_cipher_wr(struct crypto_async_request *req_base,
-		  struct chcr_context *ctx, unsigned short qid,
+*create_cipher_wr(struct ablkcipher_request *req,
+		  unsigned short qid,
 		  unsigned short op_type)
 {
-	struct ablkcipher_request *req = (struct ablkcipher_request *)req_base;
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct sk_buff *skb = NULL;
-	struct _key_ctx *key_ctx;
-	struct fw_crypto_lookaside_wr *wreq;
-	struct cpl_tx_sec_pdu *sec_cpl;
+	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
-	struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
 	struct phys_sge_parm sg_param;
-	unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0;
+	unsigned int frags = 0, transhdr_len, phys_dsgl;
 	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len;
+	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+			GFP_ATOMIC;
 
 	if (!req->info)
 		return ERR_PTR(-EINVAL);
-	ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize);
-	ablkctx->enc = op_type;
-
+	reqctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+	if (reqctx->dst_nents <= 0) {
+		pr_err("AES:Invalid Destination sg lists\n");
+		return ERR_PTR(-EINVAL);
+	}
 	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
-	    (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE))
+	    (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) {
+		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
+		       ablkctx->enckey_len, req->nbytes, ivsize);
 		return ERR_PTR(-EINVAL);
+	}
 
-	phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents);
+	phys_dsgl = get_space_for_phys_dsgl(reqctx->dst_nents);
 
-	kctx_len = sizeof(*key_ctx) +
-		(DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+	kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
-	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
-			GFP_ATOMIC);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-	wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
-
-	sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
-	sec_cpl->op_ivinsrtofst =
-		FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1);
-
-	sec_cpl->pldlen = htonl(ivsize + req->nbytes);
-	sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0,
-								ivsize + 1, 0);
-
-	sec_cpl->cipherstop_lo_authinsert =  FILL_SEC_CPL_AUTHINSERT(0, 0,
-								     0, 0);
-	sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
+	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+	memset(chcr_req, 0, transhdr_len);
+	chcr_req->sec_cpl.op_ivinsrtofst =
+		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1);
+
+	chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes);
+	chcr_req->sec_cpl.aadstart_cipherstop_hi =
+			FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, ivsize + 1, 0);
+
+	chcr_req->sec_cpl.cipherstop_lo_authinsert =
+			FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
+	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
 							 ablkctx->ciph_mode,
-							 0, 0, ivsize >> 1, 1);
-	sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
+							 0, 0, ivsize >> 1);
+	chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
 							  0, 1, phys_dsgl);
 
-	key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
-	key_ctx->ctx_hdr = ablkctx->key_ctx_hdr;
+	chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
 	if (op_type == CHCR_DECRYPT_OP) {
-		if (generate_copy_rrkey(ablkctx, key_ctx))
-			goto map_fail1;
+		generate_copy_rrkey(ablkctx, &chcr_req->key_ctx);
 	} else {
 		if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
-			memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len);
+			memcpy(chcr_req->key_ctx.key, ablkctx->key,
+			       ablkctx->enckey_len);
 		} else {
-			memcpy(key_ctx->key, ablkctx->key +
+			memcpy(chcr_req->key_ctx.key, ablkctx->key +
 			       (ablkctx->enckey_len >> 1),
 			       ablkctx->enckey_len >> 1);
-			memcpy(key_ctx->key +
+			memcpy(chcr_req->key_ctx.key +
 			       (ablkctx->enckey_len >> 1),
 			       ablkctx->key,
 			       ablkctx->enckey_len >> 1);
 		}
 	}
-	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len);
-
-	memcpy(ablkctx->iv, req->info, ivsize);
-	sg_init_table(&ablkctx->iv_sg, 1);
-	sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize);
-	sg_param.nents = ablkctx->dst_nents;
-	sg_param.obsize = dst_bufsize;
+	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+	sg_param.nents = reqctx->dst_nents;
+	sg_param.obsize = req->nbytes;
 	sg_param.qid = qid;
 	sg_param.align = 1;
 	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst,
@@ -541,10 +648,12 @@ static struct sk_buff
 		goto map_fail1;
 
 	skb_set_transport_header(skb, transhdr_len);
-	write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize);
-	write_sg_data_page_desc(skb, &frags, req->src, req->nbytes);
-	create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl);
-	req_ctx->skb = skb;
+	memcpy(reqctx->iv, req->info, ivsize);
+	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
+	write_sg_to_skb(skb, &frags, req->src, req->nbytes);
+	create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
+			sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl);
+	reqctx->skb = skb;
 	skb_get(skb);
 	return skb;
 map_fail1:
@@ -557,15 +666,9 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 {
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
-	struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm);
 	unsigned int ck_size, context_size;
 	u16 alignment = 0;
 
-	if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize))
-		goto badkey_err;
-
-	memcpy(ablkctx->key, key, keylen);
-	ablkctx->enckey_len = keylen;
 	if (keylen == AES_KEYSIZE_128) {
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
 	} else if (keylen == AES_KEYSIZE_192) {
@@ -576,7 +679,9 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	} else {
 		goto badkey_err;
 	}
-
+	memcpy(ablkctx->key, key, keylen);
+	ablkctx->enckey_len = keylen;
+	get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, keylen << 3);
 	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
 			keylen + alignment) >> 4;
 
@@ -592,16 +697,18 @@ badkey_err:
 
 static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
 {
-	int ret = 0;
-	struct sge_ofld_txq *q;
 	struct adapter *adap = netdev2adap(dev);
+	struct sge_uld_txq_info *txq_info =
+		adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
+	struct sge_uld_txq *txq;
+	int ret = 0;
 
 	local_bh_disable();
-	q = &adap->sge.ofldtxq[idx];
-	spin_lock(&q->sendq.lock);
-	if (q->full)
+	txq = &txq_info->uldtxq[idx];
+	spin_lock(&txq->sendq.lock);
+	if (txq->full)
 		ret = -1;
-	spin_unlock(&q->sendq.lock);
+	spin_unlock(&txq->sendq.lock);
 	local_bh_enable();
 	return ret;
 }
@@ -610,7 +717,6 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_async_request *req_base = &req->base;
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct sk_buff *skb;
 
@@ -620,8 +726,7 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req)
 			return -EBUSY;
 	}
 
-	skb = create_cipher_wr(req_base, ctx,
-			       u_ctx->lldi.rxq_ids[ctx->tx_channel_id],
+	skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->tx_channel_id],
 			       CHCR_ENCRYPT_OP);
 	if (IS_ERR(skb)) {
 		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
@@ -637,7 +742,6 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_async_request *req_base = &req->base;
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct sk_buff *skb;
 
@@ -647,7 +751,7 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 			return -EBUSY;
 	}
 
-	skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0],
+	skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[0],
 			       CHCR_DECRYPT_OP);
 	if (IS_ERR(skb)) {
 		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
@@ -674,11 +778,12 @@ static int chcr_device_init(struct chcr_context *ctx)
 		}
 		u_ctx = ULD_CTX(ctx);
 		rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
-		ctx->dev->tx_channel_id = 0;
 		rxq_idx = ctx->dev->tx_channel_id * rxq_perchan;
 		rxq_idx += id % rxq_perchan;
 		spin_lock(&ctx->dev->lock_chcr_dev);
 		ctx->tx_channel_id = rxq_idx;
+		ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id;
+		ctx->dev->rx_channel_id = 0;
 		spin_unlock(&ctx->dev->lock_chcr_dev);
 	}
 out:
@@ -727,50 +832,33 @@ static int get_alg_config(struct algo_param *params,
 	return 0;
 }
 
-static inline int
-write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx,
-			    struct sk_buff *skb, unsigned int *frags, char *bfr,
-			    u8 bfr_len)
+static inline void chcr_free_shash(struct crypto_shash *base_hash)
 {
-	void *page_ptr = NULL;
-
-	skb->len += bfr_len;
-	skb->data_len += bfr_len;
-	skb->truesize += bfr_len;
-	page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA);
-	if (!page_ptr)
-		return -ENOMEM;
-	get_page(virt_to_page(page_ptr));
-	req_ctx->dummy_payload_ptr = page_ptr;
-	memcpy(page_ptr, bfr, bfr_len);
-	skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr),
-			   offset_in_page(page_ptr), bfr_len);
-	(*frags)++;
-	return 0;
+		crypto_free_shash(base_hash);
 }
 
 /**
- *	create_final_hash_wr - Create hash work request
+ *	create_hash_wr - Create hash work request
  *	@req - Cipher req base
  */
-static struct sk_buff *create_final_hash_wr(struct ahash_request *req,
-					    struct hash_wr_param *param)
+static struct sk_buff *create_hash_wr(struct ahash_request *req,
+				      struct hash_wr_param *param)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
 	struct sk_buff *skb = NULL;
-	struct _key_ctx *key_ctx;
-	struct fw_crypto_lookaside_wr *wreq;
-	struct cpl_tx_sec_pdu *sec_cpl;
+	struct chcr_wr *chcr_req;
 	unsigned int frags = 0, transhdr_len, iopad_alignment = 0;
 	unsigned int digestsize = crypto_ahash_digestsize(tfm);
-	unsigned int kctx_len = sizeof(*key_ctx);
+	unsigned int kctx_len = 0;
 	u8 hash_size_in_response = 0;
+	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+		GFP_ATOMIC;
 
 	iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
-	kctx_len += param->alg_prm.result_size + iopad_alignment;
+	kctx_len = param->alg_prm.result_size + iopad_alignment;
 	if (param->opad_needed)
 		kctx_len += param->alg_prm.result_size + iopad_alignment;
 
@@ -779,54 +867,54 @@ static struct sk_buff *create_final_hash_wr(struct ahash_request *req,
 	else
 		hash_size_in_response = param->alg_prm.result_size;
 	transhdr_len = HASH_TRANSHDR_SIZE(kctx_len);
-	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
-			GFP_ATOMIC);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
 	if (!skb)
 		return skb;
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-	wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
-	memset(wreq, 0, transhdr_len);
+	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+	memset(chcr_req, 0, transhdr_len);
 
-	sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
-	sec_cpl->op_ivinsrtofst =
-		FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0);
-	sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len);
+	chcr_req->sec_cpl.op_ivinsrtofst =
+		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 0);
+	chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len);
 
-	sec_cpl->aadstart_cipherstop_hi =
+	chcr_req->sec_cpl.aadstart_cipherstop_hi =
 		FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0);
-	sec_cpl->cipherstop_lo_authinsert =
+	chcr_req->sec_cpl.cipherstop_lo_authinsert =
 		FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0);
-	sec_cpl->seqno_numivs =
+	chcr_req->sec_cpl.seqno_numivs =
 		FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode,
-					 param->opad_needed, 0, 0);
+					 param->opad_needed, 0);
 
-	sec_cpl->ivgen_hdrlen =
+	chcr_req->sec_cpl.ivgen_hdrlen =
 		FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0);
 
-	key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
-	memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size);
+	memcpy(chcr_req->key_ctx.key, req_ctx->partial_hash,
+	       param->alg_prm.result_size);
 
 	if (param->opad_needed)
-		memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 :
-				       CHCR_HASH_MAX_DIGEST_SIZE),
+		memcpy(chcr_req->key_ctx.key +
+		       ((param->alg_prm.result_size <= 32) ? 32 :
+			CHCR_HASH_MAX_DIGEST_SIZE),
 		       hmacctx->opad, param->alg_prm.result_size);
 
-	key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
+	chcr_req->key_ctx.ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
 					    param->alg_prm.mk_size, 0,
 					    param->opad_needed,
-					    (kctx_len >> 4));
-	sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1);
+					    ((kctx_len +
+					     sizeof(chcr_req->key_ctx)) >> 4));
+	chcr_req->sec_cpl.scmd1 = cpu_to_be64((u64)param->scmd1);
 
 	skb_set_transport_header(skb, transhdr_len);
 	if (param->bfr_len != 0)
-		write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr,
-					    param->bfr_len);
+		write_buffer_to_skb(skb, &frags, req_ctx->reqbfr,
+				    param->bfr_len);
 	if (param->sg_len != 0)
-		write_sg_data_page_desc(skb, &frags, req->src, param->sg_len);
+		write_sg_to_skb(skb, &frags, req->src, param->sg_len);
 
-	create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response,
-		    0);
+	create_wreq(ctx, chcr_req, req, skb, kctx_len, hash_size_in_response, 0,
+			DUMMY_BYTES);
 	req_ctx->skb = skb;
 	skb_get(skb);
 	return skb;
@@ -852,34 +940,40 @@ static int chcr_ahash_update(struct ahash_request *req)
 			return -EBUSY;
 	}
 
-	if (nbytes + req_ctx->bfr_len >= bs) {
-		remainder = (nbytes + req_ctx->bfr_len) % bs;
-		nbytes = nbytes + req_ctx->bfr_len - remainder;
+	if (nbytes + req_ctx->reqlen >= bs) {
+		remainder = (nbytes + req_ctx->reqlen) % bs;
+		nbytes = nbytes + req_ctx->reqlen - remainder;
 	} else {
-		sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr +
-				   req_ctx->bfr_len, nbytes, 0);
-		req_ctx->bfr_len += nbytes;
+		sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->reqbfr
+				   + req_ctx->reqlen, nbytes, 0);
+		req_ctx->reqlen += nbytes;
 		return 0;
 	}
 
 	params.opad_needed = 0;
 	params.more = 1;
 	params.last = 0;
-	params.sg_len = nbytes - req_ctx->bfr_len;
-	params.bfr_len = req_ctx->bfr_len;
+	params.sg_len = nbytes - req_ctx->reqlen;
+	params.bfr_len = req_ctx->reqlen;
 	params.scmd1 = 0;
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
 	req_ctx->result = 0;
 	req_ctx->data_len += params.sg_len + params.bfr_len;
-	skb = create_final_hash_wr(req, &params);
+	skb = create_hash_wr(req, &params);
 	if (!skb)
 		return -ENOMEM;
 
-	req_ctx->bfr_len = remainder;
-	if (remainder)
+	if (remainder) {
+		u8 *temp;
+		/* Swap buffers */
+		temp = req_ctx->reqbfr;
+		req_ctx->reqbfr = req_ctx->skbfr;
+		req_ctx->skbfr = temp;
 		sg_pcopy_to_buffer(req->src, sg_nents(req->src),
-				   req_ctx->bfr, remainder, req->nbytes -
+				   req_ctx->reqbfr, remainder, req->nbytes -
 				   remainder);
+	}
+	req_ctx->reqlen = remainder;
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
 	chcr_send_wr(skb);
@@ -915,10 +1009,10 @@ static int chcr_ahash_final(struct ahash_request *req)
 	params.sg_len = 0;
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
 	req_ctx->result = 1;
-	params.bfr_len = req_ctx->bfr_len;
+	params.bfr_len = req_ctx->reqlen;
 	req_ctx->data_len += params.bfr_len + params.sg_len;
-	if (req_ctx->bfr && (req_ctx->bfr_len == 0)) {
-		create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+	if (req_ctx->reqlen == 0) {
+		create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
 		params.last = 0;
 		params.more = 1;
 		params.scmd1 = 0;
@@ -929,7 +1023,10 @@ static int chcr_ahash_final(struct ahash_request *req)
 		params.last = 1;
 		params.more = 0;
 	}
-	skb = create_final_hash_wr(req, &params);
+	skb = create_hash_wr(req, &params);
+	if (!skb)
+		return -ENOMEM;
+
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
 	chcr_send_wr(skb);
@@ -961,12 +1058,12 @@ static int chcr_ahash_finup(struct ahash_request *req)
 		params.opad_needed = 0;
 
 	params.sg_len = req->nbytes;
-	params.bfr_len = req_ctx->bfr_len;
+	params.bfr_len = req_ctx->reqlen;
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
 	req_ctx->data_len += params.bfr_len + params.sg_len;
 	req_ctx->result = 1;
-	if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) {
-		create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+	if ((req_ctx->reqlen + req->nbytes) == 0) {
+		create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
 		params.last = 0;
 		params.more = 1;
 		params.scmd1 = 0;
@@ -977,9 +1074,10 @@ static int chcr_ahash_finup(struct ahash_request *req)
 		params.more = 0;
 	}
 
-	skb = create_final_hash_wr(req, &params);
+	skb = create_hash_wr(req, &params);
 	if (!skb)
 		return -ENOMEM;
+
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
 	chcr_send_wr(skb);
@@ -1021,13 +1119,13 @@ static int chcr_ahash_digest(struct ahash_request *req)
 	req_ctx->result = 1;
 	req_ctx->data_len += params.bfr_len + params.sg_len;
 
-	if (req_ctx->bfr && req->nbytes == 0) {
-		create_last_hash_block(req_ctx->bfr, bs, 0);
+	if (req->nbytes == 0) {
+		create_last_hash_block(req_ctx->reqbfr, bs, 0);
 		params.more = 1;
 		params.bfr_len = bs;
 	}
 
-	skb = create_final_hash_wr(req, &params);
+	skb = create_hash_wr(req, &params);
 	if (!skb)
 		return -ENOMEM;
 
@@ -1042,12 +1140,12 @@ static int chcr_ahash_export(struct ahash_request *areq, void *out)
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 	struct chcr_ahash_req_ctx *state = out;
 
-	state->bfr_len = req_ctx->bfr_len;
+	state->reqlen = req_ctx->reqlen;
 	state->data_len = req_ctx->data_len;
-	memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+	memcpy(state->bfr1, req_ctx->reqbfr, req_ctx->reqlen);
 	memcpy(state->partial_hash, req_ctx->partial_hash,
 	       CHCR_HASH_MAX_DIGEST_SIZE);
-	return 0;
+		return 0;
 }
 
 static int chcr_ahash_import(struct ahash_request *areq, const void *in)
@@ -1055,10 +1153,11 @@ static int chcr_ahash_import(struct ahash_request *areq, const void *in)
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 	struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in;
 
-	req_ctx->bfr_len = state->bfr_len;
+	req_ctx->reqlen = state->reqlen;
 	req_ctx->data_len = state->data_len;
-	req_ctx->dummy_payload_ptr = NULL;
-	memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+	req_ctx->reqbfr = req_ctx->bfr1;
+	req_ctx->skbfr = req_ctx->bfr2;
+	memcpy(req_ctx->bfr1, state->bfr1, CHCR_HASH_MAX_BLOCK_SIZE_128);
 	memcpy(req_ctx->partial_hash, state->partial_hash,
 	       CHCR_HASH_MAX_DIGEST_SIZE);
 	return 0;
@@ -1073,15 +1172,16 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 	unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
 	unsigned int i, err = 0, updated_digestsize;
 
-	/*
-	 * use the key to calculate the ipad and opad. ipad will sent with the
+	SHASH_DESC_ON_STACK(shash, hmacctx->base_hash);
+
+	/* use the key to calculate the ipad and opad. ipad will sent with the
 	 * first request's data. opad will be sent with the final hash result
 	 * ipad in hmacctx->ipad and opad in hmacctx->opad location
 	 */
-	if (!hmacctx->desc)
-		return -EINVAL;
+	shash->tfm = hmacctx->base_hash;
+	shash->flags = crypto_shash_get_flags(hmacctx->base_hash);
 	if (keylen > bs) {
-		err = crypto_shash_digest(hmacctx->desc, key, keylen,
+		err = crypto_shash_digest(shash, key, keylen,
 					  hmacctx->ipad);
 		if (err)
 			goto out;
@@ -1102,13 +1202,13 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 		updated_digestsize = SHA256_DIGEST_SIZE;
 	else if (digestsize == SHA384_DIGEST_SIZE)
 		updated_digestsize = SHA512_DIGEST_SIZE;
-	err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad,
+	err = chcr_compute_partial_hash(shash, hmacctx->ipad,
 					hmacctx->ipad, digestsize);
 	if (err)
 		goto out;
 	chcr_change_order(hmacctx->ipad, updated_digestsize);
 
-	err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad,
+	err = chcr_compute_partial_hash(shash, hmacctx->opad,
 					hmacctx->opad, digestsize);
 	if (err)
 		goto out;
@@ -1122,28 +1222,29 @@ static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 {
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
-	int status = 0;
 	unsigned short context_size = 0;
 
-	if ((key_len == (AES_KEYSIZE_128 << 1)) ||
-	    (key_len == (AES_KEYSIZE_256 << 1))) {
-		memcpy(ablkctx->key, key, key_len);
-		ablkctx->enckey_len = key_len;
-		context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
-		ablkctx->key_ctx_hdr =
-			FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
-					 CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
-					 CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
-					 CHCR_KEYCTX_NO_KEY, 1,
-					 0, context_size);
-		ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
-	} else {
+	if ((key_len != (AES_KEYSIZE_128 << 1)) &&
+	    (key_len != (AES_KEYSIZE_256 << 1))) {
 		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
 				     CRYPTO_TFM_RES_BAD_KEY_LEN);
 		ablkctx->enckey_len = 0;
-		status = -EINVAL;
+		return -EINVAL;
+
 	}
-	return status;
+
+	memcpy(ablkctx->key, key, key_len);
+	ablkctx->enckey_len = key_len;
+	get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, key_len << 2);
+	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
+	ablkctx->key_ctx_hdr =
+		FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
+				 CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
+				 CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
+				 CHCR_KEYCTX_NO_KEY, 1,
+				 0, context_size);
+	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
+	return 0;
 }
 
 static int chcr_sha_init(struct ahash_request *areq)
@@ -1153,8 +1254,9 @@ static int chcr_sha_init(struct ahash_request *areq)
 	int digestsize =  crypto_ahash_digestsize(tfm);
 
 	req_ctx->data_len = 0;
-	req_ctx->dummy_payload_ptr = NULL;
-	req_ctx->bfr_len = 0;
+	req_ctx->reqlen = 0;
+	req_ctx->reqbfr = req_ctx->bfr1;
+	req_ctx->skbfr = req_ctx->bfr2;
 	req_ctx->skb = NULL;
 	req_ctx->result = 0;
 	copy_hash_init_values(req_ctx->partial_hash, digestsize);
@@ -1202,29 +1304,1186 @@ static int chcr_hmac_cra_init(struct crypto_tfm *tfm)
 
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct chcr_ahash_req_ctx));
-	hmacctx->desc = chcr_alloc_shash(digestsize);
-	if (IS_ERR(hmacctx->desc))
-		return PTR_ERR(hmacctx->desc);
+	hmacctx->base_hash = chcr_alloc_shash(digestsize);
+	if (IS_ERR(hmacctx->base_hash))
+		return PTR_ERR(hmacctx->base_hash);
 	return chcr_device_init(crypto_tfm_ctx(tfm));
 }
 
-static void chcr_free_shash(struct shash_desc *desc)
-{
-	crypto_free_shash(desc->tfm);
-	kfree(desc);
-}
-
 static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
 {
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
 	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
 
-	if (hmacctx->desc) {
-		chcr_free_shash(hmacctx->desc);
-		hmacctx->desc = NULL;
+	if (hmacctx->base_hash) {
+		chcr_free_shash(hmacctx->base_hash);
+		hmacctx->base_hash = NULL;
+	}
+}
+
+static int chcr_copy_assoc(struct aead_request *req,
+				struct chcr_aead_ctx *ctx)
+{
+	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
+
+	skcipher_request_set_tfm(skreq, ctx->null);
+	skcipher_request_set_callback(skreq, aead_request_flags(req),
+			NULL, NULL);
+	skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
+			NULL);
+
+	return crypto_skcipher_encrypt(skreq);
+}
+
+static unsigned char get_hmac(unsigned int authsize)
+{
+	switch (authsize) {
+	case ICV_8:
+		return CHCR_SCMD_HMAC_CTRL_PL1;
+	case ICV_10:
+		return CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366;
+	case ICV_12:
+		return CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+	}
+	return CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+}
+
+
+static struct sk_buff *create_authenc_wr(struct aead_request *req,
+					 unsigned short qid,
+					 int size,
+					 unsigned short op_type)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_context *ctx = crypto_aead_ctx(tfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct sk_buff *skb = NULL;
+	struct chcr_wr *chcr_req;
+	struct cpl_rx_phys_dsgl *phys_cpl;
+	struct phys_sge_parm sg_param;
+	struct scatterlist *src;
+	unsigned int frags = 0, transhdr_len;
+	unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0;
+	unsigned int   kctx_len = 0;
+	unsigned short stop_offset = 0;
+	unsigned int  assoclen = req->assoclen;
+	unsigned int  authsize = crypto_aead_authsize(tfm);
+	int err = 0;
+	int null = 0;
+	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+		GFP_ATOMIC;
+
+	if (aeadctx->enckey_len == 0 || (req->cryptlen == 0))
+		goto err;
+
+	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
+		goto err;
+
+	if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
+		goto err;
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
+	reqctx->dst = src;
+
+	if (req->src != req->dst) {
+		err = chcr_copy_assoc(req, aeadctx);
+		if (err)
+			return ERR_PTR(err);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
+					       req->assoclen);
+	}
+	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
+		null = 1;
+		assoclen = 0;
+	}
+	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
+					     (op_type ? -authsize : authsize));
+	if (reqctx->dst_nents <= 0) {
+		pr_err("AUTHENC:Invalid Destination sg entries\n");
+		goto err;
+	}
+	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
+	kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4)
+		- sizeof(chcr_req->key_ctx);
+	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
+	if (!skb)
+		goto err;
+
+	/* LLD is going to write the sge hdr. */
+	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+
+	/* Write WR */
+	chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len);
+	memset(chcr_req, 0, transhdr_len);
+
+	stop_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
+
+	/*
+	 * Input order	is AAD,IV and Payload. where IV should be included as
+	 * the part of authdata. All other fields should be filled according
+	 * to the hardware spec
+	 */
+	chcr_req->sec_cpl.op_ivinsrtofst =
+		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2,
+				       (ivsize ? (assoclen + 1) : 0));
+	chcr_req->sec_cpl.pldlen = htonl(assoclen + ivsize + req->cryptlen);
+	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+					assoclen ? 1 : 0, assoclen,
+					assoclen + ivsize + 1,
+					(stop_offset & 0x1F0) >> 4);
+	chcr_req->sec_cpl.cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(
+					stop_offset & 0xF,
+					null ? 0 : assoclen + ivsize + 1,
+					stop_offset, stop_offset);
+	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type,
+					(op_type == CHCR_ENCRYPT_OP) ? 1 : 0,
+					CHCR_SCMD_CIPHER_MODE_AES_CBC,
+					actx->auth_mode, aeadctx->hmac_ctrl,
+					ivsize >> 1);
+	chcr_req->sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+					 0, 1, dst_size);
+
+	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
+	if (op_type == CHCR_ENCRYPT_OP)
+		memcpy(chcr_req->key_ctx.key, aeadctx->key,
+		       aeadctx->enckey_len);
+	else
+		memcpy(chcr_req->key_ctx.key, actx->dec_rrkey,
+		       aeadctx->enckey_len);
+
+	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) <<
+					4), actx->h_iopad, kctx_len -
+				(DIV_ROUND_UP(aeadctx->enckey_len, 16) << 4));
+
+	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+	sg_param.nents = reqctx->dst_nents;
+	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
+	sg_param.qid = qid;
+	sg_param.align = 0;
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
+				  &sg_param))
+		goto dstmap_fail;
+
+	skb_set_transport_header(skb, transhdr_len);
+
+	if (assoclen) {
+		/* AAD buffer in */
+		write_sg_to_skb(skb, &frags, req->src, assoclen);
+
+	}
+	write_buffer_to_skb(skb, &frags, req->iv, ivsize);
+	write_sg_to_skb(skb, &frags, src, req->cryptlen);
+	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
+		   sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	reqctx->skb = skb;
+	skb_get(skb);
+
+	return skb;
+dstmap_fail:
+	/* ivmap_fail: */
+	kfree_skb(skb);
+err:
+	return ERR_PTR(-EINVAL);
+}
+
+static void aes_gcm_empty_pld_pad(struct scatterlist *sg,
+				  unsigned short offset)
+{
+	struct page *spage;
+	unsigned char *addr;
+
+	spage = sg_page(sg);
+	get_page(spage); /* so that it is not freed by NIC */
+#ifdef KMAP_ATOMIC_ARGS
+	addr = kmap_atomic(spage, KM_SOFTIRQ0);
+#else
+	addr = kmap_atomic(spage);
+#endif
+	memset(addr + sg->offset, 0, offset + 1);
+
+	kunmap_atomic(addr);
+}
+
+static int set_msg_len(u8 *block, unsigned int msglen, int csize)
+{
+	__be32 data;
+
+	memset(block, 0, csize);
+	block += csize;
+
+	if (csize >= 4)
+		csize = 4;
+	else if (msglen > (unsigned int)(1 << (8 * csize)))
+		return -EOVERFLOW;
+
+	data = cpu_to_be32(msglen);
+	memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+	return 0;
+}
+
+static void generate_b0(struct aead_request *req,
+			struct chcr_aead_ctx *aeadctx,
+			unsigned short op_type)
+{
+	unsigned int l, lp, m;
+	int rc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	u8 *b0 = reqctx->scratch_pad;
+
+	m = crypto_aead_authsize(aead);
+
+	memcpy(b0, reqctx->iv, 16);
+
+	lp = b0[0];
+	l = lp + 1;
+
+	/* set m, bits 3-5 */
+	*b0 |= (8 * ((m - 2) / 2));
+
+	/* set adata, bit 6, if associated data is used */
+	if (req->assoclen)
+		*b0 |= 64;
+	rc = set_msg_len(b0 + 16 - l,
+			 (op_type == CHCR_DECRYPT_OP) ?
+			 req->cryptlen - m : req->cryptlen, l);
+}
+
+static inline int crypto_ccm_check_iv(const u8 *iv)
+{
+	/* 2 <= L <= 8, so 1 <= L' <= 7. */
+	if (iv[0] < 1 || iv[0] > 7)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ccm_format_packet(struct aead_request *req,
+			     struct chcr_aead_ctx *aeadctx,
+			     unsigned int sub_type,
+			     unsigned short op_type)
+{
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	int rc = 0;
+
+	if (req->assoclen > T5_MAX_AAD_SIZE) {
+		pr_err("CCM: Unsupported AAD data. It should be < %d\n",
+		       T5_MAX_AAD_SIZE);
+		return -EINVAL;
+	}
+	if (sub_type == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) {
+		reqctx->iv[0] = 3;
+		memcpy(reqctx->iv + 1, &aeadctx->salt[0], 3);
+		memcpy(reqctx->iv + 4, req->iv, 8);
+		memset(reqctx->iv + 12, 0, 4);
+		*((unsigned short *)(reqctx->scratch_pad + 16)) =
+			htons(req->assoclen - 8);
+	} else {
+		memcpy(reqctx->iv, req->iv, 16);
+		*((unsigned short *)(reqctx->scratch_pad + 16)) =
+			htons(req->assoclen);
+	}
+	generate_b0(req, aeadctx, op_type);
+	/* zero the ctr value */
+	memset(reqctx->iv + 15 - reqctx->iv[0], 0, reqctx->iv[0] + 1);
+	return rc;
+}
+
+static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
+				  unsigned int dst_size,
+				  struct aead_request *req,
+				  unsigned short op_type,
+					  struct chcr_context *chcrctx)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned int ivsize = AES_BLOCK_SIZE;
+	unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM;
+	unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC;
+	unsigned int c_id = chcrctx->dev->rx_channel_id;
+	unsigned int ccm_xtra;
+	unsigned char tag_offset = 0, auth_offset = 0;
+	unsigned char hmac_ctrl = get_hmac(crypto_aead_authsize(tfm));
+	unsigned int assoclen;
+
+	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
+		assoclen = req->assoclen - 8;
+	else
+		assoclen = req->assoclen;
+	ccm_xtra = CCM_B0_SIZE +
+		((assoclen) ? CCM_AAD_FIELD_SIZE : 0);
+
+	auth_offset = req->cryptlen ?
+		(assoclen + ivsize + 1 + ccm_xtra) : 0;
+	if (op_type == CHCR_DECRYPT_OP) {
+		if (crypto_aead_authsize(tfm) != req->cryptlen)
+			tag_offset = crypto_aead_authsize(tfm);
+		else
+			auth_offset = 0;
+	}
+
+
+	sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id,
+					 2, (ivsize ?  (assoclen + 1) :  0) +
+					 ccm_xtra);
+	sec_cpl->pldlen =
+		htonl(assoclen + ivsize + req->cryptlen + ccm_xtra);
+	/* For CCM there wil be b0 always. So AAD start will be 1 always */
+	sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+					1, assoclen + ccm_xtra, assoclen
+					+ ivsize + 1 + ccm_xtra, 0);
+
+	sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0,
+					auth_offset, tag_offset,
+					(op_type == CHCR_ENCRYPT_OP) ? 0 :
+					crypto_aead_authsize(tfm));
+	sec_cpl->seqno_numivs =  FILL_SEC_CPL_SCMD0_SEQNO(op_type,
+					(op_type == CHCR_ENCRYPT_OP) ? 0 : 1,
+					cipher_mode, mac_mode, hmac_ctrl,
+					ivsize >> 1);
+
+	sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1, 0,
+					1, dst_size);
+}
+
+int aead_ccm_validate_input(unsigned short op_type,
+			    struct aead_request *req,
+			    struct chcr_aead_ctx *aeadctx,
+			    unsigned int sub_type)
+{
+	if (sub_type != CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) {
+		if (crypto_ccm_check_iv(req->iv)) {
+			pr_err("CCM: IV check fails\n");
+			return -EINVAL;
+		}
+	} else {
+		if (req->assoclen != 16 && req->assoclen != 20) {
+			pr_err("RFC4309: Invalid AAD length %d\n",
+			       req->assoclen);
+			return -EINVAL;
+		}
+	}
+	if (aeadctx->enckey_len == 0) {
+		pr_err("CCM: Encryption key not set\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+unsigned int fill_aead_req_fields(struct sk_buff *skb,
+				  struct aead_request *req,
+				  struct scatterlist *src,
+				  unsigned int ivsize,
+				  struct chcr_aead_ctx *aeadctx)
+{
+	unsigned int frags = 0;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	/* b0 and aad length(if available) */
+
+	write_buffer_to_skb(skb, &frags, reqctx->scratch_pad, CCM_B0_SIZE +
+				(req->assoclen ?  CCM_AAD_FIELD_SIZE : 0));
+	if (req->assoclen) {
+		if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
+			write_sg_to_skb(skb, &frags, req->src,
+					req->assoclen - 8);
+		else
+			write_sg_to_skb(skb, &frags, req->src, req->assoclen);
+	}
+	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
+	if (req->cryptlen)
+		write_sg_to_skb(skb, &frags, src, req->cryptlen);
+
+	return frags;
+}
+
+static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
+					  unsigned short qid,
+					  int size,
+					  unsigned short op_type)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_context *ctx = crypto_aead_ctx(tfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct sk_buff *skb = NULL;
+	struct chcr_wr *chcr_req;
+	struct cpl_rx_phys_dsgl *phys_cpl;
+	struct phys_sge_parm sg_param;
+	struct scatterlist *src;
+	unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE;
+	unsigned int dst_size = 0, kctx_len;
+	unsigned int sub_type;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	int err = 0;
+	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+		GFP_ATOMIC;
+
+
+	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
+		goto err;
+
+	if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
+		goto err;
+	sub_type = get_aead_subtype(tfm);
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
+	reqctx->dst = src;
+
+	if (req->src != req->dst) {
+		err = chcr_copy_assoc(req, aeadctx);
+		if (err) {
+			pr_err("AAD copy to destination buffer fails\n");
+			return ERR_PTR(err);
+		}
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
+					       req->assoclen);
+	}
+	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
+					     (op_type ? -authsize : authsize));
+	if (reqctx->dst_nents <= 0) {
+		pr_err("CCM:Invalid Destination sg entries\n");
+		goto err;
+	}
+
+
+	if (aead_ccm_validate_input(op_type, req, aeadctx, sub_type))
+		goto err;
+
+	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
+	kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) * 2;
+	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),  flags);
+
+	if (!skb)
+		goto err;
+
+	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+
+	chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len);
+	memset(chcr_req, 0, transhdr_len);
+
+	fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, op_type, ctx);
+
+	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
+	memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
+	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) *
+					16), aeadctx->key, aeadctx->enckey_len);
+
+	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+	if (ccm_format_packet(req, aeadctx, sub_type, op_type))
+		goto dstmap_fail;
+
+	sg_param.nents = reqctx->dst_nents;
+	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
+	sg_param.qid = qid;
+	sg_param.align = 0;
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
+				  &sg_param))
+		goto dstmap_fail;
+
+	skb_set_transport_header(skb, transhdr_len);
+	frags = fill_aead_req_fields(skb, req, src, ivsize, aeadctx);
+	create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
+		    sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	reqctx->skb = skb;
+	skb_get(skb);
+	return skb;
+dstmap_fail:
+	kfree_skb(skb);
+	skb = NULL;
+err:
+	return ERR_PTR(-EINVAL);
+}
+
+static struct sk_buff *create_gcm_wr(struct aead_request *req,
+				     unsigned short qid,
+				     int size,
+				     unsigned short op_type)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_context *ctx = crypto_aead_ctx(tfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct sk_buff *skb = NULL;
+	struct chcr_wr *chcr_req;
+	struct cpl_rx_phys_dsgl *phys_cpl;
+	struct phys_sge_parm sg_param;
+	struct scatterlist *src;
+	unsigned int frags = 0, transhdr_len;
+	unsigned int ivsize = AES_BLOCK_SIZE;
+	unsigned int dst_size = 0, kctx_len;
+	unsigned char tag_offset = 0;
+	unsigned int crypt_len = 0;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned char hmac_ctrl = get_hmac(authsize);
+	int err = 0;
+	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+		GFP_ATOMIC;
+
+	/* validate key size */
+	if (aeadctx->enckey_len == 0)
+		goto err;
+
+	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
+		goto err;
+
+	if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
+		goto err;
+
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
+	reqctx->dst = src;
+	if (req->src != req->dst) {
+		err = chcr_copy_assoc(req, aeadctx);
+		if (err)
+			return	ERR_PTR(err);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
+					       req->assoclen);
+	}
+
+	if (!req->cryptlen)
+		/* null-payload is not supported in the hardware.
+		 * software is sending block size
+		 */
+		crypt_len = AES_BLOCK_SIZE;
+	else
+		crypt_len = req->cryptlen;
+	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
+					     (op_type ? -authsize : authsize));
+	if (reqctx->dst_nents <= 0) {
+		pr_err("GCM:Invalid Destination sg entries\n");
+		goto err;
+	}
+
+
+	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
+	kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) +
+		AEAD_H_SIZE;
+	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
+	if (!skb)
+		goto err;
+
+	/* NIC driver is going to write the sge hdr. */
+	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+
+	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+	memset(chcr_req, 0, transhdr_len);
+
+	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
+		req->assoclen -= 8;
+
+	tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
+	chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
+					ctx->dev->rx_channel_id, 2, (ivsize ?
+					(req->assoclen + 1) : 0));
+	chcr_req->sec_cpl.pldlen = htonl(req->assoclen + ivsize + crypt_len);
+	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+					req->assoclen ? 1 : 0, req->assoclen,
+					req->assoclen + ivsize + 1, 0);
+	if (req->cryptlen) {
+		chcr_req->sec_cpl.cipherstop_lo_authinsert =
+			FILL_SEC_CPL_AUTHINSERT(0, req->assoclen + ivsize + 1,
+						tag_offset, tag_offset);
+		chcr_req->sec_cpl.seqno_numivs =
+			FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
+					CHCR_ENCRYPT_OP) ? 1 : 0,
+					CHCR_SCMD_CIPHER_MODE_AES_GCM,
+					CHCR_SCMD_AUTH_MODE_GHASH, hmac_ctrl,
+					ivsize >> 1);
+	} else {
+		chcr_req->sec_cpl.cipherstop_lo_authinsert =
+			FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
+		chcr_req->sec_cpl.seqno_numivs =
+			FILL_SEC_CPL_SCMD0_SEQNO(op_type,
+					(op_type ==  CHCR_ENCRYPT_OP) ?
+					1 : 0, CHCR_SCMD_CIPHER_MODE_AES_CBC,
+					0, 0, ivsize >> 1);
+	}
+	chcr_req->sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+					0, 1, dst_size);
+	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
+	memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
+	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) *
+				16), GCM_CTX(aeadctx)->ghash_h, AEAD_H_SIZE);
+
+	/* prepare a 16 byte iv */
+	/* S   A   L  T |  IV | 0x00000001 */
+	if (get_aead_subtype(tfm) ==
+	    CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) {
+		memcpy(reqctx->iv, aeadctx->salt, 4);
+		memcpy(reqctx->iv + 4, req->iv, 8);
+	} else {
+		memcpy(reqctx->iv, req->iv, 12);
+	}
+	*((unsigned int *)(reqctx->iv + 12)) = htonl(0x01);
+
+	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+	sg_param.nents = reqctx->dst_nents;
+	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
+	sg_param.qid = qid;
+	sg_param.align = 0;
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
+				  &sg_param))
+		goto dstmap_fail;
+
+	skb_set_transport_header(skb, transhdr_len);
+
+	write_sg_to_skb(skb, &frags, req->src, req->assoclen);
+
+	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
+
+	if (req->cryptlen) {
+		write_sg_to_skb(skb, &frags, src, req->cryptlen);
+	} else {
+		aes_gcm_empty_pld_pad(req->dst, authsize - 1);
+		write_sg_to_skb(skb, &frags, reqctx->dst, crypt_len);
+
+	}
+
+	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
+			sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	reqctx->skb = skb;
+	skb_get(skb);
+	return skb;
+
+dstmap_fail:
+	/* ivmap_fail: */
+	kfree_skb(skb);
+	skb = NULL;
+err:
+	return skb;
+}
+
+
+
+static int chcr_aead_cra_init(struct crypto_aead *tfm)
+{
+	struct chcr_context *ctx = crypto_aead_ctx(tfm);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct chcr_aead_reqctx));
+	aeadctx->null = crypto_get_default_null_skcipher();
+	if (IS_ERR(aeadctx->null))
+		return PTR_ERR(aeadctx->null);
+	return chcr_device_init(ctx);
+}
+
+static void chcr_aead_cra_exit(struct crypto_aead *tfm)
+{
+	crypto_put_default_null_skcipher();
+}
+
+static int chcr_authenc_null_setauthsize(struct crypto_aead *tfm,
+					unsigned int authsize)
+{
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+	aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NOP;
+	aeadctx->mayverify = VERIFY_HW;
+	return 0;
+}
+static int chcr_authenc_setauthsize(struct crypto_aead *tfm,
+				    unsigned int authsize)
+{
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+	u32 maxauth = crypto_aead_maxauthsize(tfm);
+
+	/*SHA1 authsize in ipsec is 12 instead of 10 i.e maxauthsize / 2 is not
+	 * true for sha1. authsize == 12 condition should be before
+	 * authsize == (maxauth >> 1)
+	 */
+	if (authsize == ICV_4) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1;
+		aeadctx->mayverify = VERIFY_HW;
+	} else if (authsize == ICV_6) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL2;
+		aeadctx->mayverify = VERIFY_HW;
+	} else if (authsize == ICV_10) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366;
+		aeadctx->mayverify = VERIFY_HW;
+	} else if (authsize == ICV_12) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		aeadctx->mayverify = VERIFY_HW;
+	} else if (authsize == ICV_14) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3;
+		aeadctx->mayverify = VERIFY_HW;
+	} else if (authsize == (maxauth >> 1)) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		aeadctx->mayverify = VERIFY_HW;
+	} else if (authsize == maxauth) {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		aeadctx->mayverify = VERIFY_HW;
+	} else {
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		aeadctx->mayverify = VERIFY_SW;
+	}
+	return 0;
+}
+
+
+static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+	switch (authsize) {
+	case ICV_4:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_8:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_12:
+		 aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		 aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_14:
+		 aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3;
+		 aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_16:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_13:
+	case ICV_15:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		aeadctx->mayverify = VERIFY_SW;
+		break;
+	default:
+
+		  crypto_tfm_set_flags((struct crypto_tfm *) tfm,
+			CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm,
+					  unsigned int authsize)
+{
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+	switch (authsize) {
+	case ICV_8:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_12:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_16:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	default:
+		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
+				     CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int chcr_ccm_setauthsize(struct crypto_aead *tfm,
+				unsigned int authsize)
+{
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+	switch (authsize) {
+	case ICV_4:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_6:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL2;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_8:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_10:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_12:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_14:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	case ICV_16:
+		aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		aeadctx->mayverify = VERIFY_HW;
+		break;
+	default:
+		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
+				     CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int chcr_aead_ccm_setkey(struct crypto_aead *aead,
+				const u8 *key,
+				unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_aead_ctx(aead);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	unsigned char ck_size, mk_size;
+	int key_ctx_size = 0;
+
+	memcpy(aeadctx->key, key, keylen);
+	aeadctx->enckey_len = keylen;
+	key_ctx_size = sizeof(struct _key_ctx) +
+		((DIV_ROUND_UP(keylen, 16)) << 4)  * 2;
+	if (keylen == AES_KEYSIZE_128) {
+		mk_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+		mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+		mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+	} else {
+		crypto_tfm_set_flags((struct crypto_tfm *)aead,
+				     CRYPTO_TFM_RES_BAD_KEY_LEN);
+		aeadctx->enckey_len = 0;
+		return	-EINVAL;
+	}
+	aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, mk_size, 0, 0,
+						key_ctx_size >> 4);
+	return 0;
+}
+
+static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
+				    unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_aead_ctx(aead);
+	 struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+
+	if (keylen < 3) {
+		crypto_tfm_set_flags((struct crypto_tfm *)aead,
+				     CRYPTO_TFM_RES_BAD_KEY_LEN);
+		aeadctx->enckey_len = 0;
+		return	-EINVAL;
+	}
+	keylen -= 3;
+	memcpy(aeadctx->salt, key + keylen, 3);
+	return chcr_aead_ccm_setkey(aead, key, keylen);
+}
+
+static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key,
+			   unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_aead_ctx(aead);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_gcm_ctx *gctx = GCM_CTX(aeadctx);
+	struct crypto_cipher *cipher;
+	unsigned int ck_size;
+	int ret = 0, key_ctx_size = 0;
+
+	if (get_aead_subtype(aead) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106 &&
+	    keylen > 3) {
+		keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
+		memcpy(aeadctx->salt, key + keylen, 4);
+	}
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		crypto_tfm_set_flags((struct crypto_tfm *)aead,
+				     CRYPTO_TFM_RES_BAD_KEY_LEN);
+		aeadctx->enckey_len = 0;
+		pr_err("GCM: Invalid key length %d", keylen);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memcpy(aeadctx->key, key, keylen);
+	aeadctx->enckey_len = keylen;
+	key_ctx_size = sizeof(struct _key_ctx) +
+		((DIV_ROUND_UP(keylen, 16)) << 4) +
+		AEAD_H_SIZE;
+		aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						0, 0,
+						key_ctx_size >> 4);
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
+	if (IS_ERR(cipher)) {
+		aeadctx->enckey_len = 0;
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret) {
+		aeadctx->enckey_len = 0;
+		goto out1;
+	}
+	memset(gctx->ghash_h, 0, AEAD_H_SIZE);
+	crypto_cipher_encrypt_one(cipher, gctx->ghash_h, gctx->ghash_h);
+
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
+				   unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_aead_ctx(authenc);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
+	/* it contains auth and cipher key both*/
+	struct crypto_authenc_keys keys;
+	unsigned int bs;
+	unsigned int max_authsize = crypto_aead_alg(authenc)->maxauthsize;
+	int err = 0, i, key_ctx_len = 0;
+	unsigned char ck_size = 0;
+	unsigned char pad[CHCR_HASH_MAX_BLOCK_SIZE_128] = { 0 };
+	struct crypto_shash *base_hash = NULL;
+	struct algo_param param;
+	int align;
+	u8 *o_ptr = NULL;
+
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
+		crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		goto out;
+	}
+
+	if (get_alg_config(&param, max_authsize)) {
+		pr_err("chcr : Unsupported digest size\n");
+		goto out;
+	}
+	if (keys.enckeylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keys.enckeylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keys.enckeylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("chcr : Unsupported cipher key\n");
+		goto out;
+	}
+
+	/* Copy only encryption key. We use authkey to generate h(ipad) and
+	 * h(opad) so authkey is not needed again. authkeylen size have the
+	 * size of the hash digest size.
+	 */
+	memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
+	aeadctx->enckey_len = keys.enckeylen;
+	get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
+			    aeadctx->enckey_len << 3);
+
+	base_hash  = chcr_alloc_shash(max_authsize);
+	if (IS_ERR(base_hash)) {
+		pr_err("chcr : Base driver cannot be loaded\n");
+		goto out;
+	}
+	{
+		SHASH_DESC_ON_STACK(shash, base_hash);
+		shash->tfm = base_hash;
+		shash->flags = crypto_shash_get_flags(base_hash);
+		bs = crypto_shash_blocksize(base_hash);
+		align = KEYCTX_ALIGN_PAD(max_authsize);
+		o_ptr =  actx->h_iopad + param.result_size + align;
+
+		if (keys.authkeylen > bs) {
+			err = crypto_shash_digest(shash, keys.authkey,
+						  keys.authkeylen,
+						  o_ptr);
+			if (err) {
+				pr_err("chcr : Base driver cannot be loaded\n");
+				goto out;
+			}
+			keys.authkeylen = max_authsize;
+		} else
+			memcpy(o_ptr, keys.authkey, keys.authkeylen);
+
+		/* Compute the ipad-digest*/
+		memset(pad + keys.authkeylen, 0, bs - keys.authkeylen);
+		memcpy(pad, o_ptr, keys.authkeylen);
+		for (i = 0; i < bs >> 2; i++)
+			*((unsigned int *)pad + i) ^= IPAD_DATA;
+
+		if (chcr_compute_partial_hash(shash, pad, actx->h_iopad,
+					      max_authsize))
+			goto out;
+		/* Compute the opad-digest */
+		memset(pad + keys.authkeylen, 0, bs - keys.authkeylen);
+		memcpy(pad, o_ptr, keys.authkeylen);
+		for (i = 0; i < bs >> 2; i++)
+			*((unsigned int *)pad + i) ^= OPAD_DATA;
+
+		if (chcr_compute_partial_hash(shash, pad, o_ptr, max_authsize))
+			goto out;
+
+		/* convert the ipad and opad digest to network order */
+		chcr_change_order(actx->h_iopad, param.result_size);
+		chcr_change_order(o_ptr, param.result_size);
+		key_ctx_len = sizeof(struct _key_ctx) +
+			((DIV_ROUND_UP(keys.enckeylen, 16)) << 4) +
+			(param.result_size + align) * 2;
+		aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, param.mk_size,
+						0, 1, key_ctx_len >> 4);
+		actx->auth_mode = param.auth_mode;
+		chcr_free_shash(base_hash);
+
+		return 0;
+	}
+out:
+	aeadctx->enckey_len = 0;
+	if (base_hash)
+		chcr_free_shash(base_hash);
+	return -EINVAL;
+}
+
+static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
+					const u8 *key, unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_aead_ctx(authenc);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
+	struct crypto_authenc_keys keys;
+
+	/* it contains auth and cipher key both*/
+	int key_ctx_len = 0;
+	unsigned char ck_size = 0;
+
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
+		crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		goto out;
+	}
+	if (keys.enckeylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keys.enckeylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keys.enckeylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("chcr : Unsupported cipher key\n");
+		goto out;
+	}
+	memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
+	aeadctx->enckey_len = keys.enckeylen;
+	get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
+				    aeadctx->enckey_len << 3);
+	key_ctx_len =  sizeof(struct _key_ctx)
+		+ ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4);
+
+	aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY, 0,
+						0, key_ctx_len >> 4);
+	actx->auth_mode = CHCR_SCMD_AUTH_MODE_NOP;
+	return 0;
+out:
+	aeadctx->enckey_len = 0;
+	return -EINVAL;
+}
+static int chcr_aead_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+
+	reqctx->verify = VERIFY_HW;
+
+	switch (get_aead_subtype(tfm)) {
+	case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC:
+	case CRYPTO_ALG_SUB_TYPE_AEAD_NULL:
+		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
+				    create_authenc_wr);
+	case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
+	case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309:
+		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
+				    create_aead_ccm_wr);
+	default:
+		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
+				    create_gcm_wr);
+	}
+}
+
+static int chcr_aead_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	int size;
+
+	if (aeadctx->mayverify == VERIFY_SW) {
+		size = crypto_aead_maxauthsize(tfm);
+		reqctx->verify = VERIFY_SW;
+	} else {
+		size = 0;
+		reqctx->verify = VERIFY_HW;
+	}
+
+	switch (get_aead_subtype(tfm)) {
+	case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC:
+	case CRYPTO_ALG_SUB_TYPE_AEAD_NULL:
+		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
+				    create_authenc_wr);
+	case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
+	case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309:
+		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
+				    create_aead_ccm_wr);
+	default:
+		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
+				    create_gcm_wr);
 	}
 }
 
+static int chcr_aead_op(struct aead_request *req,
+			  unsigned short op_type,
+			  int size,
+			  create_wr_t create_wr_fn)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct chcr_context *ctx = crypto_aead_ctx(tfm);
+	struct uld_ctx *u_ctx;
+	struct sk_buff *skb;
+
+	if (!ctx->dev) {
+		pr_err("chcr : %s : No crypto device.\n", __func__);
+		return -ENXIO;
+	}
+	u_ctx = ULD_CTX(ctx);
+	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+				   ctx->tx_channel_id)) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	/* Form a WR from req */
+	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[ctx->tx_channel_id], size,
+			   op_type);
+
+	if (IS_ERR(skb) || skb == NULL) {
+		pr_err("chcr : %s : failed to form WR. No memory\n", __func__);
+		return PTR_ERR(skb);
+	}
+
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
 static struct chcr_alg_template driver_algs[] = {
 	/* AES-CBC */
 	{
@@ -1232,9 +2491,9 @@ static struct chcr_alg_template driver_algs[] = {
 		.is_registered = 0,
 		.alg.crypto = {
 			.cra_name		= "cbc(aes)",
-			.cra_driver_name	= "cbc(aes-chcr)",
+			.cra_driver_name	= "cbc-aes-chcr",
 			.cra_priority		= CHCR_CRA_PRIORITY,
-			.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+			.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 				CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= AES_BLOCK_SIZE,
 			.cra_ctxsize		= sizeof(struct chcr_context)
@@ -1259,9 +2518,9 @@ static struct chcr_alg_template driver_algs[] = {
 		.is_registered = 0,
 		.alg.crypto =   {
 			.cra_name		= "xts(aes)",
-			.cra_driver_name	= "xts(aes-chcr)",
+			.cra_driver_name	= "xts-aes-chcr",
 			.cra_priority		= CHCR_CRA_PRIORITY,
-			.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+			.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 				CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= AES_BLOCK_SIZE,
 			.cra_ctxsize		= sizeof(struct chcr_context) +
@@ -1352,7 +2611,7 @@ static struct chcr_alg_template driver_algs[] = {
 			.halg.digestsize = SHA1_DIGEST_SIZE,
 			.halg.base = {
 				.cra_name = "hmac(sha1)",
-				.cra_driver_name = "hmac(sha1-chcr)",
+				.cra_driver_name = "hmac-sha1-chcr",
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 			}
 		}
@@ -1364,7 +2623,7 @@ static struct chcr_alg_template driver_algs[] = {
 			.halg.digestsize = SHA224_DIGEST_SIZE,
 			.halg.base = {
 				.cra_name = "hmac(sha224)",
-				.cra_driver_name = "hmac(sha224-chcr)",
+				.cra_driver_name = "hmac-sha224-chcr",
 				.cra_blocksize = SHA224_BLOCK_SIZE,
 			}
 		}
@@ -1376,7 +2635,7 @@ static struct chcr_alg_template driver_algs[] = {
 			.halg.digestsize = SHA256_DIGEST_SIZE,
 			.halg.base = {
 				.cra_name = "hmac(sha256)",
-				.cra_driver_name = "hmac(sha256-chcr)",
+				.cra_driver_name = "hmac-sha256-chcr",
 				.cra_blocksize = SHA256_BLOCK_SIZE,
 			}
 		}
@@ -1388,7 +2647,7 @@ static struct chcr_alg_template driver_algs[] = {
 			.halg.digestsize = SHA384_DIGEST_SIZE,
 			.halg.base = {
 				.cra_name = "hmac(sha384)",
-				.cra_driver_name = "hmac(sha384-chcr)",
+				.cra_driver_name = "hmac-sha384-chcr",
 				.cra_blocksize = SHA384_BLOCK_SIZE,
 			}
 		}
@@ -1400,11 +2659,205 @@ static struct chcr_alg_template driver_algs[] = {
 			.halg.digestsize = SHA512_DIGEST_SIZE,
 			.halg.base = {
 				.cra_name = "hmac(sha512)",
-				.cra_driver_name = "hmac(sha512-chcr)",
+				.cra_driver_name = "hmac-sha512-chcr",
 				.cra_blocksize = SHA512_BLOCK_SIZE,
 			}
 		}
 	},
+	/* Add AEAD Algorithms */
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_GCM,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "gcm(aes)",
+				.cra_driver_name = "gcm-aes-chcr",
+				.cra_blocksize	= 1,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_gcm_ctx),
+			},
+			.ivsize = 12,
+			.maxauthsize = GHASH_DIGEST_SIZE,
+			.setkey = chcr_gcm_setkey,
+			.setauthsize = chcr_gcm_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "rfc4106(gcm(aes))",
+				.cra_driver_name = "rfc4106-gcm-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_gcm_ctx),
+
+			},
+			.ivsize = 8,
+			.maxauthsize	= GHASH_DIGEST_SIZE,
+			.setkey = chcr_gcm_setkey,
+			.setauthsize	= chcr_4106_4309_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_CCM,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "ccm(aes)",
+				.cra_driver_name = "ccm-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx),
+
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize	= GHASH_DIGEST_SIZE,
+			.setkey = chcr_aead_ccm_setkey,
+			.setauthsize	= chcr_ccm_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "rfc4309(ccm(aes))",
+				.cra_driver_name = "rfc4309-ccm-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx),
+
+			},
+			.ivsize = 8,
+			.maxauthsize	= GHASH_DIGEST_SIZE,
+			.setkey = chcr_aead_rfc4309_setkey,
+			.setauthsize = chcr_4106_4309_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),cbc(aes))",
+				.cra_driver_name =
+					"authenc-hmac-sha1-cbc-aes-chcr",
+				.cra_blocksize	 = AES_BLOCK_SIZE,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+
+				.cra_name = "authenc(hmac(sha256),cbc(aes))",
+				.cra_driver_name =
+					"authenc-hmac-sha256-cbc-aes-chcr",
+				.cra_blocksize	 = AES_BLOCK_SIZE,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize	= SHA256_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),cbc(aes))",
+				.cra_driver_name =
+					"authenc-hmac-sha224-cbc-aes-chcr",
+				.cra_blocksize	 = AES_BLOCK_SIZE,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha384),cbc(aes))",
+				.cra_driver_name =
+					"authenc-hmac-sha384-cbc-aes-chcr",
+				.cra_blocksize	 = AES_BLOCK_SIZE,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha512),cbc(aes))",
+				.cra_driver_name =
+					"authenc-hmac-sha512-cbc-aes-chcr",
+				.cra_blocksize	 = AES_BLOCK_SIZE,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_NULL,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(digest_null,cbc(aes))",
+				.cra_driver_name =
+					"authenc-digest_null-cbc-aes-chcr",
+				.cra_blocksize	 = AES_BLOCK_SIZE,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize  = AES_BLOCK_SIZE,
+			.maxauthsize = 0,
+			.setkey  = chcr_aead_digest_null_setkey,
+			.setauthsize = chcr_authenc_null_setauthsize,
+		}
+	},
 };
 
 /*
@@ -1422,6 +2875,11 @@ static int chcr_unregister_alg(void)
 				crypto_unregister_alg(
 						&driver_algs[i].alg.crypto);
 			break;
+		case CRYPTO_ALG_TYPE_AEAD:
+			if (driver_algs[i].is_registered)
+				crypto_unregister_aead(
+						&driver_algs[i].alg.aead);
+			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			if (driver_algs[i].is_registered)
 				crypto_unregister_ahash(
@@ -1456,6 +2914,19 @@ static int chcr_register_alg(void)
 			err = crypto_register_alg(&driver_algs[i].alg.crypto);
 			name = driver_algs[i].alg.crypto.cra_driver_name;
 			break;
+		case CRYPTO_ALG_TYPE_AEAD:
+			driver_algs[i].alg.aead.base.cra_priority =
+				CHCR_CRA_PRIORITY;
+			driver_algs[i].alg.aead.base.cra_flags =
+				CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+			driver_algs[i].alg.aead.encrypt = chcr_aead_encrypt;
+			driver_algs[i].alg.aead.decrypt = chcr_aead_decrypt;
+			driver_algs[i].alg.aead.init = chcr_aead_cra_init;
+			driver_algs[i].alg.aead.exit = chcr_aead_cra_exit;
+			driver_algs[i].alg.aead.base.cra_module = THIS_MODULE;
+			err = crypto_register_aead(&driver_algs[i].alg.aead);
+			name = driver_algs[i].alg.aead.base.cra_driver_name;
+			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			a_hash = &driver_algs[i].alg.hash;
 			a_hash->update = chcr_ahash_update;
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index 199b0bb69b89..ba38bae7ce80 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -108,30 +108,24 @@
 #define IPAD_DATA 0x36363636
 #define OPAD_DATA 0x5c5c5c5c
 
-#define TRANSHDR_SIZE(alignedkctx_len)\
-	(sizeof(struct ulptx_idata) +\
-	 sizeof(struct ulp_txpkt) +\
-	 sizeof(struct fw_crypto_lookaside_wr) +\
-	 sizeof(struct cpl_tx_sec_pdu) +\
-	 (alignedkctx_len))
-#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \
-	(TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\
+#define TRANSHDR_SIZE(kctx_len)\
+	(sizeof(struct chcr_wr) +\
+	 kctx_len)
+#define CIPHER_TRANSHDR_SIZE(kctx_len, sge_pairs) \
+	(TRANSHDR_SIZE((kctx_len)) + (sge_pairs) +\
 	 sizeof(struct cpl_rx_phys_dsgl))
-#define HASH_TRANSHDR_SIZE(alignedkctx_len)\
-	(TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES)
+#define HASH_TRANSHDR_SIZE(kctx_len)\
+	(TRANSHDR_SIZE(kctx_len) + DUMMY_BYTES)
 
-#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \
-			sizeof(struct ulp_txpkt) + \
-			sizeof(struct ulptx_idata))
 
-#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst)      \
+#define FILL_SEC_CPL_OP_IVINSR(id, len, ofst)      \
 	htonl( \
 	       CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \
 	       CPL_TX_SEC_PDU_RXCHID_V((id)) | \
 	       CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \
 	       CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \
 	       CPL_TX_SEC_PDU_CPLLEN_V((len)) | \
-	       CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \
+	       CPL_TX_SEC_PDU_PLACEHOLDER_V(0) | \
 	       CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst)))
 
 #define  FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \
@@ -148,7 +142,7 @@
 		CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \
 		CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst)))
 
-#define  FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs)  \
+#define  FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size)  \
 		htonl( \
 		SCMD_SEQ_NO_CTRL_V(0) | \
 		SCMD_STATUS_PRESENT_V(0) | \
@@ -159,7 +153,7 @@
 		SCMD_AUTH_MODE_V((amode)) | \
 		SCMD_HMAC_CTRL_V((opad)) | \
 		SCMD_IV_SIZE_V((size)) | \
-		SCMD_NUM_IVS_V((nivs)))
+		SCMD_NUM_IVS_V(0))
 
 #define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \
 		SCMD_ENB_DBGID_V(0) | \
@@ -191,20 +185,21 @@
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len)))
 
-#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \
+#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, fid) \
 		htonl( \
 			FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \
 			FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \
 			FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \
-			FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)))
+			FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)) | \
+			FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(fid))
 
-#define FILL_ULPTX_CMD_DEST(cid) \
+#define FILL_ULPTX_CMD_DEST(cid, qid) \
 	htonl(ULPTX_CMD_V(ULP_TX_PKT) | \
 	      ULP_TXPKT_DEST_V(0) | \
 	      ULP_TXPKT_DATAMODIFY_V(0) | \
 	      ULP_TXPKT_CHANNELID_V((cid)) | \
 	      ULP_TXPKT_RO_V(1) | \
-	      ULP_TXPKT_FID_V(0))
+	      ULP_TXPKT_FID_V(qid))
 
 #define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\
 			      _bs == SHA1_DIGEST_SIZE ? 12 : 0; })
@@ -264,13 +259,15 @@ enum {
  * where they indicate the size of the integrity check value (ICV)
  */
 enum {
-	AES_CCM_ICV_4   = 4,
-	AES_CCM_ICV_6   = 6,
-	AES_CCM_ICV_8   = 8,
-	AES_CCM_ICV_10  = 10,
-	AES_CCM_ICV_12  = 12,
-	AES_CCM_ICV_14  = 14,
-	AES_CCM_ICV_16 = 16
+	ICV_4  = 4,
+	ICV_6  = 6,
+	ICV_8  = 8,
+	ICV_10 = 10,
+	ICV_12 = 12,
+	ICV_13 = 13,
+	ICV_14 = 14,
+	ICV_15 = 15,
+	ICV_16 = 16
 };
 
 struct hash_op_params {
@@ -394,7 +391,7 @@ static const u8 aes_sbox[256] = {
 	187, 22
 };
 
-static u32 aes_ks_subword(const u32 w)
+static inline u32 aes_ks_subword(const u32 w)
 {
 	u8 bytes[4];
 
@@ -412,61 +409,4 @@ static u32 round_constant[11] = {
 	0x1B000000, 0x36000000, 0x6C000000
 };
 
-/* dec_key - OUTPUT - Reverse round key
- * key - INPUT - key
- * keylength - INPUT - length of the key in number of bits
- */
-static inline void get_aes_decrypt_key(unsigned char *dec_key,
-				       const unsigned char *key,
-				       unsigned int keylength)
-{
-	u32 temp;
-	u32 w_ring[MAX_NK];
-	int i, j, k;
-	u8  nr, nk;
-
-	switch (keylength) {
-	case AES_KEYLENGTH_128BIT:
-		nk = KEYLENGTH_4BYTES;
-		nr = NUMBER_OF_ROUNDS_10;
-		break;
-
-	case AES_KEYLENGTH_192BIT:
-		nk = KEYLENGTH_6BYTES;
-		nr = NUMBER_OF_ROUNDS_12;
-		break;
-	case AES_KEYLENGTH_256BIT:
-		nk = KEYLENGTH_8BYTES;
-		nr = NUMBER_OF_ROUNDS_14;
-		break;
-	default:
-		return;
-	}
-	for (i = 0; i < nk; i++ )
-		w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
-
-	i = 0;
-	temp = w_ring[nk - 1];
-	while(i + nk < (nr + 1) * 4) {
-		if(!(i % nk)) {
-			/* RotWord(temp) */
-			temp = (temp << 8) | (temp >> 24);
-			temp = aes_ks_subword(temp);
-			temp ^= round_constant[i / nk];
-		}
-		else if (nk == 8 && (i % 4 == 0))
-			temp = aes_ks_subword(temp);
-		w_ring[i % nk] ^= temp;
-		temp = w_ring[i % nk];
-		i++;
-	}
-	i--;
-	for (k = 0, j = i % nk; k < nk; k++) {
-		*((u32 *)dec_key + k) = htonl(w_ring[j]);
-		j--;
-		if(j < 0)
-			j += nk;
-	}
-}
-
 #endif /* __CHCR_ALGO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index fb5f9bbfa09c..c28e018e0773 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -42,6 +42,7 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
 static struct cxgb4_uld_info chcr_uld_info = {
 	.name = DRV_MODULE_NAME,
 	.nrxq = MAX_ULD_QSETS,
+	.ntxq = MAX_ULD_QSETS,
 	.rxq_size = 1024,
 	.add = chcr_uld_add,
 	.state_change = chcr_uld_state_change,
@@ -51,6 +52,7 @@ static struct cxgb4_uld_info chcr_uld_info = {
 int assign_chcr_device(struct chcr_dev **dev)
 {
 	struct uld_ctx *u_ctx;
+	int ret = -ENXIO;
 
 	/*
 	 * Which device to use if multiple devices are available TODO
@@ -58,15 +60,14 @@ int assign_chcr_device(struct chcr_dev **dev)
 	 * must go to the same device to maintain the ordering.
 	 */
 	mutex_lock(&dev_mutex); /* TODO ? */
-	u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry);
-	if (!u_ctx) {
-		mutex_unlock(&dev_mutex);
-		return -ENXIO;
+	list_for_each_entry(u_ctx, &uld_ctx_list, entry)
+		if (u_ctx->dev) {
+			*dev = u_ctx->dev;
+			ret = 0;
+			break;
 	}
-
-	*dev = u_ctx->dev;
 	mutex_unlock(&dev_mutex);
-	return 0;
+	return ret;
 }
 
 static int chcr_dev_add(struct uld_ctx *u_ctx)
@@ -109,14 +110,12 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 	if (ack_err_status) {
 		if (CHK_MAC_ERR_BIT(ack_err_status) ||
 		    CHK_PAD_ERR_BIT(ack_err_status))
-			error_status = -EINVAL;
+			error_status = -EBADMSG;
 	}
 	/* call completion callback with failure status */
 	if (req) {
-		if (!chcr_handle_resp(req, input, error_status))
-			req->complete(req, error_status);
-		else
-			return -EINVAL;
+		error_status = chcr_handle_resp(req, input, error_status);
+		req->complete(req, error_status);
 	} else {
 		pr_err("Incorrect request address from the firmware\n");
 		return -EFAULT;
@@ -126,7 +125,7 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 
 int chcr_send_wr(struct sk_buff *skb)
 {
-	return cxgb4_ofld_send(skb->dev, skb);
+	return cxgb4_crypto_send(skb->dev, skb);
 }
 
 static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
@@ -152,18 +151,17 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
 {
 	struct uld_ctx *u_ctx = (struct uld_ctx *)handle;
 	struct chcr_dev *dev = u_ctx->dev;
-	const struct cpl_act_establish *rpl = (struct cpl_act_establish
-					       *)rsp;
+	const struct cpl_fw6_pld *rpl = (struct cpl_fw6_pld *)rsp;
 
-	if (rpl->ot.opcode != CPL_FW6_PLD) {
+	if (rpl->opcode != CPL_FW6_PLD) {
 		pr_err("Unsupported opcode\n");
 		return 0;
 	}
 
 	if (!pgl)
-		work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]);
+		work_handlers[rpl->opcode](dev, (unsigned char *)&rsp[1]);
 	else
-		work_handlers[rpl->ot.opcode](dev, pgl->va);
+		work_handlers[rpl->opcode](dev, pgl->va);
 	return 0;
 }
 
@@ -203,10 +201,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 
 static int __init chcr_crypto_init(void)
 {
-	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) {
+	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info))
 		pr_err("ULD register fail: No chcr crypto support in cxgb4");
-		return -1;
-	}
 
 	return 0;
 }
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 2a5c671a4232..79da22b5cdc9 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -52,15 +52,30 @@
 
 #define MAC_ERROR_BIT		0
 #define CHK_MAC_ERR_BIT(x)	(((x) >> MAC_ERROR_BIT) & 1)
+#define MAX_SALT                4
 
 struct uld_ctx;
 
+struct _key_ctx {
+	__be32 ctx_hdr;
+	u8 salt[MAX_SALT];
+	__be64 reserverd;
+	unsigned char key[0];
+};
+
+struct chcr_wr {
+	struct fw_crypto_lookaside_wr wreq;
+	struct ulp_txpkt ulptx;
+	struct ulptx_idata sc_imm;
+	struct cpl_tx_sec_pdu sec_cpl;
+	struct _key_ctx key_ctx;
+};
+
 struct chcr_dev {
-	/* Request submited to h/w and waiting for response. */
 	spinlock_t lock_chcr_dev;
-	struct crypto_queue pending_queue;
 	struct uld_ctx *u_ctx;
 	unsigned char tx_channel_id;
+	unsigned char rx_channel_id;
 };
 
 struct uld_ctx {
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index d7d75605da8b..81cfd0ba132e 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -36,11 +36,19 @@
 #ifndef __CHCR_CRYPTO_H__
 #define __CHCR_CRYPTO_H__
 
+#define GHASH_BLOCK_SIZE    16
+#define GHASH_DIGEST_SIZE   16
+
+#define CCM_B0_SIZE             16
+#define CCM_AAD_FIELD_SIZE      2
+#define T5_MAX_AAD_SIZE 512
+
+
 /* Define following if h/w is not dropping the AAD and IV data before
  * giving the processed data
  */
 
-#define CHCR_CRA_PRIORITY 300
+#define CHCR_CRA_PRIORITY 3000
 
 #define CHCR_AES_MAX_KEY_LEN  (2 * (AES_MAX_KEY_SIZE)) /* consider xts */
 #define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */
@@ -63,22 +71,36 @@
 #define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0
 #define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1
 
-#define CHCR_SCMD_CIPHER_MODE_NOP           0
-#define CHCR_SCMD_CIPHER_MODE_AES_CBC       1
-#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES   4
-#define CHCR_SCMD_CIPHER_MODE_AES_XTS       6
+#define CHCR_SCMD_CIPHER_MODE_NOP               0
+#define CHCR_SCMD_CIPHER_MODE_AES_CBC           1
+#define CHCR_SCMD_CIPHER_MODE_AES_GCM           2
+#define CHCR_SCMD_CIPHER_MODE_AES_CTR           3
+#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES       4
+#define CHCR_SCMD_CIPHER_MODE_AES_XTS           6
+#define CHCR_SCMD_CIPHER_MODE_AES_CCM           7
 
 #define CHCR_SCMD_AUTH_MODE_NOP             0
 #define CHCR_SCMD_AUTH_MODE_SHA1            1
 #define CHCR_SCMD_AUTH_MODE_SHA224          2
 #define CHCR_SCMD_AUTH_MODE_SHA256          3
+#define CHCR_SCMD_AUTH_MODE_GHASH           4
 #define CHCR_SCMD_AUTH_MODE_SHA512_224      5
 #define CHCR_SCMD_AUTH_MODE_SHA512_256      6
 #define CHCR_SCMD_AUTH_MODE_SHA512_384      7
 #define CHCR_SCMD_AUTH_MODE_SHA512_512      8
+#define CHCR_SCMD_AUTH_MODE_CBCMAC          9
+#define CHCR_SCMD_AUTH_MODE_CMAC            10
 
 #define CHCR_SCMD_HMAC_CTRL_NOP             0
 #define CHCR_SCMD_HMAC_CTRL_NO_TRUNC        1
+#define CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366   2
+#define CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT     3
+#define CHCR_SCMD_HMAC_CTRL_PL1		    4
+#define CHCR_SCMD_HMAC_CTRL_PL2		    5
+#define CHCR_SCMD_HMAC_CTRL_PL3		    6
+#define CHCR_SCMD_HMAC_CTRL_DIV2	    7
+#define VERIFY_HW 0
+#define VERIFY_SW 1
 
 #define CHCR_SCMD_IVGEN_CTRL_HW             0
 #define CHCR_SCMD_IVGEN_CTRL_SW             1
@@ -106,39 +128,77 @@
 #define IV_IMMEDIATE            1
 #define IV_DSGL			2
 
+#define AEAD_H_SIZE             16
+
 #define CRYPTO_ALG_SUB_TYPE_MASK            0x0f000000
 #define CRYPTO_ALG_SUB_TYPE_HASH_HMAC       0x01000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106    0x02000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_GCM	    0x03000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC    0x04000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_CCM        0x05000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309    0x06000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_NULL       0x07000000
+#define CRYPTO_ALG_SUB_TYPE_CTR             0x08000000
 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
 			      CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
 
-#define MAX_SALT                4
 #define MAX_SCRATCH_PAD_SIZE    32
 
 #define CHCR_HASH_MAX_BLOCK_SIZE_64  64
 #define CHCR_HASH_MAX_BLOCK_SIZE_128 128
 
 /* Aligned to 128 bit boundary */
-struct _key_ctx {
-	__be32 ctx_hdr;
-	u8 salt[MAX_SALT];
-	__be64 reserverd;
-	unsigned char key[0];
-};
 
 struct ablk_ctx {
-	u8 enc;
-	unsigned int processed_len;
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
-	unsigned int dst_nents;
-	struct scatterlist iv_sg;
 	u8 key[CHCR_AES_MAX_KEY_LEN];
-	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 	unsigned char ciph_mode;
+	u8 rrkey[AES_MAX_KEY_SIZE];
+};
+struct chcr_aead_reqctx {
+	struct	sk_buff	*skb;
+	struct scatterlist *dst;
+	struct scatterlist srcffwd[2];
+	struct scatterlist dstffwd[2];
+	short int dst_nents;
+	u16 verify;
+	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+	unsigned char scratch_pad[MAX_SCRATCH_PAD_SIZE];
+};
+
+struct chcr_gcm_ctx {
+	u8 ghash_h[AEAD_H_SIZE];
 };
 
+struct chcr_authenc_ctx {
+	u8 dec_rrkey[AES_MAX_KEY_SIZE];
+	u8 h_iopad[2 * CHCR_HASH_MAX_DIGEST_SIZE];
+	unsigned char auth_mode;
+};
+
+struct __aead_ctx {
+	struct chcr_gcm_ctx gcm[0];
+	struct chcr_authenc_ctx authenc[0];
+};
+
+
+
+struct chcr_aead_ctx {
+	__be32 key_ctx_hdr;
+	unsigned int enckey_len;
+	struct crypto_skcipher *null;
+	u8 salt[MAX_SALT];
+	u8 key[CHCR_AES_MAX_KEY_LEN];
+	u16 hmac_ctrl;
+	u16 mayverify;
+	struct	__aead_ctx ctx[0];
+};
+
+
+
 struct hmac_ctx {
-	struct shash_desc *desc;
+	struct crypto_shash *base_hash;
 	u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
 	u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128];
 };
@@ -146,6 +206,7 @@ struct hmac_ctx {
 struct __crypto_ctx {
 	struct hmac_ctx hmacctx[0];
 	struct ablk_ctx ablkctx[0];
+	struct chcr_aead_ctx aeadctx[0];
 };
 
 struct chcr_context {
@@ -156,18 +217,22 @@ struct chcr_context {
 
 struct chcr_ahash_req_ctx {
 	u32 result;
-	char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128];
-	u8 bfr_len;
+	u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128];
+	u8 bfr2[CHCR_HASH_MAX_BLOCK_SIZE_128];
+	u8 *reqbfr;
+	u8 *skbfr;
+	u8 reqlen;
 	/* DMA the partial hash in it */
 	u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
 	u64 data_len;  /* Data len till time */
-	void *dummy_payload_ptr;
 	/* SKB which is being sent to the hardware for processing */
 	struct sk_buff *skb;
 };
 
 struct chcr_blkcipher_req_ctx {
 	struct sk_buff *skb;
+	unsigned int dst_nents;
+	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 };
 
 struct chcr_alg_template {
@@ -176,16 +241,19 @@ struct chcr_alg_template {
 	union {
 		struct crypto_alg crypto;
 		struct ahash_alg hash;
+		struct aead_alg aead;
 	} alg;
 };
 
 struct chcr_req_ctx {
 	union {
 		struct ahash_request *ahash_req;
+		struct aead_request *aead_req;
 		struct ablkcipher_request *ablk_req;
 	} req;
 	union {
 		struct chcr_ahash_req_ctx *ahash_ctx;
+		struct chcr_aead_reqctx *reqctx;
 		struct chcr_blkcipher_req_ctx *ablk_ctx;
 	} ctx;
 };
@@ -195,9 +263,15 @@ struct sge_opaque_hdr {
 	dma_addr_t addr[MAX_SKB_FRAGS + 1];
 };
 
-typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req,
-				       struct chcr_context *ctx,
+typedef struct sk_buff *(*create_wr_t)(struct aead_request *req,
 				       unsigned short qid,
+				       int size,
 				       unsigned short op_type);
 
+static int chcr_aead_op(struct aead_request *req_base,
+			  unsigned short op_type,
+			  int size,
+			  create_wr_t create_wr_fn);
+static inline int get_aead_subtype(struct crypto_aead *aead);
+
 #endif /* __CHCR_CRYPTO_H__ */
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index a2e77b87485b..9b07f3d88feb 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -226,7 +226,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
 	struct dma_async_tx_descriptor *desc;
 	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
 
-	ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+	ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_TO_DEVICE);
 	if (ctx->dma_ct == 0) {
 		dev_err(hdev->dev, "Invalid DMA sg\n");
 		hdev->err = -EINVAL;
@@ -241,7 +241,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
 	if (!desc) {
 		dev_err(hdev->dev, "Null DMA descriptor\n");
 		hdev->err = -EINVAL;
-		dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+		dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE);
 		return -EINVAL;
 	}
 	desc->callback = img_hash_dma_callback;
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 37dadb2a4feb..6e7a5c77a00a 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -375,10 +375,6 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
 	if (!dma->padding_pool)
 		return -ENOMEM;
 
-	dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
-	if (!dma->iv_pool)
-		return -ENOMEM;
-
 	cesa->dma = dma;
 
 	return 0;
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index e423d33decd4..b7872f62f674 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -273,11 +273,12 @@ struct mv_cesa_op_ctx {
 #define CESA_TDMA_SRC_IN_SRAM			BIT(30)
 #define CESA_TDMA_END_OF_REQ			BIT(29)
 #define CESA_TDMA_BREAK_CHAIN			BIT(28)
-#define CESA_TDMA_TYPE_MSK			GENMASK(27, 0)
+#define CESA_TDMA_SET_STATE			BIT(27)
+#define CESA_TDMA_TYPE_MSK			GENMASK(26, 0)
 #define CESA_TDMA_DUMMY				0
 #define CESA_TDMA_DATA				1
 #define CESA_TDMA_OP				2
-#define CESA_TDMA_IV				3
+#define CESA_TDMA_RESULT			3
 
 /**
  * struct mv_cesa_tdma_desc - TDMA descriptor
@@ -393,7 +394,6 @@ struct mv_cesa_dev_dma {
 	struct dma_pool *op_pool;
 	struct dma_pool *cache_pool;
 	struct dma_pool *padding_pool;
-	struct dma_pool *iv_pool;
 };
 
 /**
@@ -839,7 +839,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
 	memset(chain, 0, sizeof(*chain));
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
 			  u32 size, u32 flags, gfp_t gfp_flags);
 
 struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index d19dc9614e6e..098871a22a54 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -212,7 +212,8 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
 		struct mv_cesa_req *basereq;
 
 		basereq = &creq->base;
-		memcpy(ablkreq->info, basereq->chain.last->data, ivsize);
+		memcpy(ablkreq->info, basereq->chain.last->op->ctx.blkcipher.iv,
+		       ivsize);
 	} else {
 		memcpy_fromio(ablkreq->info,
 			      engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
@@ -373,8 +374,9 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
 
 	/* Add output data for IV */
 	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
-	ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
-				    ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
+	ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET,
+				    CESA_SA_DATA_SRAM_OFFSET,
+				    CESA_TDMA_SRC_IN_SRAM, flags);
 
 	if (ret)
 		goto err_free_tdma;
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 77712b375b84..77c0fb936f47 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -280,13 +280,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
 	sreq->offset = 0;
 }
 
+static void mv_cesa_ahash_dma_step(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_req *base = &creq->base;
+
+	/* We must explicitly set the digest state. */
+	if (base->chain.first->flags & CESA_TDMA_SET_STATE) {
+		struct mv_cesa_engine *engine = base->engine;
+		int i;
+
+		/* Set the hash state in the IVDIG regs. */
+		for (i = 0; i < ARRAY_SIZE(creq->state); i++)
+			writel_relaxed(creq->state[i], engine->regs +
+				       CESA_IVDIG(i));
+	}
+
+	mv_cesa_dma_step(base);
+}
+
 static void mv_cesa_ahash_step(struct crypto_async_request *req)
 {
 	struct ahash_request *ahashreq = ahash_request_cast(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
 
 	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
-		mv_cesa_dma_step(&creq->base);
+		mv_cesa_ahash_dma_step(ahashreq);
 	else
 		mv_cesa_ahash_std_step(ahashreq);
 }
@@ -311,24 +330,40 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
 	int i;
 
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
-	for (i = 0; i < digsize / 4; i++)
-		creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
 
-	if (creq->last_req) {
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
+	    (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) {
+		__le32 *data = NULL;
+
 		/*
-		 * Hardware's MD5 digest is in little endian format, but
-		 * SHA in big endian format
+		 * Result is already in the correct endianess when the SA is
+		 * used
 		 */
-		if (creq->algo_le) {
-			__le32 *result = (void *)ahashreq->result;
+		data = creq->base.chain.last->op->ctx.hash.hash;
+		for (i = 0; i < digsize / 4; i++)
+			creq->state[i] = cpu_to_le32(data[i]);
 
-			for (i = 0; i < digsize / 4; i++)
-				result[i] = cpu_to_le32(creq->state[i]);
-		} else {
-			__be32 *result = (void *)ahashreq->result;
+		memcpy(ahashreq->result, data, digsize);
+	} else {
+		for (i = 0; i < digsize / 4; i++)
+			creq->state[i] = readl_relaxed(engine->regs +
+						       CESA_IVDIG(i));
+		if (creq->last_req) {
+			/*
+			* Hardware's MD5 digest is in little endian format, but
+			* SHA in big endian format
+			*/
+			if (creq->algo_le) {
+				__le32 *result = (void *)ahashreq->result;
+
+				for (i = 0; i < digsize / 4; i++)
+					result[i] = cpu_to_le32(creq->state[i]);
+			} else {
+				__be32 *result = (void *)ahashreq->result;
 
-			for (i = 0; i < digsize / 4; i++)
-				result[i] = cpu_to_be32(creq->state[i]);
+				for (i = 0; i < digsize / 4; i++)
+					result[i] = cpu_to_be32(creq->state[i]);
+			}
 		}
 	}
 
@@ -503,6 +538,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
 						CESA_SA_DESC_CFG_LAST_FRAG,
 				      CESA_SA_DESC_CFG_FRAG_MSK);
 
+		ret = mv_cesa_dma_add_result_op(chain,
+						CESA_SA_CFG_SRAM_OFFSET,
+						CESA_SA_DATA_SRAM_OFFSET,
+						CESA_TDMA_SRC_IN_SRAM, flags);
+		if (ret)
+			return ERR_PTR(-ENOMEM);
 		return op;
 	}
 
@@ -562,11 +603,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	struct mv_cesa_ahash_dma_iter iter;
 	struct mv_cesa_op_ctx *op = NULL;
 	unsigned int frag_len;
+	bool set_state = false;
 	int ret;
+	u32 type;
 
 	basereq->chain.first = NULL;
 	basereq->chain.last = NULL;
 
+	if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl))
+		set_state = true;
+
 	if (creq->src_nents) {
 		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
 				 DMA_TO_DEVICE);
@@ -634,7 +680,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		goto err_free_tdma;
 	}
 
-	if (op) {
+	/*
+	 * If results are copied via DMA, this means that this
+	 * request can be directly processed by the engine,
+	 * without partial updates. So we can chain it at the
+	 * DMA level with other requests.
+	 */
+	type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
+
+	if (op && type != CESA_TDMA_RESULT) {
 		/* Add dummy desc to wait for crypto operation end */
 		ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
 		if (ret)
@@ -647,8 +701,19 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	else
 		creq->cache_ptr = 0;
 
-	basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
-				       CESA_TDMA_BREAK_CHAIN);
+	basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
+
+	if (type != CESA_TDMA_RESULT)
+		basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
+
+	if (set_state) {
+		/*
+		 * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to
+		 * let the step logic know that the IVDIG registers should be
+		 * explicitly set before launching a TDMA chain.
+		 */
+		basereq->chain.first->flags |= CESA_TDMA_SET_STATE;
+	}
 
 	return 0;
 
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 9fd7a5fbaa1b..c76375ff376d 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -69,9 +69,6 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
 		if (type == CESA_TDMA_OP)
 			dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
 				      le32_to_cpu(tdma->src));
-		else if (type == CESA_TDMA_IV)
-			dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
-				      le32_to_cpu(tdma->dst));
 
 		tdma = tdma->next;
 		dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
@@ -112,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
 		last->next = dreq->chain.first;
 		engine->chain.last = dreq->chain.last;
 
-		if (!(last->flags & CESA_TDMA_BREAK_CHAIN))
+		/*
+		 * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on
+		 * the last element of the current chain, or if the request
+		 * being queued needs the IV regs to be set before lauching
+		 * the request.
+		 */
+		if (!(last->flags & CESA_TDMA_BREAK_CHAIN) &&
+		    !(dreq->chain.first->flags & CESA_TDMA_SET_STATE))
 			last->next_dma = dreq->chain.first->cur_dma;
 	}
 }
@@ -209,29 +213,37 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 	return new_tdma;
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
 			  u32 size, u32 flags, gfp_t gfp_flags)
 {
-
-	struct mv_cesa_tdma_desc *tdma;
-	u8 *iv;
-	dma_addr_t dma_handle;
+	struct mv_cesa_tdma_desc *tdma, *op_desc;
 
 	tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
 	if (IS_ERR(tdma))
 		return PTR_ERR(tdma);
 
-	iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
-	if (!iv)
-		return -ENOMEM;
+	/* We re-use an existing op_desc object to retrieve the context
+	 * and result instead of allocating a new one.
+	 * There is at least one object of this type in a CESA crypto
+	 * req, just pick the first one in the chain.
+	 */
+	for (op_desc = chain->first; op_desc; op_desc = op_desc->next) {
+		u32 type = op_desc->flags & CESA_TDMA_TYPE_MSK;
+
+		if (type == CESA_TDMA_OP)
+			break;
+	}
+
+	if (!op_desc)
+		return -EIO;
 
 	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
 	tdma->src = src;
-	tdma->dst = cpu_to_le32(dma_handle);
-	tdma->data = iv;
+	tdma->dst = op_desc->src;
+	tdma->op = op_desc->op;
 
 	flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
-	tdma->flags = flags | CESA_TDMA_IV;
+	tdma->flags = flags | CESA_TDMA_RESULT;
 	return 0;
 }
 
diff --git a/drivers/crypto/mediatek/Makefile b/drivers/crypto/mediatek/Makefile
new file mode 100644
index 000000000000..187be79c7f3e
--- /dev/null
+++ b/drivers/crypto/mediatek/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mtk-crypto.o
+mtk-crypto-objs:= mtk-platform.o mtk-aes.o mtk-sha.o
diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c
new file mode 100644
index 000000000000..3a47cdb8f0c8
--- /dev/null
+++ b/drivers/crypto/mediatek/mtk-aes.c
@@ -0,0 +1,1299 @@
+/*
+ * Cryptographic API.
+ *
+ * Driver for EIP97 AES acceleration.
+ *
+ * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Some ideas are from atmel-aes.c drivers.
+ */
+
+#include <crypto/aes.h>
+#include "mtk-platform.h"
+
+#define AES_QUEUE_SIZE		512
+#define AES_BUF_ORDER		2
+#define AES_BUF_SIZE		((PAGE_SIZE << AES_BUF_ORDER) \
+				& ~(AES_BLOCK_SIZE - 1))
+
+/* AES command token size */
+#define AES_CT_SIZE_ECB		2
+#define AES_CT_SIZE_CBC		3
+#define AES_CT_SIZE_CTR		3
+#define AES_CT_SIZE_GCM_OUT	5
+#define AES_CT_SIZE_GCM_IN	6
+#define AES_CT_CTRL_HDR		cpu_to_le32(0x00220000)
+
+/* AES-CBC/ECB/CTR command token */
+#define AES_CMD0		cpu_to_le32(0x05000000)
+#define AES_CMD1		cpu_to_le32(0x2d060000)
+#define AES_CMD2		cpu_to_le32(0xe4a63806)
+/* AES-GCM command token */
+#define AES_GCM_CMD0		cpu_to_le32(0x0b000000)
+#define AES_GCM_CMD1		cpu_to_le32(0xa0800000)
+#define AES_GCM_CMD2		cpu_to_le32(0x25000010)
+#define AES_GCM_CMD3		cpu_to_le32(0x0f020000)
+#define AES_GCM_CMD4		cpu_to_le32(0x21e60000)
+#define AES_GCM_CMD5		cpu_to_le32(0x40e60000)
+#define AES_GCM_CMD6		cpu_to_le32(0xd0070000)
+
+/* AES transform information word 0 fields */
+#define AES_TFM_BASIC_OUT	cpu_to_le32(0x4 << 0)
+#define AES_TFM_BASIC_IN	cpu_to_le32(0x5 << 0)
+#define AES_TFM_GCM_OUT		cpu_to_le32(0x6 << 0)
+#define AES_TFM_GCM_IN		cpu_to_le32(0xf << 0)
+#define AES_TFM_SIZE(x)		cpu_to_le32((x) << 8)
+#define AES_TFM_128BITS		cpu_to_le32(0xb << 16)
+#define AES_TFM_192BITS		cpu_to_le32(0xd << 16)
+#define AES_TFM_256BITS		cpu_to_le32(0xf << 16)
+/* AES transform information word 1 fields */
+#define AES_TFM_ECB		cpu_to_le32(0x0 << 0)
+#define AES_TFM_CBC		cpu_to_le32(0x1 << 0)
+#define AES_TFM_CTR_INIT	cpu_to_le32(0x2 << 0)	/* init counter to 1 */
+#define AES_TFM_CTR_LOAD	cpu_to_le32(0x6 << 0)	/* load/reuse counter */
+#define AES_TFM_3IV		cpu_to_le32(0x7 << 5)	/* using IV 0-2 */
+#define AES_TFM_FULL_IV		cpu_to_le32(0xf << 5)	/* using IV 0-3 */
+#define AES_TFM_IV_CTR_MODE	cpu_to_le32(0x1 << 10)
+#define AES_TFM_ENC_HASH	cpu_to_le32(0x1 << 17)
+#define AES_TFM_GHASH_DIG	cpu_to_le32(0x2 << 21)
+#define AES_TFM_GHASH		cpu_to_le32(0x4 << 23)
+
+/* AES flags */
+#define AES_FLAGS_ECB		BIT(0)
+#define AES_FLAGS_CBC		BIT(1)
+#define AES_FLAGS_CTR		BIT(2)
+#define AES_FLAGS_GCM		BIT(3)
+#define AES_FLAGS_ENCRYPT	BIT(4)
+#define AES_FLAGS_BUSY		BIT(5)
+
+/**
+ * Command token(CT) is a set of hardware instructions that
+ * are used to control engine's processing flow of AES.
+ *
+ * Transform information(TFM) is used to define AES state and
+ * contains all keys and initial vectors.
+ *
+ * The engine requires CT and TFM to do:
+ * - Commands decoding and control of the engine's data path.
+ * - Coordinating hardware data fetch and store operations.
+ * - Result token construction and output.
+ *
+ * Memory map of GCM's TFM:
+ * /-----------\
+ * |  AES KEY  | 128/196/256 bits
+ * |-----------|
+ * |  HASH KEY | a string 128 zero bits encrypted using the block cipher
+ * |-----------|
+ * |    IVs    | 4 * 4 bytes
+ * \-----------/
+ */
+struct mtk_aes_ct {
+	__le32 cmd[AES_CT_SIZE_GCM_IN];
+};
+
+struct mtk_aes_tfm {
+	__le32 ctrl[2];
+	__le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE * 2)];
+};
+
+struct mtk_aes_reqctx {
+	u64 mode;
+};
+
+struct mtk_aes_base_ctx {
+	struct mtk_cryp *cryp;
+	u32 keylen;
+	mtk_aes_fn start;
+
+	struct mtk_aes_ct ct;
+	dma_addr_t ct_dma;
+	struct mtk_aes_tfm tfm;
+	dma_addr_t tfm_dma;
+
+	__le32 ct_hdr;
+	u32 ct_size;
+};
+
+struct mtk_aes_ctx {
+	struct mtk_aes_base_ctx	base;
+};
+
+struct mtk_aes_ctr_ctx {
+	struct mtk_aes_base_ctx base;
+
+	u32	iv[AES_BLOCK_SIZE / sizeof(u32)];
+	size_t offset;
+	struct scatterlist src[2];
+	struct scatterlist dst[2];
+};
+
+struct mtk_aes_gcm_ctx {
+	struct mtk_aes_base_ctx base;
+
+	u32 authsize;
+	size_t textlen;
+
+	struct crypto_skcipher *ctr;
+};
+
+struct mtk_aes_gcm_setkey_result {
+	int err;
+	struct completion completion;
+};
+
+struct mtk_aes_drv {
+	struct list_head dev_list;
+	/* Device list lock */
+	spinlock_t lock;
+};
+
+static struct mtk_aes_drv mtk_aes = {
+	.dev_list = LIST_HEAD_INIT(mtk_aes.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(mtk_aes.lock),
+};
+
+static inline u32 mtk_aes_read(struct mtk_cryp *cryp, u32 offset)
+{
+	return readl_relaxed(cryp->base + offset);
+}
+
+static inline void mtk_aes_write(struct mtk_cryp *cryp,
+				 u32 offset, u32 value)
+{
+	writel_relaxed(value, cryp->base + offset);
+}
+
+static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_base_ctx *ctx)
+{
+	struct mtk_cryp *cryp = NULL;
+	struct mtk_cryp *tmp;
+
+	spin_lock_bh(&mtk_aes.lock);
+	if (!ctx->cryp) {
+		list_for_each_entry(tmp, &mtk_aes.dev_list, aes_list) {
+			cryp = tmp;
+			break;
+		}
+		ctx->cryp = cryp;
+	} else {
+		cryp = ctx->cryp;
+	}
+	spin_unlock_bh(&mtk_aes.lock);
+
+	return cryp;
+}
+
+static inline size_t mtk_aes_padlen(size_t len)
+{
+	len &= AES_BLOCK_SIZE - 1;
+	return len ? AES_BLOCK_SIZE - len : 0;
+}
+
+static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len,
+				  struct mtk_aes_dma *dma)
+{
+	int nents;
+
+	if (!IS_ALIGNED(len, AES_BLOCK_SIZE))
+		return false;
+
+	for (nents = 0; sg; sg = sg_next(sg), ++nents) {
+		if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+			return false;
+
+		if (len <= sg->length) {
+			if (!IS_ALIGNED(len, AES_BLOCK_SIZE))
+				return false;
+
+			dma->nents = nents + 1;
+			dma->remainder = sg->length - len;
+			sg->length = len;
+			return true;
+		}
+
+		if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE))
+			return false;
+
+		len -= sg->length;
+	}
+
+	return false;
+}
+
+static inline void mtk_aes_set_mode(struct mtk_aes_rec *aes,
+				    const struct mtk_aes_reqctx *rctx)
+{
+	/* Clear all but persistent flags and set request flags. */
+	aes->flags = (aes->flags & AES_FLAGS_BUSY) | rctx->mode;
+}
+
+static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma)
+{
+	struct scatterlist *sg = dma->sg;
+	int nents = dma->nents;
+
+	if (!dma->remainder)
+		return;
+
+	while (--nents > 0 && sg)
+		sg = sg_next(sg);
+
+	if (!sg)
+		return;
+
+	sg->length += dma->remainder;
+}
+
+/*
+ * Write descriptors for processing. This will configure the engine, load
+ * the transform information and then start the packet processing.
+ */
+static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_ring *ring = cryp->ring[aes->id];
+	struct mtk_desc *cmd = NULL, *res = NULL;
+	struct scatterlist *ssg = aes->src.sg, *dsg = aes->dst.sg;
+	u32 slen = aes->src.sg_len, dlen = aes->dst.sg_len;
+	int nents;
+
+	/* Write command descriptors */
+	for (nents = 0; nents < slen; ++nents, ssg = sg_next(ssg)) {
+		cmd = ring->cmd_base + ring->cmd_pos;
+		cmd->hdr = MTK_DESC_BUF_LEN(ssg->length);
+		cmd->buf = cpu_to_le32(sg_dma_address(ssg));
+
+		if (nents == 0) {
+			cmd->hdr |= MTK_DESC_FIRST |
+				    MTK_DESC_CT_LEN(aes->ctx->ct_size);
+			cmd->ct = cpu_to_le32(aes->ctx->ct_dma);
+			cmd->ct_hdr = aes->ctx->ct_hdr;
+			cmd->tfm = cpu_to_le32(aes->ctx->tfm_dma);
+		}
+
+		if (++ring->cmd_pos == MTK_DESC_NUM)
+			ring->cmd_pos = 0;
+	}
+	cmd->hdr |= MTK_DESC_LAST;
+
+	/* Prepare result descriptors */
+	for (nents = 0; nents < dlen; ++nents, dsg = sg_next(dsg)) {
+		res = ring->res_base + ring->res_pos;
+		res->hdr = MTK_DESC_BUF_LEN(dsg->length);
+		res->buf = cpu_to_le32(sg_dma_address(dsg));
+
+		if (nents == 0)
+			res->hdr |= MTK_DESC_FIRST;
+
+		if (++ring->res_pos == MTK_DESC_NUM)
+			ring->res_pos = 0;
+	}
+	res->hdr |= MTK_DESC_LAST;
+
+	/* Prepare enough space for authenticated tag */
+	if (aes->flags & AES_FLAGS_GCM)
+		res->hdr += AES_BLOCK_SIZE;
+
+	/*
+	 * Make sure that all changes to the DMA ring are done before we
+	 * start engine.
+	 */
+	wmb();
+	/* Start DMA transfer */
+	mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(dlen));
+	mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(slen));
+
+	return -EINPROGRESS;
+}
+
+static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+
+	dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct),
+			 DMA_TO_DEVICE);
+	dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm),
+			 DMA_TO_DEVICE);
+
+	if (aes->src.sg == aes->dst.sg) {
+		dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents,
+			     DMA_BIDIRECTIONAL);
+
+		if (aes->src.sg != &aes->aligned_sg)
+			mtk_aes_restore_sg(&aes->src);
+	} else {
+		dma_unmap_sg(cryp->dev, aes->dst.sg, aes->dst.nents,
+			     DMA_FROM_DEVICE);
+
+		if (aes->dst.sg != &aes->aligned_sg)
+			mtk_aes_restore_sg(&aes->dst);
+
+		dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents,
+			     DMA_TO_DEVICE);
+
+		if (aes->src.sg != &aes->aligned_sg)
+			mtk_aes_restore_sg(&aes->src);
+	}
+
+	if (aes->dst.sg == &aes->aligned_sg)
+		sg_copy_from_buffer(aes->real_dst, sg_nents(aes->real_dst),
+				    aes->buf, aes->total);
+}
+
+static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+
+	ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct),
+				     DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma)))
+		return -EINVAL;
+
+	ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm),
+				      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma)))
+		goto tfm_map_err;
+
+	if (aes->src.sg == aes->dst.sg) {
+		aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg,
+					     aes->src.nents,
+					     DMA_BIDIRECTIONAL);
+		aes->dst.sg_len = aes->src.sg_len;
+		if (unlikely(!aes->src.sg_len))
+			goto sg_map_err;
+	} else {
+		aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg,
+					     aes->src.nents, DMA_TO_DEVICE);
+		if (unlikely(!aes->src.sg_len))
+			goto sg_map_err;
+
+		aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg,
+					     aes->dst.nents, DMA_FROM_DEVICE);
+		if (unlikely(!aes->dst.sg_len)) {
+			dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents,
+				     DMA_TO_DEVICE);
+			goto sg_map_err;
+		}
+	}
+
+	return mtk_aes_xmit(cryp, aes);
+
+sg_map_err:
+	dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm),
+			 DMA_TO_DEVICE);
+tfm_map_err:
+	dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct),
+			 DMA_TO_DEVICE);
+
+	return -EINVAL;
+}
+
+/* Initialize transform information of CBC/ECB/CTR mode */
+static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
+			      size_t len)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+
+	ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len);
+	ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len);
+	ctx->ct.cmd[1] = AES_CMD1;
+
+	if (aes->flags & AES_FLAGS_ENCRYPT)
+		ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT;
+	else
+		ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN;
+
+	if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128))
+		ctx->tfm.ctrl[0] |= AES_TFM_128BITS;
+	else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256))
+		ctx->tfm.ctrl[0] |= AES_TFM_256BITS;
+	else
+		ctx->tfm.ctrl[0] |= AES_TFM_192BITS;
+
+	if (aes->flags & AES_FLAGS_CBC) {
+		const u32 *iv = (const u32 *)req->info;
+		u32 *iv_state = ctx->tfm.state + ctx->keylen;
+		int i;
+
+		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen +
+				    SIZE_IN_WORDS(AES_BLOCK_SIZE));
+		ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV;
+
+		for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++)
+			iv_state[i] = cpu_to_le32(iv[i]);
+
+		ctx->ct.cmd[2] = AES_CMD2;
+		ctx->ct_size = AES_CT_SIZE_CBC;
+	} else if (aes->flags & AES_FLAGS_ECB) {
+		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen);
+		ctx->tfm.ctrl[1] = AES_TFM_ECB;
+
+		ctx->ct_size = AES_CT_SIZE_ECB;
+	} else if (aes->flags & AES_FLAGS_CTR) {
+		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen +
+				    SIZE_IN_WORDS(AES_BLOCK_SIZE));
+		ctx->tfm.ctrl[1] = AES_TFM_CTR_LOAD | AES_TFM_FULL_IV;
+
+		ctx->ct.cmd[2] = AES_CMD2;
+		ctx->ct_size = AES_CT_SIZE_CTR;
+	}
+}
+
+static int mtk_aes_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
+		       struct scatterlist *src, struct scatterlist *dst,
+		       size_t len)
+{
+	size_t padlen = 0;
+	bool src_aligned, dst_aligned;
+
+	aes->total = len;
+	aes->src.sg = src;
+	aes->dst.sg = dst;
+	aes->real_dst = dst;
+
+	src_aligned = mtk_aes_check_aligned(src, len, &aes->src);
+	if (src == dst)
+		dst_aligned = src_aligned;
+	else
+		dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst);
+
+	if (!src_aligned || !dst_aligned) {
+		padlen = mtk_aes_padlen(len);
+
+		if (len + padlen > AES_BUF_SIZE)
+			return -ENOMEM;
+
+		if (!src_aligned) {
+			sg_copy_to_buffer(src, sg_nents(src), aes->buf, len);
+			aes->src.sg = &aes->aligned_sg;
+			aes->src.nents = 1;
+			aes->src.remainder = 0;
+		}
+
+		if (!dst_aligned) {
+			aes->dst.sg = &aes->aligned_sg;
+			aes->dst.nents = 1;
+			aes->dst.remainder = 0;
+		}
+
+		sg_init_table(&aes->aligned_sg, 1);
+		sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen);
+	}
+
+	mtk_aes_info_init(cryp, aes, len + padlen);
+
+	return mtk_aes_map(cryp, aes);
+}
+
+static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id,
+				struct crypto_async_request *new_areq)
+{
+	struct mtk_aes_rec *aes = cryp->aes[id];
+	struct crypto_async_request *areq, *backlog;
+	struct mtk_aes_base_ctx *ctx;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&aes->lock, flags);
+	if (new_areq)
+		ret = crypto_enqueue_request(&aes->queue, new_areq);
+	if (aes->flags & AES_FLAGS_BUSY) {
+		spin_unlock_irqrestore(&aes->lock, flags);
+		return ret;
+	}
+	backlog = crypto_get_backlog(&aes->queue);
+	areq = crypto_dequeue_request(&aes->queue);
+	if (areq)
+		aes->flags |= AES_FLAGS_BUSY;
+	spin_unlock_irqrestore(&aes->lock, flags);
+
+	if (!areq)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(areq->tfm);
+
+	aes->areq = areq;
+	aes->ctx = ctx;
+
+	return ctx->start(cryp, aes);
+}
+
+static int mtk_aes_complete(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	aes->flags &= ~AES_FLAGS_BUSY;
+	aes->areq->complete(aes->areq, 0);
+
+	/* Handle new request */
+	return mtk_aes_handle_queue(cryp, aes->id, NULL);
+}
+
+static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+
+	mtk_aes_set_mode(aes, rctx);
+	aes->resume = mtk_aes_complete;
+
+	return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes);
+}
+
+static inline struct mtk_aes_ctr_ctx *
+mtk_aes_ctr_ctx_cast(struct mtk_aes_base_ctx *ctx)
+{
+	return container_of(ctx, struct mtk_aes_ctr_ctx, base);
+}
+
+static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+	struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx);
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct scatterlist *src, *dst;
+	int i;
+	u32 start, end, ctr, blocks, *iv_state;
+	size_t datalen;
+	bool fragmented = false;
+
+	/* Check for transfer completion. */
+	cctx->offset += aes->total;
+	if (cctx->offset >= req->nbytes)
+		return mtk_aes_complete(cryp, aes);
+
+	/* Compute data length. */
+	datalen = req->nbytes - cctx->offset;
+	blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE);
+	ctr = be32_to_cpu(cctx->iv[3]);
+
+	/* Check 32bit counter overflow. */
+	start = ctr;
+	end = start + blocks - 1;
+	if (end < start) {
+		ctr |= 0xffffffff;
+		datalen = AES_BLOCK_SIZE * -start;
+		fragmented = true;
+	}
+
+	/* Jump to offset. */
+	src = scatterwalk_ffwd(cctx->src, req->src, cctx->offset);
+	dst = ((req->src == req->dst) ? src :
+	       scatterwalk_ffwd(cctx->dst, req->dst, cctx->offset));
+
+	/* Write IVs into transform state buffer. */
+	iv_state = ctx->tfm.state + ctx->keylen;
+	for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++)
+		iv_state[i] = cpu_to_le32(cctx->iv[i]);
+
+	if (unlikely(fragmented)) {
+	/*
+	 * Increment the counter manually to cope with the hardware
+	 * counter overflow.
+	 */
+		cctx->iv[3] = cpu_to_be32(ctr);
+		crypto_inc((u8 *)cctx->iv, AES_BLOCK_SIZE);
+	}
+	aes->resume = mtk_aes_ctr_transfer;
+
+	return mtk_aes_dma(cryp, aes, src, dst, datalen);
+}
+
+static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx);
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+
+	mtk_aes_set_mode(aes, rctx);
+
+	memcpy(cctx->iv, req->info, AES_BLOCK_SIZE);
+	cctx->offset = 0;
+	aes->total = 0;
+
+	return mtk_aes_ctr_transfer(cryp, aes);
+}
+
+/* Check and set the AES key to transform state buffer */
+static int mtk_aes_setkey(struct crypto_ablkcipher *tfm,
+			  const u8 *key, u32 keylen)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	const u32 *aes_key = (const u32 *)key;
+	u32 *key_state = ctx->tfm.state;
+	int i;
+
+	if (keylen != AES_KEYSIZE_128 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256) {
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->keylen = SIZE_IN_WORDS(keylen);
+
+	for (i = 0; i < ctx->keylen; i++)
+		key_state[i] = cpu_to_le32(aes_key[i]);
+
+	return 0;
+}
+
+static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode)
+{
+	struct mtk_aes_base_ctx *ctx;
+	struct mtk_aes_reqctx *rctx;
+
+	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx = ablkcipher_request_ctx(req);
+	rctx->mode = mode;
+
+	return mtk_aes_handle_queue(ctx->cryp, !(mode & AES_FLAGS_ENCRYPT),
+				    &req->base);
+}
+
+static int mtk_aes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB);
+}
+
+static int mtk_aes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ECB);
+}
+
+static int mtk_aes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC);
+}
+
+static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_CBC);
+}
+
+static int mtk_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR);
+}
+
+static int mtk_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_CTR);
+}
+
+static int mtk_aes_cra_init(struct crypto_tfm *tfm)
+{
+	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mtk_cryp *cryp = NULL;
+
+	cryp = mtk_aes_find_dev(&ctx->base);
+	if (!cryp) {
+		pr_err("can't find crypto device\n");
+		return -ENODEV;
+	}
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
+	ctx->base.start = mtk_aes_start;
+	return 0;
+}
+
+static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mtk_cryp *cryp = NULL;
+
+	cryp = mtk_aes_find_dev(&ctx->base);
+	if (!cryp) {
+		pr_err("can't find crypto device\n");
+		return -ENODEV;
+	}
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
+	ctx->base.start = mtk_aes_ctr_start;
+	return 0;
+}
+
+static struct crypto_alg aes_algs[] = {
+{
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-mtk",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_init		= mtk_aes_cra_init,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= mtk_aes_setkey,
+		.encrypt	= mtk_aes_cbc_encrypt,
+		.decrypt	= mtk_aes_cbc_decrypt,
+		.ivsize		= AES_BLOCK_SIZE,
+	}
+},
+{
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-mtk",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_init		= mtk_aes_cra_init,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= mtk_aes_setkey,
+		.encrypt	= mtk_aes_ecb_encrypt,
+		.decrypt	= mtk_aes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-mtk",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_init		= mtk_aes_ctr_cra_init,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct mtk_aes_ctr_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= mtk_aes_setkey,
+		.encrypt	= mtk_aes_ctr_encrypt,
+		.decrypt	= mtk_aes_ctr_decrypt,
+	}
+},
+};
+
+static inline struct mtk_aes_gcm_ctx *
+mtk_aes_gcm_ctx_cast(struct mtk_aes_base_ctx *ctx)
+{
+	return container_of(ctx, struct mtk_aes_gcm_ctx, base);
+}
+
+/* Initialize transform information of GCM mode */
+static void mtk_aes_gcm_info_init(struct mtk_cryp *cryp,
+				  struct mtk_aes_rec *aes,
+				  size_t len)
+{
+	struct aead_request *req = aead_request_cast(aes->areq);
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
+	const u32 *iv = (const u32 *)req->iv;
+	u32 *iv_state = ctx->tfm.state + ctx->keylen +
+			SIZE_IN_WORDS(AES_BLOCK_SIZE);
+	u32 ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req));
+	int i;
+
+	ctx->ct_hdr = AES_CT_CTRL_HDR | len;
+
+	ctx->ct.cmd[0] = AES_GCM_CMD0 | cpu_to_le32(req->assoclen);
+	ctx->ct.cmd[1] = AES_GCM_CMD1 | cpu_to_le32(req->assoclen);
+	ctx->ct.cmd[2] = AES_GCM_CMD2;
+	ctx->ct.cmd[3] = AES_GCM_CMD3 | cpu_to_le32(gctx->textlen);
+
+	if (aes->flags & AES_FLAGS_ENCRYPT) {
+		ctx->ct.cmd[4] = AES_GCM_CMD4 | cpu_to_le32(gctx->authsize);
+		ctx->ct_size = AES_CT_SIZE_GCM_OUT;
+		ctx->tfm.ctrl[0] = AES_TFM_GCM_OUT;
+	} else {
+		ctx->ct.cmd[4] = AES_GCM_CMD5 | cpu_to_le32(gctx->authsize);
+		ctx->ct.cmd[5] = AES_GCM_CMD6 | cpu_to_le32(gctx->authsize);
+		ctx->ct_size = AES_CT_SIZE_GCM_IN;
+		ctx->tfm.ctrl[0] = AES_TFM_GCM_IN;
+	}
+
+	if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128))
+		ctx->tfm.ctrl[0] |= AES_TFM_128BITS;
+	else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256))
+		ctx->tfm.ctrl[0] |= AES_TFM_256BITS;
+	else
+		ctx->tfm.ctrl[0] |= AES_TFM_192BITS;
+
+	ctx->tfm.ctrl[0] |= AES_TFM_GHASH_DIG | AES_TFM_GHASH |
+			    AES_TFM_SIZE(ctx->keylen + SIZE_IN_WORDS(
+			    AES_BLOCK_SIZE + ivsize));
+	ctx->tfm.ctrl[1] = AES_TFM_CTR_INIT | AES_TFM_IV_CTR_MODE |
+			   AES_TFM_3IV | AES_TFM_ENC_HASH;
+
+	for (i = 0; i < SIZE_IN_WORDS(ivsize); i++)
+		iv_state[i] = cpu_to_le32(iv[i]);
+}
+
+static int mtk_aes_gcm_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
+			   struct scatterlist *src, struct scatterlist *dst,
+			   size_t len)
+{
+	bool src_aligned, dst_aligned;
+
+	aes->src.sg = src;
+	aes->dst.sg = dst;
+	aes->real_dst = dst;
+
+	src_aligned = mtk_aes_check_aligned(src, len, &aes->src);
+	if (src == dst)
+		dst_aligned = src_aligned;
+	else
+		dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst);
+
+	if (!src_aligned || !dst_aligned) {
+		if (aes->total > AES_BUF_SIZE)
+			return -ENOMEM;
+
+		if (!src_aligned) {
+			sg_copy_to_buffer(src, sg_nents(src), aes->buf, len);
+			aes->src.sg = &aes->aligned_sg;
+			aes->src.nents = 1;
+			aes->src.remainder = 0;
+		}
+
+		if (!dst_aligned) {
+			aes->dst.sg = &aes->aligned_sg;
+			aes->dst.nents = 1;
+			aes->dst.remainder = 0;
+		}
+
+		sg_init_table(&aes->aligned_sg, 1);
+		sg_set_buf(&aes->aligned_sg, aes->buf, aes->total);
+	}
+
+	mtk_aes_gcm_info_init(cryp, aes, len);
+
+	return mtk_aes_map(cryp, aes);
+}
+
+/* Todo: GMAC */
+static int mtk_aes_gcm_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(aes->ctx);
+	struct aead_request *req = aead_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = aead_request_ctx(req);
+	u32 len = req->assoclen + req->cryptlen;
+
+	mtk_aes_set_mode(aes, rctx);
+
+	if (aes->flags & AES_FLAGS_ENCRYPT) {
+		u32 tag[4];
+		/* Compute total process length. */
+		aes->total = len + gctx->authsize;
+		/* Compute text length. */
+		gctx->textlen = req->cryptlen;
+		/* Hardware will append authenticated tag to output buffer */
+		scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1);
+	} else {
+		aes->total = len;
+		gctx->textlen = req->cryptlen - gctx->authsize;
+	}
+	aes->resume = mtk_aes_complete;
+
+	return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len);
+}
+
+static int mtk_aes_gcm_crypt(struct aead_request *req, u64 mode)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct mtk_aes_reqctx *rctx = aead_request_ctx(req);
+
+	rctx->mode = AES_FLAGS_GCM | mode;
+
+	return mtk_aes_handle_queue(ctx->cryp, !!(mode & AES_FLAGS_ENCRYPT),
+								&req->base);
+}
+
+static void mtk_gcm_setkey_done(struct crypto_async_request *req, int err)
+{
+	struct mtk_aes_gcm_setkey_result *result = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	result->err = err;
+	complete(&result->completion);
+}
+
+/*
+ * Because of the hardware limitation, we need to pre-calculate key(H)
+ * for the GHASH operation. The result of the encryption operation
+ * need to be stored in the transform state buffer.
+ */
+static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
+			      u32 keylen)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead);
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
+	struct crypto_skcipher *ctr = gctx->ctr;
+	struct {
+		u32 hash[4];
+		u8 iv[8];
+
+		struct mtk_aes_gcm_setkey_result result;
+
+		struct scatterlist sg[1];
+		struct skcipher_request req;
+	} *data;
+	const u32 *aes_key;
+	u32 *key_state, *hash_state;
+	int err, i;
+
+	if (keylen != AES_KEYSIZE_256 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_128) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	key_state = ctx->tfm.state;
+	aes_key = (u32 *)key;
+	ctx->keylen = SIZE_IN_WORDS(keylen);
+
+	for (i = 0; i < ctx->keylen; i++)
+		ctx->tfm.state[i] = cpu_to_le32(aes_key[i]);
+
+	/* Same as crypto_gcm_setkey() from crypto/gcm.c */
+	crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(ctr, key, keylen);
+	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
+			      CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
+	data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr),
+		       GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	init_completion(&data->result.completion);
+	sg_init_one(data->sg, &data->hash, AES_BLOCK_SIZE);
+	skcipher_request_set_tfm(&data->req, ctr);
+	skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+				      CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      mtk_gcm_setkey_done, &data->result);
+	skcipher_request_set_crypt(&data->req, data->sg, data->sg,
+				   AES_BLOCK_SIZE, data->iv);
+
+	err = crypto_skcipher_encrypt(&data->req);
+	if (err == -EINPROGRESS || err == -EBUSY) {
+		err = wait_for_completion_interruptible(
+			&data->result.completion);
+		if (!err)
+			err = data->result.err;
+	}
+	if (err)
+		goto out;
+
+	hash_state = key_state + ctx->keylen;
+
+	for (i = 0; i < 4; i++)
+		hash_state[i] = cpu_to_be32(data->hash[i]);
+out:
+	kzfree(data);
+	return err;
+}
+
+static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead,
+				   u32 authsize)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead);
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
+
+	/* Same as crypto_gcm_authsize() from crypto/gcm.c */
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	gctx->authsize = authsize;
+	return 0;
+}
+
+static int mtk_aes_gcm_encrypt(struct aead_request *req)
+{
+	return mtk_aes_gcm_crypt(req, AES_FLAGS_ENCRYPT);
+}
+
+static int mtk_aes_gcm_decrypt(struct aead_request *req)
+{
+	return mtk_aes_gcm_crypt(req, 0);
+}
+
+static int mtk_aes_gcm_init(struct crypto_aead *aead)
+{
+	struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	struct mtk_cryp *cryp = NULL;
+
+	cryp = mtk_aes_find_dev(&ctx->base);
+	if (!cryp) {
+		pr_err("can't find crypto device\n");
+		return -ENODEV;
+	}
+
+	ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0,
+					 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->ctr)) {
+		pr_err("Error allocating ctr(aes)\n");
+		return PTR_ERR(ctx->ctr);
+	}
+
+	crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx));
+	ctx->base.start = mtk_aes_gcm_start;
+	return 0;
+}
+
+static void mtk_aes_gcm_exit(struct crypto_aead *aead)
+{
+	struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+
+	crypto_free_skcipher(ctx->ctr);
+}
+
+static struct aead_alg aes_gcm_alg = {
+	.setkey		= mtk_aes_gcm_setkey,
+	.setauthsize	= mtk_aes_gcm_setauthsize,
+	.encrypt	= mtk_aes_gcm_encrypt,
+	.decrypt	= mtk_aes_gcm_decrypt,
+	.init		= mtk_aes_gcm_init,
+	.exit		= mtk_aes_gcm_exit,
+	.ivsize		= 12,
+	.maxauthsize	= AES_BLOCK_SIZE,
+
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "gcm-aes-mtk",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct mtk_aes_gcm_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static void mtk_aes_enc_task(unsigned long data)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)data;
+	struct mtk_aes_rec *aes = cryp->aes[0];
+
+	mtk_aes_unmap(cryp, aes);
+	aes->resume(cryp, aes);
+}
+
+static void mtk_aes_dec_task(unsigned long data)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)data;
+	struct mtk_aes_rec *aes = cryp->aes[1];
+
+	mtk_aes_unmap(cryp, aes);
+	aes->resume(cryp, aes);
+}
+
+static irqreturn_t mtk_aes_enc_irq(int irq, void *dev_id)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id;
+	struct mtk_aes_rec *aes = cryp->aes[0];
+	u32 val = mtk_aes_read(cryp, RDR_STAT(RING0));
+
+	mtk_aes_write(cryp, RDR_STAT(RING0), val);
+
+	if (likely(AES_FLAGS_BUSY & aes->flags)) {
+		mtk_aes_write(cryp, RDR_PROC_COUNT(RING0), MTK_CNT_RST);
+		mtk_aes_write(cryp, RDR_THRESH(RING0),
+			      MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE);
+
+		tasklet_schedule(&aes->task);
+	} else {
+		dev_warn(cryp->dev, "AES interrupt when no active requests.\n");
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mtk_aes_dec_irq(int irq, void *dev_id)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id;
+	struct mtk_aes_rec *aes = cryp->aes[1];
+	u32 val = mtk_aes_read(cryp, RDR_STAT(RING1));
+
+	mtk_aes_write(cryp, RDR_STAT(RING1), val);
+
+	if (likely(AES_FLAGS_BUSY & aes->flags)) {
+		mtk_aes_write(cryp, RDR_PROC_COUNT(RING1), MTK_CNT_RST);
+		mtk_aes_write(cryp, RDR_THRESH(RING1),
+			      MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE);
+
+		tasklet_schedule(&aes->task);
+	} else {
+		dev_warn(cryp->dev, "AES interrupt when no active requests.\n");
+	}
+	return IRQ_HANDLED;
+}
+
+/*
+ * The purpose of creating encryption and decryption records is
+ * to process outbound/inbound data in parallel, it can improve
+ * performance in most use cases, such as IPSec VPN, especially
+ * under heavy network traffic.
+ */
+static int mtk_aes_record_init(struct mtk_cryp *cryp)
+{
+	struct mtk_aes_rec **aes = cryp->aes;
+	int i, err = -ENOMEM;
+
+	for (i = 0; i < MTK_REC_NUM; i++) {
+		aes[i] = kzalloc(sizeof(**aes), GFP_KERNEL);
+		if (!aes[i])
+			goto err_cleanup;
+
+		aes[i]->buf = (void *)__get_free_pages(GFP_KERNEL,
+						AES_BUF_ORDER);
+		if (!aes[i]->buf)
+			goto err_cleanup;
+
+		aes[i]->id = i;
+
+		spin_lock_init(&aes[i]->lock);
+		crypto_init_queue(&aes[i]->queue, AES_QUEUE_SIZE);
+	}
+
+	tasklet_init(&aes[0]->task, mtk_aes_enc_task, (unsigned long)cryp);
+	tasklet_init(&aes[1]->task, mtk_aes_dec_task, (unsigned long)cryp);
+
+	return 0;
+
+err_cleanup:
+	for (; i--; ) {
+		free_page((unsigned long)aes[i]->buf);
+		kfree(aes[i]);
+	}
+
+	return err;
+}
+
+static void mtk_aes_record_free(struct mtk_cryp *cryp)
+{
+	int i;
+
+	for (i = 0; i < MTK_REC_NUM; i++) {
+		tasklet_kill(&cryp->aes[i]->task);
+		free_page((unsigned long)cryp->aes[i]->buf);
+		kfree(cryp->aes[i]);
+	}
+}
+
+static void mtk_aes_unregister_algs(void)
+{
+	int i;
+
+	crypto_unregister_aead(&aes_gcm_alg);
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
+		crypto_unregister_alg(&aes_algs[i]);
+}
+
+static int mtk_aes_register_algs(void)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		err = crypto_register_alg(&aes_algs[i]);
+		if (err)
+			goto err_aes_algs;
+	}
+
+	err = crypto_register_aead(&aes_gcm_alg);
+	if (err)
+		goto err_aes_algs;
+
+	return 0;
+
+err_aes_algs:
+	for (; i--; )
+		crypto_unregister_alg(&aes_algs[i]);
+
+	return err;
+}
+
+int mtk_cipher_alg_register(struct mtk_cryp *cryp)
+{
+	int ret;
+
+	INIT_LIST_HEAD(&cryp->aes_list);
+
+	/* Initialize two cipher records */
+	ret = mtk_aes_record_init(cryp);
+	if (ret)
+		goto err_record;
+
+	/* Ring0 is use by encryption record */
+	ret = devm_request_irq(cryp->dev, cryp->irq[RING0], mtk_aes_enc_irq,
+			       IRQF_TRIGGER_LOW, "mtk-aes", cryp);
+	if (ret) {
+		dev_err(cryp->dev, "unable to request AES encryption irq.\n");
+		goto err_res;
+	}
+
+	/* Ring1 is use by decryption record */
+	ret = devm_request_irq(cryp->dev, cryp->irq[RING1], mtk_aes_dec_irq,
+			       IRQF_TRIGGER_LOW, "mtk-aes", cryp);
+	if (ret) {
+		dev_err(cryp->dev, "unable to request AES decryption irq.\n");
+		goto err_res;
+	}
+
+	/* Enable ring0 and ring1 interrupt */
+	mtk_aes_write(cryp, AIC_ENABLE_SET(RING0), MTK_IRQ_RDR0);
+	mtk_aes_write(cryp, AIC_ENABLE_SET(RING1), MTK_IRQ_RDR1);
+
+	spin_lock(&mtk_aes.lock);
+	list_add_tail(&cryp->aes_list, &mtk_aes.dev_list);
+	spin_unlock(&mtk_aes.lock);
+
+	ret = mtk_aes_register_algs();
+	if (ret)
+		goto err_algs;
+
+	return 0;
+
+err_algs:
+	spin_lock(&mtk_aes.lock);
+	list_del(&cryp->aes_list);
+	spin_unlock(&mtk_aes.lock);
+err_res:
+	mtk_aes_record_free(cryp);
+err_record:
+
+	dev_err(cryp->dev, "mtk-aes initialization failed.\n");
+	return ret;
+}
+
+void mtk_cipher_alg_release(struct mtk_cryp *cryp)
+{
+	spin_lock(&mtk_aes.lock);
+	list_del(&cryp->aes_list);
+	spin_unlock(&mtk_aes.lock);
+
+	mtk_aes_unregister_algs();
+	mtk_aes_record_free(cryp);
+}
diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c
new file mode 100644
index 000000000000..a9c713d4c733
--- /dev/null
+++ b/drivers/crypto/mediatek/mtk-platform.c
@@ -0,0 +1,604 @@
+/*
+ * Driver for EIP97 cryptographic accelerator.
+ *
+ * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include "mtk-platform.h"
+
+#define MTK_BURST_SIZE_MSK		GENMASK(7, 4)
+#define MTK_BURST_SIZE(x)		((x) << 4)
+#define MTK_DESC_SIZE(x)		((x) << 0)
+#define MTK_DESC_OFFSET(x)		((x) << 16)
+#define MTK_DESC_FETCH_SIZE(x)		((x) << 0)
+#define MTK_DESC_FETCH_THRESH(x)	((x) << 16)
+#define MTK_DESC_OVL_IRQ_EN		BIT(25)
+#define MTK_DESC_ATP_PRESENT		BIT(30)
+
+#define MTK_DFSE_IDLE			GENMASK(3, 0)
+#define MTK_DFSE_THR_CTRL_EN		BIT(30)
+#define MTK_DFSE_THR_CTRL_RESET		BIT(31)
+#define MTK_DFSE_RING_ID(x)		(((x) >> 12) & GENMASK(3, 0))
+#define MTK_DFSE_MIN_DATA(x)		((x) << 0)
+#define MTK_DFSE_MAX_DATA(x)		((x) << 8)
+#define MTK_DFE_MIN_CTRL(x)		((x) << 16)
+#define MTK_DFE_MAX_CTRL(x)		((x) << 24)
+
+#define MTK_IN_BUF_MIN_THRESH(x)	((x) << 8)
+#define MTK_IN_BUF_MAX_THRESH(x)	((x) << 12)
+#define MTK_OUT_BUF_MIN_THRESH(x)	((x) << 0)
+#define MTK_OUT_BUF_MAX_THRESH(x)	((x) << 4)
+#define MTK_IN_TBUF_SIZE(x)		(((x) >> 4) & GENMASK(3, 0))
+#define MTK_IN_DBUF_SIZE(x)		(((x) >> 8) & GENMASK(3, 0))
+#define MTK_OUT_DBUF_SIZE(x)		(((x) >> 16) & GENMASK(3, 0))
+#define MTK_CMD_FIFO_SIZE(x)		(((x) >> 8) & GENMASK(3, 0))
+#define MTK_RES_FIFO_SIZE(x)		(((x) >> 12) & GENMASK(3, 0))
+
+#define MTK_PE_TK_LOC_AVL		BIT(2)
+#define MTK_PE_PROC_HELD		BIT(14)
+#define MTK_PE_TK_TIMEOUT_EN		BIT(22)
+#define MTK_PE_INPUT_DMA_ERR		BIT(0)
+#define MTK_PE_OUTPUT_DMA_ERR		BIT(1)
+#define MTK_PE_PKT_PORC_ERR		BIT(2)
+#define MTK_PE_PKT_TIMEOUT		BIT(3)
+#define MTK_PE_FATAL_ERR		BIT(14)
+#define MTK_PE_INPUT_DMA_ERR_EN		BIT(16)
+#define MTK_PE_OUTPUT_DMA_ERR_EN	BIT(17)
+#define MTK_PE_PKT_PORC_ERR_EN		BIT(18)
+#define MTK_PE_PKT_TIMEOUT_EN		BIT(19)
+#define MTK_PE_FATAL_ERR_EN		BIT(30)
+#define MTK_PE_INT_OUT_EN		BIT(31)
+
+#define MTK_HIA_SIGNATURE		((u16)0x35ca)
+#define MTK_HIA_DATA_WIDTH(x)		(((x) >> 25) & GENMASK(1, 0))
+#define MTK_HIA_DMA_LENGTH(x)		(((x) >> 20) & GENMASK(4, 0))
+#define MTK_CDR_STAT_CLR		GENMASK(4, 0)
+#define MTK_RDR_STAT_CLR		GENMASK(7, 0)
+
+#define MTK_AIC_INT_MSK			GENMASK(5, 0)
+#define MTK_AIC_VER_MSK			(GENMASK(15, 0) | GENMASK(27, 20))
+#define MTK_AIC_VER11			0x011036c9
+#define MTK_AIC_VER12			0x012036c9
+#define MTK_AIC_G_CLR			GENMASK(30, 20)
+
+/**
+ * EIP97 is an integrated security subsystem to accelerate cryptographic
+ * functions and protocols to offload the host processor.
+ * Some important hardware modules are briefly introduced below:
+ *
+ * Host Interface Adapter(HIA) - the main interface between the host
+ * system and the hardware subsystem. It is responsible for attaching
+ * processing engine to the specific host bus interface and provides a
+ * standardized software view for off loading tasks to the engine.
+ *
+ * Command Descriptor Ring Manager(CDR Manager) - keeps track of how many
+ * CD the host has prepared in the CDR. It monitors the fill level of its
+ * CD-FIFO and if there's sufficient space for the next block of descriptors,
+ * then it fires off a DMA request to fetch a block of CDs.
+ *
+ * Data fetch engine(DFE) - It is responsible for parsing the CD and
+ * setting up the required control and packet data DMA transfers from
+ * system memory to the processing engine.
+ *
+ * Result Descriptor Ring Manager(RDR Manager) - same as CDR Manager,
+ * but target is result descriptors, Moreover, it also handles the RD
+ * updates under control of the DSE. For each packet data segment
+ * processed, the DSE triggers the RDR Manager to write the updated RD.
+ * If triggered to update, the RDR Manager sets up a DMA operation to
+ * copy the RD from the DSE to the correct location in the RDR.
+ *
+ * Data Store Engine(DSE) - It is responsible for parsing the prepared RD
+ * and setting up the required control and packet data DMA transfers from
+ * the processing engine to system memory.
+ *
+ * Advanced Interrupt Controllers(AICs) - receive interrupt request signals
+ * from various sources and combine them into one interrupt output.
+ * The AICs are used by:
+ * - One for the HIA global and processing engine interrupts.
+ * - The others for the descriptor ring interrupts.
+ */
+
+/* Cryptographic engine capabilities */
+struct mtk_sys_cap {
+	/* host interface adapter */
+	u32 hia_ver;
+	u32 hia_opt;
+	/* packet engine */
+	u32 pkt_eng_opt;
+	/* global hardware */
+	u32 hw_opt;
+};
+
+static void mtk_desc_ring_link(struct mtk_cryp *cryp, u32 mask)
+{
+	/* Assign rings to DFE/DSE thread and enable it */
+	writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DFE_THR_CTRL);
+	writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DSE_THR_CTRL);
+}
+
+static void mtk_dfe_dse_buf_setup(struct mtk_cryp *cryp,
+				  struct mtk_sys_cap *cap)
+{
+	u32 width = MTK_HIA_DATA_WIDTH(cap->hia_opt) + 2;
+	u32 len = MTK_HIA_DMA_LENGTH(cap->hia_opt) - 1;
+	u32 ipbuf = min((u32)MTK_IN_DBUF_SIZE(cap->hw_opt) + width, len);
+	u32 opbuf = min((u32)MTK_OUT_DBUF_SIZE(cap->hw_opt) + width, len);
+	u32 itbuf = min((u32)MTK_IN_TBUF_SIZE(cap->hw_opt) + width, len);
+
+	writel(MTK_DFSE_MIN_DATA(ipbuf - 1) |
+	       MTK_DFSE_MAX_DATA(ipbuf) |
+	       MTK_DFE_MIN_CTRL(itbuf - 1) |
+	       MTK_DFE_MAX_CTRL(itbuf),
+	       cryp->base + DFE_CFG);
+
+	writel(MTK_DFSE_MIN_DATA(opbuf - 1) |
+	       MTK_DFSE_MAX_DATA(opbuf),
+	       cryp->base + DSE_CFG);
+
+	writel(MTK_IN_BUF_MIN_THRESH(ipbuf - 1) |
+	       MTK_IN_BUF_MAX_THRESH(ipbuf),
+	       cryp->base + PE_IN_DBUF_THRESH);
+
+	writel(MTK_IN_BUF_MIN_THRESH(itbuf - 1) |
+	       MTK_IN_BUF_MAX_THRESH(itbuf),
+	       cryp->base + PE_IN_TBUF_THRESH);
+
+	writel(MTK_OUT_BUF_MIN_THRESH(opbuf - 1) |
+	       MTK_OUT_BUF_MAX_THRESH(opbuf),
+	       cryp->base + PE_OUT_DBUF_THRESH);
+
+	writel(0, cryp->base + PE_OUT_TBUF_THRESH);
+	writel(0, cryp->base + PE_OUT_BUF_CTRL);
+}
+
+static int mtk_dfe_dse_state_check(struct mtk_cryp *cryp)
+{
+	int ret = -EINVAL;
+	u32 val;
+
+	/* Check for completion of all DMA transfers */
+	val = readl(cryp->base + DFE_THR_STAT);
+	if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) {
+		val = readl(cryp->base + DSE_THR_STAT);
+		if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE)
+			ret = 0;
+	}
+
+	if (!ret) {
+		/* Take DFE/DSE thread out of reset */
+		writel(0, cryp->base + DFE_THR_CTRL);
+		writel(0, cryp->base + DSE_THR_CTRL);
+	} else {
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int mtk_dfe_dse_reset(struct mtk_cryp *cryp)
+{
+	int err;
+
+	/* Reset DSE/DFE and correct system priorities for all rings. */
+	writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DFE_THR_CTRL);
+	writel(0, cryp->base + DFE_PRIO_0);
+	writel(0, cryp->base + DFE_PRIO_1);
+	writel(0, cryp->base + DFE_PRIO_2);
+	writel(0, cryp->base + DFE_PRIO_3);
+
+	writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DSE_THR_CTRL);
+	writel(0, cryp->base + DSE_PRIO_0);
+	writel(0, cryp->base + DSE_PRIO_1);
+	writel(0, cryp->base + DSE_PRIO_2);
+	writel(0, cryp->base + DSE_PRIO_3);
+
+	err = mtk_dfe_dse_state_check(cryp);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void mtk_cmd_desc_ring_setup(struct mtk_cryp *cryp,
+				    int i, struct mtk_sys_cap *cap)
+{
+	/* Full descriptor that fits FIFO minus one */
+	u32 count =
+		((1 << MTK_CMD_FIFO_SIZE(cap->hia_opt)) / MTK_DESC_SZ) - 1;
+
+	/* Temporarily disable external triggering */
+	writel(0, cryp->base + CDR_CFG(i));
+
+	/* Clear CDR count */
+	writel(MTK_CNT_RST, cryp->base + CDR_PREP_COUNT(i));
+	writel(MTK_CNT_RST, cryp->base + CDR_PROC_COUNT(i));
+
+	writel(0, cryp->base + CDR_PREP_PNTR(i));
+	writel(0, cryp->base + CDR_PROC_PNTR(i));
+	writel(0, cryp->base + CDR_DMA_CFG(i));
+
+	/* Configure CDR host address space */
+	writel(0, cryp->base + CDR_BASE_ADDR_HI(i));
+	writel(cryp->ring[i]->cmd_dma, cryp->base + CDR_BASE_ADDR_LO(i));
+
+	writel(MTK_DESC_RING_SZ, cryp->base + CDR_RING_SIZE(i));
+
+	/* Clear and disable all CDR interrupts */
+	writel(MTK_CDR_STAT_CLR, cryp->base + CDR_STAT(i));
+
+	/*
+	 * Set command descriptor offset and enable additional
+	 * token present in descriptor.
+	 */
+	writel(MTK_DESC_SIZE(MTK_DESC_SZ) |
+		   MTK_DESC_OFFSET(MTK_DESC_OFF) |
+	       MTK_DESC_ATP_PRESENT,
+	       cryp->base + CDR_DESC_SIZE(i));
+
+	writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) |
+		   MTK_DESC_FETCH_THRESH(count * MTK_DESC_SZ),
+		   cryp->base + CDR_CFG(i));
+}
+
+static void mtk_res_desc_ring_setup(struct mtk_cryp *cryp,
+				    int i, struct mtk_sys_cap *cap)
+{
+	u32 rndup = 2;
+	u32 count = ((1 << MTK_RES_FIFO_SIZE(cap->hia_opt)) / rndup) - 1;
+
+	/* Temporarily disable external triggering */
+	writel(0, cryp->base + RDR_CFG(i));
+
+	/* Clear RDR count */
+	writel(MTK_CNT_RST, cryp->base + RDR_PREP_COUNT(i));
+	writel(MTK_CNT_RST, cryp->base + RDR_PROC_COUNT(i));
+
+	writel(0, cryp->base + RDR_PREP_PNTR(i));
+	writel(0, cryp->base + RDR_PROC_PNTR(i));
+	writel(0, cryp->base + RDR_DMA_CFG(i));
+
+	/* Configure RDR host address space */
+	writel(0, cryp->base + RDR_BASE_ADDR_HI(i));
+	writel(cryp->ring[i]->res_dma, cryp->base + RDR_BASE_ADDR_LO(i));
+
+	writel(MTK_DESC_RING_SZ, cryp->base + RDR_RING_SIZE(i));
+	writel(MTK_RDR_STAT_CLR, cryp->base + RDR_STAT(i));
+
+	/*
+	 * RDR manager generates update interrupts on a per-completed-packet,
+	 * and the rd_proc_thresh_irq interrupt is fired when proc_pkt_count
+	 * for the RDR exceeds the number of packets.
+	 */
+	writel(MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE,
+	       cryp->base + RDR_THRESH(i));
+
+	/*
+	 * Configure a threshold and time-out value for the processed
+	 * result descriptors (or complete packets) that are written to
+	 * the RDR.
+	 */
+	writel(MTK_DESC_SIZE(MTK_DESC_SZ) | MTK_DESC_OFFSET(MTK_DESC_OFF),
+	       cryp->base + RDR_DESC_SIZE(i));
+
+	/*
+	 * Configure HIA fetch size and fetch threshold that are used to
+	 * fetch blocks of multiple descriptors.
+	 */
+	writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) |
+	       MTK_DESC_FETCH_THRESH(count * rndup) |
+	       MTK_DESC_OVL_IRQ_EN,
+		   cryp->base + RDR_CFG(i));
+}
+
+static int mtk_packet_engine_setup(struct mtk_cryp *cryp)
+{
+	struct mtk_sys_cap cap;
+	int i, err;
+	u32 val;
+
+	cap.hia_ver = readl(cryp->base + HIA_VERSION);
+	cap.hia_opt = readl(cryp->base + HIA_OPTIONS);
+	cap.hw_opt = readl(cryp->base + EIP97_OPTIONS);
+
+	if (!(((u16)cap.hia_ver) == MTK_HIA_SIGNATURE))
+		return -EINVAL;
+
+	/* Configure endianness conversion method for master (DMA) interface */
+	writel(0, cryp->base + EIP97_MST_CTRL);
+
+	/* Set HIA burst size */
+	val = readl(cryp->base + HIA_MST_CTRL);
+	val &= ~MTK_BURST_SIZE_MSK;
+	val |= MTK_BURST_SIZE(5);
+	writel(val, cryp->base + HIA_MST_CTRL);
+
+	err = mtk_dfe_dse_reset(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Failed to reset DFE and DSE.\n");
+		return err;
+	}
+
+	mtk_dfe_dse_buf_setup(cryp, &cap);
+
+	/* Enable the 4 rings for the packet engines. */
+	mtk_desc_ring_link(cryp, 0xf);
+
+	for (i = 0; i < RING_MAX; i++) {
+		mtk_cmd_desc_ring_setup(cryp, i, &cap);
+		mtk_res_desc_ring_setup(cryp, i, &cap);
+	}
+
+	writel(MTK_PE_TK_LOC_AVL | MTK_PE_PROC_HELD | MTK_PE_TK_TIMEOUT_EN,
+	       cryp->base + PE_TOKEN_CTRL_STAT);
+
+	/* Clear all pending interrupts */
+	writel(MTK_AIC_G_CLR, cryp->base + AIC_G_ACK);
+	writel(MTK_PE_INPUT_DMA_ERR | MTK_PE_OUTPUT_DMA_ERR |
+	       MTK_PE_PKT_PORC_ERR | MTK_PE_PKT_TIMEOUT |
+	       MTK_PE_FATAL_ERR | MTK_PE_INPUT_DMA_ERR_EN |
+	       MTK_PE_OUTPUT_DMA_ERR_EN | MTK_PE_PKT_PORC_ERR_EN |
+	       MTK_PE_PKT_TIMEOUT_EN | MTK_PE_FATAL_ERR_EN |
+	       MTK_PE_INT_OUT_EN,
+	       cryp->base + PE_INTERRUPT_CTRL_STAT);
+
+	return 0;
+}
+
+static int mtk_aic_cap_check(struct mtk_cryp *cryp, int hw)
+{
+	u32 val;
+
+	if (hw == RING_MAX)
+		val = readl(cryp->base + AIC_G_VERSION);
+	else
+		val = readl(cryp->base + AIC_VERSION(hw));
+
+	val &= MTK_AIC_VER_MSK;
+	if (val != MTK_AIC_VER11 && val != MTK_AIC_VER12)
+		return -ENXIO;
+
+	if (hw == RING_MAX)
+		val = readl(cryp->base + AIC_G_OPTIONS);
+	else
+		val = readl(cryp->base + AIC_OPTIONS(hw));
+
+	val &= MTK_AIC_INT_MSK;
+	if (!val || val > 32)
+		return -ENXIO;
+
+	return 0;
+}
+
+static int mtk_aic_init(struct mtk_cryp *cryp, int hw)
+{
+	int err;
+
+	err = mtk_aic_cap_check(cryp, hw);
+	if (err)
+		return err;
+
+	/* Disable all interrupts and set initial configuration */
+	if (hw == RING_MAX) {
+		writel(0, cryp->base + AIC_G_ENABLE_CTRL);
+		writel(0, cryp->base + AIC_G_POL_CTRL);
+		writel(0, cryp->base + AIC_G_TYPE_CTRL);
+		writel(0, cryp->base + AIC_G_ENABLE_SET);
+	} else {
+		writel(0, cryp->base + AIC_ENABLE_CTRL(hw));
+		writel(0, cryp->base + AIC_POL_CTRL(hw));
+		writel(0, cryp->base + AIC_TYPE_CTRL(hw));
+		writel(0, cryp->base + AIC_ENABLE_SET(hw));
+	}
+
+	return 0;
+}
+
+static int mtk_accelerator_init(struct mtk_cryp *cryp)
+{
+	int i, err;
+
+	/* Initialize advanced interrupt controller(AIC) */
+	for (i = 0; i < MTK_IRQ_NUM; i++) {
+		err = mtk_aic_init(cryp, i);
+		if (err) {
+			dev_err(cryp->dev, "Failed to initialize AIC.\n");
+			return err;
+		}
+	}
+
+	/* Initialize packet engine */
+	err = mtk_packet_engine_setup(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Failed to configure packet engine.\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void mtk_desc_dma_free(struct mtk_cryp *cryp)
+{
+	int i;
+
+	for (i = 0; i < RING_MAX; i++) {
+		dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ,
+				  cryp->ring[i]->res_base,
+				  cryp->ring[i]->res_dma);
+		dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ,
+				  cryp->ring[i]->cmd_base,
+				  cryp->ring[i]->cmd_dma);
+		kfree(cryp->ring[i]);
+	}
+}
+
+static int mtk_desc_ring_alloc(struct mtk_cryp *cryp)
+{
+	struct mtk_ring **ring = cryp->ring;
+	int i, err = ENOMEM;
+
+	for (i = 0; i < RING_MAX; i++) {
+		ring[i] = kzalloc(sizeof(**ring), GFP_KERNEL);
+		if (!ring[i])
+			goto err_cleanup;
+
+		ring[i]->cmd_base = dma_zalloc_coherent(cryp->dev,
+					   MTK_DESC_RING_SZ,
+					   &ring[i]->cmd_dma,
+					   GFP_KERNEL);
+		if (!ring[i]->cmd_base)
+			goto err_cleanup;
+
+		ring[i]->res_base = dma_zalloc_coherent(cryp->dev,
+					   MTK_DESC_RING_SZ,
+					   &ring[i]->res_dma,
+					   GFP_KERNEL);
+		if (!ring[i]->res_base)
+			goto err_cleanup;
+	}
+	return 0;
+
+err_cleanup:
+	for (; i--; ) {
+		dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ,
+				  ring[i]->res_base, ring[i]->res_dma);
+		dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ,
+				  ring[i]->cmd_base, ring[i]->cmd_dma);
+		kfree(ring[i]);
+	}
+	return err;
+}
+
+static int mtk_crypto_probe(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct mtk_cryp *cryp;
+	int i, err;
+
+	cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL);
+	if (!cryp)
+		return -ENOMEM;
+
+	cryp->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cryp->base))
+		return PTR_ERR(cryp->base);
+
+	for (i = 0; i < MTK_IRQ_NUM; i++) {
+		cryp->irq[i] = platform_get_irq(pdev, i);
+		if (cryp->irq[i] < 0) {
+			dev_err(cryp->dev, "no IRQ:%d resource info\n", i);
+			return -ENXIO;
+		}
+	}
+
+	cryp->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
+	cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp");
+	if (IS_ERR(cryp->clk_ethif) || IS_ERR(cryp->clk_cryp))
+		return -EPROBE_DEFER;
+
+	cryp->dev = &pdev->dev;
+	pm_runtime_enable(cryp->dev);
+	pm_runtime_get_sync(cryp->dev);
+
+	err = clk_prepare_enable(cryp->clk_ethif);
+	if (err)
+		goto err_clk_ethif;
+
+	err = clk_prepare_enable(cryp->clk_cryp);
+	if (err)
+		goto err_clk_cryp;
+
+	/* Allocate four command/result descriptor rings */
+	err = mtk_desc_ring_alloc(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Unable to allocate descriptor rings.\n");
+		goto err_resource;
+	}
+
+	/* Initialize hardware modules */
+	err = mtk_accelerator_init(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Failed to initialize cryptographic engine.\n");
+		goto err_engine;
+	}
+
+	err = mtk_cipher_alg_register(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Unable to register cipher algorithm.\n");
+		goto err_cipher;
+	}
+
+	err = mtk_hash_alg_register(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Unable to register hash algorithm.\n");
+		goto err_hash;
+	}
+
+	platform_set_drvdata(pdev, cryp);
+	return 0;
+
+err_hash:
+	mtk_cipher_alg_release(cryp);
+err_cipher:
+	mtk_dfe_dse_reset(cryp);
+err_engine:
+	mtk_desc_dma_free(cryp);
+err_resource:
+	clk_disable_unprepare(cryp->clk_cryp);
+err_clk_cryp:
+	clk_disable_unprepare(cryp->clk_ethif);
+err_clk_ethif:
+	pm_runtime_put_sync(cryp->dev);
+	pm_runtime_disable(cryp->dev);
+
+	return err;
+}
+
+static int mtk_crypto_remove(struct platform_device *pdev)
+{
+	struct mtk_cryp *cryp = platform_get_drvdata(pdev);
+
+	mtk_hash_alg_release(cryp);
+	mtk_cipher_alg_release(cryp);
+	mtk_desc_dma_free(cryp);
+
+	clk_disable_unprepare(cryp->clk_cryp);
+	clk_disable_unprepare(cryp->clk_ethif);
+
+	pm_runtime_put_sync(cryp->dev);
+	pm_runtime_disable(cryp->dev);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id of_crypto_id[] = {
+	{ .compatible = "mediatek,eip97-crypto" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_crypto_id);
+
+static struct platform_driver mtk_crypto_driver = {
+	.probe = mtk_crypto_probe,
+	.remove = mtk_crypto_remove,
+	.driver = {
+		   .name = "mtk-crypto",
+		   .owner = THIS_MODULE,
+		   .of_match_table = of_crypto_id,
+	},
+};
+module_platform_driver(mtk_crypto_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ryder Lee <ryder.lee@mediatek.com>");
+MODULE_DESCRIPTION("Cryptographic accelerator driver for EIP97");
diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h
new file mode 100644
index 000000000000..ed6d8717f7f4
--- /dev/null
+++ b/drivers/crypto/mediatek/mtk-platform.h
@@ -0,0 +1,231 @@
+/*
+ * Driver for EIP97 cryptographic accelerator.
+ *
+ * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __MTK_PLATFORM_H_
+#define __MTK_PLATFORM_H_
+
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
+#include <linux/crypto.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include "mtk-regs.h"
+
+#define MTK_RDR_PROC_THRESH	BIT(0)
+#define MTK_RDR_PROC_MODE	BIT(23)
+#define MTK_CNT_RST		BIT(31)
+#define MTK_IRQ_RDR0		BIT(1)
+#define MTK_IRQ_RDR1		BIT(3)
+#define MTK_IRQ_RDR2		BIT(5)
+#define MTK_IRQ_RDR3		BIT(7)
+
+#define SIZE_IN_WORDS(x)	((x) >> 2)
+
+/**
+ * Ring 0/1 are used by AES encrypt and decrypt.
+ * Ring 2/3 are used by SHA.
+ */
+enum {
+	RING0 = 0,
+	RING1,
+	RING2,
+	RING3,
+	RING_MAX,
+};
+
+#define MTK_REC_NUM		(RING_MAX / 2)
+#define MTK_IRQ_NUM		5
+
+/**
+ * struct mtk_desc - DMA descriptor
+ * @hdr:	the descriptor control header
+ * @buf:	DMA address of input buffer segment
+ * @ct:		DMA address of command token that control operation flow
+ * @ct_hdr:	the command token control header
+ * @tag:	the user-defined field
+ * @tfm:	DMA address of transform state
+ * @bound:	align descriptors offset boundary
+ *
+ * Structure passed to the crypto engine to describe where source
+ * data needs to be fetched and how it needs to be processed.
+ */
+struct mtk_desc {
+	__le32 hdr;
+	__le32 buf;
+	__le32 ct;
+	__le32 ct_hdr;
+	__le32 tag;
+	__le32 tfm;
+	__le32 bound[2];
+};
+
+#define MTK_DESC_NUM		512
+#define MTK_DESC_OFF		SIZE_IN_WORDS(sizeof(struct mtk_desc))
+#define MTK_DESC_SZ		(MTK_DESC_OFF - 2)
+#define MTK_DESC_RING_SZ	((sizeof(struct mtk_desc) * MTK_DESC_NUM))
+#define MTK_DESC_CNT(x)		((MTK_DESC_OFF * (x)) << 2)
+#define MTK_DESC_LAST		cpu_to_le32(BIT(22))
+#define MTK_DESC_FIRST		cpu_to_le32(BIT(23))
+#define MTK_DESC_BUF_LEN(x)	cpu_to_le32(x)
+#define MTK_DESC_CT_LEN(x)	cpu_to_le32((x) << 24)
+
+/**
+ * struct mtk_ring - Descriptor ring
+ * @cmd_base:	pointer to command descriptor ring base
+ * @cmd_dma:	DMA address of command descriptor ring
+ * @cmd_pos:	current position in the command descriptor ring
+ * @res_base:	pointer to result descriptor ring base
+ * @res_dma:	DMA address of result descriptor ring
+ * @res_pos:	current position in the result descriptor ring
+ *
+ * A descriptor ring is a circular buffer that is used to manage
+ * one or more descriptors. There are two type of descriptor rings;
+ * the command descriptor ring and result descriptor ring.
+ */
+struct mtk_ring {
+	struct mtk_desc *cmd_base;
+	dma_addr_t cmd_dma;
+	u32 cmd_pos;
+	struct mtk_desc *res_base;
+	dma_addr_t res_dma;
+	u32 res_pos;
+};
+
+/**
+ * struct mtk_aes_dma - Structure that holds sg list info
+ * @sg:		pointer to scatter-gather list
+ * @nents:	number of entries in the sg list
+ * @remainder:	remainder of sg list
+ * @sg_len:	number of entries in the sg mapped list
+ */
+struct mtk_aes_dma {
+	struct scatterlist *sg;
+	int nents;
+	u32 remainder;
+	u32 sg_len;
+};
+
+struct mtk_aes_base_ctx;
+struct mtk_aes_rec;
+struct mtk_cryp;
+
+typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes);
+
+/**
+ * struct mtk_aes_rec - AES operation record
+ * @queue:	crypto request queue
+ * @areq:	pointer to async request
+ * @task:	the tasklet is use in AES interrupt
+ * @ctx:	pointer to current context
+ * @src:	the structure that holds source sg list info
+ * @dst:	the structure that holds destination sg list info
+ * @aligned_sg:	the scatter list is use to alignment
+ * @real_dst:	pointer to the destination sg list
+ * @resume:	pointer to resume function
+ * @total:	request buffer length
+ * @buf:	pointer to page buffer
+ * @id:		record identification
+ * @flags:	it's describing AES operation state
+ * @lock:	the async queue lock
+ *
+ * Structure used to record AES execution state.
+ */
+struct mtk_aes_rec {
+	struct crypto_queue queue;
+	struct crypto_async_request *areq;
+	struct tasklet_struct task;
+	struct mtk_aes_base_ctx *ctx;
+	struct mtk_aes_dma src;
+	struct mtk_aes_dma dst;
+
+	struct scatterlist aligned_sg;
+	struct scatterlist *real_dst;
+
+	mtk_aes_fn resume;
+
+	size_t total;
+	void *buf;
+
+	u8 id;
+	unsigned long flags;
+	/* queue lock */
+	spinlock_t lock;
+};
+
+/**
+ * struct mtk_sha_rec - SHA operation record
+ * @queue:	crypto request queue
+ * @req:	pointer to ahash request
+ * @task:	the tasklet is use in SHA interrupt
+ * @id:		record identification
+ * @flags:	it's describing SHA operation state
+ * @lock:	the ablkcipher queue lock
+ *
+ * Structure used to record SHA execution state.
+ */
+struct mtk_sha_rec {
+	struct crypto_queue queue;
+	struct ahash_request *req;
+	struct tasklet_struct task;
+
+	u8 id;
+	unsigned long flags;
+	/* queue lock */
+	spinlock_t lock;
+};
+
+/**
+ * struct mtk_cryp - Cryptographic device
+ * @base:	pointer to mapped register I/O base
+ * @dev:	pointer to device
+ * @clk_ethif:	pointer to ethif clock
+ * @clk_cryp:	pointer to crypto clock
+ * @irq:	global system and rings IRQ
+ * @ring:	pointer to execution state of AES
+ * @aes:	pointer to execution state of SHA
+ * @sha:	each execution record map to a ring
+ * @aes_list:	device list of AES
+ * @sha_list:	device list of SHA
+ * @tmp:	pointer to temporary buffer for internal use
+ * @tmp_dma:	DMA address of temporary buffer
+ * @rec:	it's used to select SHA record for tfm
+ *
+ * Structure storing cryptographic device information.
+ */
+struct mtk_cryp {
+	void __iomem *base;
+	struct device *dev;
+	struct clk *clk_ethif;
+	struct clk *clk_cryp;
+	int irq[MTK_IRQ_NUM];
+
+	struct mtk_ring *ring[RING_MAX];
+	struct mtk_aes_rec *aes[MTK_REC_NUM];
+	struct mtk_sha_rec *sha[MTK_REC_NUM];
+
+	struct list_head aes_list;
+	struct list_head sha_list;
+
+	void *tmp;
+	dma_addr_t tmp_dma;
+	bool rec;
+};
+
+int mtk_cipher_alg_register(struct mtk_cryp *cryp);
+void mtk_cipher_alg_release(struct mtk_cryp *cryp);
+int mtk_hash_alg_register(struct mtk_cryp *cryp);
+void mtk_hash_alg_release(struct mtk_cryp *cryp);
+
+#endif /* __MTK_PLATFORM_H_ */
diff --git a/drivers/crypto/mediatek/mtk-regs.h b/drivers/crypto/mediatek/mtk-regs.h
new file mode 100644
index 000000000000..94f4eb85be3f
--- /dev/null
+++ b/drivers/crypto/mediatek/mtk-regs.h
@@ -0,0 +1,194 @@
+/*
+ * Support for MediaTek cryptographic accelerator.
+ *
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ */
+
+#ifndef __MTK_REGS_H__
+#define __MTK_REGS_H__
+
+/* HIA, Command Descriptor Ring Manager */
+#define CDR_BASE_ADDR_LO(x)		(0x0 + ((x) << 12))
+#define CDR_BASE_ADDR_HI(x)		(0x4 + ((x) << 12))
+#define CDR_DATA_BASE_ADDR_LO(x)	(0x8 + ((x) << 12))
+#define CDR_DATA_BASE_ADDR_HI(x)	(0xC + ((x) << 12))
+#define CDR_ACD_BASE_ADDR_LO(x)		(0x10 + ((x) << 12))
+#define CDR_ACD_BASE_ADDR_HI(x)		(0x14 + ((x) << 12))
+#define CDR_RING_SIZE(x)		(0x18 + ((x) << 12))
+#define CDR_DESC_SIZE(x)		(0x1C + ((x) << 12))
+#define CDR_CFG(x)			(0x20 + ((x) << 12))
+#define CDR_DMA_CFG(x)			(0x24 + ((x) << 12))
+#define CDR_THRESH(x)			(0x28 + ((x) << 12))
+#define CDR_PREP_COUNT(x)		(0x2C + ((x) << 12))
+#define CDR_PROC_COUNT(x)		(0x30 + ((x) << 12))
+#define CDR_PREP_PNTR(x)		(0x34 + ((x) << 12))
+#define CDR_PROC_PNTR(x)		(0x38 + ((x) << 12))
+#define CDR_STAT(x)			(0x3C + ((x) << 12))
+
+/* HIA, Result Descriptor Ring Manager */
+#define RDR_BASE_ADDR_LO(x)		(0x800 + ((x) << 12))
+#define RDR_BASE_ADDR_HI(x)		(0x804 + ((x) << 12))
+#define RDR_DATA_BASE_ADDR_LO(x)	(0x808 + ((x) << 12))
+#define RDR_DATA_BASE_ADDR_HI(x)	(0x80C + ((x) << 12))
+#define RDR_ACD_BASE_ADDR_LO(x)		(0x810 + ((x) << 12))
+#define RDR_ACD_BASE_ADDR_HI(x)		(0x814 + ((x) << 12))
+#define RDR_RING_SIZE(x)		(0x818 + ((x) << 12))
+#define RDR_DESC_SIZE(x)		(0x81C + ((x) << 12))
+#define RDR_CFG(x)			(0x820 + ((x) << 12))
+#define RDR_DMA_CFG(x)			(0x824 + ((x) << 12))
+#define RDR_THRESH(x)			(0x828 + ((x) << 12))
+#define RDR_PREP_COUNT(x)		(0x82C + ((x) << 12))
+#define RDR_PROC_COUNT(x)		(0x830 + ((x) << 12))
+#define RDR_PREP_PNTR(x)		(0x834 + ((x) << 12))
+#define RDR_PROC_PNTR(x)		(0x838 + ((x) << 12))
+#define RDR_STAT(x)			(0x83C + ((x) << 12))
+
+/* HIA, Ring AIC */
+#define AIC_POL_CTRL(x)			(0xE000 - ((x) << 12))
+#define	AIC_TYPE_CTRL(x)		(0xE004 - ((x) << 12))
+#define	AIC_ENABLE_CTRL(x)		(0xE008 - ((x) << 12))
+#define	AIC_RAW_STAL(x)			(0xE00C - ((x) << 12))
+#define	AIC_ENABLE_SET(x)		(0xE00C - ((x) << 12))
+#define	AIC_ENABLED_STAT(x)		(0xE010 - ((x) << 12))
+#define	AIC_ACK(x)			(0xE010 - ((x) << 12))
+#define	AIC_ENABLE_CLR(x)		(0xE014 - ((x) << 12))
+#define	AIC_OPTIONS(x)			(0xE018 - ((x) << 12))
+#define	AIC_VERSION(x)			(0xE01C - ((x) << 12))
+
+/* HIA, Global AIC */
+#define AIC_G_POL_CTRL			0xF800
+#define AIC_G_TYPE_CTRL			0xF804
+#define AIC_G_ENABLE_CTRL		0xF808
+#define AIC_G_RAW_STAT			0xF80C
+#define AIC_G_ENABLE_SET		0xF80C
+#define AIC_G_ENABLED_STAT		0xF810
+#define AIC_G_ACK			0xF810
+#define AIC_G_ENABLE_CLR		0xF814
+#define AIC_G_OPTIONS			0xF818
+#define AIC_G_VERSION			0xF81C
+
+/* HIA, Data Fetch Engine */
+#define DFE_CFG				0xF000
+#define DFE_PRIO_0			0xF010
+#define DFE_PRIO_1			0xF014
+#define DFE_PRIO_2			0xF018
+#define DFE_PRIO_3			0xF01C
+
+/* HIA, Data Fetch Engine access monitoring for CDR */
+#define DFE_RING_REGION_LO(x)		(0xF080 + ((x) << 3))
+#define DFE_RING_REGION_HI(x)		(0xF084 + ((x) << 3))
+
+/* HIA, Data Fetch Engine thread control and status for thread */
+#define DFE_THR_CTRL			0xF200
+#define DFE_THR_STAT			0xF204
+#define DFE_THR_DESC_CTRL		0xF208
+#define DFE_THR_DESC_DPTR_LO		0xF210
+#define DFE_THR_DESC_DPTR_HI		0xF214
+#define DFE_THR_DESC_ACDPTR_LO		0xF218
+#define DFE_THR_DESC_ACDPTR_HI		0xF21C
+
+/* HIA, Data Store Engine */
+#define DSE_CFG				0xF400
+#define DSE_PRIO_0			0xF410
+#define DSE_PRIO_1			0xF414
+#define DSE_PRIO_2			0xF418
+#define DSE_PRIO_3			0xF41C
+
+/* HIA, Data Store Engine access monitoring for RDR */
+#define DSE_RING_REGION_LO(x)		(0xF480 + ((x) << 3))
+#define DSE_RING_REGION_HI(x)		(0xF484 + ((x) << 3))
+
+/* HIA, Data Store Engine thread control and status for thread */
+#define DSE_THR_CTRL			0xF600
+#define DSE_THR_STAT			0xF604
+#define DSE_THR_DESC_CTRL		0xF608
+#define DSE_THR_DESC_DPTR_LO		0xF610
+#define DSE_THR_DESC_DPTR_HI		0xF614
+#define DSE_THR_DESC_S_DPTR_LO		0xF618
+#define DSE_THR_DESC_S_DPTR_HI		0xF61C
+#define DSE_THR_ERROR_STAT		0xF620
+
+/* HIA Global */
+#define HIA_MST_CTRL			0xFFF4
+#define HIA_OPTIONS			0xFFF8
+#define HIA_VERSION			0xFFFC
+
+/* Processing Engine Input Side, Processing Engine */
+#define PE_IN_DBUF_THRESH		0x10000
+#define PE_IN_TBUF_THRESH		0x10100
+
+/* Packet Engine Configuration / Status Registers */
+#define PE_TOKEN_CTRL_STAT		0x11000
+#define PE_FUNCTION_EN			0x11004
+#define PE_CONTEXT_CTRL			0x11008
+#define PE_INTERRUPT_CTRL_STAT		0x11010
+#define PE_CONTEXT_STAT			0x1100C
+#define PE_OUT_TRANS_CTRL_STAT		0x11018
+#define PE_OUT_BUF_CTRL			0x1101C
+
+/* Packet Engine PRNG Registers */
+#define PE_PRNG_STAT			0x11040
+#define PE_PRNG_CTRL			0x11044
+#define PE_PRNG_SEED_L			0x11048
+#define PE_PRNG_SEED_H			0x1104C
+#define PE_PRNG_KEY_0_L			0x11050
+#define PE_PRNG_KEY_0_H			0x11054
+#define PE_PRNG_KEY_1_L			0x11058
+#define PE_PRNG_KEY_1_H			0x1105C
+#define PE_PRNG_RES_0			0x11060
+#define PE_PRNG_RES_1			0x11064
+#define PE_PRNG_RES_2			0x11068
+#define PE_PRNG_RES_3			0x1106C
+#define PE_PRNG_LFSR_L			0x11070
+#define PE_PRNG_LFSR_H			0x11074
+
+/* Packet Engine AIC */
+#define PE_EIP96_AIC_POL_CTRL		0x113C0
+#define PE_EIP96_AIC_TYPE_CTRL		0x113C4
+#define PE_EIP96_AIC_ENABLE_CTRL	0x113C8
+#define PE_EIP96_AIC_RAW_STAT		0x113CC
+#define PE_EIP96_AIC_ENABLE_SET		0x113CC
+#define PE_EIP96_AIC_ENABLED_STAT	0x113D0
+#define PE_EIP96_AIC_ACK		0x113D0
+#define PE_EIP96_AIC_ENABLE_CLR		0x113D4
+#define PE_EIP96_AIC_OPTIONS		0x113D8
+#define PE_EIP96_AIC_VERSION		0x113DC
+
+/* Packet Engine Options & Version Registers */
+#define PE_EIP96_OPTIONS		0x113F8
+#define PE_EIP96_VERSION		0x113FC
+
+/* Processing Engine Output Side */
+#define PE_OUT_DBUF_THRESH		0x11C00
+#define PE_OUT_TBUF_THRESH		0x11D00
+
+/* Processing Engine Local AIC */
+#define PE_AIC_POL_CTRL			0x11F00
+#define PE_AIC_TYPE_CTRL		0x11F04
+#define PE_AIC_ENABLE_CTRL		0x11F08
+#define PE_AIC_RAW_STAT			0x11F0C
+#define PE_AIC_ENABLE_SET		0x11F0C
+#define PE_AIC_ENABLED_STAT		0x11F10
+#define PE_AIC_ENABLE_CLR		0x11F14
+#define PE_AIC_OPTIONS			0x11F18
+#define PE_AIC_VERSION			0x11F1C
+
+/* Processing Engine General Configuration and Version */
+#define PE_IN_FLIGHT			0x11FF0
+#define PE_OPTIONS			0x11FF8
+#define PE_VERSION			0x11FFC
+
+/* EIP-97 - Global */
+#define EIP97_CLOCK_STATE		0x1FFE4
+#define EIP97_FORCE_CLOCK_ON		0x1FFE8
+#define EIP97_FORCE_CLOCK_OFF		0x1FFEC
+#define EIP97_MST_CTRL			0x1FFF4
+#define EIP97_OPTIONS			0x1FFF8
+#define EIP97_VERSION			0x1FFFC
+#endif /* __MTK_REGS_H__ */
diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c
new file mode 100644
index 000000000000..55e3805fba07
--- /dev/null
+++ b/drivers/crypto/mediatek/mtk-sha.c
@@ -0,0 +1,1435 @@
+/*
+ * Cryptographic API.
+ *
+ * Driver for EIP97 SHA1/SHA2(HMAC) acceleration.
+ *
+ * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Some ideas are from atmel-sha.c and omap-sham.c drivers.
+ */
+
+#include <crypto/sha.h>
+#include "mtk-platform.h"
+
+#define SHA_ALIGN_MSK		(sizeof(u32) - 1)
+#define SHA_QUEUE_SIZE		512
+#define SHA_TMP_BUF_SIZE	512
+#define SHA_BUF_SIZE		((u32)PAGE_SIZE)
+
+#define SHA_OP_UPDATE		1
+#define SHA_OP_FINAL		2
+
+#define SHA_DATA_LEN_MSK	cpu_to_le32(GENMASK(16, 0))
+
+/* SHA command token */
+#define SHA_CT_SIZE		5
+#define SHA_CT_CTRL_HDR		cpu_to_le32(0x02220000)
+#define SHA_CMD0		cpu_to_le32(0x03020000)
+#define SHA_CMD1		cpu_to_le32(0x21060000)
+#define SHA_CMD2		cpu_to_le32(0xe0e63802)
+
+/* SHA transform information */
+#define SHA_TFM_HASH		cpu_to_le32(0x2 << 0)
+#define SHA_TFM_INNER_DIG	cpu_to_le32(0x1 << 21)
+#define SHA_TFM_SIZE(x)		cpu_to_le32((x) << 8)
+#define SHA_TFM_START		cpu_to_le32(0x1 << 4)
+#define SHA_TFM_CONTINUE	cpu_to_le32(0x1 << 5)
+#define SHA_TFM_HASH_STORE	cpu_to_le32(0x1 << 19)
+#define SHA_TFM_SHA1		cpu_to_le32(0x2 << 23)
+#define SHA_TFM_SHA256		cpu_to_le32(0x3 << 23)
+#define SHA_TFM_SHA224		cpu_to_le32(0x4 << 23)
+#define SHA_TFM_SHA512		cpu_to_le32(0x5 << 23)
+#define SHA_TFM_SHA384		cpu_to_le32(0x6 << 23)
+#define SHA_TFM_DIGEST(x)	cpu_to_le32(((x) & GENMASK(3, 0)) << 24)
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY		BIT(0)
+#define	SHA_FLAGS_FINAL		BIT(1)
+#define SHA_FLAGS_FINUP		BIT(2)
+#define SHA_FLAGS_SG		BIT(3)
+#define SHA_FLAGS_ALGO_MSK	GENMASK(8, 4)
+#define SHA_FLAGS_SHA1		BIT(4)
+#define SHA_FLAGS_SHA224	BIT(5)
+#define SHA_FLAGS_SHA256	BIT(6)
+#define SHA_FLAGS_SHA384	BIT(7)
+#define SHA_FLAGS_SHA512	BIT(8)
+#define SHA_FLAGS_HMAC		BIT(9)
+#define SHA_FLAGS_PAD		BIT(10)
+
+/**
+ * mtk_sha_ct is a set of hardware instructions(command token)
+ * that are used to control engine's processing flow of SHA,
+ * and it contains the first two words of transform state.
+ */
+struct mtk_sha_ct {
+	__le32 ctrl[2];
+	__le32 cmd[3];
+};
+
+/**
+ * mtk_sha_tfm is used to define SHA transform state
+ * and store result digest that produced by engine.
+ */
+struct mtk_sha_tfm {
+	__le32 ctrl[2];
+	__le32 digest[SIZE_IN_WORDS(SHA512_DIGEST_SIZE)];
+};
+
+/**
+ * mtk_sha_info consists of command token and transform state
+ * of SHA, its role is similar to mtk_aes_info.
+ */
+struct mtk_sha_info {
+	struct mtk_sha_ct ct;
+	struct mtk_sha_tfm tfm;
+};
+
+struct mtk_sha_reqctx {
+	struct mtk_sha_info info;
+	unsigned long flags;
+	unsigned long op;
+
+	u64 digcnt;
+	bool start;
+	size_t bufcnt;
+	dma_addr_t dma_addr;
+
+	__le32 ct_hdr;
+	u32 ct_size;
+	dma_addr_t ct_dma;
+	dma_addr_t tfm_dma;
+
+	/* Walk state */
+	struct scatterlist *sg;
+	u32 offset;	/* Offset in current sg */
+	u32 total;	/* Total request */
+	size_t ds;
+	size_t bs;
+
+	u8 *buffer;
+};
+
+struct mtk_sha_hmac_ctx {
+	struct crypto_shash	*shash;
+	u8 ipad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32));
+	u8 opad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32));
+};
+
+struct mtk_sha_ctx {
+	struct mtk_cryp *cryp;
+	unsigned long flags;
+	u8 id;
+	u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32));
+
+	struct mtk_sha_hmac_ctx	base[0];
+};
+
+struct mtk_sha_drv {
+	struct list_head dev_list;
+	/* Device list lock */
+	spinlock_t lock;
+};
+
+static struct mtk_sha_drv mtk_sha = {
+	.dev_list = LIST_HEAD_INIT(mtk_sha.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(mtk_sha.lock),
+};
+
+static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id,
+				struct ahash_request *req);
+
+static inline u32 mtk_sha_read(struct mtk_cryp *cryp, u32 offset)
+{
+	return readl_relaxed(cryp->base + offset);
+}
+
+static inline void mtk_sha_write(struct mtk_cryp *cryp,
+				 u32 offset, u32 value)
+{
+	writel_relaxed(value, cryp->base + offset);
+}
+
+static struct mtk_cryp *mtk_sha_find_dev(struct mtk_sha_ctx *tctx)
+{
+	struct mtk_cryp *cryp = NULL;
+	struct mtk_cryp *tmp;
+
+	spin_lock_bh(&mtk_sha.lock);
+	if (!tctx->cryp) {
+		list_for_each_entry(tmp, &mtk_sha.dev_list, sha_list) {
+			cryp = tmp;
+			break;
+		}
+		tctx->cryp = cryp;
+	} else {
+		cryp = tctx->cryp;
+	}
+
+	/*
+	 * Assign record id to tfm in round-robin fashion, and this
+	 * will help tfm to bind  to corresponding descriptor rings.
+	 */
+	tctx->id = cryp->rec;
+	cryp->rec = !cryp->rec;
+
+	spin_unlock_bh(&mtk_sha.lock);
+
+	return cryp;
+}
+
+static int mtk_sha_append_sg(struct mtk_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < SHA_BUF_SIZE) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, SHA_BUF_SIZE - ctx->bufcnt);
+
+		if (count <= 0) {
+			/*
+			 * Check if count <= 0 because the buffer is full or
+			 * because the sg length is 0. In the latest case,
+			 * check if there is another sg in the list, a 0 length
+			 * sg doesn't necessarily mean the end of the sg list.
+			 */
+			if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+				ctx->sg = sg_next(ctx->sg);
+				continue;
+			} else {
+				break;
+			}
+		}
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+					 ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The purpose of this padding is to ensure that the padded message is a
+ * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512).
+ * The bit "1" is appended at the end of the message followed by
+ * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or
+ * 128 bits block (SHA384/SHA512) equals to the message length in bits
+ * is appended.
+ *
+ * For SHA1/SHA224/SHA256, padlen is calculated as followed:
+ *  - if message length < 56 bytes then padlen = 56 - message length
+ *  - else padlen = 64 + 56 - message length
+ *
+ * For SHA384/SHA512, padlen is calculated as followed:
+ *  - if message length < 112 bytes then padlen = 112 - message length
+ *  - else padlen = 128 + 112 - message length
+ */
+static void mtk_sha_fill_padding(struct mtk_sha_reqctx *ctx, u32 len)
+{
+	u32 index, padlen;
+	u64 bits[2];
+	u64 size = ctx->digcnt;
+
+	size += ctx->bufcnt;
+	size += len;
+
+	bits[1] = cpu_to_be64(size << 3);
+	bits[0] = cpu_to_be64(size >> 61);
+
+	if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) {
+		index = ctx->bufcnt & 0x7f;
+		padlen = (index < 112) ? (112 - index) : ((128 + 112) - index);
+		*(ctx->buffer + ctx->bufcnt) = 0x80;
+		memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1);
+		memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16);
+		ctx->bufcnt += padlen + 16;
+		ctx->flags |= SHA_FLAGS_PAD;
+	} else {
+		index = ctx->bufcnt & 0x3f;
+		padlen = (index < 56) ? (56 - index) : ((64 + 56) - index);
+		*(ctx->buffer + ctx->bufcnt) = 0x80;
+		memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1);
+		memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8);
+		ctx->bufcnt += padlen + 8;
+		ctx->flags |= SHA_FLAGS_PAD;
+	}
+}
+
+/* Initialize basic transform information of SHA */
+static void mtk_sha_info_init(struct mtk_sha_reqctx *ctx)
+{
+	struct mtk_sha_ct *ct = &ctx->info.ct;
+	struct mtk_sha_tfm *tfm = &ctx->info.tfm;
+
+	ctx->ct_hdr = SHA_CT_CTRL_HDR;
+	ctx->ct_size = SHA_CT_SIZE;
+
+	tfm->ctrl[0] = SHA_TFM_HASH | SHA_TFM_INNER_DIG |
+		       SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds));
+
+	switch (ctx->flags & SHA_FLAGS_ALGO_MSK) {
+	case SHA_FLAGS_SHA1:
+		tfm->ctrl[0] |= SHA_TFM_SHA1;
+		break;
+	case SHA_FLAGS_SHA224:
+		tfm->ctrl[0] |= SHA_TFM_SHA224;
+		break;
+	case SHA_FLAGS_SHA256:
+		tfm->ctrl[0] |= SHA_TFM_SHA256;
+		break;
+	case SHA_FLAGS_SHA384:
+		tfm->ctrl[0] |= SHA_TFM_SHA384;
+		break;
+	case SHA_FLAGS_SHA512:
+		tfm->ctrl[0] |= SHA_TFM_SHA512;
+		break;
+
+	default:
+		/* Should not happen... */
+		return;
+	}
+
+	tfm->ctrl[1] = SHA_TFM_HASH_STORE;
+	ct->ctrl[0] = tfm->ctrl[0] | SHA_TFM_CONTINUE | SHA_TFM_START;
+	ct->ctrl[1] = tfm->ctrl[1];
+
+	ct->cmd[0] = SHA_CMD0;
+	ct->cmd[1] = SHA_CMD1;
+	ct->cmd[2] = SHA_CMD2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds));
+}
+
+/*
+ * Update input data length field of transform information and
+ * map it to DMA region.
+ */
+static int mtk_sha_info_update(struct mtk_cryp *cryp,
+			       struct mtk_sha_rec *sha,
+			       size_t len)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req);
+	struct mtk_sha_info *info = &ctx->info;
+	struct mtk_sha_ct *ct = &info->ct;
+
+	if (ctx->start)
+		ctx->start = false;
+	else
+		ct->ctrl[0] &= ~SHA_TFM_START;
+
+	ctx->ct_hdr &= ~SHA_DATA_LEN_MSK;
+	ctx->ct_hdr |= cpu_to_le32(len);
+	ct->cmd[0] &= ~SHA_DATA_LEN_MSK;
+	ct->cmd[0] |= cpu_to_le32(len);
+
+	ctx->digcnt += len;
+
+	ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info),
+				     DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) {
+		dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info));
+		return -EINVAL;
+	}
+	ctx->tfm_dma = ctx->ct_dma + sizeof(*ct);
+
+	return 0;
+}
+
+/*
+ * Because of hardware limitation, we must pre-calculate the inner
+ * and outer digest that need to be processed firstly by engine, then
+ * apply the result digest to the input message. These complex hashing
+ * procedures limits HMAC performance, so we use fallback SW encoding.
+ */
+static int mtk_sha_finish_hmac(struct ahash_request *req)
+{
+	struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct mtk_sha_hmac_ctx *bctx = tctx->base;
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	SHASH_DESC_ON_STACK(shash, bctx->shash);
+
+	shash->tfm = bctx->shash;
+	shash->flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */
+
+	return crypto_shash_init(shash) ?:
+	       crypto_shash_update(shash, bctx->opad, ctx->bs) ?:
+	       crypto_shash_finup(shash, req->result, ctx->ds, req->result);
+}
+
+/* Initialize request context */
+static int mtk_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	ctx->flags = 0;
+	ctx->ds = crypto_ahash_digestsize(tfm);
+
+	switch (ctx->ds) {
+	case SHA1_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA1;
+		ctx->bs = SHA1_BLOCK_SIZE;
+		break;
+	case SHA224_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA224;
+		ctx->bs = SHA224_BLOCK_SIZE;
+		break;
+	case SHA256_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA256;
+		ctx->bs = SHA256_BLOCK_SIZE;
+		break;
+	case SHA384_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA384;
+		ctx->bs = SHA384_BLOCK_SIZE;
+		break;
+	case SHA512_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA512;
+		ctx->bs = SHA512_BLOCK_SIZE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+	ctx->buffer = tctx->buf;
+	ctx->start = true;
+
+	if (tctx->flags & SHA_FLAGS_HMAC) {
+		struct mtk_sha_hmac_ctx *bctx = tctx->base;
+
+		memcpy(ctx->buffer, bctx->ipad, ctx->bs);
+		ctx->bufcnt = ctx->bs;
+		ctx->flags |= SHA_FLAGS_HMAC;
+	}
+
+	return 0;
+}
+
+static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha,
+			dma_addr_t addr, size_t len)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req);
+	struct mtk_ring *ring = cryp->ring[sha->id];
+	struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos;
+	struct mtk_desc *res = ring->res_base + ring->res_pos;
+	int err;
+
+	err = mtk_sha_info_update(cryp, sha, len);
+	if (err)
+		return err;
+
+	/* Fill in the command/result descriptors */
+	res->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len);
+	res->buf = cpu_to_le32(cryp->tmp_dma);
+
+	cmd->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len) |
+		   MTK_DESC_CT_LEN(ctx->ct_size);
+
+	cmd->buf = cpu_to_le32(addr);
+	cmd->ct = cpu_to_le32(ctx->ct_dma);
+	cmd->ct_hdr = ctx->ct_hdr;
+	cmd->tfm = cpu_to_le32(ctx->tfm_dma);
+
+	if (++ring->cmd_pos == MTK_DESC_NUM)
+		ring->cmd_pos = 0;
+
+	ring->res_pos = ring->cmd_pos;
+	/*
+	 * Make sure that all changes to the DMA ring are done before we
+	 * start engine.
+	 */
+	wmb();
+	/* Start DMA transfer */
+	mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1));
+	mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1));
+
+	return -EINPROGRESS;
+}
+
+static int mtk_sha_xmit2(struct mtk_cryp *cryp,
+			 struct mtk_sha_rec *sha,
+			 struct mtk_sha_reqctx *ctx,
+			 size_t len1, size_t len2)
+{
+	struct mtk_ring *ring = cryp->ring[sha->id];
+	struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos;
+	struct mtk_desc *res = ring->res_base + ring->res_pos;
+	int err;
+
+	err = mtk_sha_info_update(cryp, sha, len1 + len2);
+	if (err)
+		return err;
+
+	/* Fill in the command/result descriptors */
+	res->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST;
+	res->buf = cpu_to_le32(cryp->tmp_dma);
+
+	cmd->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST |
+		   MTK_DESC_CT_LEN(ctx->ct_size);
+	cmd->buf = cpu_to_le32(sg_dma_address(ctx->sg));
+	cmd->ct = cpu_to_le32(ctx->ct_dma);
+	cmd->ct_hdr = ctx->ct_hdr;
+	cmd->tfm = cpu_to_le32(ctx->tfm_dma);
+
+	if (++ring->cmd_pos == MTK_DESC_NUM)
+		ring->cmd_pos = 0;
+
+	ring->res_pos = ring->cmd_pos;
+
+	cmd = ring->cmd_base + ring->cmd_pos;
+	res = ring->res_base + ring->res_pos;
+
+	res->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST;
+	res->buf = cpu_to_le32(cryp->tmp_dma);
+
+	cmd->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST;
+	cmd->buf = cpu_to_le32(ctx->dma_addr);
+
+	if (++ring->cmd_pos == MTK_DESC_NUM)
+		ring->cmd_pos = 0;
+
+	ring->res_pos = ring->cmd_pos;
+
+	/*
+	 * Make sure that all changes to the DMA ring are done before we
+	 * start engine.
+	 */
+	wmb();
+	/* Start DMA transfer */
+	mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2));
+	mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2));
+
+	return -EINPROGRESS;
+}
+
+static int mtk_sha_dma_map(struct mtk_cryp *cryp,
+			   struct mtk_sha_rec *sha,
+			   struct mtk_sha_reqctx *ctx,
+			   size_t count)
+{
+	ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer,
+				       SHA_BUF_SIZE, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) {
+		dev_err(cryp->dev, "dma map error\n");
+		return -EINVAL;
+	}
+
+	ctx->flags &= ~SHA_FLAGS_SG;
+
+	return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count);
+}
+
+static int mtk_sha_update_slow(struct mtk_cryp *cryp,
+			       struct mtk_sha_rec *sha)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req);
+	size_t count;
+	u32 final;
+
+	mtk_sha_append_sg(ctx);
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(cryp->dev, "slow: bufcnt: %zu\n", ctx->bufcnt);
+
+	if (final) {
+		sha->flags |= SHA_FLAGS_FINAL;
+		mtk_sha_fill_padding(ctx, 0);
+	}
+
+	if (final || (ctx->bufcnt == SHA_BUF_SIZE && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+
+		return mtk_sha_dma_map(cryp, sha, ctx, count);
+	}
+	return 0;
+}
+
+static int mtk_sha_update_start(struct mtk_cryp *cryp,
+				struct mtk_sha_rec *sha)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req);
+	u32 len, final, tail;
+	struct scatterlist *sg;
+
+	if (!ctx->total)
+		return 0;
+
+	if (ctx->bufcnt || ctx->offset)
+		return mtk_sha_update_slow(cryp, sha);
+
+	sg = ctx->sg;
+
+	if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+		return mtk_sha_update_slow(cryp, sha);
+
+	if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->bs))
+		/* size is not ctx->bs aligned */
+		return mtk_sha_update_slow(cryp, sha);
+
+	len = min(ctx->total, sg->length);
+
+	if (sg_is_last(sg)) {
+		if (!(ctx->flags & SHA_FLAGS_FINUP)) {
+			/* not last sg must be ctx->bs aligned */
+			tail = len & (ctx->bs - 1);
+			len -= tail;
+		}
+	}
+
+	ctx->total -= len;
+	ctx->offset = len; /* offset where to start slow */
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	/* Add padding */
+	if (final) {
+		size_t count;
+
+		tail = len & (ctx->bs - 1);
+		len -= tail;
+		ctx->total += tail;
+		ctx->offset = len; /* offset where to start slow */
+
+		sg = ctx->sg;
+		mtk_sha_append_sg(ctx);
+		mtk_sha_fill_padding(ctx, len);
+
+		ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer,
+					       SHA_BUF_SIZE, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) {
+			dev_err(cryp->dev, "dma map bytes error\n");
+			return -EINVAL;
+		}
+
+		sha->flags |= SHA_FLAGS_FINAL;
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+
+		if (len == 0) {
+			ctx->flags &= ~SHA_FLAGS_SG;
+			return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count);
+
+		} else {
+			ctx->sg = sg;
+			if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+				dev_err(cryp->dev, "dma_map_sg error\n");
+				return -EINVAL;
+			}
+
+			ctx->flags |= SHA_FLAGS_SG;
+			return mtk_sha_xmit2(cryp, sha, ctx, len, count);
+		}
+	}
+
+	if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(cryp->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->flags |= SHA_FLAGS_SG;
+
+	return mtk_sha_xmit(cryp, sha, sg_dma_address(ctx->sg), len);
+}
+
+static int mtk_sha_final_req(struct mtk_cryp *cryp,
+			     struct mtk_sha_rec *sha)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req);
+	size_t count;
+
+	mtk_sha_fill_padding(ctx, 0);
+
+	sha->flags |= SHA_FLAGS_FINAL;
+	count = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return mtk_sha_dma_map(cryp, sha, ctx, count);
+}
+
+/* Copy ready hash (+ finalize hmac) */
+static int mtk_sha_finish(struct ahash_request *req)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+	u32 *digest = ctx->info.tfm.digest;
+	u32 *result = (u32 *)req->result;
+	int i;
+
+	/* Get the hash from the digest buffer */
+	for (i = 0; i < SIZE_IN_WORDS(ctx->ds); i++)
+		result[i] = le32_to_cpu(digest[i]);
+
+	if (ctx->flags & SHA_FLAGS_HMAC)
+		return mtk_sha_finish_hmac(req);
+
+	return 0;
+}
+
+static void mtk_sha_finish_req(struct mtk_cryp *cryp,
+			       struct mtk_sha_rec *sha,
+			       int err)
+{
+	if (likely(!err && (SHA_FLAGS_FINAL & sha->flags)))
+		err = mtk_sha_finish(sha->req);
+
+	sha->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL);
+
+	sha->req->base.complete(&sha->req->base, err);
+
+	/* Handle new request */
+	mtk_sha_handle_queue(cryp, sha->id - RING2, NULL);
+}
+
+static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id,
+				struct ahash_request *req)
+{
+	struct mtk_sha_rec *sha = cryp->sha[id];
+	struct crypto_async_request *async_req, *backlog;
+	struct mtk_sha_reqctx *ctx;
+	unsigned long flags;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&sha->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&sha->queue, req);
+
+	if (SHA_FLAGS_BUSY & sha->flags) {
+		spin_unlock_irqrestore(&sha->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&sha->queue);
+	async_req = crypto_dequeue_request(&sha->queue);
+	if (async_req)
+		sha->flags |= SHA_FLAGS_BUSY;
+	spin_unlock_irqrestore(&sha->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	ctx = ahash_request_ctx(req);
+
+	sha->req = req;
+
+	mtk_sha_info_init(ctx);
+
+	if (ctx->op == SHA_OP_UPDATE) {
+		err = mtk_sha_update_start(cryp, sha);
+		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP))
+			/* No final() after finup() */
+			err = mtk_sha_final_req(cryp, sha);
+	} else if (ctx->op == SHA_OP_FINAL) {
+		err = mtk_sha_final_req(cryp, sha);
+	}
+
+	if (unlikely(err != -EINPROGRESS))
+		/* Task will not finish it, so do it here */
+		mtk_sha_finish_req(cryp, sha, err);
+
+	return ret;
+}
+
+static int mtk_sha_enqueue(struct ahash_request *req, u32 op)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+
+	ctx->op = op;
+
+	return mtk_sha_handle_queue(tctx->cryp, tctx->id, req);
+}
+
+static void mtk_sha_unmap(struct mtk_cryp *cryp, struct mtk_sha_rec *sha)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req);
+
+	dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->info),
+			 DMA_BIDIRECTIONAL);
+
+	if (ctx->flags & SHA_FLAGS_SG) {
+		dma_unmap_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE);
+		if (ctx->sg->length == ctx->offset) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+		}
+		if (ctx->flags & SHA_FLAGS_PAD) {
+			dma_unmap_single(cryp->dev, ctx->dma_addr,
+					 SHA_BUF_SIZE, DMA_TO_DEVICE);
+		}
+	} else
+		dma_unmap_single(cryp->dev, ctx->dma_addr,
+				 SHA_BUF_SIZE, DMA_TO_DEVICE);
+}
+
+static void mtk_sha_complete(struct mtk_cryp *cryp,
+			     struct mtk_sha_rec *sha)
+{
+	int err = 0;
+
+	err = mtk_sha_update_start(cryp, sha);
+	if (err != -EINPROGRESS)
+		mtk_sha_finish_req(cryp, sha, err);
+}
+
+static int mtk_sha_update(struct ahash_request *req)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if ((ctx->bufcnt + ctx->total < SHA_BUF_SIZE) &&
+	    !(ctx->flags & SHA_FLAGS_FINUP))
+		return mtk_sha_append_sg(ctx);
+
+	return mtk_sha_enqueue(req, SHA_OP_UPDATE);
+}
+
+static int mtk_sha_final(struct ahash_request *req)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	if (ctx->flags & SHA_FLAGS_PAD)
+		return mtk_sha_finish(req);
+
+	return mtk_sha_enqueue(req, SHA_OP_FINAL);
+}
+
+static int mtk_sha_finup(struct ahash_request *req)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = mtk_sha_update(req);
+	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+		return err1;
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if update() failed
+	 */
+	err2 = mtk_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int mtk_sha_digest(struct ahash_request *req)
+{
+	return mtk_sha_init(req) ?: mtk_sha_finup(req);
+}
+
+static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
+			  u32 keylen)
+{
+	struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct mtk_sha_hmac_ctx *bctx = tctx->base;
+	size_t bs = crypto_shash_blocksize(bctx->shash);
+	size_t ds = crypto_shash_digestsize(bctx->shash);
+	int err, i;
+
+	SHASH_DESC_ON_STACK(shash, bctx->shash);
+
+	shash->tfm = bctx->shash;
+	shash->flags = crypto_shash_get_flags(bctx->shash) &
+		       CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	if (keylen > bs) {
+		err = crypto_shash_digest(shash, key, keylen, bctx->ipad);
+		if (err)
+			return err;
+		keylen = ds;
+	} else {
+		memcpy(bctx->ipad, key, keylen);
+	}
+
+	memset(bctx->ipad + keylen, 0, bs - keylen);
+	memcpy(bctx->opad, bctx->ipad, bs);
+
+	for (i = 0; i < bs; i++) {
+		bctx->ipad[i] ^= 0x36;
+		bctx->opad[i] ^= 0x5c;
+	}
+
+	return 0;
+}
+
+static int mtk_sha_export(struct ahash_request *req, void *out)
+{
+	const struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(out, ctx, sizeof(*ctx));
+	return 0;
+}
+
+static int mtk_sha_import(struct ahash_request *req, const void *in)
+{
+	struct mtk_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(ctx, in, sizeof(*ctx));
+	return 0;
+}
+
+static int mtk_sha_cra_init_alg(struct crypto_tfm *tfm,
+				const char *alg_base)
+{
+	struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+	struct mtk_cryp *cryp = NULL;
+
+	cryp = mtk_sha_find_dev(tctx);
+	if (!cryp)
+		return -ENODEV;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct mtk_sha_reqctx));
+
+	if (alg_base) {
+		struct mtk_sha_hmac_ctx *bctx = tctx->base;
+
+		tctx->flags |= SHA_FLAGS_HMAC;
+		bctx->shash = crypto_alloc_shash(alg_base, 0,
+					CRYPTO_ALG_NEED_FALLBACK);
+		if (IS_ERR(bctx->shash)) {
+			pr_err("base driver %s could not be loaded.\n",
+			       alg_base);
+
+			return PTR_ERR(bctx->shash);
+		}
+	}
+	return 0;
+}
+
+static int mtk_sha_cra_init(struct crypto_tfm *tfm)
+{
+	return mtk_sha_cra_init_alg(tfm, NULL);
+}
+
+static int mtk_sha_cra_sha1_init(struct crypto_tfm *tfm)
+{
+	return mtk_sha_cra_init_alg(tfm, "sha1");
+}
+
+static int mtk_sha_cra_sha224_init(struct crypto_tfm *tfm)
+{
+	return mtk_sha_cra_init_alg(tfm, "sha224");
+}
+
+static int mtk_sha_cra_sha256_init(struct crypto_tfm *tfm)
+{
+	return mtk_sha_cra_init_alg(tfm, "sha256");
+}
+
+static int mtk_sha_cra_sha384_init(struct crypto_tfm *tfm)
+{
+	return mtk_sha_cra_init_alg(tfm, "sha384");
+}
+
+static int mtk_sha_cra_sha512_init(struct crypto_tfm *tfm)
+{
+	return mtk_sha_cra_init_alg(tfm, "sha512");
+}
+
+static void mtk_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	if (tctx->flags & SHA_FLAGS_HMAC) {
+		struct mtk_sha_hmac_ctx *bctx = tctx->base;
+
+		crypto_free_shash(bctx->shash);
+	}
+}
+
+static struct ahash_alg algs_sha1_sha224_sha256[] = {
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "mtk-sha1",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.halg.digestsize	= SHA224_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "sha224",
+		.cra_driver_name	= "mtk-sha224",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= SHA224_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.halg.digestsize	= SHA256_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "sha256",
+		.cra_driver_name	= "mtk-sha256",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.setkey		= mtk_sha_setkey,
+	.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "hmac(sha1)",
+		.cra_driver_name	= "mtk-hmac-sha1",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx) +
+					sizeof(struct mtk_sha_hmac_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_sha1_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.setkey		= mtk_sha_setkey,
+	.halg.digestsize	= SHA224_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "hmac(sha224)",
+		.cra_driver_name	= "mtk-hmac-sha224",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA224_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx) +
+					sizeof(struct mtk_sha_hmac_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_sha224_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.setkey		= mtk_sha_setkey,
+	.halg.digestsize	= SHA256_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "hmac(sha256)",
+		.cra_driver_name	= "mtk-hmac-sha256",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx) +
+					sizeof(struct mtk_sha_hmac_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_sha256_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+};
+
+static struct ahash_alg algs_sha384_sha512[] = {
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.halg.digestsize	= SHA384_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "sha384",
+		.cra_driver_name	= "mtk-sha384",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= SHA384_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.halg.digestsize	= SHA512_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "sha512",
+		.cra_driver_name	= "mtk-sha512",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.setkey		= mtk_sha_setkey,
+	.halg.digestsize	= SHA384_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "hmac(sha384)",
+		.cra_driver_name	= "mtk-hmac-sha384",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA384_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx) +
+					sizeof(struct mtk_sha_hmac_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_sha384_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+{
+	.init		= mtk_sha_init,
+	.update		= mtk_sha_update,
+	.final		= mtk_sha_final,
+	.finup		= mtk_sha_finup,
+	.digest		= mtk_sha_digest,
+	.export		= mtk_sha_export,
+	.import		= mtk_sha_import,
+	.setkey		= mtk_sha_setkey,
+	.halg.digestsize	= SHA512_DIGEST_SIZE,
+	.halg.statesize = sizeof(struct mtk_sha_reqctx),
+	.halg.base	= {
+		.cra_name		= "hmac(sha512)",
+		.cra_driver_name	= "mtk-hmac-sha512",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct mtk_sha_ctx) +
+					sizeof(struct mtk_sha_hmac_ctx),
+		.cra_alignmask		= SHA_ALIGN_MSK,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= mtk_sha_cra_sha512_init,
+		.cra_exit		= mtk_sha_cra_exit,
+	}
+},
+};
+
+static void mtk_sha_task0(unsigned long data)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)data;
+	struct mtk_sha_rec *sha = cryp->sha[0];
+
+	mtk_sha_unmap(cryp, sha);
+	mtk_sha_complete(cryp, sha);
+}
+
+static void mtk_sha_task1(unsigned long data)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)data;
+	struct mtk_sha_rec *sha = cryp->sha[1];
+
+	mtk_sha_unmap(cryp, sha);
+	mtk_sha_complete(cryp, sha);
+}
+
+static irqreturn_t mtk_sha_ring2_irq(int irq, void *dev_id)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id;
+	struct mtk_sha_rec *sha = cryp->sha[0];
+	u32 val = mtk_sha_read(cryp, RDR_STAT(RING2));
+
+	mtk_sha_write(cryp, RDR_STAT(RING2), val);
+
+	if (likely((SHA_FLAGS_BUSY & sha->flags))) {
+		mtk_sha_write(cryp, RDR_PROC_COUNT(RING2), MTK_CNT_RST);
+		mtk_sha_write(cryp, RDR_THRESH(RING2),
+			      MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE);
+
+		tasklet_schedule(&sha->task);
+	} else {
+		dev_warn(cryp->dev, "AES interrupt when no active requests.\n");
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mtk_sha_ring3_irq(int irq, void *dev_id)
+{
+	struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id;
+	struct mtk_sha_rec *sha = cryp->sha[1];
+	u32 val = mtk_sha_read(cryp, RDR_STAT(RING3));
+
+	mtk_sha_write(cryp, RDR_STAT(RING3), val);
+
+	if (likely((SHA_FLAGS_BUSY & sha->flags))) {
+		mtk_sha_write(cryp, RDR_PROC_COUNT(RING3), MTK_CNT_RST);
+		mtk_sha_write(cryp, RDR_THRESH(RING3),
+			      MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE);
+
+		tasklet_schedule(&sha->task);
+	} else {
+		dev_warn(cryp->dev, "AES interrupt when no active requests.\n");
+	}
+	return IRQ_HANDLED;
+}
+
+/*
+ * The purpose of two SHA records is used to get extra performance.
+ * It is similar to mtk_aes_record_init().
+ */
+static int mtk_sha_record_init(struct mtk_cryp *cryp)
+{
+	struct mtk_sha_rec **sha = cryp->sha;
+	int i, err = -ENOMEM;
+
+	for (i = 0; i < MTK_REC_NUM; i++) {
+		sha[i] = kzalloc(sizeof(**sha), GFP_KERNEL);
+		if (!sha[i])
+			goto err_cleanup;
+
+		sha[i]->id = i + RING2;
+
+		spin_lock_init(&sha[i]->lock);
+		crypto_init_queue(&sha[i]->queue, SHA_QUEUE_SIZE);
+	}
+
+	tasklet_init(&sha[0]->task, mtk_sha_task0, (unsigned long)cryp);
+	tasklet_init(&sha[1]->task, mtk_sha_task1, (unsigned long)cryp);
+
+	cryp->rec = 1;
+
+	return 0;
+
+err_cleanup:
+	for (; i--; )
+		kfree(sha[i]);
+	return err;
+}
+
+static void mtk_sha_record_free(struct mtk_cryp *cryp)
+{
+	int i;
+
+	for (i = 0; i < MTK_REC_NUM; i++) {
+		tasklet_kill(&cryp->sha[i]->task);
+		kfree(cryp->sha[i]);
+	}
+}
+
+static void mtk_sha_unregister_algs(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++)
+		crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]);
+
+	for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++)
+		crypto_unregister_ahash(&algs_sha384_sha512[i]);
+}
+
+static int mtk_sha_register_algs(void)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) {
+		err = crypto_register_ahash(&algs_sha1_sha224_sha256[i]);
+		if (err)
+			goto err_sha_224_256_algs;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) {
+		err = crypto_register_ahash(&algs_sha384_sha512[i]);
+		if (err)
+			goto err_sha_384_512_algs;
+	}
+
+	return 0;
+
+err_sha_384_512_algs:
+	for (; i--; )
+		crypto_unregister_ahash(&algs_sha384_sha512[i]);
+	i = ARRAY_SIZE(algs_sha1_sha224_sha256);
+err_sha_224_256_algs:
+	for (; i--; )
+		crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]);
+
+	return err;
+}
+
+int mtk_hash_alg_register(struct mtk_cryp *cryp)
+{
+	int err;
+
+	INIT_LIST_HEAD(&cryp->sha_list);
+
+	/* Initialize two hash records */
+	err = mtk_sha_record_init(cryp);
+	if (err)
+		goto err_record;
+
+	/* Ring2 is use by SHA record0 */
+	err = devm_request_irq(cryp->dev, cryp->irq[RING2],
+			       mtk_sha_ring2_irq, IRQF_TRIGGER_LOW,
+			       "mtk-sha", cryp);
+	if (err) {
+		dev_err(cryp->dev, "unable to request sha irq0.\n");
+		goto err_res;
+	}
+
+	/* Ring3 is use by SHA record1 */
+	err = devm_request_irq(cryp->dev, cryp->irq[RING3],
+			       mtk_sha_ring3_irq, IRQF_TRIGGER_LOW,
+			       "mtk-sha", cryp);
+	if (err) {
+		dev_err(cryp->dev, "unable to request sha irq1.\n");
+		goto err_res;
+	}
+
+	/* Enable ring2 and ring3 interrupt for hash */
+	mtk_sha_write(cryp, AIC_ENABLE_SET(RING2), MTK_IRQ_RDR2);
+	mtk_sha_write(cryp, AIC_ENABLE_SET(RING3), MTK_IRQ_RDR3);
+
+	cryp->tmp = dma_alloc_coherent(cryp->dev, SHA_TMP_BUF_SIZE,
+					&cryp->tmp_dma, GFP_KERNEL);
+	if (!cryp->tmp) {
+		dev_err(cryp->dev, "unable to allocate tmp buffer.\n");
+		err = -EINVAL;
+		goto err_res;
+	}
+
+	spin_lock(&mtk_sha.lock);
+	list_add_tail(&cryp->sha_list, &mtk_sha.dev_list);
+	spin_unlock(&mtk_sha.lock);
+
+	err = mtk_sha_register_algs();
+	if (err)
+		goto err_algs;
+
+	return 0;
+
+err_algs:
+	spin_lock(&mtk_sha.lock);
+	list_del(&cryp->sha_list);
+	spin_unlock(&mtk_sha.lock);
+	dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE,
+			  cryp->tmp, cryp->tmp_dma);
+err_res:
+	mtk_sha_record_free(cryp);
+err_record:
+
+	dev_err(cryp->dev, "mtk-sha initialization failed.\n");
+	return err;
+}
+
+void mtk_hash_alg_release(struct mtk_cryp *cryp)
+{
+	spin_lock(&mtk_sha.lock);
+	list_del(&cryp->sha_list);
+	spin_unlock(&mtk_sha.lock);
+
+	mtk_sha_unregister_algs();
+	dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE,
+			  cryp->tmp, cryp->tmp_dma);
+	mtk_sha_record_free(cryp);
+}
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 104e9ce9400a..451fa18c1c7b 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -1073,7 +1073,7 @@ static int mv_probe(struct platform_device *pdev)
 	if (!res)
 		return -ENXIO;
 
-	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+	cp = devm_kzalloc(&pdev->dev, sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
 
@@ -1163,7 +1163,6 @@ err_irq:
 err_thread:
 	kthread_stop(cp->queue_th);
 err:
-	kfree(cp);
 	cpg = NULL;
 	return ret;
 }
@@ -1187,7 +1186,6 @@ static int mv_remove(struct platform_device *pdev)
 		clk_put(cp->clk);
 	}
 
-	kfree(cp);
 	cpg = NULL;
 	return 0;
 }
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 42f0f229f7f7..036057abb257 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -32,7 +32,6 @@
 #include <linux/scatterlist.h>
 #include <linux/device.h>
 #include <linux/of.h>
-#include <linux/types.h>
 #include <asm/hvcall.h>
 #include <asm/vio.h>
 
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 441e86b23571..b3869748cc6b 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -183,8 +183,8 @@ static inline void padlock_store_cword(struct cword *cword)
 
 /*
  * While the padlock instructions don't use FP/SSE registers, they
- * generate a spurious DNA fault when cr0.ts is '1'. These instructions
- * should be used only inside the irq_ts_save/restore() context
+ * generate a spurious DNA fault when CR0.TS is '1'.  Fortunately,
+ * the kernel doesn't use CR0.TS.
  */
 
 static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
@@ -298,24 +298,18 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
-	int ts_state;
 
 	padlock_reset_key(&ctx->cword.encrypt);
-	ts_state = irq_ts_save();
 	ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
-	irq_ts_restore(ts_state);
 	padlock_store_cword(&ctx->cword.encrypt);
 }
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
-	int ts_state;
 
 	padlock_reset_key(&ctx->cword.encrypt);
-	ts_state = irq_ts_save();
 	ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
-	irq_ts_restore(ts_state);
 	padlock_store_cword(&ctx->cword.encrypt);
 }
 
@@ -346,14 +340,12 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
-	int ts_state;
 
 	padlock_reset_key(&ctx->cword.encrypt);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->E, &ctx->cword.encrypt,
@@ -361,7 +353,6 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
-	irq_ts_restore(ts_state);
 
 	padlock_store_cword(&ctx->cword.encrypt);
 
@@ -375,14 +366,12 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
-	int ts_state;
 
 	padlock_reset_key(&ctx->cword.decrypt);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->D, &ctx->cword.decrypt,
@@ -390,7 +379,6 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
-	irq_ts_restore(ts_state);
 
 	padlock_store_cword(&ctx->cword.encrypt);
 
@@ -425,14 +413,12 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
-	int ts_state;
 
 	padlock_reset_key(&ctx->cword.encrypt);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
 					    walk.dst.virt.addr, ctx->E,
@@ -442,7 +428,6 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
-	irq_ts_restore(ts_state);
 
 	padlock_store_cword(&ctx->cword.decrypt);
 
@@ -456,14 +441,12 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
-	int ts_state;
 
 	padlock_reset_key(&ctx->cword.encrypt);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->D, walk.iv, &ctx->cword.decrypt,
@@ -472,8 +455,6 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
 
-	irq_ts_restore(ts_state);
-
 	padlock_store_cword(&ctx->cword.encrypt);
 
 	return err;
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 8c5f90647b7a..bc72d20c32c3 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -89,7 +89,6 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
 	struct sha1_state state;
 	unsigned int space;
 	unsigned int leftover;
-	int ts_state;
 	int err;
 
 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -120,14 +119,11 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
 
 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
 
-	/* prevent taking the spurious DNA fault with padlock. */
-	ts_state = irq_ts_save();
 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
 		      : \
 		      : "c"((unsigned long)state.count + count), \
 			"a"((unsigned long)state.count), \
 			"S"(in), "D"(result));
-	irq_ts_restore(ts_state);
 
 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
 
@@ -155,7 +151,6 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
 	struct sha256_state state;
 	unsigned int space;
 	unsigned int leftover;
-	int ts_state;
 	int err;
 
 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -186,14 +181,11 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
 
 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
 
-	/* prevent taking the spurious DNA fault with padlock. */
-	ts_state = irq_ts_save();
 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
 		      : \
 		      : "c"((unsigned long)state.count + count), \
 			"a"((unsigned long)state.count), \
 			"S"(in), "D"(result));
-	irq_ts_restore(ts_state);
 
 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
 
@@ -312,7 +304,6 @@ static int padlock_sha1_update_nano(struct shash_desc *desc,
 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
 		((aligned(STACK_ALIGN)));
 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
-	int ts_state;
 
 	partial = sctx->count & 0x3f;
 	sctx->count += len;
@@ -328,23 +319,19 @@ static int padlock_sha1_update_nano(struct shash_desc *desc,
 			memcpy(sctx->buffer + partial, data,
 				done + SHA1_BLOCK_SIZE);
 			src = sctx->buffer;
-			ts_state = irq_ts_save();
 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
 			: "+S"(src), "+D"(dst) \
 			: "a"((long)-1), "c"((unsigned long)1));
-			irq_ts_restore(ts_state);
 			done += SHA1_BLOCK_SIZE;
 			src = data + done;
 		}
 
 		/* Process the left bytes from the input data */
 		if (len - done >= SHA1_BLOCK_SIZE) {
-			ts_state = irq_ts_save();
 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
 			: "+S"(src), "+D"(dst)
 			: "a"((long)-1),
 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
-			irq_ts_restore(ts_state);
 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
 			src = data + done;
 		}
@@ -401,7 +388,6 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
 		((aligned(STACK_ALIGN)));
 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
-	int ts_state;
 
 	partial = sctx->count & 0x3f;
 	sctx->count += len;
@@ -417,23 +403,19 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
 			memcpy(sctx->buf + partial, data,
 				done + SHA256_BLOCK_SIZE);
 			src = sctx->buf;
-			ts_state = irq_ts_save();
 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
 			: "+S"(src), "+D"(dst)
 			: "a"((long)-1), "c"((unsigned long)1));
-			irq_ts_restore(ts_state);
 			done += SHA256_BLOCK_SIZE;
 			src = data + done;
 		}
 
 		/* Process the left bytes from input data*/
 		if (len - done >= SHA256_BLOCK_SIZE) {
-			ts_state = irq_ts_save();
 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
 			: "+S"(src), "+D"(dst)
 			: "a"((long)-1),
 			"c"((unsigned long)((len - done) / 64)));
-			irq_ts_restore(ts_state);
 			done += ((len - done) - (len - done) % 64);
 			src = data + done;
 		}
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 47576098831f..b6f14844702e 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1616,32 +1616,17 @@ static const struct of_device_id spacc_of_id_table[] = {
 MODULE_DEVICE_TABLE(of, spacc_of_id_table);
 #endif /* CONFIG_OF */
 
-static bool spacc_is_compatible(struct platform_device *pdev,
-				const char *spacc_type)
-{
-	const struct platform_device_id *platid = platform_get_device_id(pdev);
-
-	if (platid && !strcmp(platid->name, spacc_type))
-		return true;
-
-#ifdef CONFIG_OF
-	if (of_device_is_compatible(pdev->dev.of_node, spacc_type))
-		return true;
-#endif /* CONFIG_OF */
-
-	return false;
-}
-
 static int spacc_probe(struct platform_device *pdev)
 {
 	int i, err, ret = -EINVAL;
 	struct resource *mem, *irq;
+	struct device_node *np = pdev->dev.of_node;
 	struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine),
 						   GFP_KERNEL);
 	if (!engine)
 		return -ENOMEM;
 
-	if (spacc_is_compatible(pdev, "picochip,spacc-ipsec")) {
+	if (of_device_is_compatible(np, "picochip,spacc-ipsec")) {
 		engine->max_ctxs	= SPACC_CRYPTO_IPSEC_MAX_CTXS;
 		engine->cipher_pg_sz	= SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ;
 		engine->hash_pg_sz	= SPACC_CRYPTO_IPSEC_HASH_PG_SZ;
@@ -1650,7 +1635,7 @@ static int spacc_probe(struct platform_device *pdev)
 		engine->num_algs	= ARRAY_SIZE(ipsec_engine_algs);
 		engine->aeads		= ipsec_engine_aeads;
 		engine->num_aeads	= ARRAY_SIZE(ipsec_engine_aeads);
-	} else if (spacc_is_compatible(pdev, "picochip,spacc-l2")) {
+	} else if (of_device_is_compatible(np, "picochip,spacc-l2")) {
 		engine->max_ctxs	= SPACC_CRYPTO_L2_MAX_CTXS;
 		engine->cipher_pg_sz	= SPACC_CRYPTO_L2_CIPHER_PG_SZ;
 		engine->hash_pg_sz	= SPACC_CRYPTO_L2_HASH_PG_SZ;
@@ -1803,12 +1788,6 @@ static int spacc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct platform_device_id spacc_id_table[] = {
-	{ "picochip,spacc-ipsec", },
-	{ "picochip,spacc-l2", },
-	{ }
-};
-
 static struct platform_driver spacc_driver = {
 	.probe		= spacc_probe,
 	.remove		= spacc_remove,
@@ -1819,7 +1798,6 @@ static struct platform_driver spacc_driver = {
 #endif /* CONFIG_PM */
 		.of_match_table	= of_match_ptr(spacc_of_id_table),
 	},
-	.id_table	= spacc_id_table,
 };
 
 module_platform_driver(spacc_driver);
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index 640c3fc870fd..f172171668ee 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* Create dev top level debugfs entry */
-	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d",
+	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d",
 		 ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name,
 		 pdev->bus->number, PCI_SLOT(pdev->devfn),
 		 PCI_FUNC(pdev->devfn));
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index 949d77b79fbe..24ec908eb26c 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	accel_pci_dev->sku = hw_data->get_sku(hw_data);
 
 	/* Create dev top level debugfs entry */
-	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d",
+	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d",
 		 ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name,
 		 pdev->bus->number, PCI_SLOT(pdev->devfn),
 		 PCI_FUNC(pdev->devfn));
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index bc5cbc193aae..58a984c9c3ec 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* Create dev top level debugfs entry */
-	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d",
+	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d",
 		 ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name,
 		 pdev->bus->number, PCI_SLOT(pdev->devfn),
 		 PCI_FUNC(pdev->devfn));
@@ -233,7 +233,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			      &hw_data->accel_capabilities_mask);
 
 	/* Find and map all the device's BARS */
-	i = 0;
+	i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
 	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
 			 ADF_PCI_MAX_BARS * 2) {
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index 7540ce13b0d0..b9f3e0e4fde9 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	accel_pci_dev->sku = hw_data->get_sku(hw_data);
 
 	/* Create dev top level debugfs entry */
-	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d",
+	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d",
 		 ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name,
 		 pdev->bus->number, PCI_SLOT(pdev->devfn),
 		 PCI_FUNC(pdev->devfn));
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index e8822536530b..33f0a6251e38 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -69,6 +69,7 @@
 #define ADF_ERRSOU5 (0x3A000 + 0xD8)
 #define ADF_DEVICE_FUSECTL_OFFSET 0x40
 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
+#define ADF_DEVICE_FUSECTL_MASK 0x80000000
 #define ADF_PCI_MAX_BARS 3
 #define ADF_DEVICE_NAME_LENGTH 32
 #define ADF_ETR_MAX_RINGS_PER_BANK 16
diff --git a/drivers/crypto/qat/qat_common/adf_cfg_common.h b/drivers/crypto/qat/qat_common/adf_cfg_common.h
index 8c4f6573ce59..1211261de7c2 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg_common.h
+++ b/drivers/crypto/qat/qat_common/adf_cfg_common.h
@@ -61,6 +61,7 @@
 #define ADF_CFG_AFFINITY_WHATEVER 0xFF
 #define MAX_DEVICE_NAME_SIZE 32
 #define ADF_MAX_DEVICES (32 * 32)
+#define ADF_DEVS_ARRAY_SIZE BITS_TO_LONGS(ADF_MAX_DEVICES)
 
 enum adf_cfg_val_type {
 	ADF_DEC,
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 980e07475012..5c4c0a253129 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -87,8 +87,8 @@ enum adf_event {
 struct service_hndl {
 	int (*event_hld)(struct adf_accel_dev *accel_dev,
 			 enum adf_event event);
-	unsigned long init_status;
-	unsigned long start_status;
+	unsigned long init_status[ADF_DEVS_ARRAY_SIZE];
+	unsigned long start_status[ADF_DEVS_ARRAY_SIZE];
 	char *name;
 	struct list_head list;
 };
diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c
index b3ebb25f9ca7..8afac52677a6 100644
--- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c
+++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c
@@ -152,7 +152,7 @@ void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data)
 			ptr->hw_device->instance_id = i++;
 
 		if (i == class->instances)
-				break;
+			break;
 	}
 }
 EXPORT_SYMBOL_GPL(adf_devmgr_update_class_index);
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 888c6675e7e5..26556c713049 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -64,8 +64,8 @@ static void adf_service_add(struct service_hndl *service)
 
 int adf_service_register(struct service_hndl *service)
 {
-	service->init_status = 0;
-	service->start_status = 0;
+	memset(service->init_status, 0, sizeof(service->init_status));
+	memset(service->start_status, 0, sizeof(service->start_status));
 	adf_service_add(service);
 	return 0;
 }
@@ -79,9 +79,13 @@ static void adf_service_remove(struct service_hndl *service)
 
 int adf_service_unregister(struct service_hndl *service)
 {
-	if (service->init_status || service->start_status) {
-		pr_err("QAT: Could not remove active service\n");
-		return -EFAULT;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(service->init_status); i++) {
+		if (service->init_status[i] || service->start_status[i]) {
+			pr_err("QAT: Could not remove active service\n");
+			return -EFAULT;
+		}
 	}
 	adf_service_remove(service);
 	return 0;
@@ -163,7 +167,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
 				service->name);
 			return -EFAULT;
 		}
-		set_bit(accel_dev->accel_id, &service->init_status);
+		set_bit(accel_dev->accel_id, service->init_status);
 	}
 
 	hw_data->enable_error_correction(accel_dev);
@@ -210,7 +214,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
 				service->name);
 			return -EFAULT;
 		}
-		set_bit(accel_dev->accel_id, &service->start_status);
+		set_bit(accel_dev->accel_id, service->start_status);
 	}
 
 	clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
@@ -259,14 +263,14 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev)
 
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
-		if (!test_bit(accel_dev->accel_id, &service->start_status))
+		if (!test_bit(accel_dev->accel_id, service->start_status))
 			continue;
 		ret = service->event_hld(accel_dev, ADF_EVENT_STOP);
 		if (!ret) {
-			clear_bit(accel_dev->accel_id, &service->start_status);
+			clear_bit(accel_dev->accel_id, service->start_status);
 		} else if (ret == -EAGAIN) {
 			wait = true;
-			clear_bit(accel_dev->accel_id, &service->start_status);
+			clear_bit(accel_dev->accel_id, service->start_status);
 		}
 	}
 
@@ -317,14 +321,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
 
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
-		if (!test_bit(accel_dev->accel_id, &service->init_status))
+		if (!test_bit(accel_dev->accel_id, service->init_status))
 			continue;
 		if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN))
 			dev_err(&GET_DEV(accel_dev),
 				"Failed to shutdown service %s\n",
 				service->name);
 		else
-			clear_bit(accel_dev->accel_id, &service->init_status);
+			clear_bit(accel_dev->accel_id, service->init_status);
 	}
 
 	hw_data->disable_iov(accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index 9320ae1d005b..b36d8653b1ba 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -162,9 +162,9 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
 
 /**
  * adf_disable_sriov() - Disable SRIOV for the device
- * @pdev:  Pointer to pci device.
+ * @accel_dev:  Pointer to accel device.
  *
- * Function disables SRIOV for the pci device.
+ * Function disables SRIOV for the accel device.
  *
  * Return: 0 on success, error code otherwise.
  */
diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c
index bf99e11a3403..4a73fc70f7a9 100644
--- a/drivers/crypto/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c
@@ -148,7 +148,7 @@ static void adf_pf2vf_bh_handler(void *data)
 		INIT_WORK(&stop_data->work, adf_dev_stop_async);
 		queue_work(adf_vf_stop_wq, &stop_data->work);
 		/* To ack, clear the PF2VFINT bit */
-		msg &= ~BIT(0);
+		msg &= ~ADF_PF2VF_INT;
 		ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg);
 		return;
 	}
@@ -168,7 +168,7 @@ static void adf_pf2vf_bh_handler(void *data)
 	}
 
 	/* To ack, clear the PF2VFINT bit */
-	msg &= ~BIT(0);
+	msg &= ~ADF_PF2VF_INT;
 	ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg);
 
 	/* Re-enable PF2VF interrupts */
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 1e480f140663..8c4fd255a601 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -456,7 +456,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
 	unsigned int csr_val;
 	int times = 30;
 
-	if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+	if (handle->pci_dev->device != ADF_DH895XCC_PCI_DEVICE_ID)
 		return 0;
 
 	csr_val = ADF_CSR_RD(csr_addr, 0);
@@ -716,7 +716,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
 		(void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
 				 LOCAL_TO_XFER_REG_OFFSET);
 	handle->pci_dev = pci_info->pci_dev;
-	if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+	if (handle->pci_dev->device == ADF_DH895XCC_PCI_DEVICE_ID) {
 		sram_bar =
 			&pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
 		handle->hal_sram_addr_v = sram_bar->virt_addr;
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index 4d2de2838451..2ce01f010c74 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* Create dev top level debugfs entry */
-	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d",
+	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d",
 		 ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name,
 		 pdev->bus->number, PCI_SLOT(pdev->devfn),
 		 PCI_FUNC(pdev->devfn));
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 60df98632fa2..26ab17bfc6da 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	accel_pci_dev->sku = hw_data->get_sku(hw_data);
 
 	/* Create dev top level debugfs entry */
-	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d",
+	snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d",
 		 ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name,
 		 pdev->bus->number, PCI_SLOT(pdev->devfn),
 		 PCI_FUNC(pdev->devfn));
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dce1af0ce85c..1b9da3dc799b 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
 	scatterwalk_done(&walk, out, 0);
 }
 
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
 {
 	if (dev->sg_dst_cpy) {
 		dev_dbg(dev->dev,
@@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
 	}
 	s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
 	s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
 
-	/* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
 	dev->req->base.complete(&dev->req->base, err);
 	dev->busy = false;
 }
@@ -368,51 +371,44 @@ exit:
 }
 
 /*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new transmitting (output) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_outdata()).
  */
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
 {
-	int err = 0;
-	bool ret = false;
+	int ret = 0;
 
 	s5p_unset_outdata(dev);
 
 	if (!sg_is_last(dev->sg_dst)) {
-		err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
-		if (err)
-			s5p_aes_complete(dev, err);
-		else
-			ret = true;
-	} else {
-		s5p_aes_complete(dev, err);
-
-		dev->busy = true;
-		tasklet_schedule(&dev->tasklet);
+		ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+		if (!ret)
+			ret = 1;
 	}
 
 	return ret;
 }
 
 /*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new receiving (input) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_indata()).
  */
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
 {
-	int err;
-	bool ret = false;
+	int ret = 0;
 
 	s5p_unset_indata(dev);
 
 	if (!sg_is_last(dev->sg_src)) {
-		err = s5p_set_indata(dev, sg_next(dev->sg_src));
-		if (err)
-			s5p_aes_complete(dev, err);
-		else
-			ret = true;
+		ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+		if (!ret)
+			ret = 1;
 	}
 
 	return ret;
@@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
 	struct platform_device *pdev = dev_id;
 	struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
-	bool set_dma_tx = false;
-	bool set_dma_rx = false;
+	int err_dma_tx = 0;
+	int err_dma_rx = 0;
+	bool tx_end = false;
 	unsigned long flags;
 	uint32_t status;
+	int err;
 
 	spin_lock_irqsave(&dev->lock, flags);
 
+	/*
+	 * Handle rx or tx interrupt. If there is still data (scatterlist did not
+	 * reach end), then map next scatterlist entry.
+	 * In case of such mapping error, s5p_aes_complete() should be called.
+	 *
+	 * If there is no more data in tx scatter list, call s5p_aes_complete()
+	 * and schedule new tasklet.
+	 */
 	status = SSS_READ(dev, FCINTSTAT);
 	if (status & SSS_FCINTSTAT_BRDMAINT)
-		set_dma_rx = s5p_aes_rx(dev);
-	if (status & SSS_FCINTSTAT_BTDMAINT)
-		set_dma_tx = s5p_aes_tx(dev);
+		err_dma_rx = s5p_aes_rx(dev);
+
+	if (status & SSS_FCINTSTAT_BTDMAINT) {
+		if (sg_is_last(dev->sg_dst))
+			tx_end = true;
+		err_dma_tx = s5p_aes_tx(dev);
+	}
 
 	SSS_WRITE(dev, FCINTPEND, status);
 
-	/*
-	 * Writing length of DMA block (either receiving or transmitting)
-	 * will start the operation immediately, so this should be done
-	 * at the end (even after clearing pending interrupts to not miss the
-	 * interrupt).
-	 */
-	if (set_dma_tx)
-		s5p_set_dma_outdata(dev, dev->sg_dst);
-	if (set_dma_rx)
-		s5p_set_dma_indata(dev, dev->sg_src);
+	if (err_dma_rx < 0) {
+		err = err_dma_rx;
+		goto error;
+	}
+	if (err_dma_tx < 0) {
+		err = err_dma_tx;
+		goto error;
+	}
+
+	if (tx_end) {
+		s5p_sg_done(dev);
+
+		spin_unlock_irqrestore(&dev->lock, flags);
+
+		s5p_aes_complete(dev, 0);
+		dev->busy = true;
+		tasklet_schedule(&dev->tasklet);
+	} else {
+		/*
+		 * Writing length of DMA block (either receiving or
+		 * transmitting) will start the operation immediately, so this
+		 * should be done at the end (even after clearing pending
+		 * interrupts to not miss the interrupt).
+		 */
+		if (err_dma_tx == 1)
+			s5p_set_dma_outdata(dev, dev->sg_dst);
+		if (err_dma_rx == 1)
+			s5p_set_dma_indata(dev, dev->sg_src);
 
+		spin_unlock_irqrestore(&dev->lock, flags);
+	}
+
+	return IRQ_HANDLED;
+
+error:
+	s5p_sg_done(dev);
 	spin_unlock_irqrestore(&dev->lock, flags);
+	s5p_aes_complete(dev, err);
 
 	return IRQ_HANDLED;
 }
@@ -597,8 +633,9 @@ outdata_error:
 	s5p_unset_indata(dev);
 
 indata_error:
-	s5p_aes_complete(dev, err);
+	s5p_sg_done(dev);
 	spin_unlock_irqrestore(&dev->lock, flags);
+	s5p_aes_complete(dev, err);
 }
 
 static void s5p_tasklet_cb(unsigned long data)
@@ -805,8 +842,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
 		dev_warn(dev, "feed control interrupt is not available.\n");
 		goto err_irq;
 	}
-	err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
-			       IRQF_SHARED, pdev->name, pdev);
+	err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+					s5p_aes_interrupt, IRQF_ONESHOT,
+					pdev->name, pdev);
 	if (err < 0) {
 		dev_warn(dev, "feed control interrupt is not available.\n");
 		goto err_irq;
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 0c49956ee0ce..1d9ecd368b5b 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -390,7 +390,7 @@ static void sahara_decode_status(struct sahara_dev *dev, unsigned int status)
 	if (status & SAHARA_STATUS_MODE_BATCH)
 		dev_dbg(dev->device, "	- Batch Mode.\n");
 	else if (status & SAHARA_STATUS_MODE_DEDICATED)
-		dev_dbg(dev->device, "	- Decidated Mode.\n");
+		dev_dbg(dev->device, "	- Dedicated Mode.\n");
 	else if (status & SAHARA_STATUS_MODE_DEBUG)
 		dev_dbg(dev->device, "	- Debug Mode.\n");
 
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 0418a2f41dc0..0bba6a19d36a 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -590,7 +590,7 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
 		if (v_lo & TALITOS_CCPSR_LO_MDTE)
 			dev_err(dev, "master data transfer error\n");
 		if (v_lo & TALITOS_CCPSR_LO_SGDLZ)
-			dev_err(dev, is_sec1 ? "pointeur not complete error\n"
+			dev_err(dev, is_sec1 ? "pointer not complete error\n"
 					     : "s/g data length zero error\n");
 		if (v_lo & TALITOS_CCPSR_LO_FPZ)
 			dev_err(dev, is_sec1 ? "parity error\n"
diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c
index 43a0c8a26ab0..00a16ab601cb 100644
--- a/drivers/crypto/ux500/cryp/cryp.c
+++ b/drivers/crypto/ux500/cryp/cryp.c
@@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data,
 void cryp_flush_inoutfifo(struct cryp_device_data *device_data)
 {
 	/*
-	 * We always need to disble the hardware before trying to flush the
+	 * We always need to disable the hardware before trying to flush the
 	 * FIFO. This is something that isn't written in the design
 	 * specification, but we have been informed by the hardware designers
 	 * that this must be done.
diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig
new file mode 100644
index 000000000000..5db07495ddc5
--- /dev/null
+++ b/drivers/crypto/virtio/Kconfig
@@ -0,0 +1,11 @@
+config CRYPTO_DEV_VIRTIO
+	tristate "VirtIO crypto driver"
+	depends on VIRTIO
+	select CRYPTO_AEAD
+	select CRYPTO_AUTHENC
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_ENGINE
+	default m
+	help
+	  This driver provides support for virtio crypto device. If you
+	  choose 'M' here, this module will be called virtio_crypto.
diff --git a/drivers/crypto/virtio/Makefile b/drivers/crypto/virtio/Makefile
new file mode 100644
index 000000000000..dd342c947ff9
--- /dev/null
+++ b/drivers/crypto/virtio/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio_crypto.o
+virtio_crypto-objs := \
+	virtio_crypto_algs.o \
+	virtio_crypto_mgr.o \
+	virtio_crypto_core.o
diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
new file mode 100644
index 000000000000..49defda4e03d
--- /dev/null
+++ b/drivers/crypto/virtio/virtio_crypto_algs.c
@@ -0,0 +1,558 @@
+ /* Algorithms supported by virtio crypto device
+  *
+  * Authors: Gonglei <arei.gonglei@huawei.com>
+  *
+  * Copyright 2016 HUAWEI TECHNOLOGIES CO., LTD.
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, see <http://www.gnu.org/licenses/>.
+  */
+
+#include <linux/scatterlist.h>
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <crypto/scatterwalk.h>
+#include <linux/atomic.h>
+
+#include <uapi/linux/virtio_crypto.h>
+#include "virtio_crypto_common.h"
+
+/*
+ * The algs_lock protects the below global virtio_crypto_active_devs
+ * and crypto algorithms registion.
+ */
+static DEFINE_MUTEX(algs_lock);
+static unsigned int virtio_crypto_active_devs;
+
+static u64 virtio_crypto_alg_sg_nents_length(struct scatterlist *sg)
+{
+	u64 total = 0;
+
+	for (total = 0; sg; sg = sg_next(sg))
+		total += sg->length;
+
+	return total;
+}
+
+static int
+virtio_crypto_alg_validate_key(int key_len, uint32_t *alg)
+{
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_192:
+	case AES_KEYSIZE_256:
+		*alg = VIRTIO_CRYPTO_CIPHER_AES_CBC;
+		break;
+	default:
+		pr_err("virtio_crypto: Unsupported key length: %d\n",
+			key_len);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int virtio_crypto_alg_ablkcipher_init_session(
+		struct virtio_crypto_ablkcipher_ctx *ctx,
+		uint32_t alg, const uint8_t *key,
+		unsigned int keylen,
+		int encrypt)
+{
+	struct scatterlist outhdr, key_sg, inhdr, *sgs[3];
+	unsigned int tmp;
+	struct virtio_crypto *vcrypto = ctx->vcrypto;
+	int op = encrypt ? VIRTIO_CRYPTO_OP_ENCRYPT : VIRTIO_CRYPTO_OP_DECRYPT;
+	int err;
+	unsigned int num_out = 0, num_in = 0;
+
+	/*
+	 * Avoid to do DMA from the stack, switch to using
+	 * dynamically-allocated for the key
+	 */
+	uint8_t *cipher_key = kmalloc(keylen, GFP_ATOMIC);
+
+	if (!cipher_key)
+		return -ENOMEM;
+
+	memcpy(cipher_key, key, keylen);
+
+	spin_lock(&vcrypto->ctrl_lock);
+	/* Pad ctrl header */
+	vcrypto->ctrl.header.opcode =
+		cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION);
+	vcrypto->ctrl.header.algo = cpu_to_le32(alg);
+	/* Set the default dataqueue id to 0 */
+	vcrypto->ctrl.header.queue_id = 0;
+
+	vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR);
+	/* Pad cipher's parameters */
+	vcrypto->ctrl.u.sym_create_session.op_type =
+		cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER);
+	vcrypto->ctrl.u.sym_create_session.u.cipher.para.algo =
+		vcrypto->ctrl.header.algo;
+	vcrypto->ctrl.u.sym_create_session.u.cipher.para.keylen =
+		cpu_to_le32(keylen);
+	vcrypto->ctrl.u.sym_create_session.u.cipher.para.op =
+		cpu_to_le32(op);
+
+	sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl));
+	sgs[num_out++] = &outhdr;
+
+	/* Set key */
+	sg_init_one(&key_sg, cipher_key, keylen);
+	sgs[num_out++] = &key_sg;
+
+	/* Return status and session id back */
+	sg_init_one(&inhdr, &vcrypto->input, sizeof(vcrypto->input));
+	sgs[num_out + num_in++] = &inhdr;
+
+	err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out,
+				num_in, vcrypto, GFP_ATOMIC);
+	if (err < 0) {
+		spin_unlock(&vcrypto->ctrl_lock);
+		kzfree(cipher_key);
+		return err;
+	}
+	virtqueue_kick(vcrypto->ctrl_vq);
+
+	/*
+	 * Trapping into the hypervisor, so the request should be
+	 * handled immediately.
+	 */
+	while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) &&
+	       !virtqueue_is_broken(vcrypto->ctrl_vq))
+		cpu_relax();
+
+	if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) {
+		spin_unlock(&vcrypto->ctrl_lock);
+		pr_err("virtio_crypto: Create session failed status: %u\n",
+			le32_to_cpu(vcrypto->input.status));
+		kzfree(cipher_key);
+		return -EINVAL;
+	}
+
+	if (encrypt)
+		ctx->enc_sess_info.session_id =
+			le64_to_cpu(vcrypto->input.session_id);
+	else
+		ctx->dec_sess_info.session_id =
+			le64_to_cpu(vcrypto->input.session_id);
+
+	spin_unlock(&vcrypto->ctrl_lock);
+
+	kzfree(cipher_key);
+	return 0;
+}
+
+static int virtio_crypto_alg_ablkcipher_close_session(
+		struct virtio_crypto_ablkcipher_ctx *ctx,
+		int encrypt)
+{
+	struct scatterlist outhdr, status_sg, *sgs[2];
+	unsigned int tmp;
+	struct virtio_crypto_destroy_session_req *destroy_session;
+	struct virtio_crypto *vcrypto = ctx->vcrypto;
+	int err;
+	unsigned int num_out = 0, num_in = 0;
+
+	spin_lock(&vcrypto->ctrl_lock);
+	vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR;
+	/* Pad ctrl header */
+	vcrypto->ctrl.header.opcode =
+		cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION);
+	/* Set the default virtqueue id to 0 */
+	vcrypto->ctrl.header.queue_id = 0;
+
+	destroy_session = &vcrypto->ctrl.u.destroy_session;
+
+	if (encrypt)
+		destroy_session->session_id =
+			cpu_to_le64(ctx->enc_sess_info.session_id);
+	else
+		destroy_session->session_id =
+			cpu_to_le64(ctx->dec_sess_info.session_id);
+
+	sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl));
+	sgs[num_out++] = &outhdr;
+
+	/* Return status and session id back */
+	sg_init_one(&status_sg, &vcrypto->ctrl_status.status,
+		sizeof(vcrypto->ctrl_status.status));
+	sgs[num_out + num_in++] = &status_sg;
+
+	err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out,
+			num_in, vcrypto, GFP_ATOMIC);
+	if (err < 0) {
+		spin_unlock(&vcrypto->ctrl_lock);
+		return err;
+	}
+	virtqueue_kick(vcrypto->ctrl_vq);
+
+	while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) &&
+	       !virtqueue_is_broken(vcrypto->ctrl_vq))
+		cpu_relax();
+
+	if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) {
+		spin_unlock(&vcrypto->ctrl_lock);
+		pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n",
+			vcrypto->ctrl_status.status,
+			destroy_session->session_id);
+
+		return -EINVAL;
+	}
+	spin_unlock(&vcrypto->ctrl_lock);
+
+	return 0;
+}
+
+static int virtio_crypto_alg_ablkcipher_init_sessions(
+		struct virtio_crypto_ablkcipher_ctx *ctx,
+		const uint8_t *key, unsigned int keylen)
+{
+	uint32_t alg;
+	int ret;
+	struct virtio_crypto *vcrypto = ctx->vcrypto;
+
+	if (keylen > vcrypto->max_cipher_key_len) {
+		pr_err("virtio_crypto: the key is too long\n");
+		goto bad_key;
+	}
+
+	if (virtio_crypto_alg_validate_key(keylen, &alg))
+		goto bad_key;
+
+	/* Create encryption session */
+	ret = virtio_crypto_alg_ablkcipher_init_session(ctx,
+			alg, key, keylen, 1);
+	if (ret)
+		return ret;
+	/* Create decryption session */
+	ret = virtio_crypto_alg_ablkcipher_init_session(ctx,
+			alg, key, keylen, 0);
+	if (ret) {
+		virtio_crypto_alg_ablkcipher_close_session(ctx, 1);
+		return ret;
+	}
+	return 0;
+
+bad_key:
+	crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+/* Note: kernel crypto API realization */
+static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
+					 const uint8_t *key,
+					 unsigned int keylen)
+{
+	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	int ret;
+
+	if (!ctx->vcrypto) {
+		/* New key */
+		int node = virtio_crypto_get_current_node();
+		struct virtio_crypto *vcrypto =
+				      virtcrypto_get_dev_node(node);
+		if (!vcrypto) {
+			pr_err("virtio_crypto: Could not find a virtio device in the system");
+			return -ENODEV;
+		}
+
+		ctx->vcrypto = vcrypto;
+	} else {
+		/* Rekeying, we should close the created sessions previously */
+		virtio_crypto_alg_ablkcipher_close_session(ctx, 1);
+		virtio_crypto_alg_ablkcipher_close_session(ctx, 0);
+	}
+
+	ret = virtio_crypto_alg_ablkcipher_init_sessions(ctx, key, keylen);
+	if (ret) {
+		virtcrypto_dev_put(ctx->vcrypto);
+		ctx->vcrypto = NULL;
+
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+__virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req,
+		struct ablkcipher_request *req,
+		struct data_queue *data_vq)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
+	struct virtio_crypto_ablkcipher_ctx *ctx = vc_req->ablkcipher_ctx;
+	struct virtio_crypto *vcrypto = ctx->vcrypto;
+	struct virtio_crypto_op_data_req *req_data;
+	int src_nents, dst_nents;
+	int err;
+	unsigned long flags;
+	struct scatterlist outhdr, iv_sg, status_sg, **sgs;
+	int i;
+	u64 dst_len;
+	unsigned int num_out = 0, num_in = 0;
+	int sg_total;
+	uint8_t *iv;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	dst_nents = sg_nents(req->dst);
+
+	pr_debug("virtio_crypto: Number of sgs (src_nents: %d, dst_nents: %d)\n",
+			src_nents, dst_nents);
+
+	/* Why 3?  outhdr + iv + inhdr */
+	sg_total = src_nents + dst_nents + 3;
+	sgs = kzalloc_node(sg_total * sizeof(*sgs), GFP_ATOMIC,
+				dev_to_node(&vcrypto->vdev->dev));
+	if (!sgs)
+		return -ENOMEM;
+
+	req_data = kzalloc_node(sizeof(*req_data), GFP_ATOMIC,
+				dev_to_node(&vcrypto->vdev->dev));
+	if (!req_data) {
+		kfree(sgs);
+		return -ENOMEM;
+	}
+
+	vc_req->req_data = req_data;
+	vc_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER;
+	/* Head of operation */
+	if (vc_req->encrypt) {
+		req_data->header.session_id =
+			cpu_to_le64(ctx->enc_sess_info.session_id);
+		req_data->header.opcode =
+			cpu_to_le32(VIRTIO_CRYPTO_CIPHER_ENCRYPT);
+	} else {
+		req_data->header.session_id =
+			cpu_to_le64(ctx->dec_sess_info.session_id);
+	    req_data->header.opcode =
+			cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DECRYPT);
+	}
+	req_data->u.sym_req.op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER);
+	req_data->u.sym_req.u.cipher.para.iv_len = cpu_to_le32(ivsize);
+	req_data->u.sym_req.u.cipher.para.src_data_len =
+			cpu_to_le32(req->nbytes);
+
+	dst_len = virtio_crypto_alg_sg_nents_length(req->dst);
+	if (unlikely(dst_len > U32_MAX)) {
+		pr_err("virtio_crypto: The dst_len is beyond U32_MAX\n");
+		err = -EINVAL;
+		goto free;
+	}
+
+	pr_debug("virtio_crypto: src_len: %u, dst_len: %llu\n",
+			req->nbytes, dst_len);
+
+	if (unlikely(req->nbytes + dst_len + ivsize +
+		sizeof(vc_req->status) > vcrypto->max_size)) {
+		pr_err("virtio_crypto: The length is too big\n");
+		err = -EINVAL;
+		goto free;
+	}
+
+	req_data->u.sym_req.u.cipher.para.dst_data_len =
+			cpu_to_le32((uint32_t)dst_len);
+
+	/* Outhdr */
+	sg_init_one(&outhdr, req_data, sizeof(*req_data));
+	sgs[num_out++] = &outhdr;
+
+	/* IV */
+
+	/*
+	 * Avoid to do DMA from the stack, switch to using
+	 * dynamically-allocated for the IV
+	 */
+	iv = kzalloc_node(ivsize, GFP_ATOMIC,
+				dev_to_node(&vcrypto->vdev->dev));
+	if (!iv) {
+		err = -ENOMEM;
+		goto free;
+	}
+	memcpy(iv, req->info, ivsize);
+	sg_init_one(&iv_sg, iv, ivsize);
+	sgs[num_out++] = &iv_sg;
+	vc_req->iv = iv;
+
+	/* Source data */
+	for (i = 0; i < src_nents; i++)
+		sgs[num_out++] = &req->src[i];
+
+	/* Destination data */
+	for (i = 0; i < dst_nents; i++)
+		sgs[num_out + num_in++] = &req->dst[i];
+
+	/* Status */
+	sg_init_one(&status_sg, &vc_req->status, sizeof(vc_req->status));
+	sgs[num_out + num_in++] = &status_sg;
+
+	vc_req->sgs = sgs;
+
+	spin_lock_irqsave(&data_vq->lock, flags);
+	err = virtqueue_add_sgs(data_vq->vq, sgs, num_out,
+				num_in, vc_req, GFP_ATOMIC);
+	virtqueue_kick(data_vq->vq);
+	spin_unlock_irqrestore(&data_vq->lock, flags);
+	if (unlikely(err < 0))
+		goto free_iv;
+
+	return 0;
+
+free_iv:
+	kzfree(iv);
+free:
+	kzfree(req_data);
+	kfree(sgs);
+	return err;
+}
+
+static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
+	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm);
+	struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req);
+	struct virtio_crypto *vcrypto = ctx->vcrypto;
+	/* Use the first data virtqueue as default */
+	struct data_queue *data_vq = &vcrypto->data_vq[0];
+
+	vc_req->ablkcipher_ctx = ctx;
+	vc_req->ablkcipher_req = req;
+	vc_req->encrypt = true;
+	vc_req->dataq = data_vq;
+
+	return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
+}
+
+static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
+	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm);
+	struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req);
+	struct virtio_crypto *vcrypto = ctx->vcrypto;
+	/* Use the first data virtqueue as default */
+	struct data_queue *data_vq = &vcrypto->data_vq[0];
+
+	vc_req->ablkcipher_ctx = ctx;
+	vc_req->ablkcipher_req = req;
+
+	vc_req->encrypt = false;
+	vc_req->dataq = data_vq;
+
+	return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
+}
+
+static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
+{
+	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_request);
+	ctx->tfm = tfm;
+
+	return 0;
+}
+
+static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm)
+{
+	struct virtio_crypto_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (!ctx->vcrypto)
+		return;
+
+	virtio_crypto_alg_ablkcipher_close_session(ctx, 1);
+	virtio_crypto_alg_ablkcipher_close_session(ctx, 0);
+	virtcrypto_dev_put(ctx->vcrypto);
+	ctx->vcrypto = NULL;
+}
+
+int virtio_crypto_ablkcipher_crypt_req(
+	struct crypto_engine *engine,
+	struct ablkcipher_request *req)
+{
+	struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req);
+	struct data_queue *data_vq = vc_req->dataq;
+	int ret;
+
+	ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq);
+	if (ret < 0)
+		return ret;
+
+	virtqueue_kick(data_vq->vq);
+
+	return 0;
+}
+
+void virtio_crypto_ablkcipher_finalize_req(
+	struct virtio_crypto_request *vc_req,
+	struct ablkcipher_request *req,
+	int err)
+{
+	crypto_finalize_cipher_request(vc_req->dataq->engine, req, err);
+
+	virtcrypto_clear_request(vc_req);
+}
+
+static struct crypto_alg virtio_crypto_algs[] = { {
+	.cra_name = "cbc(aes)",
+	.cra_driver_name = "virtio_crypto_aes_cbc",
+	.cra_priority = 150,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize  = sizeof(struct virtio_crypto_ablkcipher_ctx),
+	.cra_alignmask = 0,
+	.cra_module = THIS_MODULE,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_init = virtio_crypto_ablkcipher_init,
+	.cra_exit = virtio_crypto_ablkcipher_exit,
+	.cra_u = {
+	   .ablkcipher = {
+			.setkey = virtio_crypto_ablkcipher_setkey,
+			.decrypt = virtio_crypto_ablkcipher_decrypt,
+			.encrypt = virtio_crypto_ablkcipher_encrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+	},
+} };
+
+int virtio_crypto_algs_register(void)
+{
+	int ret = 0;
+
+	mutex_lock(&algs_lock);
+	if (++virtio_crypto_active_devs != 1)
+		goto unlock;
+
+	ret = crypto_register_algs(virtio_crypto_algs,
+			ARRAY_SIZE(virtio_crypto_algs));
+	if (ret)
+		virtio_crypto_active_devs--;
+
+unlock:
+	mutex_unlock(&algs_lock);
+	return ret;
+}
+
+void virtio_crypto_algs_unregister(void)
+{
+	mutex_lock(&algs_lock);
+	if (--virtio_crypto_active_devs != 0)
+		goto unlock;
+
+	crypto_unregister_algs(virtio_crypto_algs,
+			ARRAY_SIZE(virtio_crypto_algs));
+
+unlock:
+	mutex_unlock(&algs_lock);
+}
diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
new file mode 100644
index 000000000000..da6d8c0ea407
--- /dev/null
+++ b/drivers/crypto/virtio/virtio_crypto_common.h
@@ -0,0 +1,144 @@
+/* Common header for Virtio crypto device.
+ *
+ * Copyright 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VIRTIO_CRYPTO_COMMON_H
+#define _VIRTIO_CRYPTO_COMMON_H
+
+#include <linux/virtio.h>
+#include <linux/crypto.h>
+#include <linux/spinlock.h>
+#include <crypto/aead.h>
+#include <crypto/aes.h>
+#include <crypto/authenc.h>
+#include <crypto/engine.h>
+
+
+/* Internal representation of a data virtqueue */
+struct data_queue {
+	/* Virtqueue associated with this send _queue */
+	struct virtqueue *vq;
+
+	/* To protect the vq operations for the dataq */
+	spinlock_t lock;
+
+	/* Name of the tx queue: dataq.$index */
+	char name[32];
+
+	struct crypto_engine *engine;
+};
+
+struct virtio_crypto {
+	struct virtio_device *vdev;
+	struct virtqueue *ctrl_vq;
+	struct data_queue *data_vq;
+
+	/* To protect the vq operations for the controlq */
+	spinlock_t ctrl_lock;
+
+	/* Maximum of data queues supported by the device */
+	u32 max_data_queues;
+
+	/* Number of queue currently used by the driver */
+	u32 curr_queue;
+
+	/* Maximum length of cipher key */
+	u32 max_cipher_key_len;
+	/* Maximum length of authenticated key */
+	u32 max_auth_key_len;
+	/* Maximum size of per request */
+	u64 max_size;
+
+	/* Control VQ buffers: protected by the ctrl_lock */
+	struct virtio_crypto_op_ctrl_req ctrl;
+	struct virtio_crypto_session_input input;
+	struct virtio_crypto_inhdr ctrl_status;
+
+	unsigned long status;
+	atomic_t ref_count;
+	struct list_head list;
+	struct module *owner;
+	uint8_t dev_id;
+
+	/* Does the affinity hint is set for virtqueues? */
+	bool affinity_hint_set;
+};
+
+struct virtio_crypto_sym_session_info {
+	/* Backend session id, which come from the host side */
+	__u64 session_id;
+};
+
+struct virtio_crypto_ablkcipher_ctx {
+	struct virtio_crypto *vcrypto;
+	struct crypto_tfm *tfm;
+
+	struct virtio_crypto_sym_session_info enc_sess_info;
+	struct virtio_crypto_sym_session_info dec_sess_info;
+};
+
+struct virtio_crypto_request {
+	/* Cipher or aead */
+	uint32_t type;
+	uint8_t status;
+	struct virtio_crypto_ablkcipher_ctx *ablkcipher_ctx;
+	struct ablkcipher_request *ablkcipher_req;
+	struct virtio_crypto_op_data_req *req_data;
+	struct scatterlist **sgs;
+	uint8_t *iv;
+	/* Encryption? */
+	bool encrypt;
+	struct data_queue *dataq;
+};
+
+int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev);
+struct list_head *virtcrypto_devmgr_get_head(void);
+void virtcrypto_devmgr_rm_dev(struct virtio_crypto *vcrypto_dev);
+struct virtio_crypto *virtcrypto_devmgr_get_first(void);
+int virtcrypto_dev_in_use(struct virtio_crypto *vcrypto_dev);
+int virtcrypto_dev_get(struct virtio_crypto *vcrypto_dev);
+void virtcrypto_dev_put(struct virtio_crypto *vcrypto_dev);
+int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev);
+struct virtio_crypto *virtcrypto_get_dev_node(int node);
+int virtcrypto_dev_start(struct virtio_crypto *vcrypto);
+void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
+int virtio_crypto_ablkcipher_crypt_req(
+	struct crypto_engine *engine,
+	struct ablkcipher_request *req);
+void virtio_crypto_ablkcipher_finalize_req(
+	struct virtio_crypto_request *vc_req,
+	struct ablkcipher_request *req,
+	int err);
+
+void
+virtcrypto_clear_request(struct virtio_crypto_request *vc_req);
+
+static inline int virtio_crypto_get_current_node(void)
+{
+	int cpu, node;
+
+	cpu = get_cpu();
+	node = topology_physical_package_id(cpu);
+	put_cpu();
+
+	return node;
+}
+
+int virtio_crypto_algs_register(void);
+void virtio_crypto_algs_unregister(void);
+
+#endif /* _VIRTIO_CRYPTO_COMMON_H */
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
new file mode 100644
index 000000000000..21472e427f6f
--- /dev/null
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -0,0 +1,540 @@
+ /* Driver for Virtio crypto device.
+  *
+  * Copyright 2016 HUAWEI TECHNOLOGIES CO., LTD.
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, see <http://www.gnu.org/licenses/>.
+  */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/virtio_config.h>
+#include <linux/cpu.h>
+
+#include <uapi/linux/virtio_crypto.h>
+#include "virtio_crypto_common.h"
+
+
+void
+virtcrypto_clear_request(struct virtio_crypto_request *vc_req)
+{
+	if (vc_req) {
+		kzfree(vc_req->iv);
+		kzfree(vc_req->req_data);
+		kfree(vc_req->sgs);
+	}
+}
+
+static void virtcrypto_dataq_callback(struct virtqueue *vq)
+{
+	struct virtio_crypto *vcrypto = vq->vdev->priv;
+	struct virtio_crypto_request *vc_req;
+	unsigned long flags;
+	unsigned int len;
+	struct ablkcipher_request *ablk_req;
+	int error;
+	unsigned int qid = vq->index;
+
+	spin_lock_irqsave(&vcrypto->data_vq[qid].lock, flags);
+	do {
+		virtqueue_disable_cb(vq);
+		while ((vc_req = virtqueue_get_buf(vq, &len)) != NULL) {
+			if (vc_req->type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+				switch (vc_req->status) {
+				case VIRTIO_CRYPTO_OK:
+					error = 0;
+					break;
+				case VIRTIO_CRYPTO_INVSESS:
+				case VIRTIO_CRYPTO_ERR:
+					error = -EINVAL;
+					break;
+				case VIRTIO_CRYPTO_BADMSG:
+					error = -EBADMSG;
+					break;
+				default:
+					error = -EIO;
+					break;
+				}
+				ablk_req = vc_req->ablkcipher_req;
+
+				spin_unlock_irqrestore(
+					&vcrypto->data_vq[qid].lock, flags);
+				/* Finish the encrypt or decrypt process */
+				virtio_crypto_ablkcipher_finalize_req(vc_req,
+				    ablk_req, error);
+				spin_lock_irqsave(
+					&vcrypto->data_vq[qid].lock, flags);
+			}
+		}
+	} while (!virtqueue_enable_cb(vq));
+	spin_unlock_irqrestore(&vcrypto->data_vq[qid].lock, flags);
+}
+
+static int virtcrypto_find_vqs(struct virtio_crypto *vi)
+{
+	vq_callback_t **callbacks;
+	struct virtqueue **vqs;
+	int ret = -ENOMEM;
+	int i, total_vqs;
+	const char **names;
+	struct device *dev = &vi->vdev->dev;
+
+	/*
+	 * We expect 1 data virtqueue, followed by
+	 * possible N-1 data queues used in multiqueue mode,
+	 * followed by control vq.
+	 */
+	total_vqs = vi->max_data_queues + 1;
+
+	/* Allocate space for find_vqs parameters */
+	vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
+	if (!vqs)
+		goto err_vq;
+	callbacks = kcalloc(total_vqs, sizeof(*callbacks), GFP_KERNEL);
+	if (!callbacks)
+		goto err_callback;
+	names = kcalloc(total_vqs, sizeof(*names), GFP_KERNEL);
+	if (!names)
+		goto err_names;
+
+	/* Parameters for control virtqueue */
+	callbacks[total_vqs - 1] = NULL;
+	names[total_vqs - 1] = "controlq";
+
+	/* Allocate/initialize parameters for data virtqueues */
+	for (i = 0; i < vi->max_data_queues; i++) {
+		callbacks[i] = virtcrypto_dataq_callback;
+		snprintf(vi->data_vq[i].name, sizeof(vi->data_vq[i].name),
+				"dataq.%d", i);
+		names[i] = vi->data_vq[i].name;
+	}
+
+	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
+					 names, NULL);
+	if (ret)
+		goto err_find;
+
+	vi->ctrl_vq = vqs[total_vqs - 1];
+
+	for (i = 0; i < vi->max_data_queues; i++) {
+		spin_lock_init(&vi->data_vq[i].lock);
+		vi->data_vq[i].vq = vqs[i];
+		/* Initialize crypto engine */
+		vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1);
+		if (!vi->data_vq[i].engine) {
+			ret = -ENOMEM;
+			goto err_engine;
+		}
+
+		vi->data_vq[i].engine->cipher_one_request =
+			virtio_crypto_ablkcipher_crypt_req;
+	}
+
+	kfree(names);
+	kfree(callbacks);
+	kfree(vqs);
+
+	return 0;
+
+err_engine:
+err_find:
+	kfree(names);
+err_names:
+	kfree(callbacks);
+err_callback:
+	kfree(vqs);
+err_vq:
+	return ret;
+}
+
+static int virtcrypto_alloc_queues(struct virtio_crypto *vi)
+{
+	vi->data_vq = kcalloc(vi->max_data_queues, sizeof(*vi->data_vq),
+				GFP_KERNEL);
+	if (!vi->data_vq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void virtcrypto_clean_affinity(struct virtio_crypto *vi, long hcpu)
+{
+	int i;
+
+	if (vi->affinity_hint_set) {
+		for (i = 0; i < vi->max_data_queues; i++)
+			virtqueue_set_affinity(vi->data_vq[i].vq, -1);
+
+		vi->affinity_hint_set = false;
+	}
+}
+
+static void virtcrypto_set_affinity(struct virtio_crypto *vcrypto)
+{
+	int i = 0;
+	int cpu;
+
+	/*
+	 * In single queue mode, we don't set the cpu affinity.
+	 */
+	if (vcrypto->curr_queue == 1 || vcrypto->max_data_queues == 1) {
+		virtcrypto_clean_affinity(vcrypto, -1);
+		return;
+	}
+
+	/*
+	 * In multiqueue mode, we let the queue to be private to one cpu
+	 * by setting the affinity hint to eliminate the contention.
+	 *
+	 * TODO: adds cpu hotplug support by register cpu notifier.
+	 *
+	 */
+	for_each_online_cpu(cpu) {
+		virtqueue_set_affinity(vcrypto->data_vq[i].vq, cpu);
+		if (++i >= vcrypto->max_data_queues)
+			break;
+	}
+
+	vcrypto->affinity_hint_set = true;
+}
+
+static void virtcrypto_free_queues(struct virtio_crypto *vi)
+{
+	kfree(vi->data_vq);
+}
+
+static int virtcrypto_init_vqs(struct virtio_crypto *vi)
+{
+	int ret;
+
+	/* Allocate send & receive queues */
+	ret = virtcrypto_alloc_queues(vi);
+	if (ret)
+		goto err;
+
+	ret = virtcrypto_find_vqs(vi);
+	if (ret)
+		goto err_free;
+
+	get_online_cpus();
+	virtcrypto_set_affinity(vi);
+	put_online_cpus();
+
+	return 0;
+
+err_free:
+	virtcrypto_free_queues(vi);
+err:
+	return ret;
+}
+
+static int virtcrypto_update_status(struct virtio_crypto *vcrypto)
+{
+	u32 status;
+	int err;
+
+	virtio_cread(vcrypto->vdev,
+	    struct virtio_crypto_config, status, &status);
+
+	/*
+	 * Unknown status bits would be a host error and the driver
+	 * should consider the device to be broken.
+	 */
+	if (status & (~VIRTIO_CRYPTO_S_HW_READY)) {
+		dev_warn(&vcrypto->vdev->dev,
+				"Unknown status bits: 0x%x\n", status);
+
+		virtio_break_device(vcrypto->vdev);
+		return -EPERM;
+	}
+
+	if (vcrypto->status == status)
+		return 0;
+
+	vcrypto->status = status;
+
+	if (vcrypto->status & VIRTIO_CRYPTO_S_HW_READY) {
+		err = virtcrypto_dev_start(vcrypto);
+		if (err) {
+			dev_err(&vcrypto->vdev->dev,
+				"Failed to start virtio crypto device.\n");
+
+			return -EPERM;
+		}
+		dev_info(&vcrypto->vdev->dev, "Accelerator is ready\n");
+	} else {
+		virtcrypto_dev_stop(vcrypto);
+		dev_info(&vcrypto->vdev->dev, "Accelerator is not ready\n");
+	}
+
+	return 0;
+}
+
+static int virtcrypto_start_crypto_engines(struct virtio_crypto *vcrypto)
+{
+	int32_t i;
+	int ret;
+
+	for (i = 0; i < vcrypto->max_data_queues; i++) {
+		if (vcrypto->data_vq[i].engine) {
+			ret = crypto_engine_start(vcrypto->data_vq[i].engine);
+			if (ret)
+				goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	while (--i >= 0)
+		if (vcrypto->data_vq[i].engine)
+			crypto_engine_exit(vcrypto->data_vq[i].engine);
+
+	return ret;
+}
+
+static void virtcrypto_clear_crypto_engines(struct virtio_crypto *vcrypto)
+{
+	u32 i;
+
+	for (i = 0; i < vcrypto->max_data_queues; i++)
+		if (vcrypto->data_vq[i].engine)
+			crypto_engine_exit(vcrypto->data_vq[i].engine);
+}
+
+static void virtcrypto_del_vqs(struct virtio_crypto *vcrypto)
+{
+	struct virtio_device *vdev = vcrypto->vdev;
+
+	virtcrypto_clean_affinity(vcrypto, -1);
+
+	vdev->config->del_vqs(vdev);
+
+	virtcrypto_free_queues(vcrypto);
+}
+
+static int virtcrypto_probe(struct virtio_device *vdev)
+{
+	int err = -EFAULT;
+	struct virtio_crypto *vcrypto;
+	u32 max_data_queues = 0, max_cipher_key_len = 0;
+	u32 max_auth_key_len = 0;
+	u64 max_size = 0;
+
+	if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+		return -ENODEV;
+
+	if (!vdev->config->get) {
+		dev_err(&vdev->dev, "%s failure: config access disabled\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (num_possible_nodes() > 1 && dev_to_node(&vdev->dev) < 0) {
+		/*
+		 * If the accelerator is connected to a node with no memory
+		 * there is no point in using the accelerator since the remote
+		 * memory transaction will be very slow.
+		 */
+		dev_err(&vdev->dev, "Invalid NUMA configuration.\n");
+		return -EINVAL;
+	}
+
+	vcrypto = kzalloc_node(sizeof(*vcrypto), GFP_KERNEL,
+					dev_to_node(&vdev->dev));
+	if (!vcrypto)
+		return -ENOMEM;
+
+	virtio_cread(vdev, struct virtio_crypto_config,
+			max_dataqueues, &max_data_queues);
+	if (max_data_queues < 1)
+		max_data_queues = 1;
+
+	virtio_cread(vdev, struct virtio_crypto_config,
+		max_cipher_key_len, &max_cipher_key_len);
+	virtio_cread(vdev, struct virtio_crypto_config,
+		max_auth_key_len, &max_auth_key_len);
+	virtio_cread(vdev, struct virtio_crypto_config,
+		max_size, &max_size);
+
+	/* Add virtio crypto device to global table */
+	err = virtcrypto_devmgr_add_dev(vcrypto);
+	if (err) {
+		dev_err(&vdev->dev, "Failed to add new virtio crypto device.\n");
+		goto free;
+	}
+	vcrypto->owner = THIS_MODULE;
+	vcrypto = vdev->priv = vcrypto;
+	vcrypto->vdev = vdev;
+
+	spin_lock_init(&vcrypto->ctrl_lock);
+
+	/* Use single data queue as default */
+	vcrypto->curr_queue = 1;
+	vcrypto->max_data_queues = max_data_queues;
+	vcrypto->max_cipher_key_len = max_cipher_key_len;
+	vcrypto->max_auth_key_len = max_auth_key_len;
+	vcrypto->max_size = max_size;
+
+	dev_info(&vdev->dev,
+		"max_queues: %u, max_cipher_key_len: %u, max_auth_key_len: %u, max_size 0x%llx\n",
+		vcrypto->max_data_queues,
+		vcrypto->max_cipher_key_len,
+		vcrypto->max_auth_key_len,
+		vcrypto->max_size);
+
+	err = virtcrypto_init_vqs(vcrypto);
+	if (err) {
+		dev_err(&vdev->dev, "Failed to initialize vqs.\n");
+		goto free_dev;
+	}
+
+	err = virtcrypto_start_crypto_engines(vcrypto);
+	if (err)
+		goto free_vqs;
+
+	virtio_device_ready(vdev);
+
+	err = virtcrypto_update_status(vcrypto);
+	if (err)
+		goto free_engines;
+
+	return 0;
+
+free_engines:
+	virtcrypto_clear_crypto_engines(vcrypto);
+free_vqs:
+	vcrypto->vdev->config->reset(vdev);
+	virtcrypto_del_vqs(vcrypto);
+free_dev:
+	virtcrypto_devmgr_rm_dev(vcrypto);
+free:
+	kfree(vcrypto);
+	return err;
+}
+
+static void virtcrypto_free_unused_reqs(struct virtio_crypto *vcrypto)
+{
+	struct virtio_crypto_request *vc_req;
+	int i;
+	struct virtqueue *vq;
+
+	for (i = 0; i < vcrypto->max_data_queues; i++) {
+		vq = vcrypto->data_vq[i].vq;
+		while ((vc_req = virtqueue_detach_unused_buf(vq)) != NULL) {
+			kfree(vc_req->req_data);
+			kfree(vc_req->sgs);
+		}
+	}
+}
+
+static void virtcrypto_remove(struct virtio_device *vdev)
+{
+	struct virtio_crypto *vcrypto = vdev->priv;
+
+	dev_info(&vdev->dev, "Start virtcrypto_remove.\n");
+
+	if (virtcrypto_dev_started(vcrypto))
+		virtcrypto_dev_stop(vcrypto);
+	vdev->config->reset(vdev);
+	virtcrypto_free_unused_reqs(vcrypto);
+	virtcrypto_clear_crypto_engines(vcrypto);
+	virtcrypto_del_vqs(vcrypto);
+	virtcrypto_devmgr_rm_dev(vcrypto);
+	kfree(vcrypto);
+}
+
+static void virtcrypto_config_changed(struct virtio_device *vdev)
+{
+	struct virtio_crypto *vcrypto = vdev->priv;
+
+	virtcrypto_update_status(vcrypto);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtcrypto_freeze(struct virtio_device *vdev)
+{
+	struct virtio_crypto *vcrypto = vdev->priv;
+
+	vdev->config->reset(vdev);
+	virtcrypto_free_unused_reqs(vcrypto);
+	if (virtcrypto_dev_started(vcrypto))
+		virtcrypto_dev_stop(vcrypto);
+
+	virtcrypto_clear_crypto_engines(vcrypto);
+	virtcrypto_del_vqs(vcrypto);
+	return 0;
+}
+
+static int virtcrypto_restore(struct virtio_device *vdev)
+{
+	struct virtio_crypto *vcrypto = vdev->priv;
+	int err;
+
+	err = virtcrypto_init_vqs(vcrypto);
+	if (err)
+		return err;
+
+	err = virtcrypto_start_crypto_engines(vcrypto);
+	if (err)
+		goto free_vqs;
+
+	virtio_device_ready(vdev);
+
+	err = virtcrypto_dev_start(vcrypto);
+	if (err) {
+		dev_err(&vdev->dev, "Failed to start virtio crypto device.\n");
+		goto free_engines;
+	}
+
+	return 0;
+
+free_engines:
+	virtcrypto_clear_crypto_engines(vcrypto);
+free_vqs:
+	vcrypto->vdev->config->reset(vdev);
+	virtcrypto_del_vqs(vcrypto);
+	return err;
+}
+#endif
+
+static unsigned int features[] = {
+	/* none */
+};
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_CRYPTO, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct virtio_driver virtio_crypto_driver = {
+	.driver.name         = KBUILD_MODNAME,
+	.driver.owner        = THIS_MODULE,
+	.feature_table       = features,
+	.feature_table_size  = ARRAY_SIZE(features),
+	.id_table            = id_table,
+	.probe               = virtcrypto_probe,
+	.remove              = virtcrypto_remove,
+	.config_changed = virtcrypto_config_changed,
+#ifdef CONFIG_PM_SLEEP
+	.freeze = virtcrypto_freeze,
+	.restore = virtcrypto_restore,
+#endif
+};
+
+module_virtio_driver(virtio_crypto_driver);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("virtio crypto device driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gonglei <arei.gonglei@huawei.com>");
diff --git a/drivers/crypto/virtio/virtio_crypto_mgr.c b/drivers/crypto/virtio/virtio_crypto_mgr.c
new file mode 100644
index 000000000000..a69ff71de2c4
--- /dev/null
+++ b/drivers/crypto/virtio/virtio_crypto_mgr.c
@@ -0,0 +1,264 @@
+ /* Management for virtio crypto devices (refer to adf_dev_mgr.c)
+  *
+  * Copyright 2016 HUAWEI TECHNOLOGIES CO., LTD.
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, see <http://www.gnu.org/licenses/>.
+  */
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/module.h>
+
+#include <uapi/linux/virtio_crypto.h>
+#include "virtio_crypto_common.h"
+
+static LIST_HEAD(virtio_crypto_table);
+static uint32_t num_devices;
+
+/* The table_lock protects the above global list and num_devices */
+static DEFINE_MUTEX(table_lock);
+
+#define VIRTIO_CRYPTO_MAX_DEVICES 32
+
+
+/*
+ * virtcrypto_devmgr_add_dev() - Add vcrypto_dev to the acceleration
+ * framework.
+ * @vcrypto_dev:  Pointer to virtio crypto device.
+ *
+ * Function adds virtio crypto device to the global list.
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: 0 on success, error code othewise.
+ */
+int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev)
+{
+	struct list_head *itr;
+
+	mutex_lock(&table_lock);
+	if (num_devices == VIRTIO_CRYPTO_MAX_DEVICES) {
+		pr_info("virtio_crypto: only support up to %d devices\n",
+			    VIRTIO_CRYPTO_MAX_DEVICES);
+		mutex_unlock(&table_lock);
+		return -EFAULT;
+	}
+
+	list_for_each(itr, &virtio_crypto_table) {
+		struct virtio_crypto *ptr =
+				list_entry(itr, struct virtio_crypto, list);
+
+		if (ptr == vcrypto_dev) {
+			mutex_unlock(&table_lock);
+			return -EEXIST;
+		}
+	}
+	atomic_set(&vcrypto_dev->ref_count, 0);
+	list_add_tail(&vcrypto_dev->list, &virtio_crypto_table);
+	vcrypto_dev->dev_id = num_devices++;
+	mutex_unlock(&table_lock);
+	return 0;
+}
+
+struct list_head *virtcrypto_devmgr_get_head(void)
+{
+	return &virtio_crypto_table;
+}
+
+/*
+ * virtcrypto_devmgr_rm_dev() - Remove vcrypto_dev from the acceleration
+ * framework.
+ * @vcrypto_dev:  Pointer to virtio crypto device.
+ *
+ * Function removes virtio crypto device from the acceleration framework.
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: void
+ */
+void virtcrypto_devmgr_rm_dev(struct virtio_crypto *vcrypto_dev)
+{
+	mutex_lock(&table_lock);
+	list_del(&vcrypto_dev->list);
+	num_devices--;
+	mutex_unlock(&table_lock);
+}
+
+/*
+ * virtcrypto_devmgr_get_first()
+ *
+ * Function returns the first virtio crypto device from the acceleration
+ * framework.
+ *
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: pointer to vcrypto_dev or NULL if not found.
+ */
+struct virtio_crypto *virtcrypto_devmgr_get_first(void)
+{
+	struct virtio_crypto *dev = NULL;
+
+	mutex_lock(&table_lock);
+	if (!list_empty(&virtio_crypto_table))
+		dev = list_first_entry(&virtio_crypto_table,
+					struct virtio_crypto,
+				    list);
+	mutex_unlock(&table_lock);
+	return dev;
+}
+
+/*
+ * virtcrypto_dev_in_use() - Check whether vcrypto_dev is currently in use
+ * @vcrypto_dev: Pointer to virtio crypto device.
+ *
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: 1 when device is in use, 0 otherwise.
+ */
+int virtcrypto_dev_in_use(struct virtio_crypto *vcrypto_dev)
+{
+	return atomic_read(&vcrypto_dev->ref_count) != 0;
+}
+
+/*
+ * virtcrypto_dev_get() - Increment vcrypto_dev reference count
+ * @vcrypto_dev: Pointer to virtio crypto device.
+ *
+ * Increment the vcrypto_dev refcount and if this is the first time
+ * incrementing it during this period the vcrypto_dev is in use,
+ * increment the module refcount too.
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: 0 when successful, EFAULT when fail to bump module refcount
+ */
+int virtcrypto_dev_get(struct virtio_crypto *vcrypto_dev)
+{
+	if (atomic_add_return(1, &vcrypto_dev->ref_count) == 1)
+		if (!try_module_get(vcrypto_dev->owner))
+			return -EFAULT;
+	return 0;
+}
+
+/*
+ * virtcrypto_dev_put() - Decrement vcrypto_dev reference count
+ * @vcrypto_dev: Pointer to virtio crypto device.
+ *
+ * Decrement the vcrypto_dev refcount and if this is the last time
+ * decrementing it during this period the vcrypto_dev is in use,
+ * decrement the module refcount too.
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: void
+ */
+void virtcrypto_dev_put(struct virtio_crypto *vcrypto_dev)
+{
+	if (atomic_sub_return(1, &vcrypto_dev->ref_count) == 0)
+		module_put(vcrypto_dev->owner);
+}
+
+/*
+ * virtcrypto_dev_started() - Check whether device has started
+ * @vcrypto_dev: Pointer to virtio crypto device.
+ *
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: 1 when the device has started, 0 otherwise
+ */
+int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev)
+{
+	return (vcrypto_dev->status & VIRTIO_CRYPTO_S_HW_READY);
+}
+
+/*
+ * virtcrypto_get_dev_node() - Get vcrypto_dev on the node.
+ * @node:  Node id the driver works.
+ *
+ * Function returns the virtio crypto device used fewest on the node.
+ *
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: pointer to vcrypto_dev or NULL if not found.
+ */
+struct virtio_crypto *virtcrypto_get_dev_node(int node)
+{
+	struct virtio_crypto *vcrypto_dev = NULL, *tmp_dev;
+	unsigned long best = ~0;
+	unsigned long ctr;
+
+	mutex_lock(&table_lock);
+	list_for_each_entry(tmp_dev, virtcrypto_devmgr_get_head(), list) {
+
+		if ((node == dev_to_node(&tmp_dev->vdev->dev) ||
+		     dev_to_node(&tmp_dev->vdev->dev) < 0) &&
+		    virtcrypto_dev_started(tmp_dev)) {
+			ctr = atomic_read(&tmp_dev->ref_count);
+			if (best > ctr) {
+				vcrypto_dev = tmp_dev;
+				best = ctr;
+			}
+		}
+	}
+
+	if (!vcrypto_dev) {
+		pr_info("virtio_crypto: Could not find a device on node %d\n",
+				node);
+		/* Get any started device */
+		list_for_each_entry(tmp_dev,
+				virtcrypto_devmgr_get_head(), list) {
+			if (virtcrypto_dev_started(tmp_dev)) {
+				vcrypto_dev = tmp_dev;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&table_lock);
+	if (!vcrypto_dev)
+		return NULL;
+
+	virtcrypto_dev_get(vcrypto_dev);
+	return vcrypto_dev;
+}
+
+/*
+ * virtcrypto_dev_start() - Start virtio crypto device
+ * @vcrypto:    Pointer to virtio crypto device.
+ *
+ * Function notifies all the registered services that the virtio crypto device
+ * is ready to be used.
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: 0 on success, EFAULT when fail to register algorithms
+ */
+int virtcrypto_dev_start(struct virtio_crypto *vcrypto)
+{
+	if (virtio_crypto_algs_register()) {
+		pr_err("virtio_crypto: Failed to register crypto algs\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * virtcrypto_dev_stop() - Stop virtio crypto device
+ * @vcrypto:    Pointer to virtio crypto device.
+ *
+ * Function notifies all the registered services that the virtio crypto device
+ * is ready to be used.
+ * To be used by virtio crypto device specific drivers.
+ *
+ * Return: void
+ */
+void virtcrypto_dev_stop(struct virtio_crypto *vcrypto)
+{
+	virtio_crypto_algs_unregister();
+}
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index de6e241b0866..55f7c392582f 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -10,10 +10,12 @@ endif
 quiet_cmd_perl = PERL $@
       cmd_perl = $(PERL) $(<) $(TARGET) > $(@)
 
-$(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl
-	$(call cmd,perl)
+targets += aesp8-ppc.S ghashp8-ppc.S
+
+$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE
+	$(call if_changed,perl)
   
-$(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl
-	$(call cmd,perl)
+$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE
+	$(call if_changed,perl)
 
-.PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S
+clean-files := aesp8-ppc.S ghashp8-ppc.S
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 94ad5c0adbcb..72a26eb4e954 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -27,11 +27,12 @@
 #include <asm/switch_to.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 
 #include "aesp8-ppc.h"
 
 struct p8_aes_cbc_ctx {
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 };
@@ -39,7 +40,7 @@ struct p8_aes_cbc_ctx {
 static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 {
 	const char *alg;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!(alg = crypto_tfm_alg_name(tfm))) {
@@ -47,8 +48,9 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return -ENOENT;
 	}
 
-	fallback =
-	    crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_skcipher(alg, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 		       "Failed to allocate transformation for '%s': %ld\n",
@@ -56,11 +58,12 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+		crypto_skcipher_driver_name(fallback));
+
 
-	crypto_blkcipher_set_flags(
+	crypto_skcipher_set_flags(
 		fallback,
-		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
 
 	return 0;
@@ -71,7 +74,7 @@ static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_blkcipher(ctx->fallback);
+		crypto_free_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -91,7 +94,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -103,15 +106,14 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_cbc_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src,
-					       nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = crypto_skcipher_encrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();
@@ -144,15 +146,14 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_cbc_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src,
-					       nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = crypto_skcipher_decrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 38ed10d761d0..7cf6d31c1123 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -80,11 +80,13 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
 	int ret;
 	struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
 
+	preempt_disable();
 	pagefault_disable();
 	enable_kernel_vsx();
 	ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
 	disable_kernel_vsx();
 	pagefault_enable();
+	preempt_enable();
 
 	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
@@ -99,11 +101,13 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
 	u8 *dst = walk->dst.virt.addr;
 	unsigned int nbytes = walk->nbytes;
 
+	preempt_disable();
 	pagefault_disable();
 	enable_kernel_vsx();
 	aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
 	disable_kernel_vsx();
 	pagefault_enable();
+	preempt_enable();
 
 	crypto_xor(keystream, src, nbytes);
 	memcpy(dst, keystream, nbytes);
@@ -132,6 +136,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
 		blkcipher_walk_init(&walk, dst, src, nbytes);
 		ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
 		while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+			preempt_disable();
 			pagefault_disable();
 			enable_kernel_vsx();
 			aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
@@ -143,6 +148,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
 						    walk.iv);
 			disable_kernel_vsx();
 			pagefault_enable();
+			preempt_enable();
 
 			/* We need to update IV mostly for last bytes/round */
 			inc = (nbytes & AES_BLOCK_MASK) / AES_BLOCK_SIZE;
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 24353ec336c5..6adc9290557a 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -28,11 +28,12 @@
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
+#include <crypto/skcipher.h>
 
 #include "aesp8-ppc.h"
 
 struct p8_aes_xts_ctx {
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 	struct aes_key tweak_key;
@@ -41,7 +42,7 @@ struct p8_aes_xts_ctx {
 static int p8_aes_xts_init(struct crypto_tfm *tfm)
 {
 	const char *alg;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!(alg = crypto_tfm_alg_name(tfm))) {
@@ -49,8 +50,8 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return -ENOENT;
 	}
 
-	fallback =
-		crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_skcipher(alg, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 			"Failed to allocate transformation for '%s': %ld\n",
@@ -58,11 +59,11 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-		crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+		crypto_skcipher_driver_name(fallback));
 
-	crypto_blkcipher_set_flags(
+	crypto_skcipher_set_flags(
 		fallback,
-		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
 
 	return 0;
@@ -73,7 +74,7 @@ static void p8_aes_xts_exit(struct crypto_tfm *tfm)
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_blkcipher(ctx->fallback);
+		crypto_free_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -98,7 +99,7 @@ static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -113,15 +114,14 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_xts_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) :
-                            crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();