22 files changed, 1249 insertions, 687 deletions
diff --git a/Documentation/padata.txt b/Documentation/padata.txt
index 269d7d0d8335..93dd4e6db975 100644
--- a/Documentation/padata.txt
+++ b/Documentation/padata.txt
@@ -22,12 +22,15 @@ actually be done; it should be a multithreaded queue, naturally.
 
 There are functions for enabling and disabling the instance:
 
-    void padata_start(struct padata_instance *pinst);
+    int padata_start(struct padata_instance *pinst);
     void padata_stop(struct padata_instance *pinst);
 
-These functions literally do nothing beyond setting or clearing the
-"padata_start() was called" flag; if that flag is not set, other functions
-will refuse to work.
+These functions are setting or clearing the "PADATA_INIT" flag;
+if that flag is not set, other functions will refuse to work.
+padata_start returns zero on success (flag set) or -EINVAL if the
+padata cpumask contains no active cpu (flag not set).
+padata_stop clears the flag and blocks until the padata instance
+is unused.
 
 The list of CPUs to be used can be adjusted with these functions:
 
@@ -63,12 +66,10 @@ The submission of work is done with:
 The pinst and padata structures must be set up as described above; cb_cpu
 specifies which CPU will be used for the final callback when the work is
 done; it must be in the current instance's CPU mask.  The return value from
-padata_do_parallel() is a little strange; zero is an error return
-indicating that the caller forgot the padata_start() formalities.  -EBUSY
-means that somebody, somewhere else is messing with the instance's CPU
-mask, while -EINVAL is a complaint about cb_cpu not being in that CPU mask.
-If all goes well, this function will return -EINPROGRESS, indicating that
-the work is in progress.
+padata_do_parallel() is zero on success, indicating that the work is in
+progress. -EBUSY means that somebody, somewhere else is messing with the
+instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being
+in that CPU mask or about a not running instance.
 
 Each task submitted to padata_do_parallel() will, in turn, be passed to
 exactly one call to the above-mentioned parallel() function, on one CPU, so
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 6a1157fa4f98..1cf81d77c5a5 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -5,6 +5,6 @@
 obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o des_check_key.o
+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h
index c964b64111dd..6210457ceebb 100644
--- a/arch/s390/crypto/crypto_des.h
+++ b/arch/s390/crypto/crypto_des.h
@@ -15,4 +15,4 @@
 
 extern int crypto_des_check_key(const u8*, unsigned int, u32*);
 
-#endif //__CRYPTO_DES_H__
+#endif /*__CRYPTO_DES_H__*/
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 2bc479ab3a66..cc5420118393 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -14,32 +14,21 @@
  *
  */
 
-#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
 
 #include "crypt_s390.h"
-#include "crypto_des.h"
-
-#define DES_BLOCK_SIZE 8
-#define DES_KEY_SIZE 8
-
-#define DES3_128_KEY_SIZE	(2 * DES_KEY_SIZE)
-#define DES3_128_BLOCK_SIZE	DES_BLOCK_SIZE
 
 #define DES3_192_KEY_SIZE	(3 * DES_KEY_SIZE)
-#define DES3_192_BLOCK_SIZE	DES_BLOCK_SIZE
 
 struct crypt_s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES_KEY_SIZE];
 };
 
-struct crypt_s390_des3_128_ctx {
-	u8 iv[DES_BLOCK_SIZE];
-	u8 key[DES3_128_KEY_SIZE];
-};
-
 struct crypt_s390_des3_192_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES3_192_KEY_SIZE];
@@ -50,13 +39,16 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 {
 	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
-	int ret;
+	u32 tmp[DES_EXPKEY_WORDS];
 
-	/* test if key is valid (not a weak key) */
-	ret = crypto_des_check_key(key, keylen, flags);
-	if (ret == 0)
-		memcpy(dctx->key, key, keylen);
-	return ret;
+	/* check for weak keys */
+	if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	memcpy(dctx->key, key, keylen);
+	return 0;
 }
 
 static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
@@ -237,165 +229,6 @@ static struct crypto_alg cbc_des_alg = {
  *   complementation keys.  Any weakness is obviated by the use of
  *   multiple keys.
  *
- *   However, if the two  independent 64-bit keys are equal,
- *   then the DES3 operation is simply the same as DES.
- *   Implementers MUST reject keys that exhibit this property.
- *
- */
-static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen)
-{
-	int i, ret;
-	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8 *temp_key = key;
-	u32 *flags = &tfm->crt_flags;
-
-	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE)) &&
-	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
-	for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) {
-		ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
-		if (ret < 0)
-			return ret;
-	}
-	memcpy(dctx->key, key, keylen);
-	return 0;
-}
-
-static void des3_128_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-
-	crypt_s390_km(KM_TDEA_128_ENCRYPT, dctx->key, dst, (void*)src,
-		      DES3_128_BLOCK_SIZE);
-}
-
-static void des3_128_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-
-	crypt_s390_km(KM_TDEA_128_DECRYPT, dctx->key, dst, (void*)src,
-		      DES3_128_BLOCK_SIZE);
-}
-
-static struct crypto_alg des3_128_alg = {
-	.cra_name		=	"des3_ede128",
-	.cra_driver_name	=	"des3_ede128-s390",
-	.cra_priority		=	CRYPT_S390_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(des3_128_alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	DES3_128_KEY_SIZE,
-			.cia_max_keysize	=	DES3_128_KEY_SIZE,
-			.cia_setkey		=	des3_128_setkey,
-			.cia_encrypt		=	des3_128_encrypt,
-			.cia_decrypt		=	des3_128_decrypt,
-		}
-	}
-};
-
-static int ecb_des3_128_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk);
-}
-
-static int ecb_des3_128_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk);
-}
-
-static struct crypto_alg ecb_des3_128_alg = {
-	.cra_name		=	"ecb(des3_ede128)",
-	.cra_driver_name	=	"ecb-des3_ede128-s390",
-	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(
-						ecb_des3_128_alg.cra_list),
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_128_KEY_SIZE,
-			.max_keysize		=	DES3_128_KEY_SIZE,
-			.setkey			=	des3_128_setkey,
-			.encrypt		=	ecb_des3_128_encrypt,
-			.decrypt		=	ecb_des3_128_decrypt,
-		}
-	}
-};
-
-static int cbc_des3_128_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk);
-}
-
-static int cbc_des3_128_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk);
-}
-
-static struct crypto_alg cbc_des3_128_alg = {
-	.cra_name		=	"cbc(des3_ede128)",
-	.cra_driver_name	=	"cbc-des3_ede128-s390",
-	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(
-						cbc_des3_128_alg.cra_list),
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_128_KEY_SIZE,
-			.max_keysize		=	DES3_128_KEY_SIZE,
-			.ivsize			=	DES3_128_BLOCK_SIZE,
-			.setkey			=	des3_128_setkey,
-			.encrypt		=	cbc_des3_128_encrypt,
-			.decrypt		=	cbc_des3_128_decrypt,
-		}
-	}
-};
-
-/*
- * RFC2451:
- *
- *   For DES-EDE3, there is no known need to reject weak or
- *   complementation keys.  Any weakness is obviated by the use of
- *   multiple keys.
- *
  *   However, if the first two or last two independent 64-bit keys are
  *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
  *   same as DES.  Implementers MUST reject keys that exhibit this
@@ -405,9 +238,7 @@ static struct crypto_alg cbc_des3_128_alg = {
 static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	int i, ret;
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8 *temp_key = key;
 	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
@@ -417,11 +248,6 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
-	for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) {
-		ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
-		if (ret < 0)
-			return ret;
-	}
 	memcpy(dctx->key, key, keylen);
 	return 0;
 }
@@ -431,7 +257,7 @@ static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
-		      DES3_192_BLOCK_SIZE);
+		      DES_BLOCK_SIZE);
 }
 
 static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -439,7 +265,7 @@ static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
-		      DES3_192_BLOCK_SIZE);
+		      DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des3_192_alg = {
@@ -447,7 +273,7 @@ static struct crypto_alg des3_192_alg = {
 	.cra_driver_name	=	"des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(des3_192_alg.cra_list),
@@ -489,7 +315,7 @@ static struct crypto_alg ecb_des3_192_alg = {
 	.cra_driver_name	=	"ecb-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
@@ -533,7 +359,7 @@ static struct crypto_alg cbc_des3_192_alg = {
 	.cra_driver_name	=	"cbc-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
@@ -543,7 +369,7 @@ static struct crypto_alg cbc_des3_192_alg = {
 		.blkcipher = {
 			.min_keysize		=	DES3_192_KEY_SIZE,
 			.max_keysize		=	DES3_192_KEY_SIZE,
-			.ivsize			=	DES3_192_BLOCK_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
 			.setkey			=	des3_192_setkey,
 			.encrypt		=	cbc_des3_192_encrypt,
 			.decrypt		=	cbc_des3_192_decrypt,
@@ -553,10 +379,9 @@ static struct crypto_alg cbc_des3_192_alg = {
 
 static int des_s390_init(void)
 {
-	int ret = 0;
+	int ret;
 
 	if (!crypt_s390_func_available(KM_DEA_ENCRYPT) ||
-	    !crypt_s390_func_available(KM_TDEA_128_ENCRYPT) ||
 	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
 		return -EOPNOTSUPP;
 
@@ -569,17 +394,6 @@ static int des_s390_init(void)
 	ret = crypto_register_alg(&cbc_des_alg);
 	if (ret)
 		goto cbc_des_err;
-
-	ret = crypto_register_alg(&des3_128_alg);
-	if (ret)
-		goto des3_128_err;
-	ret = crypto_register_alg(&ecb_des3_128_alg);
-	if (ret)
-		goto ecb_des3_128_err;
-	ret = crypto_register_alg(&cbc_des3_128_alg);
-	if (ret)
-		goto cbc_des3_128_err;
-
 	ret = crypto_register_alg(&des3_192_alg);
 	if (ret)
 		goto des3_192_err;
@@ -589,7 +403,6 @@ static int des_s390_init(void)
 	ret = crypto_register_alg(&cbc_des3_192_alg);
 	if (ret)
 		goto cbc_des3_192_err;
-
 out:
 	return ret;
 
@@ -598,12 +411,6 @@ cbc_des3_192_err:
 ecb_des3_192_err:
 	crypto_unregister_alg(&des3_192_alg);
 des3_192_err:
-	crypto_unregister_alg(&cbc_des3_128_alg);
-cbc_des3_128_err:
-	crypto_unregister_alg(&ecb_des3_128_alg);
-ecb_des3_128_err:
-	crypto_unregister_alg(&des3_128_alg);
-des3_128_err:
 	crypto_unregister_alg(&cbc_des_alg);
 cbc_des_err:
 	crypto_unregister_alg(&ecb_des_alg);
@@ -613,21 +420,18 @@ des_err:
 	goto out;
 }
 
-static void __exit des_s390_fini(void)
+static void __exit des_s390_exit(void)
 {
 	crypto_unregister_alg(&cbc_des3_192_alg);
 	crypto_unregister_alg(&ecb_des3_192_alg);
 	crypto_unregister_alg(&des3_192_alg);
-	crypto_unregister_alg(&cbc_des3_128_alg);
-	crypto_unregister_alg(&ecb_des3_128_alg);
-	crypto_unregister_alg(&des3_128_alg);
 	crypto_unregister_alg(&cbc_des_alg);
 	crypto_unregister_alg(&ecb_des_alg);
 	crypto_unregister_alg(&des_alg);
 }
 
 module_init(des_s390_init);
-module_exit(des_s390_fini);
+module_exit(des_s390_exit);
 
 MODULE_ALIAS("des");
 MODULE_ALIAS("des3_ede");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 9d9434f08c92..1cd497d7a15a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -80,6 +80,11 @@ config CRYPTO_RNG2
 
 config CRYPTO_PCOMP
 	tristate
+	select CRYPTO_PCOMP2
+	select CRYPTO_ALGAPI
+
+config CRYPTO_PCOMP2
+	tristate
 	select CRYPTO_ALGAPI2
 
 config CRYPTO_MANAGER
@@ -94,7 +99,15 @@ config CRYPTO_MANAGER2
 	select CRYPTO_AEAD2
 	select CRYPTO_HASH2
 	select CRYPTO_BLKCIPHER2
-	select CRYPTO_PCOMP
+	select CRYPTO_PCOMP2
+
+config CRYPTO_MANAGER_TESTS
+	bool "Run algolithms' self-tests"
+	default y
+	depends on CRYPTO_MANAGER2
+	help
+	  Run cryptomanager's tests for the new crypto algorithms being
+	  registered.
 
 config CRYPTO_GF128MUL
 	tristate "GF(2^128) multiplication functions (EXPERIMENTAL)"
diff --git a/crypto/Makefile b/crypto/Makefile
index d7e6441df7fe..423b7de61f93 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -26,7 +26,7 @@ crypto_hash-objs += ahash.o
 crypto_hash-objs += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 
-obj-$(CONFIG_CRYPTO_PCOMP) += pcompress.o
+obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
 
 cryptomgr-objs := algboss.o testmgr.o
 
@@ -61,7 +61,7 @@ obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
-obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
+obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
 obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
diff --git a/crypto/algboss.c b/crypto/algboss.c
index c3c196b5823a..40bd391f34d9 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -206,6 +206,7 @@ err:
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_CRYPTO_MANAGER_TESTS
 static int cryptomgr_test(void *data)
 {
 	struct crypto_test_param *param = data;
@@ -266,6 +267,7 @@ err_put_module:
 err:
 	return NOTIFY_OK;
 }
+#endif /* CONFIG_CRYPTO_MANAGER_TESTS */
 
 static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
 			    void *data)
@@ -273,8 +275,10 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
 	switch (msg) {
 	case CRYPTO_MSG_ALG_REQUEST:
 		return cryptomgr_schedule_probe(data);
+#ifdef CONFIG_CRYPTO_MANAGER_TESTS
 	case CRYPTO_MSG_ALG_REGISTER:
 		return cryptomgr_schedule_test(data);
+#endif
 	}
 
 	return NOTIFY_DONE;
diff --git a/crypto/authenc.c b/crypto/authenc.c
index b9884ee0adb6..a5a22cfcd07b 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -616,7 +616,7 @@ static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb)
 	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
 			       CRYPTO_ALG_TYPE_AHASH_MASK);
 	if (IS_ERR(auth))
-		return ERR_PTR(PTR_ERR(auth));
+		return ERR_CAST(auth);
 
 	auth_base = &auth->base;
 
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 6c3bfabb9d1d..4ca7222cfeb6 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -185,7 +185,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
 	if (IS_ERR(alg))
-		return ERR_PTR(PTR_ERR(alg));
+		return ERR_CAST(alg);
 
 	/* Block size must be >= 4 bytes. */
 	err = -EINVAL;
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 247178cb98ec..7153a50bce27 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -24,12 +24,40 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/kobject.h>
 #include <crypto/pcrypt.h>
 
-static struct padata_instance *pcrypt_enc_padata;
-static struct padata_instance *pcrypt_dec_padata;
-static struct workqueue_struct *encwq;
-static struct workqueue_struct *decwq;
+struct pcrypt_instance {
+	const char *name;
+	struct padata_instance *pinst;
+	struct workqueue_struct *wq;
+
+	/*
+	 * Cpumask for callback CPUs. It should be
+	 * equal to serial cpumask of corresponding padata instance,
+	 * so it is updated when padata notifies us about serial
+	 * cpumask change.
+	 *
+	 * cb_cpumask is protected by RCU. This fact prevents us from
+	 * using cpumask_var_t directly because the actual type of
+	 * cpumsak_var_t depends on kernel configuration(particularly on
+	 * CONFIG_CPUMASK_OFFSTACK macro). Depending on the configuration
+	 * cpumask_var_t may be either a pointer to the struct cpumask
+	 * or a variable allocated on the stack. Thus we can not safely use
+	 * cpumask_var_t with RCU operations such as rcu_assign_pointer or
+	 * rcu_dereference. So cpumask_var_t is wrapped with struct
+	 * pcrypt_cpumask which makes possible to use it with RCU.
+	 */
+	struct pcrypt_cpumask {
+		cpumask_var_t mask;
+	} *cb_cpumask;
+	struct notifier_block nblock;
+};
+
+static struct pcrypt_instance pencrypt;
+static struct pcrypt_instance pdecrypt;
+static struct kset           *pcrypt_kset;
 
 struct pcrypt_instance_ctx {
 	struct crypto_spawn spawn;
@@ -42,25 +70,29 @@ struct pcrypt_aead_ctx {
 };
 
 static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
-			      struct padata_instance *pinst)
+			      struct pcrypt_instance *pcrypt)
 {
 	unsigned int cpu_index, cpu, i;
+	struct pcrypt_cpumask *cpumask;
 
 	cpu = *cb_cpu;
 
-	if (cpumask_test_cpu(cpu, cpu_active_mask))
+	rcu_read_lock_bh();
+	cpumask = rcu_dereference(pcrypt->cb_cpumask);
+	if (cpumask_test_cpu(cpu, cpumask->mask))
 			goto out;
 
-	cpu_index = cpu % cpumask_weight(cpu_active_mask);
+	cpu_index = cpu % cpumask_weight(cpumask->mask);
 
-	cpu = cpumask_first(cpu_active_mask);
+	cpu = cpumask_first(cpumask->mask);
 	for (i = 0; i < cpu_index; i++)
-		cpu = cpumask_next(cpu, cpu_active_mask);
+		cpu = cpumask_next(cpu, cpumask->mask);
 
 	*cb_cpu = cpu;
 
 out:
-	return padata_do_parallel(pinst, padata, cpu);
+	rcu_read_unlock_bh();
+	return padata_do_parallel(pcrypt->pinst, padata, cpu);
 }
 
 static int pcrypt_aead_setkey(struct crypto_aead *parent,
@@ -142,11 +174,9 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_assoc(creq, req->assoc, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
-	if (err)
-		return err;
-	else
-		err = crypto_aead_encrypt(creq);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
+	if (!err)
+		return -EINPROGRESS;
 
 	return err;
 }
@@ -186,11 +216,9 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_assoc(creq, req->assoc, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_dec_padata);
-	if (err)
-		return err;
-	else
-		err = crypto_aead_decrypt(creq);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
+	if (!err)
+		return -EINPROGRESS;
 
 	return err;
 }
@@ -232,11 +260,9 @@ static int pcrypt_aead_givencrypt(struct aead_givcrypt_request *req)
 	aead_givcrypt_set_assoc(creq, areq->assoc, areq->assoclen);
 	aead_givcrypt_set_giv(creq, req->giv, req->seq);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
-	if (err)
-		return err;
-	else
-		err = crypto_aead_givencrypt(creq);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
+	if (!err)
+		return -EINPROGRESS;
 
 	return err;
 }
@@ -376,6 +402,108 @@ static void pcrypt_free(struct crypto_instance *inst)
 	kfree(inst);
 }
 
+static int pcrypt_cpumask_change_notify(struct notifier_block *self,
+					unsigned long val, void *data)
+{
+	struct pcrypt_instance *pcrypt;
+	struct pcrypt_cpumask *new_mask, *old_mask;
+
+	if (!(val & PADATA_CPU_SERIAL))
+		return 0;
+
+	pcrypt = container_of(self, struct pcrypt_instance, nblock);
+	new_mask = kmalloc(sizeof(*new_mask), GFP_KERNEL);
+	if (!new_mask)
+		return -ENOMEM;
+	if (!alloc_cpumask_var(&new_mask->mask, GFP_KERNEL)) {
+		kfree(new_mask);
+		return -ENOMEM;
+	}
+
+	old_mask = pcrypt->cb_cpumask;
+
+	padata_get_cpumask(pcrypt->pinst, PADATA_CPU_SERIAL, new_mask->mask);
+	rcu_assign_pointer(pcrypt->cb_cpumask, new_mask);
+	synchronize_rcu_bh();
+
+	free_cpumask_var(old_mask->mask);
+	kfree(old_mask);
+	return 0;
+}
+
+static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name)
+{
+	int ret;
+
+	pinst->kobj.kset = pcrypt_kset;
+	ret = kobject_add(&pinst->kobj, NULL, name);
+	if (!ret)
+		kobject_uevent(&pinst->kobj, KOBJ_ADD);
+
+	return ret;
+}
+
+static int __pcrypt_init_instance(struct pcrypt_instance *pcrypt,
+				  const char *name)
+{
+	int ret = -ENOMEM;
+	struct pcrypt_cpumask *mask;
+
+	pcrypt->name = name;
+	pcrypt->wq = create_workqueue(name);
+	if (!pcrypt->wq)
+		goto err;
+
+	pcrypt->pinst = padata_alloc(pcrypt->wq);
+	if (!pcrypt->pinst)
+		goto err_destroy_workqueue;
+
+	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+	if (!mask)
+		goto err_free_padata;
+	if (!alloc_cpumask_var(&mask->mask, GFP_KERNEL)) {
+		kfree(mask);
+		goto err_free_padata;
+	}
+
+	padata_get_cpumask(pcrypt->pinst, PADATA_CPU_SERIAL, mask->mask);
+	rcu_assign_pointer(pcrypt->cb_cpumask, mask);
+
+	pcrypt->nblock.notifier_call = pcrypt_cpumask_change_notify;
+	ret = padata_register_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	if (ret)
+		goto err_free_cpumask;
+
+	ret = pcrypt_sysfs_add(pcrypt->pinst, name);
+	if (ret)
+		goto err_unregister_notifier;
+
+	return ret;
+err_unregister_notifier:
+	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+err_free_cpumask:
+	free_cpumask_var(mask->mask);
+	kfree(mask);
+err_free_padata:
+	padata_free(pcrypt->pinst);
+err_destroy_workqueue:
+	destroy_workqueue(pcrypt->wq);
+err:
+	return ret;
+}
+
+static void __pcrypt_deinit_instance(struct pcrypt_instance *pcrypt)
+{
+	kobject_put(&pcrypt->pinst->kobj);
+	free_cpumask_var(pcrypt->cb_cpumask->mask);
+	kfree(pcrypt->cb_cpumask);
+
+	padata_stop(pcrypt->pinst);
+	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	destroy_workqueue(pcrypt->wq);
+	padata_free(pcrypt->pinst);
+}
+
 static struct crypto_template pcrypt_tmpl = {
 	.name = "pcrypt",
 	.alloc = pcrypt_alloc,
@@ -385,52 +513,39 @@ static struct crypto_template pcrypt_tmpl = {
 
 static int __init pcrypt_init(void)
 {
-	encwq = create_workqueue("pencrypt");
-	if (!encwq)
-		goto err;
-
-	decwq = create_workqueue("pdecrypt");
-	if (!decwq)
-		goto err_destroy_encwq;
+	int err = -ENOMEM;
 
+	pcrypt_kset = kset_create_and_add("pcrypt", NULL, kernel_kobj);
+	if (!pcrypt_kset)
+		goto err;
 
-	pcrypt_enc_padata = padata_alloc(cpu_possible_mask, encwq);
-	if (!pcrypt_enc_padata)
-		goto err_destroy_decwq;
+	err = __pcrypt_init_instance(&pencrypt, "pencrypt");
+	if (err)
+		goto err_unreg_kset;
 
-	pcrypt_dec_padata = padata_alloc(cpu_possible_mask, decwq);
-	if (!pcrypt_dec_padata)
-		goto err_free_padata;
+	err = __pcrypt_init_instance(&pdecrypt, "pdecrypt");
+	if (err)
+		goto err_deinit_pencrypt;
 
-	padata_start(pcrypt_enc_padata);
-	padata_start(pcrypt_dec_padata);
+	padata_start(pencrypt.pinst);
+	padata_start(pdecrypt.pinst);
 
 	return crypto_register_template(&pcrypt_tmpl);
 
-err_free_padata:
-	padata_free(pcrypt_enc_padata);
-
-err_destroy_decwq:
-	destroy_workqueue(decwq);
-
-err_destroy_encwq:
-	destroy_workqueue(encwq);
-
+err_deinit_pencrypt:
+	__pcrypt_deinit_instance(&pencrypt);
+err_unreg_kset:
+	kset_unregister(pcrypt_kset);
 err:
-	return -ENOMEM;
+	return err;
 }
 
 static void __exit pcrypt_exit(void)
 {
-	padata_stop(pcrypt_enc_padata);
-	padata_stop(pcrypt_dec_padata);
-
-	destroy_workqueue(encwq);
-	destroy_workqueue(decwq);
-
-	padata_free(pcrypt_enc_padata);
-	padata_free(pcrypt_dec_padata);
+	__pcrypt_deinit_instance(&pencrypt);
+	__pcrypt_deinit_instance(&pdecrypt);
 
+	kset_unregister(pcrypt_kset);
 	crypto_unregister_template(&pcrypt_tmpl);
 }
 
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5c8aaa0cb0b9..abd980c729eb 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -22,6 +22,17 @@
 #include <crypto/rng.h>
 
 #include "internal.h"
+
+#ifndef CONFIG_CRYPTO_MANAGER_TESTS
+
+/* a perfect nop */
+int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
+{
+	return 0;
+}
+
+#else
+
 #include "testmgr.h"
 
 /*
@@ -2530,4 +2541,7 @@ notest:
 non_fips_alg:
 	return -EINVAL;
 }
+
+#endif /* CONFIG_CRYPTO_MANAGER_TESTS */
+
 EXPORT_SYMBOL_GPL(alg_test);
diff --git a/crypto/twofish.c b/crypto/twofish_generic.c
index dfcda231f87a..1f07b843e07c 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish_generic.c
@@ -212,3 +212,4 @@ module_exit(twofish_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Twofish Cipher Algorithm");
+MODULE_ALIAS("twofish");
diff --git a/crypto/xts.c b/crypto/xts.c
index d87b0f3102c3..555ecaab1e54 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -224,7 +224,7 @@ static struct crypto_instance *alloc(struct rtattr **tb)
 	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
 	if (IS_ERR(alg))
-		return ERR_PTR(PTR_ERR(alg));
+		return ERR_CAST(alg);
 
 	inst = crypto_alloc_instance("xts", alg);
 	if (IS_ERR(inst))
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 0f9cbf1aaf15..101d5f235547 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -387,7 +387,7 @@ static int n2rng_init_control(struct n2rng *np)
 
 static int n2rng_data_read(struct hwrng *rng, u32 *data)
 {
-	struct n2rng *np = (struct n2rng *) rng->priv;
+	struct n2rng *np = rng->priv;
 	unsigned long ra = __pa(&np->test_data);
 	int len;
 
diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index 09389dd2f96b..219d09cbb0d1 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
@@ -573,7 +573,7 @@ geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 }
 
 static struct pci_device_id geode_aes_tbl[] = {
-	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES, PCI_ANY_ID, PCI_ANY_ID} ,
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_LX_AES), } ,
 	{ 0, }
 };
 
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 16fce3aadf4d..e449ac5627a5 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2018,7 +2018,6 @@ static void hifn_flush(struct hifn_device *dev)
 {
 	unsigned long flags;
 	struct crypto_async_request *async_req;
-	struct hifn_context *ctx;
 	struct ablkcipher_request *req;
 	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
 	int i;
@@ -2035,7 +2034,6 @@ static void hifn_flush(struct hifn_device *dev)
 
 	spin_lock_irqsave(&dev->lock, flags);
 	while ((async_req = crypto_dequeue_request(&dev->queue))) {
-		ctx = crypto_tfm_ctx(async_req->tfm);
 		req = container_of(async_req, struct ablkcipher_request, base);
 		spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -2139,7 +2137,6 @@ static int hifn_setup_crypto_req(struct ablkcipher_request *req, u8 op,
 static int hifn_process_queue(struct hifn_device *dev)
 {
 	struct crypto_async_request *async_req, *backlog;
-	struct hifn_context *ctx;
 	struct ablkcipher_request *req;
 	unsigned long flags;
 	int err = 0;
@@ -2156,7 +2153,6 @@ static int hifn_process_queue(struct hifn_device *dev)
 		if (backlog)
 			backlog->complete(backlog, -EINPROGRESS);
 
-		ctx = crypto_tfm_ctx(async_req->tfm);
 		req = container_of(async_req, struct ablkcipher_request, base);
 
 		err = hifn_handle_req(req);
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index e095422b58dd..7d279e578df5 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -1055,20 +1055,20 @@ static int mv_probe(struct platform_device *pdev)
 	cp->queue_th = kthread_run(queue_manag, cp, "mv_crypto");
 	if (IS_ERR(cp->queue_th)) {
 		ret = PTR_ERR(cp->queue_th);
-		goto err_thread;
+		goto err_unmap_sram;
 	}
 
 	ret = request_irq(irq, crypto_int, IRQF_DISABLED, dev_name(&pdev->dev),
 			cp);
 	if (ret)
-		goto err_unmap_sram;
+		goto err_thread;
 
 	writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
 	writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG);
 
 	ret = crypto_register_alg(&mv_aes_alg_ecb);
 	if (ret)
-		goto err_reg;
+		goto err_irq;
 
 	ret = crypto_register_alg(&mv_aes_alg_cbc);
 	if (ret)
@@ -1091,9 +1091,9 @@ static int mv_probe(struct platform_device *pdev)
 	return 0;
 err_unreg_ecb:
 	crypto_unregister_alg(&mv_aes_alg_ecb);
-err_thread:
+err_irq:
 	free_irq(irq, cp);
-err_reg:
+err_thread:
 	kthread_stop(cp->queue_th);
 err_unmap_sram:
 	iounmap(cp->sram);
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 23163fda5035..b99c38f23d61 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -239,21 +239,57 @@ static inline bool n2_should_run_async(struct spu_queue *qp, int this_len)
 }
 #endif
 
-struct n2_base_ctx {
-	struct list_head		list;
+struct n2_ahash_alg {
+	struct list_head	entry;
+	const char		*hash_zero;
+	const u32		*hash_init;
+	u8			hw_op_hashsz;
+	u8			digest_size;
+	u8			auth_type;
+	u8			hmac_type;
+	struct ahash_alg	alg;
 };
 
-static void n2_base_ctx_init(struct n2_base_ctx *ctx)
+static inline struct n2_ahash_alg *n2_ahash_alg(struct crypto_tfm *tfm)
 {
-	INIT_LIST_HEAD(&ctx->list);
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct ahash_alg *ahash_alg;
+
+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
+
+	return container_of(ahash_alg, struct n2_ahash_alg, alg);
 }
 
-struct n2_hash_ctx {
-	struct n2_base_ctx		base;
+struct n2_hmac_alg {
+	const char		*child_alg;
+	struct n2_ahash_alg	derived;
+};
+
+static inline struct n2_hmac_alg *n2_hmac_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct ahash_alg *ahash_alg;
+
+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
+
+	return container_of(ahash_alg, struct n2_hmac_alg, derived.alg);
+}
 
+struct n2_hash_ctx {
 	struct crypto_ahash		*fallback_tfm;
 };
 
+#define N2_HASH_KEY_MAX			32 /* HW limit for all HMAC requests */
+
+struct n2_hmac_ctx {
+	struct n2_hash_ctx		base;
+
+	struct crypto_shash		*child_shash;
+
+	int				hash_key_len;
+	unsigned char			hash_key[N2_HASH_KEY_MAX];
+};
+
 struct n2_hash_req_ctx {
 	union {
 		struct md5_state	md5;
@@ -261,9 +297,6 @@ struct n2_hash_req_ctx {
 		struct sha256_state	sha256;
 	} u;
 
-	unsigned char			hash_key[64];
-	unsigned char			keyed_zero_hash[32];
-
 	struct ahash_request		fallback_req;
 };
 
@@ -356,6 +389,94 @@ static void n2_hash_cra_exit(struct crypto_tfm *tfm)
 	crypto_free_ahash(ctx->fallback_tfm);
 }
 
+static int n2_hmac_cra_init(struct crypto_tfm *tfm)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct n2_hmac_alg *n2alg = n2_hmac_alg(tfm);
+	struct crypto_ahash *fallback_tfm;
+	struct crypto_shash *child_shash;
+	int err;
+
+	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		pr_warning("Fallback driver '%s' could not be loaded!\n",
+			   fallback_driver_name);
+		err = PTR_ERR(fallback_tfm);
+		goto out;
+	}
+
+	child_shash = crypto_alloc_shash(n2alg->child_alg, 0, 0);
+	if (IS_ERR(child_shash)) {
+		pr_warning("Child shash '%s' could not be loaded!\n",
+			   n2alg->child_alg);
+		err = PTR_ERR(child_shash);
+		goto out_free_fallback;
+	}
+
+	crypto_ahash_set_reqsize(ahash, (sizeof(struct n2_hash_req_ctx) +
+					 crypto_ahash_reqsize(fallback_tfm)));
+
+	ctx->child_shash = child_shash;
+	ctx->base.fallback_tfm = fallback_tfm;
+	return 0;
+
+out_free_fallback:
+	crypto_free_ahash(fallback_tfm);
+
+out:
+	return err;
+}
+
+static void n2_hmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(ahash);
+
+	crypto_free_ahash(ctx->base.fallback_tfm);
+	crypto_free_shash(ctx->child_shash);
+}
+
+static int n2_hmac_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+				unsigned int keylen)
+{
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct crypto_shash *child_shash = ctx->child_shash;
+	struct crypto_ahash *fallback_tfm;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(child_shash)];
+	} desc;
+	int err, bs, ds;
+
+	fallback_tfm = ctx->base.fallback_tfm;
+	err = crypto_ahash_setkey(fallback_tfm, key, keylen);
+	if (err)
+		return err;
+
+	desc.shash.tfm = child_shash;
+	desc.shash.flags = crypto_ahash_get_flags(tfm) &
+		CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	bs = crypto_shash_blocksize(child_shash);
+	ds = crypto_shash_digestsize(child_shash);
+	BUG_ON(ds > N2_HASH_KEY_MAX);
+	if (keylen > bs) {
+		err = crypto_shash_digest(&desc.shash, key, keylen,
+					  ctx->hash_key);
+		if (err)
+			return err;
+		keylen = ds;
+	} else if (keylen <= N2_HASH_KEY_MAX)
+		memcpy(ctx->hash_key, key, keylen);
+
+	ctx->hash_key_len = keylen;
+
+	return err;
+}
+
 static unsigned long wait_for_tail(struct spu_queue *qp)
 {
 	unsigned long head, hv_ret;
@@ -385,12 +506,12 @@ static unsigned long submit_and_wait_for_tail(struct spu_queue *qp,
 	return hv_ret;
 }
 
-static int n2_hash_async_digest(struct ahash_request *req,
-				unsigned int auth_type, unsigned int digest_size,
-				unsigned int result_size, void *hash_loc)
+static int n2_do_async_digest(struct ahash_request *req,
+			      unsigned int auth_type, unsigned int digest_size,
+			      unsigned int result_size, void *hash_loc,
+			      unsigned long auth_key, unsigned int auth_key_len)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct cwq_initial_entry *ent;
 	struct crypto_hash_walk walk;
 	struct spu_queue *qp;
@@ -403,6 +524,7 @@ static int n2_hash_async_digest(struct ahash_request *req,
 	 */
 	if (unlikely(req->nbytes > (1 << 16))) {
 		struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+		struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 
 		ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
 		rctx->fallback_req.base.flags =
@@ -414,8 +536,6 @@ static int n2_hash_async_digest(struct ahash_request *req,
 		return crypto_ahash_digest(&rctx->fallback_req);
 	}
 
-	n2_base_ctx_init(&ctx->base);
-
 	nbytes = crypto_hash_walk_first(req, &walk);
 
 	cpu = get_cpu();
@@ -430,13 +550,13 @@ static int n2_hash_async_digest(struct ahash_request *req,
 	 */
 	ent = qp->q + qp->tail;
 
-	ent->control = control_word_base(nbytes, 0, 0,
+	ent->control = control_word_base(nbytes, auth_key_len, 0,
 					 auth_type, digest_size,
 					 false, true, false, false,
 					 OPCODE_INPLACE_BIT |
 					 OPCODE_AUTH_MAC);
 	ent->src_addr = __pa(walk.data);
-	ent->auth_key_addr = 0UL;
+	ent->auth_key_addr = auth_key;
 	ent->auth_iv_addr = __pa(hash_loc);
 	ent->final_auth_state_addr = 0UL;
 	ent->enc_key_addr = 0UL;
@@ -475,114 +595,55 @@ out:
 	return err;
 }
 
-static int n2_md5_async_digest(struct ahash_request *req)
+static int n2_hash_async_digest(struct ahash_request *req)
 {
+	struct n2_ahash_alg *n2alg = n2_ahash_alg(req->base.tfm);
 	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct md5_state *m = &rctx->u.md5;
+	int ds;
 
+	ds = n2alg->digest_size;
 	if (unlikely(req->nbytes == 0)) {
-		static const char md5_zero[MD5_DIGEST_SIZE] = {
-			0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
-			0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
-		};
-
-		memcpy(req->result, md5_zero, MD5_DIGEST_SIZE);
+		memcpy(req->result, n2alg->hash_zero, ds);
 		return 0;
 	}
-	m->hash[0] = cpu_to_le32(0x67452301);
-	m->hash[1] = cpu_to_le32(0xefcdab89);
-	m->hash[2] = cpu_to_le32(0x98badcfe);
-	m->hash[3] = cpu_to_le32(0x10325476);
+	memcpy(&rctx->u, n2alg->hash_init, n2alg->hw_op_hashsz);
 
-	return n2_hash_async_digest(req, AUTH_TYPE_MD5,
-				    MD5_DIGEST_SIZE, MD5_DIGEST_SIZE,
-				    m->hash);
+	return n2_do_async_digest(req, n2alg->auth_type,
+				  n2alg->hw_op_hashsz, ds,
+				  &rctx->u, 0UL, 0);
 }
 
-static int n2_sha1_async_digest(struct ahash_request *req)
+static int n2_hmac_async_digest(struct ahash_request *req)
 {
+	struct n2_hmac_alg *n2alg = n2_hmac_alg(req->base.tfm);
 	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct sha1_state *s = &rctx->u.sha1;
-
-	if (unlikely(req->nbytes == 0)) {
-		static const char sha1_zero[SHA1_DIGEST_SIZE] = {
-			0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
-			0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
-			0x07, 0x09
-		};
-
-		memcpy(req->result, sha1_zero, SHA1_DIGEST_SIZE);
-		return 0;
-	}
-	s->state[0] = SHA1_H0;
-	s->state[1] = SHA1_H1;
-	s->state[2] = SHA1_H2;
-	s->state[3] = SHA1_H3;
-	s->state[4] = SHA1_H4;
-
-	return n2_hash_async_digest(req, AUTH_TYPE_SHA1,
-				    SHA1_DIGEST_SIZE, SHA1_DIGEST_SIZE,
-				    s->state);
-}
-
-static int n2_sha256_async_digest(struct ahash_request *req)
-{
-	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct sha256_state *s = &rctx->u.sha256;
-
-	if (req->nbytes == 0) {
-		static const char sha256_zero[SHA256_DIGEST_SIZE] = {
-			0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
-			0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
-			0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
-			0x1b, 0x78, 0x52, 0xb8, 0x55
-		};
-
-		memcpy(req->result, sha256_zero, SHA256_DIGEST_SIZE);
-		return 0;
-	}
-	s->state[0] = SHA256_H0;
-	s->state[1] = SHA256_H1;
-	s->state[2] = SHA256_H2;
-	s->state[3] = SHA256_H3;
-	s->state[4] = SHA256_H4;
-	s->state[5] = SHA256_H5;
-	s->state[6] = SHA256_H6;
-	s->state[7] = SHA256_H7;
-
-	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
-				    SHA256_DIGEST_SIZE, SHA256_DIGEST_SIZE,
-				    s->state);
-}
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(tfm);
+	int ds;
 
-static int n2_sha224_async_digest(struct ahash_request *req)
-{
-	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct sha256_state *s = &rctx->u.sha256;
+	ds = n2alg->derived.digest_size;
+	if (unlikely(req->nbytes == 0) ||
+	    unlikely(ctx->hash_key_len > N2_HASH_KEY_MAX)) {
+		struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+		struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 
-	if (req->nbytes == 0) {
-		static const char sha224_zero[SHA224_DIGEST_SIZE] = {
-			0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
-			0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
-			0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
-			0x2f
-		};
+		ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+		rctx->fallback_req.base.flags =
+			req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+		rctx->fallback_req.nbytes = req->nbytes;
+		rctx->fallback_req.src = req->src;
+		rctx->fallback_req.result = req->result;
 
-		memcpy(req->result, sha224_zero, SHA224_DIGEST_SIZE);
-		return 0;
+		return crypto_ahash_digest(&rctx->fallback_req);
 	}
-	s->state[0] = SHA224_H0;
-	s->state[1] = SHA224_H1;
-	s->state[2] = SHA224_H2;
-	s->state[3] = SHA224_H3;
-	s->state[4] = SHA224_H4;
-	s->state[5] = SHA224_H5;
-	s->state[6] = SHA224_H6;
-	s->state[7] = SHA224_H7;
+	memcpy(&rctx->u, n2alg->derived.hash_init,
+	       n2alg->derived.hw_op_hashsz);
 
-	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
-				    SHA256_DIGEST_SIZE, SHA224_DIGEST_SIZE,
-				    s->state);
+	return n2_do_async_digest(req, n2alg->derived.hmac_type,
+				  n2alg->derived.hw_op_hashsz, ds,
+				  &rctx->u,
+				  __pa(&ctx->hash_key),
+				  ctx->hash_key_len);
 }
 
 struct n2_cipher_context {
@@ -1209,35 +1270,92 @@ static LIST_HEAD(cipher_algs);
 
 struct n2_hash_tmpl {
 	const char	*name;
-	int		(*digest)(struct ahash_request *req);
+	const char	*hash_zero;
+	const u32	*hash_init;
+	u8		hw_op_hashsz;
 	u8		digest_size;
 	u8		block_size;
+	u8		auth_type;
+	u8		hmac_type;
+};
+
+static const char md5_zero[MD5_DIGEST_SIZE] = {
+	0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
+	0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
+};
+static const u32 md5_init[MD5_HASH_WORDS] = {
+	cpu_to_le32(0x67452301),
+	cpu_to_le32(0xefcdab89),
+	cpu_to_le32(0x98badcfe),
+	cpu_to_le32(0x10325476),
+};
+static const char sha1_zero[SHA1_DIGEST_SIZE] = {
+	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
+	0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
+	0x07, 0x09
 };
+static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = {
+	SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4,
+};
+static const char sha256_zero[SHA256_DIGEST_SIZE] = {
+	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
+	0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
+	0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
+	0x1b, 0x78, 0x52, 0xb8, 0x55
+};
+static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = {
+	SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+	SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+};
+static const char sha224_zero[SHA224_DIGEST_SIZE] = {
+	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
+	0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
+	0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
+	0x2f
+};
+static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = {
+	SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+	SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+};
+
 static const struct n2_hash_tmpl hash_tmpls[] = {
 	{ .name		= "md5",
-	  .digest	= n2_md5_async_digest,
+	  .hash_zero	= md5_zero,
+	  .hash_init	= md5_init,
+	  .auth_type	= AUTH_TYPE_MD5,
+	  .hmac_type	= AUTH_TYPE_HMAC_MD5,
+	  .hw_op_hashsz	= MD5_DIGEST_SIZE,
 	  .digest_size	= MD5_DIGEST_SIZE,
 	  .block_size	= MD5_HMAC_BLOCK_SIZE },
 	{ .name		= "sha1",
-	  .digest	= n2_sha1_async_digest,
+	  .hash_zero	= sha1_zero,
+	  .hash_init	= sha1_init,
+	  .auth_type	= AUTH_TYPE_SHA1,
+	  .hmac_type	= AUTH_TYPE_HMAC_SHA1,
+	  .hw_op_hashsz	= SHA1_DIGEST_SIZE,
 	  .digest_size	= SHA1_DIGEST_SIZE,
 	  .block_size	= SHA1_BLOCK_SIZE },
 	{ .name		= "sha256",
-	  .digest	= n2_sha256_async_digest,
+	  .hash_zero	= sha256_zero,
+	  .hash_init	= sha256_init,
+	  .auth_type	= AUTH_TYPE_SHA256,
+	  .hmac_type	= AUTH_TYPE_HMAC_SHA256,
+	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
 	  .digest_size	= SHA256_DIGEST_SIZE,
 	  .block_size	= SHA256_BLOCK_SIZE },
 	{ .name		= "sha224",
-	  .digest	= n2_sha224_async_digest,
+	  .hash_zero	= sha224_zero,
+	  .hash_init	= sha224_init,
+	  .auth_type	= AUTH_TYPE_SHA256,
+	  .hmac_type	= AUTH_TYPE_RESERVED,
+	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
 	  .digest_size	= SHA224_DIGEST_SIZE,
 	  .block_size	= SHA224_BLOCK_SIZE },
 };
 #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
 
-struct n2_ahash_alg {
-	struct list_head	entry;
-	struct ahash_alg	alg;
-};
 static LIST_HEAD(ahash_algs);
+static LIST_HEAD(hmac_algs);
 
 static int algs_registered;
 
@@ -1245,12 +1363,18 @@ static void __n2_unregister_algs(void)
 {
 	struct n2_cipher_alg *cipher, *cipher_tmp;
 	struct n2_ahash_alg *alg, *alg_tmp;
+	struct n2_hmac_alg *hmac, *hmac_tmp;
 
 	list_for_each_entry_safe(cipher, cipher_tmp, &cipher_algs, entry) {
 		crypto_unregister_alg(&cipher->alg);
 		list_del(&cipher->entry);
 		kfree(cipher);
 	}
+	list_for_each_entry_safe(hmac, hmac_tmp, &hmac_algs, derived.entry) {
+		crypto_unregister_ahash(&hmac->derived.alg);
+		list_del(&hmac->derived.entry);
+		kfree(hmac);
+	}
 	list_for_each_entry_safe(alg, alg_tmp, &ahash_algs, entry) {
 		crypto_unregister_ahash(&alg->alg);
 		list_del(&alg->entry);
@@ -1290,8 +1414,49 @@ static int __devinit __n2_register_one_cipher(const struct n2_cipher_tmpl *tmpl)
 	list_add(&p->entry, &cipher_algs);
 	err = crypto_register_alg(alg);
 	if (err) {
+		pr_err("%s alg registration failed\n", alg->cra_name);
 		list_del(&p->entry);
 		kfree(p);
+	} else {
+		pr_info("%s alg registered\n", alg->cra_name);
+	}
+	return err;
+}
+
+static int __devinit __n2_register_one_hmac(struct n2_ahash_alg *n2ahash)
+{
+	struct n2_hmac_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	struct ahash_alg *ahash;
+	struct crypto_alg *base;
+	int err;
+
+	if (!p)
+		return -ENOMEM;
+
+	p->child_alg = n2ahash->alg.halg.base.cra_name;
+	memcpy(&p->derived, n2ahash, sizeof(struct n2_ahash_alg));
+	INIT_LIST_HEAD(&p->derived.entry);
+
+	ahash = &p->derived.alg;
+	ahash->digest = n2_hmac_async_digest;
+	ahash->setkey = n2_hmac_async_setkey;
+
+	base = &ahash->halg.base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", p->child_alg);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s-n2", p->child_alg);
+
+	base->cra_ctxsize = sizeof(struct n2_hmac_ctx);
+	base->cra_init = n2_hmac_cra_init;
+	base->cra_exit = n2_hmac_cra_exit;
+
+	list_add(&p->derived.entry, &hmac_algs);
+	err = crypto_register_ahash(ahash);
+	if (err) {
+		pr_err("%s alg registration failed\n", base->cra_name);
+		list_del(&p->derived.entry);
+		kfree(p);
+	} else {
+		pr_info("%s alg registered\n", base->cra_name);
 	}
 	return err;
 }
@@ -1307,12 +1472,19 @@ static int __devinit __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
 	if (!p)
 		return -ENOMEM;
 
+	p->hash_zero = tmpl->hash_zero;
+	p->hash_init = tmpl->hash_init;
+	p->auth_type = tmpl->auth_type;
+	p->hmac_type = tmpl->hmac_type;
+	p->hw_op_hashsz = tmpl->hw_op_hashsz;
+	p->digest_size = tmpl->digest_size;
+
 	ahash = &p->alg;
 	ahash->init = n2_hash_async_init;
 	ahash->update = n2_hash_async_update;
 	ahash->final = n2_hash_async_final;
 	ahash->finup = n2_hash_async_finup;
-	ahash->digest = tmpl->digest;
+	ahash->digest = n2_hash_async_digest;
 
 	halg = &ahash->halg;
 	halg->digestsize = tmpl->digest_size;
@@ -1331,9 +1503,14 @@ static int __devinit __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
 	list_add(&p->entry, &ahash_algs);
 	err = crypto_register_ahash(ahash);
 	if (err) {
+		pr_err("%s alg registration failed\n", base->cra_name);
 		list_del(&p->entry);
 		kfree(p);
+	} else {
+		pr_info("%s alg registered\n", base->cra_name);
 	}
+	if (!err && p->hmac_type != AUTH_TYPE_RESERVED)
+		err = __n2_register_one_hmac(p);
 	return err;
 }
 
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 8b034337793f..7d1485676886 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -15,7 +15,6 @@
 
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#include <linux/version.h>
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/module.h>
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index bd78acf3c365..97f4af1d8a64 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -720,7 +720,6 @@ struct talitos_ctx {
 #define TALITOS_MDEU_MAX_CONTEXT_SIZE	TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512
 
 struct talitos_ahash_req_ctx {
-	u64 count;
 	u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
 	unsigned int hw_context_size;
 	u8 buf[HASH_MAX_BLOCK_SIZE];
@@ -729,6 +728,7 @@ struct talitos_ahash_req_ctx {
 	unsigned int first;
 	unsigned int last;
 	unsigned int to_hash_later;
+	u64 nbuf;
 	struct scatterlist bufsl[2];
 	struct scatterlist *psrc;
 };
@@ -1613,6 +1613,7 @@ static void ahash_done(struct device *dev,
 	if (!req_ctx->last && req_ctx->to_hash_later) {
 		/* Position any partial block for next update/final/finup */
 		memcpy(req_ctx->buf, req_ctx->bufnext, req_ctx->to_hash_later);
+		req_ctx->nbuf = req_ctx->to_hash_later;
 	}
 	common_nonsnoop_hash_unmap(dev, edesc, areq);
 
@@ -1728,7 +1729,7 @@ static int ahash_init(struct ahash_request *areq)
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 
 	/* Initialize the context */
-	req_ctx->count = 0;
+	req_ctx->nbuf = 0;
 	req_ctx->first = 1; /* first indicates h/w must init its context */
 	req_ctx->swinit = 0; /* assume h/w init of context */
 	req_ctx->hw_context_size =
@@ -1776,52 +1777,54 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 			crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
 	unsigned int nbytes_to_hash;
 	unsigned int to_hash_later;
-	unsigned int index;
+	unsigned int nsg;
 	int chained;
 
-	index = req_ctx->count & (blocksize - 1);
-	req_ctx->count += nbytes;
-
-	if (!req_ctx->last && (index + nbytes) < blocksize) {
-		/* Buffer the partial block */
+	if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
+		/* Buffer up to one whole block */
 		sg_copy_to_buffer(areq->src,
 				  sg_count(areq->src, nbytes, &chained),
-				  req_ctx->buf + index, nbytes);
+				  req_ctx->buf + req_ctx->nbuf, nbytes);
+		req_ctx->nbuf += nbytes;
 		return 0;
 	}
 
-	if (index) {
-		/* partial block from previous update; chain it in. */
-		sg_init_table(req_ctx->bufsl, (nbytes) ? 2 : 1);
-		sg_set_buf(req_ctx->bufsl, req_ctx->buf, index);
-		if (nbytes)
-			scatterwalk_sg_chain(req_ctx->bufsl, 2,
-					     areq->src);
+	/* At least (blocksize + 1) bytes are available to hash */
+	nbytes_to_hash = nbytes + req_ctx->nbuf;
+	to_hash_later = nbytes_to_hash & (blocksize - 1);
+
+	if (req_ctx->last)
+		to_hash_later = 0;
+	else if (to_hash_later)
+		/* There is a partial block. Hash the full block(s) now */
+		nbytes_to_hash -= to_hash_later;
+	else {
+		/* Keep one block buffered */
+		nbytes_to_hash -= blocksize;
+		to_hash_later = blocksize;
+	}
+
+	/* Chain in any previously buffered data */
+	if (req_ctx->nbuf) {
+		nsg = (req_ctx->nbuf < nbytes_to_hash) ? 2 : 1;
+		sg_init_table(req_ctx->bufsl, nsg);
+		sg_set_buf(req_ctx->bufsl, req_ctx->buf, req_ctx->nbuf);
+		if (nsg > 1)
+			scatterwalk_sg_chain(req_ctx->bufsl, 2, areq->src);
 		req_ctx->psrc = req_ctx->bufsl;
-	} else {
+	} else
 		req_ctx->psrc = areq->src;
+
+	if (to_hash_later) {
+		int nents = sg_count(areq->src, nbytes, &chained);
+		sg_copy_end_to_buffer(areq->src, nents,
+				      req_ctx->bufnext,
+				      to_hash_later,
+				      nbytes - to_hash_later);
 	}
-	nbytes_to_hash =  index + nbytes;
-	if (!req_ctx->last) {
-		to_hash_later = (nbytes_to_hash & (blocksize - 1));
-		if (to_hash_later) {
-			int nents;
-			/* Must copy to_hash_later bytes from the end
-			 * to bufnext (a partial block) for later.
-			 */
-			nents = sg_count(areq->src, nbytes, &chained);
-			sg_copy_end_to_buffer(areq->src, nents,
-					      req_ctx->bufnext,
-					      to_hash_later,
-					      nbytes - to_hash_later);
-
-			/* Adjust count for what will be hashed now */
-			nbytes_to_hash -= to_hash_later;
-		}
-		req_ctx->to_hash_later = to_hash_later;
-	}
+	req_ctx->to_hash_later = to_hash_later;
 
-	/* allocate extended descriptor */
+	/* Allocate extended descriptor */
 	edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8d8406246eef..293ad46ffced 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -25,6 +25,11 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/kobject.h>
+
+#define PADATA_CPU_SERIAL   0x01
+#define PADATA_CPU_PARALLEL 0x02
 
 /**
  * struct padata_priv -  Embedded to the users data structure.
@@ -59,7 +64,20 @@ struct padata_list {
 };
 
 /**
- * struct padata_queue - The percpu padata queues.
+* struct padata_serial_queue - The percpu padata serial queue
+*
+* @serial: List to wait for serialization after reordering.
+* @work: work struct for serialization.
+* @pd: Backpointer to the internal control structure.
+*/
+struct padata_serial_queue {
+       struct padata_list    serial;
+       struct work_struct    work;
+       struct parallel_data *pd;
+};
+
+/**
+ * struct padata_parallel_queue - The percpu padata parallel queue
  *
  * @parallel: List to wait for parallelization.
  * @reorder: List to wait for reordering after parallel processing.
@@ -67,44 +85,52 @@ struct padata_list {
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
  * @pd: Backpointer to the internal control structure.
+ * @work: work struct for parallelization.
  * @num_obj: Number of objects that are processed by this cpu.
  * @cpu_index: Index of the cpu.
  */
-struct padata_queue {
-	struct padata_list	parallel;
-	struct padata_list	reorder;
-	struct padata_list	serial;
-	struct work_struct	pwork;
-	struct work_struct	swork;
-	struct parallel_data    *pd;
-	atomic_t		num_obj;
-	int			cpu_index;
+struct padata_parallel_queue {
+       struct padata_list    parallel;
+       struct padata_list    reorder;
+       struct parallel_data *pd;
+       struct work_struct    work;
+       atomic_t              num_obj;
+       int                   cpu_index;
 };
 
+
 /**
  * struct parallel_data - Internal control structure, covers everything
  * that depends on the cpumask in use.
  *
  * @pinst: padata instance.
- * @queue: percpu padata queues.
+ * @pqueue: percpu padata queues used for parallelization.
+ * @squeue: percpu padata queues used for serialuzation.
  * @seq_nr: The sequence number that will be attached to the next object.
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
- * @cpumask: cpumask in use.
+ * @cpumask: Contains two cpumasks: pcpu and cbcpu for
+ *           parallel and serial workers respectively.
  * @lock: Reorder lock.
+ * @processed: Number of already processed objects.
  * @timer: Reorder timer.
  */
 struct parallel_data {
-	struct padata_instance	*pinst;
-	struct padata_queue	*queue;
-	atomic_t		seq_nr;
-	atomic_t		reorder_objects;
-	atomic_t                refcnt;
-	unsigned int		max_seq_nr;
-	cpumask_var_t		cpumask;
-	spinlock_t              lock;
-	struct timer_list       timer;
+	struct padata_instance		*pinst;
+	struct padata_parallel_queue	*pqueue;
+	struct padata_serial_queue	*squeue;
+	atomic_t			 seq_nr;
+	atomic_t			 reorder_objects;
+	atomic_t			 refcnt;
+	unsigned int			 max_seq_nr;
+	struct {
+		cpumask_var_t		 pcpu;
+		cpumask_var_t		 cbcpu;
+	} cpumask;
+	spinlock_t                       lock ____cacheline_aligned;
+	unsigned int			 processed;
+	struct timer_list		 timer;
 };
 
 /**
@@ -113,31 +139,53 @@ struct parallel_data {
  * @cpu_notifier: cpu hotplug notifier.
  * @wq: The workqueue in use.
  * @pd: The internal control structure.
- * @cpumask: User supplied cpumask.
+ * @cpumask: User supplied cpumask. Contains two cpumasks: pcpu and
+ *           cbcpu for parallel and serial works respectivly.
+ * @cpumask_change_notifier: Notifiers chain for user-defined notify
+ *            callbacks that will be called when either @pcpu or @cbcpu
+ *            or both cpumasks change.
+ * @kobj: padata instance kernel object.
  * @lock: padata instance lock.
  * @flags: padata flags.
  */
 struct padata_instance {
-	struct notifier_block   cpu_notifier;
-	struct workqueue_struct *wq;
-	struct parallel_data	*pd;
-	cpumask_var_t           cpumask;
-	struct mutex		lock;
-	u8			flags;
-#define	PADATA_INIT		1
-#define	PADATA_RESET		2
+	struct notifier_block		 cpu_notifier;
+	struct workqueue_struct		*wq;
+	struct parallel_data		*pd;
+	struct {
+		cpumask_var_t		 pcpu;
+		cpumask_var_t		 cbcpu;
+	} cpumask;
+	struct blocking_notifier_head	 cpumask_change_notifier;
+	struct kobject                   kobj;
+	struct mutex			 lock;
+	u8				 flags;
+#define	PADATA_INIT	1
+#define	PADATA_RESET	2
+#define	PADATA_INVALID	4
 };
 
-extern struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-					    struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc(struct workqueue_struct *wq);
+extern struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
+					      const struct cpumask *pcpumask,
+					      const struct cpumask *cbcpumask);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
-extern int padata_set_cpumask(struct padata_instance *pinst,
+extern int padata_get_cpumask(struct padata_instance *pinst,
+			      int cpumask_type, struct cpumask *out_mask);
+extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
-extern int padata_add_cpu(struct padata_instance *pinst, int cpu);
-extern int padata_remove_cpu(struct padata_instance *pinst, int cpu);
-extern void padata_start(struct padata_instance *pinst);
+extern int __padata_set_cpumasks(struct padata_instance *pinst,
+				 cpumask_var_t pcpumask,
+				 cpumask_var_t cbcpumask);
+extern int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
+extern int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
+extern int padata_start(struct padata_instance *pinst);
 extern void padata_stop(struct padata_instance *pinst);
+extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
+					    struct notifier_block *nblock);
+extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+					      struct notifier_block *nblock);
 #endif
diff --git a/kernel/padata.c b/kernel/padata.c
index fdd8ae609ce3..09c973c05273 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -26,18 +26,19 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/sysfs.h>
 #include <linux/rcupdate.h>
 
-#define MAX_SEQ_NR INT_MAX - NR_CPUS
+#define MAX_SEQ_NR (INT_MAX - NR_CPUS)
 #define MAX_OBJ_NUM 1000
 
 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 {
 	int cpu, target_cpu;
 
-	target_cpu = cpumask_first(pd->cpumask);
+	target_cpu = cpumask_first(pd->cpumask.pcpu);
 	for (cpu = 0; cpu < cpu_index; cpu++)
-		target_cpu = cpumask_next(target_cpu, pd->cpumask);
+		target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
 
 	return target_cpu;
 }
@@ -53,26 +54,27 @@ static int padata_cpu_hash(struct padata_priv *padata)
 	 * Hash the sequence numbers to the cpus by taking
 	 * seq_nr mod. number of cpus in use.
 	 */
-	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask);
+	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask.pcpu);
 
 	return padata_index_to_cpu(pd, cpu_index);
 }
 
-static void padata_parallel_worker(struct work_struct *work)
+static void padata_parallel_worker(struct work_struct *parallel_work)
 {
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 	struct padata_instance *pinst;
 	LIST_HEAD(local_list);
 
 	local_bh_disable();
-	queue = container_of(work, struct padata_queue, pwork);
-	pd = queue->pd;
+	pqueue = container_of(parallel_work,
+			      struct padata_parallel_queue, work);
+	pd = pqueue->pd;
 	pinst = pd->pinst;
 
-	spin_lock(&queue->parallel.lock);
-	list_replace_init(&queue->parallel.list, &local_list);
-	spin_unlock(&queue->parallel.lock);
+	spin_lock(&pqueue->parallel.lock);
+	list_replace_init(&pqueue->parallel.list, &local_list);
+	spin_unlock(&pqueue->parallel.lock);
 
 	while (!list_empty(&local_list)) {
 		struct padata_priv *padata;
@@ -94,7 +96,7 @@ static void padata_parallel_worker(struct work_struct *work)
  * @pinst: padata instance
  * @padata: object to be parallelized
  * @cb_cpu: cpu the serialization callback function will run on,
- *          must be in the cpumask of padata.
+ *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
  *
  * The parallelization callback function will run with BHs off.
  * Note: Every object which is parallelized by padata_do_parallel
@@ -104,17 +106,20 @@ int padata_do_parallel(struct padata_instance *pinst,
 		       struct padata_priv *padata, int cb_cpu)
 {
 	int target_cpu, err;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *queue;
 	struct parallel_data *pd;
 
 	rcu_read_lock_bh();
 
 	pd = rcu_dereference(pinst->pd);
 
-	err = 0;
+	err = -EINVAL;
 	if (!(pinst->flags & PADATA_INIT))
 		goto out;
 
+	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
+		goto out;
+
 	err =  -EBUSY;
 	if ((pinst->flags & PADATA_RESET))
 		goto out;
@@ -122,11 +127,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 	if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 		goto out;
 
-	err = -EINVAL;
-	if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
-		goto out;
-
-	err = -EINPROGRESS;
+	err = 0;
 	atomic_inc(&pd->refcnt);
 	padata->pd = pd;
 	padata->cb_cpu = cb_cpu;
@@ -137,13 +138,13 @@ int padata_do_parallel(struct padata_instance *pinst,
 	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
 
 	target_cpu = padata_cpu_hash(padata);
-	queue = per_cpu_ptr(pd->queue, target_cpu);
+	queue = per_cpu_ptr(pd->pqueue, target_cpu);
 
 	spin_lock(&queue->parallel.lock);
 	list_add_tail(&padata->list, &queue->parallel.list);
 	spin_unlock(&queue->parallel.lock);
 
-	queue_work_on(target_cpu, pinst->wq, &queue->pwork);
+	queue_work_on(target_cpu, pinst->wq, &queue->work);
 
 out:
 	rcu_read_unlock_bh();
@@ -171,84 +172,52 @@ EXPORT_SYMBOL(padata_do_parallel);
  */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
-	int cpu, num_cpus, empty, calc_seq_nr;
-	int seq_nr, next_nr, overrun, next_overrun;
-	struct padata_queue *queue, *next_queue;
+	int cpu, num_cpus;
+	int next_nr, next_index;
+	struct padata_parallel_queue *queue, *next_queue;
 	struct padata_priv *padata;
 	struct padata_list *reorder;
 
-	empty = 0;
-	next_nr = -1;
-	next_overrun = 0;
-	next_queue = NULL;
-
-	num_cpus = cpumask_weight(pd->cpumask);
-
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		reorder = &queue->reorder;
+	num_cpus = cpumask_weight(pd->cpumask.pcpu);
 
-		/*
-		 * Calculate the seq_nr of the object that should be
-		 * next in this reorder queue.
-		 */
-		overrun = 0;
-		calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
-			       + queue->cpu_index;
-
-		if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
-			calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
-			overrun = 1;
-		}
-
-		if (!list_empty(&reorder->list)) {
-			padata = list_entry(reorder->list.next,
-					    struct padata_priv, list);
-
-			seq_nr  = padata->seq_nr;
-			BUG_ON(calc_seq_nr != seq_nr);
-		} else {
-			seq_nr = calc_seq_nr;
-			empty++;
-		}
-
-		if (next_nr < 0 || seq_nr < next_nr
-		    || (next_overrun && !overrun)) {
-			next_nr = seq_nr;
-			next_overrun = overrun;
-			next_queue = queue;
-		}
+	/*
+	 * Calculate the percpu reorder queue and the sequence
+	 * number of the next object.
+	 */
+	next_nr = pd->processed;
+	next_index = next_nr % num_cpus;
+	cpu = padata_index_to_cpu(pd, next_index);
+	next_queue = per_cpu_ptr(pd->pqueue, cpu);
+
+	if (unlikely(next_nr > pd->max_seq_nr)) {
+		next_nr = next_nr - pd->max_seq_nr - 1;
+		next_index = next_nr % num_cpus;
+		cpu = padata_index_to_cpu(pd, next_index);
+		next_queue = per_cpu_ptr(pd->pqueue, cpu);
+		pd->processed = 0;
 	}
 
 	padata = NULL;
 
-	if (empty == num_cpus)
-		goto out;
-
 	reorder = &next_queue->reorder;
 
 	if (!list_empty(&reorder->list)) {
 		padata = list_entry(reorder->list.next,
 				    struct padata_priv, list);
 
-		if (unlikely(next_overrun)) {
-			for_each_cpu(cpu, pd->cpumask) {
-				queue = per_cpu_ptr(pd->queue, cpu);
-				atomic_set(&queue->num_obj, 0);
-			}
-		}
+		BUG_ON(next_nr != padata->seq_nr);
 
 		spin_lock(&reorder->lock);
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
 		spin_unlock(&reorder->lock);
 
-		atomic_inc(&next_queue->num_obj);
+		pd->processed++;
 
 		goto out;
 	}
 
-	queue = per_cpu_ptr(pd->queue, smp_processor_id());
+	queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
 	if (queue->cpu_index == next_queue->cpu_index) {
 		padata = ERR_PTR(-ENODATA);
 		goto out;
@@ -262,7 +231,7 @@ out:
 static void padata_reorder(struct parallel_data *pd)
 {
 	struct padata_priv *padata;
-	struct padata_queue *queue;
+	struct padata_serial_queue *squeue;
 	struct padata_instance *pinst = pd->pinst;
 
 	/*
@@ -301,13 +270,13 @@ static void padata_reorder(struct parallel_data *pd)
 			return;
 		}
 
-		queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
+		squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu);
 
-		spin_lock(&queue->serial.lock);
-		list_add_tail(&padata->list, &queue->serial.list);
-		spin_unlock(&queue->serial.lock);
+		spin_lock(&squeue->serial.lock);
+		list_add_tail(&padata->list, &squeue->serial.list);
+		spin_unlock(&squeue->serial.lock);
 
-		queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
+		queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work);
 	}
 
 	spin_unlock_bh(&pd->lock);
@@ -333,19 +302,19 @@ static void padata_reorder_timer(unsigned long arg)
 	padata_reorder(pd);
 }
 
-static void padata_serial_worker(struct work_struct *work)
+static void padata_serial_worker(struct work_struct *serial_work)
 {
-	struct padata_queue *queue;
+	struct padata_serial_queue *squeue;
 	struct parallel_data *pd;
 	LIST_HEAD(local_list);
 
 	local_bh_disable();
-	queue = container_of(work, struct padata_queue, swork);
-	pd = queue->pd;
+	squeue = container_of(serial_work, struct padata_serial_queue, work);
+	pd = squeue->pd;
 
-	spin_lock(&queue->serial.lock);
-	list_replace_init(&queue->serial.list, &local_list);
-	spin_unlock(&queue->serial.lock);
+	spin_lock(&squeue->serial.lock);
+	list_replace_init(&squeue->serial.list, &local_list);
+	spin_unlock(&squeue->serial.lock);
 
 	while (!list_empty(&local_list)) {
 		struct padata_priv *padata;
@@ -372,18 +341,18 @@ static void padata_serial_worker(struct work_struct *work)
 void padata_do_serial(struct padata_priv *padata)
 {
 	int cpu;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 
 	pd = padata->pd;
 
 	cpu = get_cpu();
-	queue = per_cpu_ptr(pd->queue, cpu);
+	pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
-	spin_lock(&queue->reorder.lock);
+	spin_lock(&pqueue->reorder.lock);
 	atomic_inc(&pd->reorder_objects);
-	list_add_tail(&padata->list, &queue->reorder.list);
-	spin_unlock(&queue->reorder.lock);
+	list_add_tail(&padata->list, &pqueue->reorder.list);
+	spin_unlock(&pqueue->reorder.lock);
 
 	put_cpu();
 
@@ -391,52 +360,88 @@ void padata_do_serial(struct padata_priv *padata)
 }
 EXPORT_SYMBOL(padata_do_serial);
 
-/* Allocate and initialize the internal cpumask dependend resources. */
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
-					     const struct cpumask *cpumask)
+static int padata_setup_cpumasks(struct parallel_data *pd,
+				 const struct cpumask *pcpumask,
+				 const struct cpumask *cbcpumask)
 {
-	int cpu, cpu_index, num_cpus;
-	struct padata_queue *queue;
-	struct parallel_data *pd;
+	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+		return -ENOMEM;
 
-	cpu_index = 0;
+	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
+	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
+		free_cpumask_var(pd->cpumask.cbcpu);
+		return -ENOMEM;
+	}
 
-	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
-	if (!pd)
-		goto err;
+	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
+	return 0;
+}
 
-	pd->queue = alloc_percpu(struct padata_queue);
-	if (!pd->queue)
-		goto err_free_pd;
+static void __padata_list_init(struct padata_list *pd_list)
+{
+	INIT_LIST_HEAD(&pd_list->list);
+	spin_lock_init(&pd_list->lock);
+}
 
-	if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
-		goto err_free_queue;
+/* Initialize all percpu queues used by serial workers */
+static void padata_init_squeues(struct parallel_data *pd)
+{
+	int cpu;
+	struct padata_serial_queue *squeue;
 
-	cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+	for_each_cpu(cpu, pd->cpumask.cbcpu) {
+		squeue = per_cpu_ptr(pd->squeue, cpu);
+		squeue->pd = pd;
+		__padata_list_init(&squeue->serial);
+		INIT_WORK(&squeue->work, padata_serial_worker);
+	}
+}
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
+/* Initialize all percpu queues used by parallel workers */
+static void padata_init_pqueues(struct parallel_data *pd)
+{
+	int cpu_index, num_cpus, cpu;
+	struct padata_parallel_queue *pqueue;
+
+	cpu_index = 0;
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
+		pqueue = per_cpu_ptr(pd->pqueue, cpu);
+		pqueue->pd = pd;
+		pqueue->cpu_index = cpu_index;
+
+		__padata_list_init(&pqueue->reorder);
+		__padata_list_init(&pqueue->parallel);
+		INIT_WORK(&pqueue->work, padata_parallel_worker);
+		atomic_set(&pqueue->num_obj, 0);
+	}
 
-		queue->pd = pd;
+	num_cpus = cpumask_weight(pd->cpumask.pcpu);
+	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+}
 
-		queue->cpu_index = cpu_index;
-		cpu_index++;
+/* Allocate and initialize the internal cpumask dependend resources. */
+static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+					     const struct cpumask *pcpumask,
+					     const struct cpumask *cbcpumask)
+{
+	struct parallel_data *pd;
 
-		INIT_LIST_HEAD(&queue->reorder.list);
-		INIT_LIST_HEAD(&queue->parallel.list);
-		INIT_LIST_HEAD(&queue->serial.list);
-		spin_lock_init(&queue->reorder.lock);
-		spin_lock_init(&queue->parallel.lock);
-		spin_lock_init(&queue->serial.lock);
+	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
+	if (!pd)
+		goto err;
 
-		INIT_WORK(&queue->pwork, padata_parallel_worker);
-		INIT_WORK(&queue->swork, padata_serial_worker);
-		atomic_set(&queue->num_obj, 0);
-	}
+	pd->pqueue = alloc_percpu(struct padata_parallel_queue);
+	if (!pd->pqueue)
+		goto err_free_pd;
 
-	num_cpus = cpumask_weight(pd->cpumask);
-	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+	pd->squeue = alloc_percpu(struct padata_serial_queue);
+	if (!pd->squeue)
+		goto err_free_pqueue;
+	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
+		goto err_free_squeue;
 
+	padata_init_pqueues(pd);
+	padata_init_squeues(pd);
 	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
@@ -446,8 +451,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 
 	return pd;
 
-err_free_queue:
-	free_percpu(pd->queue);
+err_free_squeue:
+	free_percpu(pd->squeue);
+err_free_pqueue:
+	free_percpu(pd->pqueue);
 err_free_pd:
 	kfree(pd);
 err:
@@ -456,8 +463,10 @@ err:
 
 static void padata_free_pd(struct parallel_data *pd)
 {
-	free_cpumask_var(pd->cpumask);
-	free_percpu(pd->queue);
+	free_cpumask_var(pd->cpumask.pcpu);
+	free_cpumask_var(pd->cpumask.cbcpu);
+	free_percpu(pd->pqueue);
+	free_percpu(pd->squeue);
 	kfree(pd);
 }
 
@@ -465,11 +474,12 @@ static void padata_free_pd(struct parallel_data *pd)
 static void padata_flush_queues(struct parallel_data *pd)
 {
 	int cpu;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
+	struct padata_serial_queue *squeue;
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		flush_work(&queue->pwork);
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
+		pqueue = per_cpu_ptr(pd->pqueue, cpu);
+		flush_work(&pqueue->work);
 	}
 
 	del_timer_sync(&pd->timer);
@@ -477,96 +487,278 @@ static void padata_flush_queues(struct parallel_data *pd)
 	if (atomic_read(&pd->reorder_objects))
 		padata_reorder(pd);
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		flush_work(&queue->swork);
+	for_each_cpu(cpu, pd->cpumask.cbcpu) {
+		squeue = per_cpu_ptr(pd->squeue, cpu);
+		flush_work(&squeue->work);
 	}
 
 	BUG_ON(atomic_read(&pd->refcnt) != 0);
 }
 
+static void __padata_start(struct padata_instance *pinst)
+{
+	pinst->flags |= PADATA_INIT;
+}
+
+static void __padata_stop(struct padata_instance *pinst)
+{
+	if (!(pinst->flags & PADATA_INIT))
+		return;
+
+	pinst->flags &= ~PADATA_INIT;
+
+	synchronize_rcu();
+
+	get_online_cpus();
+	padata_flush_queues(pinst->pd);
+	put_online_cpus();
+}
+
 /* Replace the internal control stucture with a new one. */
 static void padata_replace(struct padata_instance *pinst,
 			   struct parallel_data *pd_new)
 {
 	struct parallel_data *pd_old = pinst->pd;
+	int notification_mask = 0;
 
 	pinst->flags |= PADATA_RESET;
 
 	rcu_assign_pointer(pinst->pd, pd_new);
 
 	synchronize_rcu();
+	if (!pd_old)
+		goto out;
 
 	padata_flush_queues(pd_old);
+	if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
+		notification_mask |= PADATA_CPU_PARALLEL;
+	if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
+		notification_mask |= PADATA_CPU_SERIAL;
+
 	padata_free_pd(pd_old);
+	if (notification_mask)
+		blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
+					     notification_mask, pinst);
 
+out:
 	pinst->flags &= ~PADATA_RESET;
 }
 
 /**
- * padata_set_cpumask - set the cpumask that padata should use
+ * padata_register_cpumask_notifier - Registers a notifier that will be called
+ *                             if either pcpu or cbcpu or both cpumasks change.
+ *
+ * @pinst: A poineter to padata instance
+ * @nblock: A pointer to notifier block.
+ */
+int padata_register_cpumask_notifier(struct padata_instance *pinst,
+				     struct notifier_block *nblock)
+{
+	return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
+						nblock);
+}
+EXPORT_SYMBOL(padata_register_cpumask_notifier);
+
+/**
+ * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
+ *        registered earlier  using padata_register_cpumask_notifier
+ *
+ * @pinst: A pointer to data instance.
+ * @nlock: A pointer to notifier block.
+ */
+int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+				       struct notifier_block *nblock)
+{
+	return blocking_notifier_chain_unregister(
+		&pinst->cpumask_change_notifier,
+		nblock);
+}
+EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
+
+
+/* If cpumask contains no active cpu, we mark the instance as invalid. */
+static bool padata_validate_cpumask(struct padata_instance *pinst,
+				    const struct cpumask *cpumask)
+{
+	if (!cpumask_intersects(cpumask, cpu_active_mask)) {
+		pinst->flags |= PADATA_INVALID;
+		return false;
+	}
+
+	pinst->flags &= ~PADATA_INVALID;
+	return true;
+}
+
+/**
+ * padata_get_cpumask: Fetch serial or parallel cpumask from the
+ *                     given padata instance and copy it to @out_mask
+ *
+ * @pinst: A pointer to padata instance
+ * @cpumask_type: Specifies which cpumask will be copied.
+ *                Possible values are PADATA_CPU_SERIAL *or* PADATA_CPU_PARALLEL
+ *                corresponding to serial and parallel cpumask respectively.
+ * @out_mask: A pointer to cpumask structure where selected
+ *            cpumask will be copied.
+ */
+int padata_get_cpumask(struct padata_instance *pinst,
+		       int cpumask_type, struct cpumask *out_mask)
+{
+	struct parallel_data *pd;
+	int ret = 0;
+
+	rcu_read_lock_bh();
+	pd = rcu_dereference(pinst->pd);
+	switch (cpumask_type) {
+	case PADATA_CPU_SERIAL:
+		cpumask_copy(out_mask, pd->cpumask.cbcpu);
+		break;
+	case PADATA_CPU_PARALLEL:
+		cpumask_copy(out_mask, pd->cpumask.pcpu);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	rcu_read_unlock_bh();
+	return ret;
+}
+EXPORT_SYMBOL(padata_get_cpumask);
+
+/**
+ * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
+ *                     equivalent to @cpumask.
  *
  * @pinst: padata instance
+ * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
+ *                to parallel and serial cpumasks respectively.
  * @cpumask: the cpumask to use
  */
-int padata_set_cpumask(struct padata_instance *pinst,
-			cpumask_var_t cpumask)
+int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+		       cpumask_var_t cpumask)
 {
-	struct parallel_data *pd;
+	struct cpumask *serial_mask, *parallel_mask;
+
+	switch (cpumask_type) {
+	case PADATA_CPU_PARALLEL:
+		serial_mask = pinst->cpumask.cbcpu;
+		parallel_mask = cpumask;
+		break;
+	case PADATA_CPU_SERIAL:
+		parallel_mask = pinst->cpumask.pcpu;
+		serial_mask = cpumask;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
+}
+EXPORT_SYMBOL(padata_set_cpumask);
+
+/**
+ * __padata_set_cpumasks - Set both parallel and serial cpumasks. The first
+ *                         one is used by parallel workers and the second one
+ *                         by the wokers doing serialization.
+ *
+ * @pinst: padata instance
+ * @pcpumask: the cpumask to use for parallel workers
+ * @cbcpumask: the cpumsak to use for serial workers
+ */
+int __padata_set_cpumasks(struct padata_instance *pinst,
+			  cpumask_var_t pcpumask, cpumask_var_t cbcpumask)
+{
+	int valid;
 	int err = 0;
+	struct parallel_data *pd = NULL;
 
 	mutex_lock(&pinst->lock);
 
+	valid = padata_validate_cpumask(pinst, pcpumask);
+	if (!valid) {
+		__padata_stop(pinst);
+		goto out_replace;
+	}
+
+	valid = padata_validate_cpumask(pinst, cbcpumask);
+	if (!valid) {
+		__padata_stop(pinst);
+		goto out_replace;
+	}
+
 	get_online_cpus();
 
-	pd = padata_alloc_pd(pinst, cpumask);
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 	if (!pd) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	cpumask_copy(pinst->cpumask, cpumask);
+out_replace:
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
 	padata_replace(pinst, pd);
 
+	if (valid)
+		__padata_start(pinst);
+
 out:
 	put_online_cpus();
 
 	mutex_unlock(&pinst->lock);
 
 	return err;
+
 }
-EXPORT_SYMBOL(padata_set_cpumask);
+EXPORT_SYMBOL(__padata_set_cpumasks);
 
 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 {
 	struct parallel_data *pd;
 
 	if (cpumask_test_cpu(cpu, cpu_active_mask)) {
-		pd = padata_alloc_pd(pinst, pinst->cpumask);
+		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+				     pinst->cpumask.cbcpu);
 		if (!pd)
 			return -ENOMEM;
 
 		padata_replace(pinst, pd);
+
+		if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
+		    padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
+			__padata_start(pinst);
 	}
 
 	return 0;
 }
 
-/**
- * padata_add_cpu - add a cpu to the padata cpumask
+ /**
+ * padata_add_cpu - add a cpu to one or both(parallel and serial)
+ *                  padata cpumasks.
  *
  * @pinst: padata instance
  * @cpu: cpu to add
+ * @mask: bitmask of flags specifying to which cpumask @cpu shuld be added.
+ *        The @mask may be any combination of the following flags:
+ *          PADATA_CPU_SERIAL   - serial cpumask
+ *          PADATA_CPU_PARALLEL - parallel cpumask
  */
-int padata_add_cpu(struct padata_instance *pinst, int cpu)
+
+int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask)
 {
 	int err;
 
+	if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+		return -EINVAL;
+
 	mutex_lock(&pinst->lock);
 
 	get_online_cpus();
-	cpumask_set_cpu(cpu, pinst->cpumask);
+	if (mask & PADATA_CPU_SERIAL)
+		cpumask_set_cpu(cpu, pinst->cpumask.cbcpu);
+	if (mask & PADATA_CPU_PARALLEL)
+		cpumask_set_cpu(cpu, pinst->cpumask.pcpu);
+
 	err = __padata_add_cpu(pinst, cpu);
 	put_online_cpus();
 
@@ -578,33 +770,55 @@ EXPORT_SYMBOL(padata_add_cpu);
 
 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 {
-	struct parallel_data *pd;
+	struct parallel_data *pd = NULL;
 
 	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
-		pd = padata_alloc_pd(pinst, pinst->cpumask);
+
+		if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
+		    !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) {
+			__padata_stop(pinst);
+			padata_replace(pinst, pd);
+			goto out;
+		}
+
+		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+				     pinst->cpumask.cbcpu);
 		if (!pd)
 			return -ENOMEM;
 
 		padata_replace(pinst, pd);
 	}
 
+out:
 	return 0;
 }
 
-/**
- * padata_remove_cpu - remove a cpu from the padata cpumask
+ /**
+ * padata_remove_cpu - remove a cpu from the one or both(serial and paralell)
+ *                     padata cpumasks.
  *
  * @pinst: padata instance
  * @cpu: cpu to remove
+ * @mask: bitmask specifying from which cpumask @cpu should be removed
+ *        The @mask may be any combination of the following flags:
+ *          PADATA_CPU_SERIAL   - serial cpumask
+ *          PADATA_CPU_PARALLEL - parallel cpumask
  */
-int padata_remove_cpu(struct padata_instance *pinst, int cpu)
+int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
 {
 	int err;
 
+	if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+		return -EINVAL;
+
 	mutex_lock(&pinst->lock);
 
 	get_online_cpus();
-	cpumask_clear_cpu(cpu, pinst->cpumask);
+	if (mask & PADATA_CPU_SERIAL)
+		cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
+	if (mask & PADATA_CPU_PARALLEL)
+		cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
+
 	err = __padata_remove_cpu(pinst, cpu);
 	put_online_cpus();
 
@@ -619,11 +833,20 @@ EXPORT_SYMBOL(padata_remove_cpu);
  *
  * @pinst: padata instance to start
  */
-void padata_start(struct padata_instance *pinst)
+int padata_start(struct padata_instance *pinst)
 {
+	int err = 0;
+
 	mutex_lock(&pinst->lock);
-	pinst->flags |= PADATA_INIT;
+
+	if (pinst->flags & PADATA_INVALID)
+		err =-EINVAL;
+
+	 __padata_start(pinst);
+
 	mutex_unlock(&pinst->lock);
+
+	return err;
 }
 EXPORT_SYMBOL(padata_start);
 
@@ -635,12 +858,20 @@ EXPORT_SYMBOL(padata_start);
 void padata_stop(struct padata_instance *pinst)
 {
 	mutex_lock(&pinst->lock);
-	pinst->flags &= ~PADATA_INIT;
+	__padata_stop(pinst);
 	mutex_unlock(&pinst->lock);
 }
 EXPORT_SYMBOL(padata_stop);
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
+{
+	return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
+		cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
+}
+
+
 static int padata_cpu_callback(struct notifier_block *nfb,
 			       unsigned long action, void *hcpu)
 {
@@ -653,7 +884,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		err = __padata_add_cpu(pinst, cpu);
@@ -664,7 +895,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		err = __padata_remove_cpu(pinst, cpu);
@@ -675,7 +906,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		__padata_remove_cpu(pinst, cpu);
@@ -683,7 +914,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		__padata_add_cpu(pinst, cpu);
@@ -694,36 +925,202 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 }
 #endif
 
+static void __padata_free(struct padata_instance *pinst)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
+	padata_stop(pinst);
+	padata_free_pd(pinst->pd);
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
+	kfree(pinst);
+}
+
+#define kobj2pinst(_kobj)					\
+	container_of(_kobj, struct padata_instance, kobj)
+#define attr2pentry(_attr)					\
+	container_of(_attr, struct padata_sysfs_entry, attr)
+
+static void padata_sysfs_release(struct kobject *kobj)
+{
+	struct padata_instance *pinst = kobj2pinst(kobj);
+	__padata_free(pinst);
+}
+
+struct padata_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
+	ssize_t (*store)(struct padata_instance *, struct attribute *,
+			 const char *, size_t);
+};
+
+static ssize_t show_cpumask(struct padata_instance *pinst,
+			    struct attribute *attr,  char *buf)
+{
+	struct cpumask *cpumask;
+	ssize_t len;
+
+	mutex_lock(&pinst->lock);
+	if (!strcmp(attr->name, "serial_cpumask"))
+		cpumask = pinst->cpumask.cbcpu;
+	else
+		cpumask = pinst->cpumask.pcpu;
+
+	len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask),
+			       nr_cpu_ids);
+	if (PAGE_SIZE - len < 2)
+		len = -EINVAL;
+	else
+		len += sprintf(buf + len, "\n");
+
+	mutex_unlock(&pinst->lock);
+	return len;
+}
+
+static ssize_t store_cpumask(struct padata_instance *pinst,
+			     struct attribute *attr,
+			     const char *buf, size_t count)
+{
+	cpumask_var_t new_cpumask;
+	ssize_t ret;
+	int mask_type;
+
+	if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
+			   nr_cpumask_bits);
+	if (ret < 0)
+		goto out;
+
+	mask_type = !strcmp(attr->name, "serial_cpumask") ?
+		PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
+	ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
+	if (!ret)
+		ret = count;
+
+out:
+	free_cpumask_var(new_cpumask);
+	return ret;
+}
+
+#define PADATA_ATTR_RW(_name, _show_name, _store_name)		\
+	static struct padata_sysfs_entry _name##_attr =		\
+		__ATTR(_name, 0644, _show_name, _store_name)
+#define PADATA_ATTR_RO(_name, _show_name)		\
+	static struct padata_sysfs_entry _name##_attr = \
+		__ATTR(_name, 0400, _show_name, NULL)
+
+PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
+PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
+
+/*
+ * Padata sysfs provides the following objects:
+ * serial_cpumask   [RW] - cpumask for serial workers
+ * parallel_cpumask [RW] - cpumask for parallel workers
+ */
+static struct attribute *padata_default_attrs[] = {
+	&serial_cpumask_attr.attr,
+	&parallel_cpumask_attr.attr,
+	NULL,
+};
+
+static ssize_t padata_sysfs_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct padata_instance *pinst;
+	struct padata_sysfs_entry *pentry;
+	ssize_t ret = -EIO;
+
+	pinst = kobj2pinst(kobj);
+	pentry = attr2pentry(attr);
+	if (pentry->show)
+		ret = pentry->show(pinst, attr, buf);
+
+	return ret;
+}
+
+static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct padata_instance *pinst;
+	struct padata_sysfs_entry *pentry;
+	ssize_t ret = -EIO;
+
+	pinst = kobj2pinst(kobj);
+	pentry = attr2pentry(attr);
+	if (pentry->show)
+		ret = pentry->store(pinst, attr, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops padata_sysfs_ops = {
+	.show = padata_sysfs_show,
+	.store = padata_sysfs_store,
+};
+
+static struct kobj_type padata_attr_type = {
+	.sysfs_ops = &padata_sysfs_ops,
+	.default_attrs = padata_default_attrs,
+	.release = padata_sysfs_release,
+};
+
 /**
- * padata_alloc - allocate and initialize a padata instance
+ * padata_alloc - Allocate and initialize padata instance.
+ *                Use default cpumask(cpu_possible_mask)
+ *                for serial and parallel workes.
  *
- * @cpumask: cpumask that padata uses for parallelization
  * @wq: workqueue to use for the allocated padata instance
  */
-struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-				     struct workqueue_struct *wq)
+struct padata_instance *padata_alloc(struct workqueue_struct *wq)
+{
+	return __padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc);
+
+/**
+ * __padata_alloc - allocate and initialize a padata instance
+ *                  and specify cpumasks for serial and parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ * @pcpumask: cpumask that will be used for padata parallelization
+ * @cbcpumask: cpumask that will be used for padata serialization
+ */
+struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
+				       const struct cpumask *pcpumask,
+				       const struct cpumask *cbcpumask)
 {
 	struct padata_instance *pinst;
-	struct parallel_data *pd;
+	struct parallel_data *pd = NULL;
 
 	pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
 	if (!pinst)
 		goto err;
 
 	get_online_cpus();
-
-	pd = padata_alloc_pd(pinst, cpumask);
-	if (!pd)
+	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+		goto err_free_inst;
+	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
+		free_cpumask_var(pinst->cpumask.pcpu);
 		goto err_free_inst;
+	}
+	if (!padata_validate_cpumask(pinst, pcpumask) ||
+	    !padata_validate_cpumask(pinst, cbcpumask))
+		goto err_free_masks;
 
-	if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL))
-		goto err_free_pd;
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
+	if (!pd)
+		goto err_free_masks;
 
 	rcu_assign_pointer(pinst->pd, pd);
 
 	pinst->wq = wq;
 
-	cpumask_copy(pinst->cpumask, cpumask);
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
 	pinst->flags = 0;
 
@@ -735,19 +1132,22 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 
 	put_online_cpus();
 
+	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
+	kobject_init(&pinst->kobj, &padata_attr_type);
 	mutex_init(&pinst->lock);
 
 	return pinst;
 
-err_free_pd:
-	padata_free_pd(pd);
+err_free_masks:
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
 err_free_inst:
 	kfree(pinst);
 	put_online_cpus();
 err:
 	return NULL;
 }
-EXPORT_SYMBOL(padata_alloc);
+EXPORT_SYMBOL(__padata_alloc);
 
 /**
  * padata_free - free a padata instance
@@ -756,19 +1156,6 @@ EXPORT_SYMBOL(padata_alloc);
  */
 void padata_free(struct padata_instance *pinst)
 {
-	padata_stop(pinst);
-
-	synchronize_rcu();
-
-#ifdef CONFIG_HOTPLUG_CPU
-	unregister_hotcpu_notifier(&pinst->cpu_notifier);
-#endif
-	get_online_cpus();
-	padata_flush_queues(pinst->pd);
-	put_online_cpus();
-
-	padata_free_pd(pinst->pd);
-	free_cpumask_var(pinst->cpumask);
-	kfree(pinst);
+	kobject_put(&pinst->kobj);
 }
 EXPORT_SYMBOL(padata_free);