From 3274819b3c81c18c01e3c0e0ea726870ec237ac0 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 8 Jan 2024 16:53:48 -0600 Subject: crypto: iaa - Remove header table code The header table and related code is currently unused - it was included and used for canned mode, but canned mode has been removed, so this code can be safely removed as well. This indirectly fixes a bug reported by Dan Carpenter. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-crypto/b2e0bd974981291e16882686a2b9b1db3986abe4.camel@linux.intel.com/T/#m4403253d6a4347a925fab4fc1cdb4ef7c095fb86 Signed-off-by: Tom Zanussi Reviewed-by: Dave Jiang Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 108 +---------------------------- 1 file changed, 3 insertions(+), 105 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index dfd3baf0a8d8..39a5fc905c4d 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -258,16 +258,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode) kfree(mode->name); kfree(mode->ll_table); kfree(mode->d_table); - kfree(mode->header_table); kfree(mode); } /* - * IAA Compression modes are defined by an ll_table, a d_table, and an - * optional header_table. These tables are typically generated and - * captured using statistics collected from running actual - * compress/decompress workloads. + * IAA Compression modes are defined by an ll_table and a d_table. + * These tables are typically generated and captured using statistics + * collected from running actual compress/decompress workloads. * * A module or other kernel code can add and remove compression modes * with a given name using the exported @add_iaa_compression_mode() @@ -315,9 +313,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); * @ll_table_size: The ll table size in bytes * @d_table: The d table * @d_table_size: The d table size in bytes - * @header_table: Optional header table - * @header_table_size: Optional header table size in bytes - * @gen_decomp_table_flags: Otional flags used to generate the decomp table * @init: Optional callback function to init the compression mode data * @free: Optional callback function to free the compression mode data * @@ -330,9 +325,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free) { @@ -370,16 +362,6 @@ int add_iaa_compression_mode(const char *name, mode->d_table_size = d_table_size; } - if (header_table) { - mode->header_table = kzalloc(header_table_size, GFP_KERNEL); - if (!mode->header_table) - goto free; - memcpy(mode->header_table, header_table, header_table_size); - mode->header_table_size = header_table_size; - } - - mode->gen_decomp_table_flags = gen_decomp_table_flags; - mode->init = init; mode->free = free; @@ -420,10 +402,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device, if (device_mode->aecs_comp_table) dma_free_coherent(dev, size, device_mode->aecs_comp_table, device_mode->aecs_comp_table_dma_addr); - if (device_mode->aecs_decomp_table) - dma_free_coherent(dev, size, device_mode->aecs_decomp_table, - device_mode->aecs_decomp_table_dma_addr); - kfree(device_mode); } @@ -440,73 +418,6 @@ static int check_completion(struct device *dev, bool compress, bool only_once); -static int decompress_header(struct iaa_device_compression_mode *device_mode, - struct iaa_compression_mode *mode, - struct idxd_wq *wq) -{ - dma_addr_t src_addr, src2_addr; - struct idxd_desc *idxd_desc; - struct iax_hw_desc *desc; - struct device *dev; - int ret = 0; - - idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); - if (IS_ERR(idxd_desc)) - return PTR_ERR(idxd_desc); - - desc = idxd_desc->iax_hw; - - dev = &wq->idxd->pdev->dev; - - src_addr = dma_map_single(dev, (void *)mode->header_table, - mode->header_table_size, DMA_TO_DEVICE); - dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n", - __func__, mode->name, src_addr, dev, - mode->header_table, mode->header_table_size); - if (unlikely(dma_mapping_error(dev, src_addr))) { - dev_dbg(dev, "dma_map_single err, exiting\n"); - ret = -ENOMEM; - return ret; - } - - desc->flags = IAX_AECS_GEN_FLAG; - desc->opcode = IAX_OPCODE_DECOMPRESS; - - desc->src1_addr = (u64)src_addr; - desc->src1_size = mode->header_table_size; - - src2_addr = device_mode->aecs_decomp_table_dma_addr; - desc->src2_addr = (u64)src2_addr; - desc->src2_size = 1088; - dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n", - __func__, mode->name, desc->src2_addr, dev, desc->src2_size); - desc->max_dst_size = 0; // suppressed output - - desc->decompr_flags = mode->gen_decomp_table_flags; - - desc->priv = 0; - - desc->completion_addr = idxd_desc->compl_dma; - - ret = idxd_submit_desc(wq, idxd_desc); - if (ret) { - pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret); - goto out; - } - - ret = check_completion(dev, idxd_desc->iax_completion, false, false); - if (ret) - dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n", - __func__, mode->name, ret); - else - dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__, - mode->name); -out: - dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE); - - return ret; -} - static int init_device_compression_mode(struct iaa_device *iaa_device, struct iaa_compression_mode *mode, int idx, struct idxd_wq *wq) @@ -529,24 +440,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device, if (!device_mode->aecs_comp_table) goto free; - device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size, - &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL); - if (!device_mode->aecs_decomp_table) - goto free; - /* Add Huffman table to aecs */ memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); - if (mode->header_table) { - ret = decompress_header(device_mode, mode, wq); - if (ret) { - pr_debug("iaa header decompression failed: ret=%d\n", ret); - goto free; - } - } - if (mode->init) { ret = mode->init(device_mode); if (ret) -- cgit v1.2.3 From 262534ddc88dfea7474ed18adfecf856e4fbe054 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 25 Feb 2024 14:11:33 -0600 Subject: crypto: iaa - Fix async_disable descriptor leak The disable_async paths of iaa_compress/decompress() don't free idxd descriptors in the async_disable case. Currently this only happens in the testcases where req->dst is set to null. Add a test to free them in those paths. Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 39a5fc905c4d..85ee4c965ccf 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1222,7 +1222,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, *compression_crc = idxd_desc->iax_completion->crc; - if (!ctx->async_mode) + if (!ctx->async_mode || disable_async) idxd_free_desc(wq, idxd_desc); out: return ret; @@ -1468,7 +1468,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, *dlen = req->dlen; - if (!ctx->async_mode) + if (!ctx->async_mode || disable_async) idxd_free_desc(wq, idxd_desc); /* Update stats */ -- cgit v1.2.3 From cdb083e73d632afcc5a931d31bb37445580f4bfb Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 25 Feb 2024 14:11:34 -0600 Subject: crypto: iaa - Fix comp/decomp delay statistics The comp/decomp delay statistics currently have no callers; somehow they were dropped during refactoring. There originally were also two sets, one for the async algorithm, the other for the synchronous version. Because the synchronous algorithm was dropped, one set should be removed. To keep it consistent with the rest of the stats, and since there's no ambiguity, remove the acomp/adecomp versions. Also add back the callers. Reported-by: Rex Zhang Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 9 +++++++++ drivers/crypto/intel/iaa/iaa_crypto_stats.c | 28 ---------------------------- drivers/crypto/intel/iaa/iaa_crypto_stats.h | 8 ++++---- 3 files changed, 13 insertions(+), 32 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 85ee4c965ccf..b54f93c64033 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1494,6 +1494,7 @@ static int iaa_comp_acompress(struct acomp_req *req) u32 compression_crc; struct idxd_wq *wq; struct device *dev; + u64 start_time_ns; int order = -1; compression_ctx = crypto_tfm_ctx(tfm); @@ -1567,8 +1568,10 @@ static int iaa_comp_acompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); + start_time_ns = iaa_get_ts(); ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, &compression_crc, disable_async); + update_max_comp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; @@ -1615,6 +1618,7 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) struct iaa_wq *iaa_wq; struct device *dev; struct idxd_wq *wq; + u64 start_time_ns; int order = -1; cpu = get_cpu(); @@ -1671,8 +1675,10 @@ alloc_dest: dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); + start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, true); + update_max_decomp_delay_ns(start_time_ns); if (ret == -EOVERFLOW) { dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); req->dlen *= 2; @@ -1703,6 +1709,7 @@ static int iaa_comp_adecompress(struct acomp_req *req) int nr_sgs, cpu, ret = 0; struct iaa_wq *iaa_wq; struct device *dev; + u64 start_time_ns; struct idxd_wq *wq; if (!iaa_crypto_enabled) { @@ -1762,8 +1769,10 @@ static int iaa_comp_adecompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); + start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, false); + update_max_decomp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index cbf87d0effe3..c9f83af4b307 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -22,8 +22,6 @@ static u64 total_decomp_calls; static u64 total_sw_decomp_calls; static u64 max_comp_delay_ns; static u64 max_decomp_delay_ns; -static u64 max_acomp_delay_ns; -static u64 max_adecomp_delay_ns; static u64 total_comp_bytes_out; static u64 total_decomp_bytes_in; static u64 total_completion_einval_errors; @@ -92,26 +90,6 @@ void update_max_decomp_delay_ns(u64 start_time_ns) max_decomp_delay_ns = time_diff; } -void update_max_acomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_acomp_delay_ns) - max_acomp_delay_ns = time_diff; -} - -void update_max_adecomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_adecomp_delay_ns) - max_adecomp_delay_ns = time_diff; -} - void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); @@ -151,8 +129,6 @@ static void reset_iaa_crypto_stats(void) total_sw_decomp_calls = 0; max_comp_delay_ns = 0; max_decomp_delay_ns = 0; - max_acomp_delay_ns = 0; - max_adecomp_delay_ns = 0; total_comp_bytes_out = 0; total_decomp_bytes_in = 0; total_completion_einval_errors = 0; @@ -280,10 +256,6 @@ int __init iaa_crypto_debugfs_init(void) iaa_crypto_debugfs_root, &max_comp_delay_ns); debugfs_create_u64("max_decomp_delay_ns", 0644, iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("max_acomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_adecomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); debugfs_create_u64("total_comp_calls", 0644, iaa_crypto_debugfs_root, &total_comp_calls); debugfs_create_u64("total_decomp_calls", 0644, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h index c10b87b86fa4..c916ca83f070 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -15,8 +15,6 @@ void update_total_sw_decomp_calls(void); void update_total_decomp_bytes_in(int n); void update_max_comp_delay_ns(u64 start_time_ns); void update_max_decomp_delay_ns(u64 start_time_ns); -void update_max_acomp_delay_ns(u64 start_time_ns); -void update_max_adecomp_delay_ns(u64 start_time_ns); void update_completion_einval_errs(void); void update_completion_timeout_errs(void); void update_completion_comp_buf_overflow_errs(void); @@ -26,6 +24,8 @@ void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n); void update_wq_decomp_calls(struct idxd_wq *idxd_wq); void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n); +static inline u64 iaa_get_ts(void) { return ktime_get_ns(); } + #else static inline int iaa_crypto_debugfs_init(void) { return 0; } static inline void iaa_crypto_debugfs_cleanup(void) {} @@ -37,8 +37,6 @@ static inline void update_total_sw_decomp_calls(void) {} static inline void update_total_decomp_bytes_in(int n) {} static inline void update_max_comp_delay_ns(u64 start_time_ns) {} static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_acomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {} static inline void update_completion_einval_errs(void) {} static inline void update_completion_timeout_errs(void) {} static inline void update_completion_comp_buf_overflow_errs(void) {} @@ -48,6 +46,8 @@ static inline void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {} static inline void update_wq_decomp_calls(struct idxd_wq *idxd_wq) {} static inline void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {} +static inline u64 iaa_get_ts(void) { return 0; } + #endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS #endif -- cgit v1.2.3 From 30dd94dba350043a32cfe9cb478ed621aae3c5c9 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 29 Feb 2024 23:14:49 +1300 Subject: crypto: iaa - fix the missing CRYPTO_ALG_ASYNC in cra_flags Add the missing CRYPTO_ALG_ASYNC flag since intel iaa driver works asynchronously. Signed-off-by: Barry Song Acked-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index b54f93c64033..1cd304de5388 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1823,6 +1823,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = { .base = { .cra_name = "deflate", .cra_driver_name = "deflate-iaa", + .cra_flags = CRYPTO_ALG_ASYNC, .cra_ctxsize = sizeof(struct iaa_compression_ctx), .cra_module = THIS_MODULE, .cra_priority = IAA_ALG_PRIORITY, -- cgit v1.2.3 From 5a7e89d3315d1be86aff8a8bf849023cda6547f7 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 21 Mar 2024 16:08:45 -0500 Subject: crypto: iaa - Fix nr_cpus < nr_iaa case If nr_cpus < nr_iaa, the calculated cpus_per_iaa will be 0, which causes a divide-by-0 in rebalance_wq_table(). Make sure cpus_per_iaa is 1 in that case, and also in the nr_iaa == 0 case, even though cpus_per_iaa is never used if nr_iaa == 0, for paranoia. Cc: # v6.8+ Reported-by: Jerry Snitselaar Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 1cd304de5388..b2191ade9011 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -806,6 +806,8 @@ static int save_iaa_wq(struct idxd_wq *wq) return -EINVAL; cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; + if (!cpus_per_iaa) + cpus_per_iaa = 1; out: return 0; } @@ -821,10 +823,12 @@ static void remove_iaa_wq(struct idxd_wq *wq) } } - if (nr_iaa) + if (nr_iaa) { cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; - else - cpus_per_iaa = 0; + if (!cpus_per_iaa) + cpus_per_iaa = 1; + } else + cpus_per_iaa = 1; } static int wq_table_add_wqs(int iaa, int cpu) -- cgit v1.2.3