From bd174fc2ca63f1c833a09f930796d789fb1f5361 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 5 Apr 2017 09:23:51 +0300 Subject: IB/mlx5: Fix function updating xlt emergency path In memory shortage path we fall back to use spare buffer. mlx5_ib_update_xlt() called from ib_uverbs_reg_mr when ibmr.ucontext not initialized yet. Scenario how to test it: 1. trigger memory exhaustion so __get_free_pages(GFP_KERNEL, 4) will fail 2. register MR 3. there should be no kernel oops Fixes: 7d0cc6edcc70 ('IB/mlx5: Add MR cache for large UMR regions') Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b8f9382a8b7d..1f09e11fa694 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1009,7 +1009,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, } if (!xlt) { - uctx = to_mucontext(mr->ibmr.uobject->context); + uctx = to_mucontext(mr->ibmr.pd->uobject->context); mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); size = PAGE_SIZE; xlt = (void *)uctx->upd_xlt_page; -- cgit v1.2.3 From 438b228e03740957d94198417d28a6ebea6672ed Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 5 Apr 2017 09:23:52 +0300 Subject: IB/mlx5: Fix UMR size calculation Translation table updates of large UMR may require multiple post send operations. The last operations can be in various lengths, but current code set them to be the same length. Fixes: 7d0cc6edcc70 ('IB/mlx5: Add MR cache for large UMR regions') Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1f09e11fa694..9a74260e9899 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1045,8 +1045,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, for (pages_mapped = 0; pages_mapped < pages_to_map && !err; pages_mapped += pages_iter, idx += pages_iter) { + npages = min_t(int, pages_iter, pages_to_map - pages_mapped); dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - npages = populate_xlt(mr, idx, pages_iter, xlt, + npages = populate_xlt(mr, idx, npages, xlt, page_shift, size, flags); dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); -- cgit v1.2.3 From db570d7deafb47ee635981f403a6531844c18ba5 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 5 Apr 2017 09:23:59 +0300 Subject: IB/mlx5: Add ODP support to MW Internally MW implemented as KLM MKey and filled by userspace UMR postsends. Handle pagefault trigered by operations on this MKeys. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 1 + drivers/infiniband/hw/mlx5/odp.c | 161 +++++++++++++++++++++++++---------- 3 files changed, 120 insertions(+), 43 deletions(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 191b82b038d7..93c646691208 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -513,6 +513,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; + int ndescs; }; struct mlx5_ib_umr_context { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9a74260e9899..93c0e82aa491 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1688,6 +1688,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mw->mmkey.type = MLX5_MKEY_MW; mw->ibmw.rkey = mw->mmkey.key; + mw->ndescs = ndescs; resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 842e1dbb50b8..ae0746754008 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -288,24 +288,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) return; } -static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, - u32 key) -{ - u32 base_key = mlx5_base_mkey(key); - struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key); - struct mlx5_ib_mr *mr; - - if (!mmkey || mmkey->key != key || mmkey->type != MLX5_MKEY_MR) - return NULL; - - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - - if (!mr->live) - return NULL; - - return container_of(mmkey, struct mlx5_ib_mr, mmkey); -} - static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, int error) @@ -625,6 +607,14 @@ out: return ret; } +struct pf_frame { + struct pf_frame *next; + u32 key; + u64 io_virt; + size_t bcnt; + int depth; +}; + /* * Handle a single data segment in a page-fault WQE or RDMA region. * @@ -641,43 +631,128 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, srcu_key, ret; + int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + struct pf_frame *head = NULL, *frame; + struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mw *mw; struct mlx5_ib_mr *mr; - size_t size; + struct mlx5_klm *pklm; + u32 *out = NULL; + size_t offset; srcu_key = srcu_read_lock(&dev->mr_srcu); - mr = mlx5_ib_odp_find_mr_lkey(dev, key); - /* - * If we didn't find the MR, it means the MR was closed while we were - * handling the ODP event. In this case we return -EFAULT so that the - * QP will be closed. - */ - if (!mr || !mr->ibmr.pd) { - mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", - key); + + io_virt += *bytes_committed; + bcnt -= *bytes_committed; + +next_mr: + mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); + if (!mmkey || mmkey->key != key) { + mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; } - if (!mr->umem->odp_data) { - mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", - key); - if (bytes_mapped) - *bytes_mapped += - (bcnt - *bytes_committed); + + switch (mmkey->type) { + case MLX5_MKEY_MR: + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + if (!mr->live || !mr->ibmr.pd) { + mlx5_ib_dbg(dev, "got dead MR\n"); + ret = -EFAULT; + goto srcu_unlock; + } + + ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped); + if (ret < 0) + goto srcu_unlock; + + npages += ret; + ret = 0; + break; + + case MLX5_MKEY_MW: + mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + + if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { + mlx5_ib_dbg(dev, "indirection level exceeded\n"); + ret = -EFAULT; + goto srcu_unlock; + } + + outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + + sizeof(*pklm) * (mw->ndescs - 2); + + if (outlen > cur_outlen) { + kfree(out); + out = kzalloc(outlen, GFP_KERNEL); + if (!out) { + ret = -ENOMEM; + goto srcu_unlock; + } + cur_outlen = outlen; + } + + pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, + bsf0_klm0_pas_mtt0_1); + + ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen); + if (ret) + goto srcu_unlock; + + offset = io_virt - MLX5_GET64(query_mkey_out, out, + memory_key_mkey_entry.start_addr); + + for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) { + if (offset >= be32_to_cpu(pklm->bcount)) { + offset -= be32_to_cpu(pklm->bcount); + continue; + } + + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) { + ret = -ENOMEM; + goto srcu_unlock; + } + + frame->key = be32_to_cpu(pklm->key); + frame->io_virt = be64_to_cpu(pklm->va) + offset; + frame->bcnt = min_t(size_t, bcnt, + be32_to_cpu(pklm->bcount) - offset); + frame->depth = depth + 1; + frame->next = head; + head = frame; + + bcnt -= frame->bcnt; + } + break; + + default: + mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); + ret = -EFAULT; goto srcu_unlock; } - /* - * Avoid branches - this code will perform correctly - * in all iterations (in iteration 2 and above, - * bytes_committed == 0). - */ - io_virt += *bytes_committed; - bcnt -= *bytes_committed; + if (head) { + frame = head; + head = frame->next; - npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped); + key = frame->key; + io_virt = frame->io_virt; + bcnt = frame->bcnt; + depth = frame->depth; + kfree(frame); + + goto next_mr; + } srcu_unlock: + while (head) { + frame = head; + head = frame->next; + kfree(frame); + } + kfree(out); + srcu_read_unlock(&dev->mr_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; -- cgit v1.2.3 From 0a49f2c31c3efbeb0de3e4b5598764887f629be2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Apr 2017 14:31:42 +0300 Subject: mlx5: Fix mlx5_ib_map_mr_sg mr length In case we got an initial sg_offset, we need to account for it in the mr length. Cc: stable@vger.kernel.org Fixes: ff2ba9936591 ("IB/core: Add passing an offset into the SG to ib_map_mr_sg") Signed-off-by: Sagi Grimberg Tested-by: Israel Rukshin Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 93c0e82aa491..366433f71b58 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1784,7 +1784,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); klms[i].key = cpu_to_be32(lkey); - mr->ibmr.length += sg_dma_len(sg); + mr->ibmr.length += sg_dma_len(sg) - sg_offset; sg_offset = 0; } -- cgit v1.2.3