diff options
Diffstat (limited to 'net/smc/smc_core.c')
-rw-r--r-- | net/smc/smc_core.c | 246 |
1 files changed, 160 insertions, 86 deletions
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f40f6ed0fbdb..ff49a11f57b8 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -347,6 +347,8 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type)) goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type)) + goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE, @@ -907,6 +909,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->net = smc_ib_net(lnk->smcibdev); lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; + lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type; atomic_inc(&lgr_cnt); } smc->conn.lgr = lgr; @@ -1086,34 +1089,37 @@ err_out: return NULL; } -static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, +static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link_group *lgr) { + struct mutex *lock; /* lock buffer list */ int rc; - if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { + if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) { /* unregister rmb with peer */ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); if (!rc) { /* protect against smc_llc_cli_rkey_exchange() */ mutex_lock(&lgr->llc_conf_mutex); - smc_llc_do_delete_rkey(lgr, rmb_desc); - rmb_desc->is_conf_rkey = false; + smc_llc_do_delete_rkey(lgr, buf_desc); + buf_desc->is_conf_rkey = false; mutex_unlock(&lgr->llc_conf_mutex); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } } - if (rmb_desc->is_reg_err) { + if (buf_desc->is_reg_err) { /* buf registration failed, reuse not possible */ - mutex_lock(&lgr->rmbs_lock); - list_del(&rmb_desc->list); - mutex_unlock(&lgr->rmbs_lock); + lock = is_rmb ? &lgr->rmbs_lock : + &lgr->sndbufs_lock; + mutex_lock(lock); + list_del(&buf_desc->list); + mutex_unlock(lock); - smc_buf_free(lgr, true, rmb_desc); + smc_buf_free(lgr, is_rmb, buf_desc); } else { - rmb_desc->used = 0; - memset(rmb_desc->cpu_addr, 0, rmb_desc->len); + buf_desc->used = 0; + memset(buf_desc->cpu_addr, 0, buf_desc->len); } } @@ -1121,15 +1127,23 @@ static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { if (conn->sndbuf_desc) { - conn->sndbuf_desc->used = 0; - memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len); + if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) { + smcr_buf_unuse(conn->sndbuf_desc, false, lgr); + } else { + conn->sndbuf_desc->used = 0; + memset(conn->sndbuf_desc->cpu_addr, 0, + conn->sndbuf_desc->len); + } } - if (conn->rmb_desc && lgr->is_smcd) { - conn->rmb_desc->used = 0; - memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len + - sizeof(struct smcd_cdc_msg)); - } else if (conn->rmb_desc) { - smcr_buf_unuse(conn->rmb_desc, lgr); + if (conn->rmb_desc) { + if (!lgr->is_smcd) { + smcr_buf_unuse(conn->rmb_desc, true, lgr); + } else { + conn->rmb_desc->used = 0; + memset(conn->rmb_desc->cpu_addr, 0, + conn->rmb_desc->len + + sizeof(struct smcd_cdc_msg)); + } } } @@ -1177,20 +1191,21 @@ lgr_put: static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link *lnk) { - if (is_rmb) + if (is_rmb || buf_desc->is_vm) buf_desc->is_reg_mr[lnk->link_idx] = false; if (!buf_desc->is_map_ib[lnk->link_idx]) return; - if (is_rmb) { - if (buf_desc->mr_rx[lnk->link_idx]) { - smc_ib_put_memory_region( - buf_desc->mr_rx[lnk->link_idx]); - buf_desc->mr_rx[lnk->link_idx] = NULL; - } + + if ((is_rmb || buf_desc->is_vm) && + buf_desc->mr[lnk->link_idx]) { + smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]); + buf_desc->mr[lnk->link_idx] = NULL; + } + if (is_rmb) smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); - } else { + else smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); - } + sg_free_table(&buf_desc->sgt[lnk->link_idx]); buf_desc->is_map_ib[lnk->link_idx] = false; } @@ -1279,8 +1294,10 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); - if (buf_desc->pages) + if (!buf_desc->is_vm && buf_desc->pages) __free_pages(buf_desc->pages, buf_desc->order); + else if (buf_desc->is_vm && buf_desc->cpu_addr) + vfree(buf_desc->cpu_addr); kfree(buf_desc); } @@ -1992,39 +2009,69 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -/* map an rmb buf to a link */ +/* map an buf to a link */ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link *lnk) { - int rc; + int rc, i, nents, offset, buf_size, size, access_flags; + struct scatterlist *sg; + void *buf; if (buf_desc->is_map_ib[lnk->link_idx]) return 0; - rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); + if (buf_desc->is_vm) { + buf = buf_desc->cpu_addr; + buf_size = buf_desc->len; + offset = offset_in_page(buf_desc->cpu_addr); + nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE; + } else { + nents = 1; + } + + rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL); if (rc) return rc; - sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, - buf_desc->cpu_addr, buf_desc->len); + + if (buf_desc->is_vm) { + /* virtually contiguous buffer */ + for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) { + size = min_t(int, PAGE_SIZE - offset, buf_size); + sg_set_page(sg, vmalloc_to_page(buf), size, offset); + buf += size / sizeof(*buf); + buf_size -= size; + offset = 0; + } + } else { + /* physically contiguous buffer */ + sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, + buf_desc->cpu_addr, buf_desc->len); + } /* map sg table to DMA address */ rc = smc_ib_buf_map_sg(lnk, buf_desc, is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); /* SMC protocol depends on mapping to one DMA address only */ - if (rc != 1) { + if (rc != nents) { rc = -EAGAIN; goto free_table; } - /* create a new memory region for the RMB */ - if (is_rmb) { - rc = smc_ib_get_memory_region(lnk->roce_pd, - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE, + buf_desc->is_dma_need_sync |= + smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx; + + if (is_rmb || buf_desc->is_vm) { + /* create a new memory region for the RMB or vzalloced sndbuf */ + access_flags = is_rmb ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_LOCAL_WRITE; + + rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags, buf_desc, lnk->link_idx); if (rc) goto buf_unmap; - smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); + smc_ib_sync_sg_for_device(lnk, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } buf_desc->is_map_ib[lnk->link_idx] = true; return 0; @@ -2037,20 +2084,23 @@ free_table: return rc; } -/* register a new rmb on IB device, +/* register a new buf on IB device, rmb or vzalloced sndbuf * must be called under lgr->llc_conf_mutex lock */ -int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) +int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc) { if (list_empty(&link->lgr->list)) return -ENOLINK; - if (!rmb_desc->is_reg_mr[link->link_idx]) { - /* register memory region for new rmb */ - if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { - rmb_desc->is_reg_err = true; + if (!buf_desc->is_reg_mr[link->link_idx]) { + /* register memory region for new buf */ + if (buf_desc->is_vm) + buf_desc->mr[link->link_idx]->iova = + (uintptr_t)buf_desc->cpu_addr; + if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) { + buf_desc->is_reg_err = true; return -EFAULT; } - rmb_desc->is_reg_mr[link->link_idx] = true; + buf_desc->is_reg_mr[link->link_idx] = true; } return 0; } @@ -2102,18 +2152,38 @@ int smcr_buf_reg_lgr(struct smc_link *lnk) struct smc_buf_desc *buf_desc, *bf; int i, rc = 0; + /* reg all RMBs for a new link */ mutex_lock(&lgr->rmbs_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { if (!buf_desc->used) continue; - rc = smcr_link_reg_rmb(lnk, buf_desc); - if (rc) - goto out; + rc = smcr_link_reg_buf(lnk, buf_desc); + if (rc) { + mutex_unlock(&lgr->rmbs_lock); + return rc; + } } } -out: mutex_unlock(&lgr->rmbs_lock); + + if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) + return rc; + + /* reg all vzalloced sndbufs for a new link */ + mutex_lock(&lgr->sndbufs_lock); + for (i = 0; i < SMC_RMBE_SIZES; i++) { + list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) { + if (!buf_desc->used || !buf_desc->is_vm) + continue; + rc = smcr_link_reg_buf(lnk, buf_desc); + if (rc) { + mutex_unlock(&lgr->sndbufs_lock); + return rc; + } + } + } + mutex_unlock(&lgr->sndbufs_lock); return rc; } @@ -2127,18 +2197,39 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, if (!buf_desc) return ERR_PTR(-ENOMEM); - buf_desc->order = get_order(bufsize); - buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | __GFP_COMP | - __GFP_NORETRY | __GFP_ZERO, - buf_desc->order); - if (!buf_desc->pages) { - kfree(buf_desc); - return ERR_PTR(-EAGAIN); - } - buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); - buf_desc->len = bufsize; + switch (lgr->buf_type) { + case SMCR_PHYS_CONT_BUFS: + case SMCR_MIXED_BUFS: + buf_desc->order = get_order(bufsize); + buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_NOMEMALLOC | __GFP_COMP | + __GFP_NORETRY | __GFP_ZERO, + buf_desc->order); + if (buf_desc->pages) { + buf_desc->cpu_addr = + (void *)page_address(buf_desc->pages); + buf_desc->len = bufsize; + buf_desc->is_vm = false; + break; + } + if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) + goto out; + fallthrough; // try virtually continguous buf + case SMCR_VIRT_CONT_BUFS: + buf_desc->order = get_order(bufsize); + buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order); + if (!buf_desc->cpu_addr) + goto out; + buf_desc->pages = NULL; + buf_desc->len = bufsize; + buf_desc->is_vm = true; + break; + } return buf_desc; + +out: + kfree(buf_desc); + return ERR_PTR(-EAGAIN); } /* map buf_desc on all usable links, @@ -2234,6 +2325,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) /* check for reusable slot in the link group */ buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); if (buf_desc) { + buf_desc->is_dma_need_sync = 0; SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); break; /* found reusable slot */ @@ -2268,7 +2360,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (!is_smcd) { if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { - smcr_buf_unuse(buf_desc, lgr); + smcr_buf_unuse(buf_desc, is_rmb, lgr); return -ENOMEM; } } @@ -2290,16 +2382,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) return 0; } -void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) -{ - if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || - !smc_link_active(conn->lnk)) - return; - smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); -} - void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { + if (!conn->sndbuf_desc->is_dma_need_sync) + return; if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; @@ -2310,6 +2396,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { int i; + if (!conn->rmb_desc->is_dma_need_sync) + return; if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { @@ -2320,20 +2408,6 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) } } -void smc_rmb_sync_sg_for_device(struct smc_connection *conn) -{ - int i; - - if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) - return; - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_active(&conn->lgr->lnk[i])) - continue; - smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, - DMA_FROM_DEVICE); - } -} - /* create the send and receive buffer for an SMC socket; * receive buffers are called RMBs; * (even though the SMC protocol allows more than one RMB-element per RMB, |