From 04e10e2164fcfa05e14eff3c2757a5097f11d258 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 14 Jul 2014 21:34:51 +0530 Subject: iw_cxgb4: Detect Ing. Padding Boundary at run-time Updates iw_cxgb4 to determine the Ingress Padding Boundary from cxgb4_lld_info, and take subsequent actions. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/qp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/qp.c') diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 086f62f5dc9e..6f74e0e9a022 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -258,7 +258,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* * eqsize is the number of 64B entries plus the status page size. */ - eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ @@ -283,7 +284,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* * eqsize is the number of 64B entries plus the status page size. */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ @@ -1570,11 +1572,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, return ERR_PTR(-EINVAL); rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); - if (rqsize > T4_MAX_RQ_SIZE) + if (rqsize > rhp->rdev.hw_queue.t4_max_rq_size) return ERR_PTR(-E2BIG); sqsize = roundup(attrs->cap.max_send_wr + 1, 16); - if (sqsize > T4_MAX_SQ_SIZE) + if (sqsize > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; -- cgit v1.2.3 From 4c2c5763227a14ce111d6f35df708459d2443cc3 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 14 Jul 2014 21:34:52 +0530 Subject: cxgb4/iw_cxgb4: use firmware ord/ird resource limits Advertise a larger max read queue depth for qps, and gather the resource limits from fw and use them to avoid exhaustinq all the resources. Design: cxgb4: Obtain the max_ordird_qp and max_ird_adapter device params from FW at init time and pass them up to the ULDs when they attach. If these parameters are not available, due to older firmware, then hard-code the values based on the known values for older firmware. iw_cxgb4: Fix the c4iw_query_device() to report these correct values based on adapter parameters. ibv_query_device() will always return: max_qp_rd_atom = max_qp_init_rd_atom = min(module_max, max_ordird_qp) max_res_rd_atom = max_ird_adapter Bump up the per qp max module option to 32, allowing it to be increased by the user up to the device max of max_ordird_qp. 32 seems to be sufficient to maximize throughput for streaming read benchmarks. Fail connection setup if the negotiated IRD exhausts the available adapter ird resources. So the driver will track the amount of ird resource in use and not send an RI_WR/INIT to FW that would reduce the available ird resources below zero. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 80 ++++++++++++++++++------- drivers/infiniband/hw/cxgb4/device.c | 2 + drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 9 ++- drivers/infiniband/hw/cxgb4/provider.c | 6 +- drivers/infiniband/hw/cxgb4/qp.c | 54 ++++++++++++++--- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 18 ++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 2 + drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 2 + 9 files changed, 142 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/qp.c') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d62a0f9dd11a..df5bd3df08a2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -79,9 +79,10 @@ static int dack_mode = 1; module_param(dack_mode, int, 0644); MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); -int c4iw_max_read_depth = 8; +uint c4iw_max_read_depth = 32; module_param(c4iw_max_read_depth, int, 0644); -MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)"); +MODULE_PARM_DESC(c4iw_max_read_depth, + "Per-connection max ORD/IRD (default=32)"); static int enable_tcp_timestamps; module_param(enable_tcp_timestamps, int, 0644); @@ -813,6 +814,8 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, if (mpa_rev_to_use == 2) { mpa->private_data_size = htons(ntohs(mpa->private_data_size) + sizeof (struct mpa_v2_conn_params)); + PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, + ep->ord); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -1182,8 +1185,8 @@ static int connect_request_upcall(struct c4iw_ep *ep) sizeof(struct mpa_v2_conn_params); } else { /* this means MPA_v1 is used. Send max supported */ - event.ord = c4iw_max_read_depth; - event.ird = c4iw_max_read_depth; + event.ord = cur_max_read_depth(ep->com.dev); + event.ird = cur_max_read_depth(ep->com.dev); event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } @@ -1247,6 +1250,8 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return credits; } +#define RELAXED_IRD_NEGOTIATION 1 + static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; @@ -1358,17 +1363,33 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) MPA_V2_IRD_ORD_MASK; resp_ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + PDBG("%s responder ird %u ord %u ep ird %u ord %u\n", + __func__, resp_ird, resp_ord, ep->ird, ep->ord); /* * This is a double-check. Ideally, below checks are * not required since ird/ord stuff has been taken * care of in c4iw_accept_cr */ - if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { + if (ep->ird < resp_ord) { + if (RELAXED_IRD_NEGOTIATION && resp_ord <= + ep->com.dev->rdev.lldi.max_ordird_qp) + ep->ird = resp_ord; + else + insuff_ird = 1; + } else if (ep->ird > resp_ord) { + ep->ird = resp_ord; + } + if (ep->ord > resp_ird) { + if (RELAXED_IRD_NEGOTIATION) + ep->ord = resp_ird; + else + insuff_ird = 1; + } + if (insuff_ird) { err = -ENOMEM; ep->ird = resp_ord; ep->ord = resp_ird; - insuff_ird = 1; } if (ntohs(mpa_v2_params->ird) & @@ -1571,6 +1592,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) MPA_V2_IRD_ORD_MASK; ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, + ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) if (peer2peer) { if (ntohs(mpa_v2_params->ord) & @@ -2724,8 +2747,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); - if ((conn_param->ord > c4iw_max_read_depth) || - (conn_param->ird > c4iw_max_read_depth)) { + if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || + (conn_param->ird > cur_max_read_depth(ep->com.dev))) { abort_connection(ep, NULL, GFP_KERNEL); err = -EINVAL; goto err; @@ -2733,31 +2756,41 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - send_mpa_reject(ep, conn_param->private_data, - conn_param->private_data_len); - abort_connection(ep, NULL, GFP_KERNEL); - err = -ENOMEM; - goto err; + if (RELAXED_IRD_NEGOTIATION) { + ep->ord = ep->ird; + } else { + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + send_mpa_reject(ep, conn_param->private_data, + conn_param->private_data_len); + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } } - if (conn_param->ird > ep->ord) { - if (!ep->ord) - conn_param->ird = 1; - else { + if (conn_param->ird < ep->ord) { + if (RELAXED_IRD_NEGOTIATION && + ep->ord <= h->rdev.lldi.max_ordird_qp) { + conn_param->ird = ep->ord; + } else { abort_connection(ep, NULL, GFP_KERNEL); err = -ENOMEM; goto err; } } - } ep->ird = conn_param->ird; ep->ord = conn_param->ord; - if (ep->mpa_attr.version != 2) + if (ep->mpa_attr.version == 1) { if (peer2peer && ep->ird == 0) ep->ird = 1; + } else { + if (peer2peer && + (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && + (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0) + ep->ird = 1; + } PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); @@ -2796,6 +2829,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return 0; err1: ep->com.cm_id = NULL; + abort_connection(ep, NULL, GFP_KERNEL); cm_id->rem_ref(cm_id); err: mutex_unlock(&ep->com.mutex); @@ -2879,8 +2913,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int iptype; int iwpm_err = 0; - if ((conn_param->ord > c4iw_max_read_depth) || - (conn_param->ird > c4iw_max_read_depth)) { + if ((conn_param->ord > cur_max_read_depth(dev)) || + (conn_param->ird > cur_max_read_depth(dev))) { err = -EINVAL; goto out; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 88291ef82941..e76358efcaa1 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -348,6 +348,7 @@ static int stats_show(struct seq_file *seq, void *v) dev->rdev.stats.act_ofld_conn_fails); seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.pas_ofld_conn_fails); + seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird); return 0; } @@ -839,6 +840,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9b9754c69ea0..75541cb833c6 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -249,6 +249,7 @@ struct c4iw_dev { struct idr atid_idr; struct idr stid_idr; struct list_head db_fc_list; + u32 avail_ird; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -330,6 +331,13 @@ static inline void remove_handle_nolock(struct c4iw_dev *rhp, _remove_handle(rhp, idr, id, 0); } +extern uint c4iw_max_read_depth; + +static inline int cur_max_read_depth(struct c4iw_dev *dev) +{ + return min(dev->rdev.lldi.max_ordird_qp, c4iw_max_read_depth); +} + struct c4iw_pd { struct ib_pd ibpd; u32 pdid; @@ -1003,7 +1011,6 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; -extern int c4iw_max_read_depth; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1d41b92caaf5..67c4a6908021 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -322,8 +322,10 @@ static int c4iw_query_device(struct ib_device *ibdev, props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; props->max_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; - props->max_qp_rd_atom = c4iw_max_read_depth; - props->max_qp_init_rd_atom = c4iw_max_read_depth; + props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; + props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, + c4iw_max_read_depth); + props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->max_cq = T4_MAX_NUM_CQ; props->max_cqe = dev->rdev.hw_queue.t4_max_cq_depth; props->max_mr = c4iw_num_stags(&dev->rdev); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 6f74e0e9a022..0de3cf64eb5e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -58,6 +58,31 @@ static int max_fr_immd = T4_MAX_FR_IMMD; module_param(max_fr_immd, int, 0644); MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); +static int alloc_ird(struct c4iw_dev *dev, u32 ird) +{ + int ret = 0; + + spin_lock_irq(&dev->lock); + if (ird <= dev->avail_ird) + dev->avail_ird -= ird; + else + ret = -ENOMEM; + spin_unlock_irq(&dev->lock); + + if (ret) + dev_warn(&dev->rdev.lldi.pdev->dev, + "device IRD resources exhausted\n"); + + return ret; +} + +static void free_ird(struct c4iw_dev *dev, int ird) +{ + spin_lock_irq(&dev->lock); + dev->avail_ird += ird; + spin_unlock_irq(&dev->lock); +} + static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { unsigned long flag; @@ -1204,12 +1229,20 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) int ret; struct sk_buff *skb; - PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, - qhp->ep->hwtid); + PDBG("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp, + qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord); skb = alloc_skb(sizeof *wqe, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + ret = -ENOMEM; + goto out; + } + ret = alloc_ird(rhp, qhp->attr.max_ird); + if (ret) { + qhp->attr.max_ird = 0; + kfree_skb(skb); + goto out; + } set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); @@ -1260,10 +1293,14 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) - goto out; + goto err1; ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, qhp->ep->hwtid, qhp->wq.sq.qid, __func__); + if (!ret) + goto out; +err1: + free_ird(rhp, qhp->attr.max_ird); out: PDBG("%s ret %d\n", __func__, ret); return ret; @@ -1308,7 +1345,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, newattr.max_ord = attrs->max_ord; } if (mask & C4IW_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > c4iw_max_read_depth) { + if (attrs->max_ird > cur_max_read_depth(rhp)) { ret = -EINVAL; goto out; } @@ -1531,6 +1568,7 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) if (!list_empty(&qhp->db_fc_entry)) list_del_init(&qhp->db_fc_entry); spin_unlock_irq(&rhp->lock); + free_ird(rhp, qhp->attr.max_ird); ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; @@ -1621,8 +1659,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_rdma_read = 1; qhp->attr.enable_rdma_write = 1; qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; + qhp->attr.max_ord = 0; + qhp->attr.max_ird = 0; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index f338a7fcebf7..46156210df34 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -310,6 +310,9 @@ struct adapter_params { unsigned int ofldq_wr_cred; bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + + unsigned int max_ordird_qp; /* Max read depth per RDMA QP */ + unsigned int max_ird_adapter; /* Max read depth per adapter */ }; #include "t4fw_api.h" diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a7ce996630ed..767cbbaa3d1e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4113,6 +4113,8 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.sge_egrstatuspagesize = adap->sge.stat_len; lli.sge_pktshift = adap->sge.pktshift; lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; + lli.max_ordird_qp = adap->params.max_ordird_qp; + lli.max_ird_adapter = adap->params.max_ird_adapter; lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; handle = ulds[uld].add(&lli); @@ -5877,6 +5879,22 @@ static int adap_init0(struct adapter *adap) adap->vres.cq.size = val[3] - val[2] + 1; adap->vres.ocq.start = val[4]; adap->vres.ocq.size = val[5] - val[4] + 1; + + params[0] = FW_PARAM_DEV(MAXORDIRD_QP); + params[1] = FW_PARAM_DEV(MAXIRD_ADAPTER); + ret = t4_query_params(adap, 0, 0, 0, 2, params, val); + if (ret < 0) { + adap->params.max_ordird_qp = 8; + adap->params.max_ird_adapter = 32 * adap->tids.ntids; + ret = 0; + } else { + adap->params.max_ordird_qp = val[0]; + adap->params.max_ird_adapter = val[1]; + } + dev_info(adap->pdev_dev, + "max_ordird_qp %d max_ird_adapter %d\n", + adap->params.max_ordird_qp, + adap->params.max_ird_adapter); } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 962458f5d5b3..df1d9446768a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -258,6 +258,8 @@ struct cxgb4_lld_info { unsigned int pf; /* Physical Function we're using */ bool enable_fw_ofld_conn; /* Enable connection through fw */ /* WR */ + unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */ + unsigned int max_ird_adapter; /* Max IRD memory per adapter */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 4a6ae4db7397..ff709e3b3e7e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -934,6 +934,8 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_FWREV = 0x0B, FW_PARAMS_PARAM_DEV_TPREV = 0x0C, FW_PARAMS_PARAM_DEV_CF = 0x0D, + FW_PARAMS_PARAM_DEV_MAXORDIRD_QP = 0x13, /* max supported QP IRD/ORD */ + FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */ FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17, }; -- cgit v1.2.3 From 7730b4c7e32c0ab4d7db746a9c3a84cf715161fa Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 14 Jul 2014 21:34:54 +0530 Subject: cxgb4/iw_cxgb4: work request logging feature This commit enhances the iwarp driver to optionally keep a log of rdma work request timining data for kernel mode QPs. If iw_cxgb4 module option c4iw_wr_log is set to non-zero, each work request is tracked and timing data maintained in a rolling log that is 4096 entries deep by default. Module option c4iw_wr_log_size_order allows specifing a log2 size to use instead of the default order of 12 (4096 entries). Both module options are read-only and must be passed in at module load time to set them. IE: modprobe iw_cxgb4 c4iw_wr_log=1 c4iw_wr_log_size_order=10 The timing data is viewable via the iw_cxgb4 debugfs file "wr_log". Writing anything to this file will clear all the timing data. Data tracked includes: - The host time when the work request was posted, just before ringing the doorbell. The host time when the completion was polled by the application. This is also the time the log entry is created. The delta of these two times is the amount of time took processing the work request. - The qid of the EQ used to post the work request. - The work request opcode. - The cqe wr_id field. For sq completions requests this is the swsqe index. For recv completions this is the MSN of the ingress SEND. This value can be used to match log entries from this log with firmware flowc event entries. - The sge timestamp value just before ringing the doorbell when posting, the sge timestamp value just after polling the completion, and CQE.timestamp field from the completion itself. With these three timestamps we can track the latency from post to poll, and the amount of time the completion resided in the CQ before being reaped by the application. With debug firmware, the sge timestamp is also logged by firmware in its flowc history so that we can compute the latency from posting the work request until the firmware sees it. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cq.c | 4 + drivers/infiniband/hw/cxgb4/device.c | 137 ++++++++++++++++++++++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 17 +++ drivers/infiniband/hw/cxgb4/qp.c | 12 +++ drivers/infiniband/hw/cxgb4/t4.h | 4 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 14 +++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 2 + drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 6 ++ 8 files changed, 196 insertions(+) (limited to 'drivers/infiniband/hw/cxgb4/qp.c') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index f04a838b65c7..de9bcf2e6d30 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -633,11 +633,15 @@ proc_cqe: wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); t4_sq_consume(wq); } else { PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; BUG_ON(t4_rq_empty(wq)); + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); t4_rq_consume(wq); goto skip_cqe; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8386678f1159..df1f1b52c7ec 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -55,6 +55,15 @@ module_param(allow_db_coalescing_on_t5, int, 0644); MODULE_PARM_DESC(allow_db_coalescing_on_t5, "Allow DB Coalescing on T5 (default = 0)"); +int c4iw_wr_log = 0; +module_param(c4iw_wr_log, int, 0444); +MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data."); + +int c4iw_wr_log_size_order = 12; +module_param(c4iw_wr_log_size_order, int, 0444); +MODULE_PARM_DESC(c4iw_wr_log_size_order, + "Number of entries (log2) in the work request timing log."); + struct uld_ctx { struct list_head entry; struct cxgb4_lld_info lldi; @@ -103,6 +112,117 @@ static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); } +void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe) +{ + struct wr_log_entry le; + int idx; + + if (!wq->rdev->wr_log) + return; + + idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) & + (wq->rdev->wr_log_size - 1); + le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]); + getnstimeofday(&le.poll_host_ts); + le.valid = 1; + le.cqe_sge_ts = CQE_TS(cqe); + if (SQ_TYPE(cqe)) { + le.qid = wq->sq.qid; + le.opcode = CQE_OPCODE(cqe); + le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts; + le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts; + le.wr_id = CQE_WRID_SQ_IDX(cqe); + } else { + le.qid = wq->rq.qid; + le.opcode = FW_RI_RECEIVE; + le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts; + le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts; + le.wr_id = CQE_WRID_MSN(cqe); + } + wq->rdev->wr_log[idx] = le; +} + +static int wr_log_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + struct timespec prev_ts = {0, 0}; + struct wr_log_entry *lep; + int prev_ts_set = 0; + int idx, end; + +#define ts2ns(ts) ((ts) * dev->rdev.lldi.cclk_ps / 1000) + + idx = atomic_read(&dev->rdev.wr_log_idx) & + (dev->rdev.wr_log_size - 1); + end = idx - 1; + if (end < 0) + end = dev->rdev.wr_log_size - 1; + lep = &dev->rdev.wr_log[idx]; + while (idx != end) { + if (lep->valid) { + if (!prev_ts_set) { + prev_ts_set = 1; + prev_ts = lep->poll_host_ts; + } + seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode " + "%u %s 0x%x host_wr_delta sec %lu nsec %lu " + "post_sge_ts 0x%llx cqe_sge_ts 0x%llx " + "poll_sge_ts 0x%llx post_poll_delta_ns %llu " + "cqe_poll_delta_ns %llu\n", + idx, + timespec_sub(lep->poll_host_ts, + prev_ts).tv_sec, + timespec_sub(lep->poll_host_ts, + prev_ts).tv_nsec, + lep->qid, lep->opcode, + lep->opcode == FW_RI_RECEIVE ? + "msn" : "wrid", + lep->wr_id, + timespec_sub(lep->poll_host_ts, + lep->post_host_ts).tv_sec, + timespec_sub(lep->poll_host_ts, + lep->post_host_ts).tv_nsec, + lep->post_sge_ts, lep->cqe_sge_ts, + lep->poll_sge_ts, + ts2ns(lep->poll_sge_ts - lep->post_sge_ts), + ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts)); + prev_ts = lep->poll_host_ts; + } + idx++; + if (idx > (dev->rdev.wr_log_size - 1)) + idx = 0; + lep = &dev->rdev.wr_log[idx]; + } +#undef ts2ns + return 0; +} + +static int wr_log_open(struct inode *inode, struct file *file) +{ + return single_open(file, wr_log_show, inode->i_private); +} + +static ssize_t wr_log_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + int i; + + if (dev->rdev.wr_log) + for (i = 0; i < dev->rdev.wr_log_size; i++) + dev->rdev.wr_log[i].valid = 0; + return count; +} + +static const struct file_operations wr_log_debugfs_fops = { + .owner = THIS_MODULE, + .open = wr_log_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = wr_log_clear, +}; + static int dump_qp(int id, void *p, void *data) { struct c4iw_qp *qp = p; @@ -604,6 +724,12 @@ static int setup_debugfs(struct c4iw_dev *devp) if (de && de->d_inode) de->d_inode->i_size = 4096; + if (c4iw_wr_log) { + de = debugfs_create_file("wr_log", S_IWUSR, devp->debugfs_root, + (void *)devp, &wr_log_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + } return 0; } @@ -717,6 +843,16 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) pr_err(MOD "error allocating status page\n"); goto err4; } + if (c4iw_wr_log) { + rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) * + sizeof(*rdev->wr_log), GFP_KERNEL); + if (rdev->wr_log) { + rdev->wr_log_size = 1 << c4iw_wr_log_size_order; + atomic_set(&rdev->wr_log_idx, 0); + } else { + pr_err(MOD "error allocating wr_log. Logging disabled\n"); + } + } return 0; err4: c4iw_rqtpool_destroy(rdev); @@ -730,6 +866,7 @@ err1: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + kfree(rdev->wr_log); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 75541cb833c6..69f047cdba6a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -150,6 +150,18 @@ struct c4iw_hw_queue { int t4_stat_len; }; +struct wr_log_entry { + struct timespec post_host_ts; + struct timespec poll_host_ts; + u64 post_sge_ts; + u64 cqe_sge_ts; + u64 poll_sge_ts; + u16 qid; + u16 wr_id; + u8 opcode; + u8 valid; +}; + struct c4iw_rdev { struct c4iw_resource resource; unsigned long qpshift; @@ -169,6 +181,9 @@ struct c4iw_rdev { struct c4iw_stats stats; struct c4iw_hw_queue hw_queue; struct t4_dev_status_page *status_page; + atomic_t wr_log_idx; + struct wr_log_entry *wr_log; + int wr_log_size; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -1011,6 +1026,8 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; +extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); +extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 0de3cf64eb5e..fd66bd9a9db0 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -823,6 +823,11 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qhp->sq_sig_all; swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = cxgb4_read_sge_timestamp( + qhp->rhp->rdev.lldi.ports[0]); + getnstimeofday(&swsqe->host_ts); + } init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); @@ -886,6 +891,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; + if (c4iw_wr_log) { + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts = + cxgb4_read_sge_timestamp( + qhp->rhp->rdev.lldi.ports[0]); + getnstimeofday( + &qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_ts); + } wqe->recv.opcode = FW_RI_RECV_WR; wqe->recv.r1 = 0; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index dd45186d1c55..c9f7034e6647 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -269,6 +269,8 @@ struct t4_swsqe { int signaled; u16 idx; int flushed; + struct timespec host_ts; + u64 sge_ts; }; static inline pgprot_t t4_pgprot_wc(pgprot_t prot) @@ -306,6 +308,8 @@ struct t4_sq { struct t4_swrqe { u64 wr_id; + struct timespec host_ts; + u64 sge_ts; }; struct t4_rq { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index ba7d13de5e83..9c7e4f0a7683 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3898,6 +3898,19 @@ err: } EXPORT_SYMBOL(cxgb4_read_tpte); +u64 cxgb4_read_sge_timestamp(struct net_device *dev) +{ + u32 hi, lo; + struct adapter *adap; + + adap = netdev2adap(dev); + lo = t4_read_reg(adap, SGE_TIMESTAMP_LO); + hi = GET_TSVAL(t4_read_reg(adap, SGE_TIMESTAMP_HI)); + + return ((u64)hi << 32) | (u64)lo; +} +EXPORT_SYMBOL(cxgb4_read_sge_timestamp); + static struct pci_driver cxgb4_driver; static void check_neigh_update(struct neighbour *neigh) @@ -4161,6 +4174,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.wr_cred = adap->params.ofldq_wr_cred; lli.adapter_type = adap->params.chip; lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2)); + lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; lli.udb_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index c7170d68bf63..79a84de1d204 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -243,6 +243,7 @@ struct cxgb4_lld_info { unsigned char fw_api_ver; /* FW API version */ unsigned int fw_vers; /* FW version */ unsigned int iscsi_iolen; /* iSCSI max I/O length */ + unsigned int cclk_ps; /* Core clock period in psec */ unsigned short udb_density; /* # of user DB/page */ unsigned short ucq_density; /* # of user CQs/page */ unsigned short filt_mode; /* filter optional components */ @@ -297,5 +298,6 @@ int cxgb4_flush_eq_cache(struct net_device *dev); void cxgb4_disable_db_coalescing(struct net_device *dev); void cxgb4_enable_db_coalescing(struct net_device *dev); int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte); +u64 cxgb4_read_sge_timestamp(struct net_device *dev); #endif /* !__CXGB4_OFLD_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index ae7776471ceb..3b244abbf907 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -251,6 +251,12 @@ #define V_NOCOALESCE(x) ((x) << S_NOCOALESCE) #define F_NOCOALESCE V_NOCOALESCE(1U) +#define SGE_TIMESTAMP_LO 0x1098 +#define SGE_TIMESTAMP_HI 0x109c +#define S_TSVAL 0 +#define M_TSVAL 0xfffffffU +#define GET_TSVAL(x) (((x) >> S_TSVAL) & M_TSVAL) + #define SGE_TIMER_VALUE_0_AND_1 0x10b8 #define TIMERVALUE0_MASK 0xffff0000U #define TIMERVALUE0_SHIFT 16 -- cgit v1.2.3 From 3e5c02c9ef9a86f39014156ddb8a9676a01f41a9 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 21 Jul 2014 20:55:14 +0530 Subject: iw_cxgb4: Support query_qp() verb Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband/hw/cxgb4/qp.c') diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index fd66bd9a9db0..0e7e0e30fba4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1856,5 +1856,11 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, memset(attr, 0, sizeof *attr); memset(init_attr, 0, sizeof *init_attr); attr->qp_state = to_ib_qp_state(qhp->attr.state); + init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; + init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; + init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; + init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } -- cgit v1.2.3 From 66eb19af0b459426a1f6ba3f78235ffecd1bc5ab Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 21 Jul 2014 20:55:15 +0530 Subject: iw_cxgb4: advertise the correct device max attributes Advertise the actual max limits for things like qp depths, number of qps, cqs, etc. Clean up the queue allocation for qps and cqs. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cq.c | 8 +------- drivers/infiniband/hw/cxgb4/device.c | 16 ++++++++-------- drivers/infiniband/hw/cxgb4/provider.c | 4 ++-- drivers/infiniband/hw/cxgb4/qp.c | 35 ++++++++++++++++++++-------------- drivers/infiniband/hw/cxgb4/t4.h | 2 -- 5 files changed, 32 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/qp.c') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index de9bcf2e6d30..0f773e78e080 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -913,14 +913,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, /* * memsize must be a multiple of the page size if its a user cq. */ - if (ucontext) { + if (ucontext) memsize = roundup(memsize, PAGE_SIZE); - hwentries = memsize / sizeof *chp->cq.queue; - while (hwentries > rhp->rdev.hw_queue.t4_max_iq_size) { - memsize -= PAGE_SIZE; - hwentries = memsize / sizeof *chp->cq.queue; - } - } chp->cq.size = hwentries; chp->cq.memsize = memsize; chp->cq.vector = vector; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 03b6fa1291bf..bda949223637 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -934,17 +934,17 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.hw_queue.t4_eq_status_entries = devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1; - devp->rdev.hw_queue.t4_max_eq_size = - 65520 - devp->rdev.hw_queue.t4_eq_status_entries; - devp->rdev.hw_queue.t4_max_iq_size = 65520 - 1; - devp->rdev.hw_queue.t4_max_rq_size = - 8192 - devp->rdev.hw_queue.t4_eq_status_entries; + devp->rdev.hw_queue.t4_max_eq_size = 65520; + devp->rdev.hw_queue.t4_max_iq_size = 65520; + devp->rdev.hw_queue.t4_max_rq_size = 8192 - + devp->rdev.hw_queue.t4_eq_status_entries - 1; devp->rdev.hw_queue.t4_max_sq_size = - devp->rdev.hw_queue.t4_max_eq_size - 1; + devp->rdev.hw_queue.t4_max_eq_size - + devp->rdev.hw_queue.t4_eq_status_entries - 1; devp->rdev.hw_queue.t4_max_qp_depth = - devp->rdev.hw_queue.t4_max_rq_size - 1; + devp->rdev.hw_queue.t4_max_rq_size; devp->rdev.hw_queue.t4_max_cq_depth = - devp->rdev.hw_queue.t4_max_iq_size - 1; + devp->rdev.hw_queue.t4_max_iq_size - 2; devp->rdev.hw_queue.t4_stat_len = devp->rdev.lldi.sge_egrstatuspagesize; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 67c4a6908021..72e3b69d1b76 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -318,7 +318,7 @@ static int c4iw_query_device(struct ib_device *ibdev, props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; props->max_mr_size = T4_MAX_MR_SIZE; - props->max_qp = T4_MAX_NUM_QP; + props->max_qp = dev->rdev.lldi.vr->qp.size / 2; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; props->max_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; @@ -326,7 +326,7 @@ static int c4iw_query_device(struct ib_device *ibdev, props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, c4iw_max_read_depth); props->max_qp_init_rd_atom = props->max_qp_rd_atom; - props->max_cq = T4_MAX_NUM_CQ; + props->max_cq = dev->rdev.lldi.vr->qp.size; props->max_cqe = dev->rdev.hw_queue.t4_max_cq_depth; props->max_mr = c4iw_num_stags(&dev->rdev); props->max_pd = T4_MAX_NUM_PD; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 0e7e0e30fba4..c158fcc02bca 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -205,9 +205,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, } /* - * RQT must be a power of 2. + * RQT must be a power of 2 and at least 16 deep. */ - wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); + wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) { ret = -ENOMEM; @@ -1621,13 +1621,17 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); - if (rqsize > rhp->rdev.hw_queue.t4_max_rq_size) + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; - sqsize = roundup(attrs->cap.max_send_wr + 1, 16); - if (sqsize > rhp->rdev.hw_queue.t4_max_sq_size) + if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); + sqsize = attrs->cap.max_send_wr + 1; + if (sqsize < 8) + sqsize = 8; ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; @@ -1635,19 +1639,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.sq.size = sqsize; - qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; + qhp->wq.sq.memsize = + (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); } - PDBG("%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu\n", - __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize); - ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) @@ -1766,9 +1771,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); INIT_LIST_HEAD(&qhp->db_fc_entry); - PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", - __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.sq.qid); + PDBG("%s sq id %u size %u memsize %zu num_entries %u " + "rq id %u size %u memsize %zu num_entries %u\n", __func__, + qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, + attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size, + qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; err8: kfree(mm5); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index c9f7034e6647..641ab55b1d55 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -36,8 +36,6 @@ #include "t4_msg.h" #include "t4fw_ri_api.h" -#define T4_MAX_NUM_QP 65536 -#define T4_MAX_NUM_CQ 65536 #define T4_MAX_NUM_PD 65536 #define T4_MAX_NUM_STAG (1<<15) #define T4_MAX_MR_SIZE (~0ULL) -- cgit v1.2.3 From 678ea9b5baab6800692b249bdba77c3c07261d61 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 31 Jul 2014 14:35:43 -0500 Subject: RDMA/cxgb4: Only call CQ completion handler if it is armed The function __flush_qp() always calls the ULP's CQ completion handler functions even if the CQ was not armed. This can crash the system if the function pointer is NULL. The iSER ULP behaves this way: no completion handler and never arm the CQ for notification. So now we track whether the CQ is armed at flush time and only call the completion handlers if their CQs were armed. Also, if the RCQ and SCQ are the same CQ, the completion handler is getting called twice. It should only be called once after all SQ and RQ WRs are flushed from the QP. So rearrange the logic to fix this. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/ev.c | 1 + drivers/infiniband/hw/cxgb4/qp.c | 37 +++++++++++++++++++++++++------------ drivers/infiniband/hw/cxgb4/t4.h | 11 +++++++++++ 3 files changed, 37 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/qp.c') diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index d61d0a18f784..a98426fed9ee 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -182,6 +182,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) chp = get_chp(dev, qid); if (chp) { + t4_clear_cq_armed(&chp->cq); spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 086f62f5dc9e..60cfc11a66e4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1066,7 +1066,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int flushed; + int rq_flushed, sq_flushed; unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); @@ -1084,27 +1084,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) { - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); - } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); if (schp != rchp) c4iw_flush_hw_cq(schp); - flushed = c4iw_flush_sq(qhp); + sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) { - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + + if (schp == rchp) { + if (t4_clear_cq_armed(&rchp->cq) && + (rq_flushed || sq_flushed)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 68b0a6bf4eb0..d8d7fa3e446d 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -531,6 +531,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq) return !wq->rq.queue[wq->rq.size].status.db_off; } +enum t4_cq_flags { + CQ_ARMED = 1, +}; + struct t4_cq { struct t4_cqe *queue; dma_addr_t dma_addr; @@ -551,12 +555,19 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + unsigned long flags; }; +static inline int t4_clear_cq_armed(struct t4_cq *cq) +{ + return test_and_clear_bit(CQ_ARMED, &cq->flags); +} + static inline int t4_arm_cq(struct t4_cq *cq, int se) { u32 val; + set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_MASK) { val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | INGRESSQID(cq->cqid); -- cgit v1.2.3