diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca')
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_classes.h | 21 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_cq.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_iverbs.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_qes.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_qp.c | 253 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_reqs.c | 231 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_tools.h | 1 |
7 files changed, 473 insertions, 39 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 0b0618edd645..5d7b7855afb9 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -156,6 +156,21 @@ struct ehca_mod_qp_parm { #define EHCA_MOD_QP_PARM_MAX 4 +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u16 reported; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; +}; + struct ehca_qp { union { struct ib_qp ib_qp; @@ -165,7 +180,9 @@ struct ehca_qp { enum ehca_ext_qp_type ext_type; enum ib_qp_state state; struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; struct h_galpas galpas; u32 qkey; u32 real_qp_num; @@ -195,6 +212,8 @@ struct ehca_qp { atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -224,6 +243,8 @@ struct ehca_cq { /* mmap counter for resources mapped into user space */ u32 mm_count_queue; u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; }; enum ehca_mr_flag { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 5540b276a33c..33647a95eb9a 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, for (i = 0; i < QP_HASHTAB_LEN; i++) INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index a8a2ea585d2f..8f7f282ead65 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data); int ehca_calc_ipd(struct ehca_shca *shca, int port, enum ib_rate path_rate, u32 *ipd); +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 818803057ebf..5d28e3e98a20 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -213,6 +213,7 @@ struct ehca_wqe { #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 struct ehca_cqe { u64 work_request_id; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index ea13efddf175..4dbe2870e014 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, queue->is_small = (queue->page_size != 0); } +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = 0; + for (i = 0; i < qmap->entries; i++) + qmap->map[i].reported = 1; +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -407,7 +451,7 @@ static struct ehca_qp *internal_create_qp( struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - struct ehca_qp *my_qp; + struct ehca_qp *my_qp, *my_srq = NULL; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); @@ -456,8 +500,7 @@ static struct ehca_qp *internal_create_qp( /* handle SRQ base QPs */ if (init_attr->srq) { - struct ehca_qp *my_srq = - container_of(init_attr->srq, struct ehca_qp, ib_srq); + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); has_srq = 1; parms.ext_type = EQPT_SRQBASE; @@ -715,6 +758,19 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit2; } + + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -724,8 +780,27 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't initialize rqueue " "and pages ret=%i", ret); - goto create_qp_exit3; + goto create_qp_exit4; } + + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } else if (init_attr->srq) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; } if (is_srq) { @@ -770,7 +845,7 @@ static struct ehca_qp *internal_create_qp( if (!my_qp->mod_qp_parm) { ehca_err(pd->device, "Could not alloc mod_qp_parm"); - goto create_qp_exit4; + goto create_qp_exit5; } } } @@ -780,7 +855,7 @@ static struct ehca_qp *internal_create_qp( h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - goto create_qp_exit5; + goto create_qp_exit6; } } @@ -789,7 +864,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit5; + goto create_qp_exit7; } } @@ -815,22 +890,30 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit8; } } return my_qp; -create_qp_exit6: +create_qp_exit8: ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); -create_qp_exit5: +create_qp_exit7: kfree(my_qp->mod_qp_parm); -create_qp_exit4: +create_qp_exit6: + if (HAS_RQ(my_qp)) + vfree(my_qp->rq_map.map); + +create_qp_exit5: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); +create_qp_exit4: + if (HAS_SQ(my_qp)) + vfree(my_qp->sq_map.map); + create_qp_exit3: if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); @@ -1021,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, return 0; } +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = abs_to_virt(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + wqe_idx = q_ofs / ipz_queue->qe_size; + if (wqe_idx < qmap->tail) + qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; + else + qmap->left_to_poll = wqe_idx - qmap->tail; + + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%li", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + /* * internal_modify_qp with circumvention to handle aqp0 properly * smi_reset2init indicates if this is an internal reset-to-init-call for @@ -1525,17 +1703,32 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } if (statetrans == IB_QPST_ANY2RESET) { ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp)) + reset_queue_map(&my_qp->rq_map); } if (attr_mask & IB_QP_QKEY) my_qp->qkey = attr->qkey; - my_qp->state = qp_new_state; - modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ spin_unlock_irqrestore(&my_qp->spinlock_s, flags); @@ -1551,6 +1744,8 @@ modify_qp_exit1: int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + int ret = 0; + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, ib_device); struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); @@ -1597,12 +1792,18 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, attr->qp_state, my_qp->init_attr.port_num, ibqp->qp_type); spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - return 0; + goto out; } spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); } - return internal_modify_qp(ibqp, attr, attr_mask, 0); + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; } void ehca_recover_sqp(struct ib_qp *sqp) @@ -1938,6 +2139,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp)) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + /* now wait until all pending events have completed */ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); @@ -1963,7 +2174,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (qp_type == IB_QPT_GSI) { struct ib_event event; ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); + shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port_num; @@ -1971,10 +2182,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, ib_dispatch_event(&event); } - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (HAS_SQ(my_qp)) + + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + + vfree(my_qp->sq_map.map); + } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index dd9bc68f1c7b..64928079eafa 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -42,7 +42,7 @@ */ -#include <asm-powerpc/system.h> +#include <asm/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -53,9 +53,25 @@ /* in RC traffic, insert an empty RDMA READ every this many packets */ #define ACK_CIRC_THRESHOLD 2000000 +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -139,12 +155,14 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, const struct ib_send_wr *send_wr, + u32 sq_map_idx, int hidden) { u32 idx; u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -157,7 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -381,6 +402,7 @@ static inline int post_one_send(struct ehca_qp *my_qp, { struct ehca_wqe *wqe_p; int ret; + u32 sq_map_idx; u64 start_offset = my_qp->ipz_squeue.current_q_offset; /* get pointer next to free WQE */ @@ -393,8 +415,15 @@ static inline int post_one_send(struct ehca_qp *my_qp, "qp_num=%x", my_qp->ib_qp.qp_num); return -ENOMEM; } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden); + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); /* * if something failed, * reset the free entry pointer to the start value @@ -483,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; + u32 rq_map_idx; unsigned long flags; + struct ehca_qmap_entry *qmap_entry; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", @@ -511,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value @@ -527,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); + qmap_entry->reported = 0; + wqe_cnt++; } /* eof for cur_recv_wr */ @@ -583,13 +626,15 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; int cqe_count = 0, is_error; -poll_cq_one_read_cqe: +repoll: cqe = (struct ehca_cqe *) ipz_qeit_get_inc_valid(&my_cq->ipz_queue); if (!cqe) { @@ -617,7 +662,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", my_cq->cq_number, cqe->local_qp_number); /* ignore this purged cqe */ - goto poll_cq_one_read_cqe; + goto repoll; } spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; @@ -636,7 +681,7 @@ poll_cq_one_read_cqe: * that caused sqe and turn off purge flag */ qp->sqerr_purgeflag = 0; - goto poll_cq_one_read_cqe; + goto repoll; } } @@ -654,8 +699,59 @@ poll_cq_one_read_cqe: my_cq, my_cq->cq_number); } - /* we got a completion! */ - wc->wr_id = cqe->work_request_id; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + ehca_add_to_err_list(my_qp, 1); + my_qp->sq_map.left_to_poll = 0; + + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + my_qp->rq_map.left_to_poll = 0; + } + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* this is a proper completion, we need to advance the tail pointer */ + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -667,7 +763,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); /* update also queue adder to throw away this entry!!! */ - goto poll_cq_one_exit0; + goto repoll; } /* eval ib_wc_status */ @@ -678,11 +774,6 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - wc->qp = &my_qp->ib_qp; - read_unlock(&ehca_qp_idr_lock); - wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; @@ -699,13 +790,88 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->tail]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + memset(wc, 0, sizeof(*wc)); + + offset = qmap->tail * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#lx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance tail pointer */ + qmap_entry->reported = 1; + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + qmap_entry = &qmap->map[qmap->tail]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -715,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) } spin_lock_irqsave(&my_cq->spinlock, flags); - for (nr = 0; nr < num_entries; nr++) { + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ + entries_left -= nr; + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index ec950bf8c479..21f7d06f14ad 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -54,7 +54,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/vmalloc.h> -#include <linux/version.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/device.h> |