This fix enables ehca device driver to generate flush work completions
even if the application doesn't request completions for all work
requests. The current implementation of ehca will generate flush work
completions for the wrong work requests if an application uses non
signaled work completions.
Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
/* struct for tracking if cqes have been reported to the application */
struct ehca_qmap_entry {
u16 app_wr_id;
/* struct for tracking if cqes have been reported to the application */
struct ehca_qmap_entry {
u16 app_wr_id;
+ u8 reported;
+ u8 cqe_req;
};
struct ehca_queue_map {
};
struct ehca_queue_map {
unsigned int entries;
unsigned int tail;
unsigned int left_to_poll;
unsigned int entries;
unsigned int tail;
unsigned int left_to_poll;
+ unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */
- qmap->tail = 0;
- for (i = 0; i < qmap->entries; i++)
+ qmap->tail = qmap->entries - 1;
+ qmap->left_to_poll = 0;
+ qmap->next_wqe_idx = 0;
+ for (i = 0; i < qmap->entries; i++) {
qmap->map[i].reported = 1;
qmap->map[i].reported = 1;
+ qmap->map[i].cqe_req = 0;
+ }
void *wqe_v;
u64 q_ofs;
u32 wqe_idx;
void *wqe_v;
u64 q_ofs;
u32 wqe_idx;
/* convert real to abs address */
wqe_p = wqe_p & (~(1UL << 63));
/* convert real to abs address */
wqe_p = wqe_p & (~(1UL << 63));
+ tail_idx = (qmap->tail + 1) % qmap->entries;
wqe_idx = q_ofs / ipz_queue->qe_size;
wqe_idx = q_ofs / ipz_queue->qe_size;
- if (wqe_idx < qmap->tail)
- qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
- else
- qmap->left_to_poll = wqe_idx - qmap->tail;
+ /* check all processed wqes, whether a cqe is requested or not */
+ while (tail_idx != wqe_idx) {
+ if (qmap->map[tail_idx].cqe_req)
+ qmap->left_to_poll++;
+ tail_idx = (tail_idx + 1) % qmap->entries;
+ }
+ /* save index in queue, where we have to start flushing */
+ qmap->next_wqe_idx = wqe_idx;
} else {
spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
my_qp->sq_map.left_to_poll = 0;
} else {
spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
my_qp->sq_map.left_to_poll = 0;
+ my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
+ my_qp->sq_map.entries;
spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
my_qp->rq_map.left_to_poll = 0;
spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
my_qp->rq_map.left_to_poll = 0;
+ my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
+ my_qp->rq_map.entries;
spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
}
spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
}
qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 0;
switch (send_wr->opcode) {
case IB_WR_SEND:
switch (send_wr->opcode) {
case IB_WR_SEND:
if ((send_wr->send_flags & IB_SEND_SIGNALED ||
qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
if ((send_wr->send_flags & IB_SEND_SIGNALED ||
qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+ qmap_entry->cqe_req = 1;
+ }
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 1;
wqe_cnt++;
} /* eof for cur_recv_wr */
wqe_cnt++;
} /* eof for cur_recv_wr */
goto repoll;
wc->qp = &my_qp->ib_qp;
goto repoll;
wc->qp = &my_qp->ib_qp;
+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+ /* We got a send completion. */
+ qmap = &my_qp->sq_map;
+ else
+ /* We got a receive completion. */
+ qmap = &my_qp->rq_map;
+
+ /* advance the tail pointer */
+ qmap->tail = qmap_tail_idx;
+
if (is_error) {
/*
* set left_to_poll to 0 because in error state, we will not
* get any additional CQEs
*/
if (is_error) {
/*
* set left_to_poll to 0 because in error state, we will not
* get any additional CQEs
*/
- ehca_add_to_err_list(my_qp, 1);
+ my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
+ my_qp->sq_map.entries;
my_qp->sq_map.left_to_poll = 0;
my_qp->sq_map.left_to_poll = 0;
+ ehca_add_to_err_list(my_qp, 1);
+ my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
+ my_qp->rq_map.entries;
+ my_qp->rq_map.left_to_poll = 0;
if (HAS_RQ(my_qp))
ehca_add_to_err_list(my_qp, 0);
if (HAS_RQ(my_qp))
ehca_add_to_err_list(my_qp, 0);
- my_qp->rq_map.left_to_poll = 0;
- qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
- if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
- /* We got a send completion. */
- qmap = &my_qp->sq_map;
- else
- /* We got a receive completion. */
- qmap = &my_qp->rq_map;
-
qmap_entry = &qmap->map[qmap_tail_idx];
if (qmap_entry->reported) {
ehca_warn(cq->device, "Double cqe on qp_num=%#x",
qmap_entry = &qmap->map[qmap_tail_idx];
if (qmap_entry->reported) {
ehca_warn(cq->device, "Double cqe on qp_num=%#x",
wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
qmap_entry->reported = 1;
wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
qmap_entry->reported = 1;
- /* this is a proper completion, we need to advance the tail pointer */
- if (++qmap->tail == qmap->entries)
- qmap->tail = 0;
-
/* if left_to_poll is decremented to 0, add the QP to the error list */
if (qmap->left_to_poll > 0) {
qmap->left_to_poll--;
/* if left_to_poll is decremented to 0, add the QP to the error list */
if (qmap->left_to_poll > 0) {
qmap->left_to_poll--;
else
qmap = &my_qp->rq_map;
else
qmap = &my_qp->rq_map;
- qmap_entry = &qmap->map[qmap->tail];
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
while ((nr < num_entries) && (qmap_entry->reported == 0)) {
/* generate flush CQE */
while ((nr < num_entries) && (qmap_entry->reported == 0)) {
/* generate flush CQE */
memset(wc, 0, sizeof(*wc));
memset(wc, 0, sizeof(*wc));
- offset = qmap->tail * ipz_queue->qe_size;
+ offset = qmap->next_wqe_idx * ipz_queue->qe_size;
wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
if (!wqe) {
ehca_err(cq->device, "Invalid wqe offset=%#lx on "
wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
if (!wqe) {
ehca_err(cq->device, "Invalid wqe offset=%#lx on "
- /* mark as reported and advance tail pointer */
+ /* mark as reported and advance next_wqe pointer */
qmap_entry->reported = 1;
qmap_entry->reported = 1;
- if (++qmap->tail == qmap->entries)
- qmap->tail = 0;
- qmap_entry = &qmap->map[qmap->tail];
+ qmap->next_wqe_idx++;
+ if (qmap->next_wqe_idx == qmap->entries)
+ qmap->next_wqe_idx = 0;
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];