struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
 {
-       struct mlx4_wqe_ctrl_seg *ctrl;
        int err;
-       int i;
 
        mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
                if (err)
                        goto err_mtt;
 
-               for (i = 0; i < qp->sq.max; ++i) {
-                       ctrl = get_send_wqe(qp, i);
-                       ctrl->owner_opcode = cpu_to_be32(1 << 31);
-               }
-
                qp->sq.wrid  = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
                qp->rq.wrid  = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
 
        else
                sqd_event = 0;
 
+       /*
+        * Before passing a kernel QP to the HW, make sure that the
+        * ownership bits of the send queue are set so that the
+        * hardware doesn't start processing stale work requests.
+        */
+       if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+               struct mlx4_wqe_ctrl_seg *ctrl;
+               int i;
+
+               for (i = 0; i < qp->sq.max; ++i) {
+                       ctrl = get_send_wqe(qp, i);
+                       ctrl->owner_opcode = cpu_to_be32(1 << 31);
+               }
+       }
+
        err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
                             to_mlx4_state(new_state), context, optpar,
                             sqd_event, &qp->mqp);