Add a new IB_WR_SEND_WITH_INV send opcode that can be used to mark a
"send with invalidate" work request as defined in the iWARP verbs and
the InfiniBand base memory management extensions. Also put "imm_data"
and a new "invalidate_rkey" member in a new "ex" union in struct
ib_send_wr. The invalidate_rkey member can be used to pass in an
R_Key/STag to be invalidated. Add this new union to struct
ib_uverbs_send_wr. Add code to copy the invalidate_rkey field in
ib_uverbs_post_send().
Fix up low-level drivers to deal with the change to struct ib_send_wr,
and just remove the imm_data initialization from net/sunrpc/xprtrdma/,
since that code never does any send with immediate operations.
Also, move the existing IB_DEVICE_SEND_W_INV flag to a new bit, since
the iWARP drivers currently in the tree set the bit. The amso1100
driver at least will silently fail to honor the IB_SEND_INVALIDATE bit
if passed in as part of userspace send requests (since it does not
implement kernel bypass work request queueing). Remove the flag from
all existing drivers that set it until we know which ones are OK.
The values chosen for the new flag is not consecutive to avoid clashing
with flags defined in the XRC patches, which are not merged yet but
which are already in use and are likely to be merged soon.
This resurrects a patch sent long ago by Mikkel Hagen <mhagen@iol.unh.edu>.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
15 files changed:
next->num_sge = user_wr->num_sge;
next->opcode = user_wr->opcode;
next->send_flags = user_wr->send_flags;
next->num_sge = user_wr->num_sge;
next->opcode = user_wr->opcode;
next->send_flags = user_wr->send_flags;
- next->imm_data = (__be32 __force) user_wr->imm_data;
if (is_ud) {
next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
if (is_ud) {
next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
} else {
switch (next->opcode) {
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
case IB_WR_RDMA_WRITE_WITH_IMM:
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
+ case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
next->wr.rdma.remote_addr =
user_wr->wr.rdma.remote_addr;
next->wr.rdma.rkey =
user_wr->wr.rdma.rkey;
break;
case IB_WR_RDMA_READ:
next->wr.rdma.remote_addr =
user_wr->wr.rdma.remote_addr;
next->wr.rdma.rkey =
user_wr->wr.rdma.rkey;
break;
+ case IB_WR_SEND_WITH_IMM:
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ next->ex.invalidate_rkey =
+ user_wr->ex.invalidate_rkey;
+ break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
next->wr.atomic.remote_addr =
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
next->wr.atomic.remote_addr =
IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_ZERO_STAG |
IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_ZERO_STAG |
- IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
/* Allocate the qptr_array */
c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
/* Allocate the qptr_array */
c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
dev->device_cap_flags =
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
dev->device_cap_flags =
- (IB_DEVICE_ZERO_STAG |
- IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+ (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
wqe->send.reserved[2] = 0;
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
plen = 4;
wqe->send.reserved[2] = 0;
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
plen = 4;
- wqe->send.sgl[0].stag = wr->imm_data;
+ wqe->send.sgl[0].stag = wr->ex.imm_data;
wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
wqe->send.num_sgle = __constant_cpu_to_be32(0);
*flit_cnt = 5;
wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
wqe->send.num_sgle = __constant_cpu_to_be32(0);
*flit_cnt = 5;
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
- wqe->write.sgl[0].stag = wr->imm_data;
+ wqe->write.sgl[0].stag = wr->ex.imm_data;
wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
wqe->write.num_sgle = __constant_cpu_to_be32(0);
*flit_cnt = 6;
wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
wqe->write.num_sgle = __constant_cpu_to_be32(0);
*flit_cnt = 6;
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
/* this might not work as long as HW does not support it */
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
/* this might not work as long as HW does not support it */
- wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+ wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
}
wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
}
else {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
else {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
- ohdr->u.rc.imm_data = wqe->wr.imm_data;
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
else {
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
else {
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = wqe->wr.imm_data;
+ wc.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
if (!ipath_get_rwqe(qp, 0)) {
/* FALLTHROUGH */
case IB_WR_SEND:
if (!ipath_get_rwqe(qp, 0)) {
goto err;
}
wc.wc_flags = IB_WC_WITH_IMM;
goto err;
}
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = wqe->wr.imm_data;
+ wc.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
- ohdr->u.rc.imm_data = wqe->wr.imm_data;
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = swqe->wr.imm_data;
+ wc.imm_data = swqe->wr.ex.imm_data;
} else {
wc.wc_flags = 0;
wc.imm_data = 0;
} else {
wc.wc_flags = 0;
wc.imm_data = 0;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
- ohdr->u.ud.imm_data = wqe->wr.imm_data;
+ ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->imm_data;
+ sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
default:
return -EINVAL;
break;
default:
return -EINVAL;
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ctrl->imm = wr->imm_data;
+ ctrl->imm = wr->ex.imm_data;
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->imm_data;
+ sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
default:
return -EINVAL;
break;
default:
return -EINVAL;
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+ ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+ ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
nesadapter->base_pd = 1;
nesadapter->device_cap_flags =
nesadapter->base_pd = 1;
nesadapter->device_cap_flags =
- IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
+ IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
__u32 num_sge;
__u32 opcode;
__u32 send_flags;
__u32 num_sge;
__u32 opcode;
__u32 send_flags;
+ union {
+ __u32 imm_data;
+ __u32 invalidate_rkey;
+ } ex;
union {
struct {
__u64 remote_addr;
union {
struct {
__u64 remote_addr;
IB_DEVICE_SRQ_RESIZE = (1<<13),
IB_DEVICE_N_NOTIFY_CQ = (1<<14),
IB_DEVICE_ZERO_STAG = (1<<15),
IB_DEVICE_SRQ_RESIZE = (1<<13),
IB_DEVICE_N_NOTIFY_CQ = (1<<14),
IB_DEVICE_ZERO_STAG = (1<<15),
- IB_DEVICE_SEND_W_INV = (1<<16),
+ IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
IB_DEVICE_MEM_WINDOW = (1<<17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
IB_DEVICE_MEM_WINDOW = (1<<17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
*/
IB_DEVICE_UD_IP_CSUM = (1<<18),
IB_DEVICE_UD_TSO = (1<<19),
*/
IB_DEVICE_UD_IP_CSUM = (1<<18),
IB_DEVICE_UD_TSO = (1<<19),
+ IB_DEVICE_SEND_W_INV = (1<<21),
IB_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_LSO,
+ IB_WR_SEND_WITH_INV,
int num_sge;
enum ib_wr_opcode opcode;
int send_flags;
int num_sge;
enum ib_wr_opcode opcode;
int send_flags;
+ union {
+ __be32 imm_data;
+ u32 invalidate_rkey;
+ } ex;
union {
struct {
u64 remote_addr;
union {
struct {
u64 remote_addr;
send_wr.sg_list = req->rl_send_iov;
send_wr.num_sge = req->rl_niovs;
send_wr.opcode = IB_WR_SEND;
send_wr.sg_list = req->rl_send_iov;
send_wr.num_sge = req->rl_niovs;
send_wr.opcode = IB_WR_SEND;
if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
ib_dma_sync_single_for_device(ia->ri_id->device,
req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
ib_dma_sync_single_for_device(ia->ri_id->device,
req->rl_send_iov[3].addr, req->rl_send_iov[3].length,