[ofa-general] [PATCH/RFC] Add support for "send with invalidate" to libibverbs
Roland Dreier
rdreier at cisco.com
Tue Apr 1 14:24:09 PDT 2008
In kernel commit c80cf84d ("IB/core: Add support for "send with
invalidate" work requests"), which is currently queued for 2.6.26, I
added support for send with invalidate work reqeuests on the kernel side
of things. This patch adds the matching support to libibverbs.
There is one part that's a bit tricky: in ibv_cmd_query_device(), I
added a bit of code to move IBV_DEVICE_SEND_W_INV to the reserved bit
where it used to be. This is to make sure that the userspace low-level
driver for the device in question really supports send with invalidate.
To see why this is necessary, suppose that we didn't do this and a user
had a system with
- a new kernel with a low-level driver that sets the
IB_DEVICE_SEND_W_INV bit
- a new libibverbs with send with invalidate support
- an old userspace driver that has no send with invalidate support
In this case send with invalidate requests would be silently turned into
plain send requests with no way that an application to know this. With
the approach in my patch, the application will not see
IBV_DEVICE_SEND_W_INV set and hence should not use send with invalidate
requests.
This scheme means that low-level drivers that support send with
invalidate should add some autoconf code that checks if
IBV_DEVICE_KERNEL_SEND_W_INV is defined, and if so, compile in code in
the query_device method that sets IBV_DEVICE_SEND_W_INV if
ibv_cmd_query_device() returns IBV_DEVICE_KERNEL_SEND_W_INV set.
This patch also adds enum values for a few more device capability bits
defined in the kernel.
Does this approach make sense to people?
---
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..ee799bb 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -592,6 +592,10 @@ struct ibv_kern_send_wr {
__u32 remote_qkey;
__u32 reserved;
} ud;
+ struct {
+ __u32 rkey;
+ __u32 reserved;
+ } invalidate;
} wr;
};
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index a51bb9d..679386a 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -92,7 +92,18 @@ enum ibv_device_cap_flags {
IBV_DEVICE_SYS_IMAGE_GUID = 1 << 11,
IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12,
IBV_DEVICE_SRQ_RESIZE = 1 << 13,
- IBV_DEVICE_N_NOTIFY_CQ = 1 << 14
+ IBV_DEVICE_N_NOTIFY_CQ = 1 << 14,
+ IBV_DEVICE_ZERO_STAG = 1 << 15,
+ /*
+ * IBV_DEVICE_KERNEL_SEND_W_INV is used by libibverbs to
+ * signal to low-level driver libraries that the kernel set
+ * the "send with invalidate" capaibility bit. Applications
+ * should only test IBV_DEVICE_SEND_W_INV and never look at
+ * IBV_DEVICE_KERNEL_SEND_W_INV.
+ */
+ IBV_DEVICE_KERNEL_SEND_W_INV = 1 << 16,
+ IBV_DEVICE_MEM_WINDOW = 1 << 17,
+ IBV_DEVICE_SEND_W_INV = 1 << 21
};
enum ibv_atomic_cap {
@@ -492,7 +503,8 @@ enum ibv_send_flags {
IBV_SEND_FENCE = 1 << 0,
IBV_SEND_SIGNALED = 1 << 1,
IBV_SEND_SOLICITED = 1 << 2,
- IBV_SEND_INLINE = 1 << 3
+ IBV_SEND_INLINE = 1 << 3,
+ IBV_SEND_INVALIDATE = 1 << 6
};
struct ibv_sge {
@@ -525,6 +537,9 @@ struct ibv_send_wr {
uint32_t remote_qpn;
uint32_t remote_qkey;
} ud;
+ struct {
+ uint32_t rkey;
+ } invalidate;
} wr;
};
diff --git a/src/cmd.c b/src/cmd.c
index 9db8aa6..3e0ff0a 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -159,6 +159,17 @@ int ibv_cmd_query_device(struct ibv_context *context,
device_attr->local_ca_ack_delay = resp.local_ca_ack_delay;
device_attr->phys_port_cnt = resp.phys_port_cnt;
+ /*
+ * If the kernel driver says that it supports send with
+ * invalidate work requests, then move the flag to
+ * IBV_DEVICE_KERNEL_SEND_W_INV so that the low-level driver
+ * gets a chance to make sure it supports the operation as well.
+ */
+ if (device_attr->device_cap_flags & IBV_DEVICE_SEND_W_INV) {
+ device_attr->device_cap_flags &= ~IBV_DEVICE_SEND_W_INV;
+ device_attr->device_cap_flags |= ~IBV_DEVICE_KERNEL_SEND_W_INV;
+ }
+
return 0;
}
@@ -859,6 +870,11 @@ int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
i->wr.rdma.remote_addr;
tmp->wr.rdma.rkey = i->wr.rdma.rkey;
break;
+ case IBV_WR_SEND:
+ case IBV_WR_SEND_WITH_IMM:
+ tmp->wr.invalidate.rkey =
+ i->wr.invalidate.rkey;
+ break;
case IBV_WR_ATOMIC_CMP_AND_SWP:
case IBV_WR_ATOMIC_FETCH_AND_ADD:
tmp->wr.atomic.remote_addr =
More information about the general
mailing list