[ofa-general] [PATCH 1 of 5] libibverbs: XRC implementation
Jack Morgenstein
jackm at dev.mellanox.co.il
Tue Sep 18 10:25:01 PDT 2007
Implement eXtended Reliable Connections.
Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 67a3bf8..30ba79f 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -99,6 +99,11 @@ int ibv_cmd_create_srq(struct ibv_pd *pd,
struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
struct ibv_create_srq *cmd, size_t cmd_size,
struct ibv_create_srq_resp *resp, size_t resp_size);
+int ibv_cmd_create_xrc_srq(struct ibv_pd *pd,
+ struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
+ uint32_t xrc_domain, uint32_t xrc_cq,
+ struct ibv_create_xrc_srq *cmd, size_t cmd_size,
+ struct ibv_create_srq_resp *resp, size_t resp_size);
int ibv_cmd_modify_srq(struct ibv_srq *srq,
struct ibv_srq_attr *srq_attr,
enum ibv_srq_attr_mask srq_attr_mask,
@@ -134,6 +139,12 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
int ibv_dontfork_range(void *base, size_t size);
int ibv_dofork_range(void *base, size_t size);
+int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag,
+ struct ibv_xrc_domain *d,
+ struct ibv_open_xrc_domain_resp *resp,
+ size_t resp_size);
+int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d);
+
/*
* sysfs helper functions
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..3845a4c 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -85,7 +85,10 @@ enum {
IB_USER_VERBS_CMD_MODIFY_SRQ,
IB_USER_VERBS_CMD_QUERY_SRQ,
IB_USER_VERBS_CMD_DESTROY_SRQ,
- IB_USER_VERBS_CMD_POST_SRQ_RECV
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
+ IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN,
+ IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN
};
/*
@@ -706,6 +709,21 @@ struct ibv_create_srq {
__u64 driver_data[0];
};
+struct ibv_create_xrc_srq {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 max_wr;
+ __u32 max_sge;
+ __u32 srq_limit;
+ __u32 xrcd_handle;
+ __u32 xrc_cq;
+ __u64 driver_data[0];
+};
+
struct ibv_create_srq_resp {
__u32 srq_handle;
__u32 max_wr;
@@ -754,6 +772,29 @@ struct ibv_destroy_srq_resp {
__u32 events_reported;
};
+struct ibv_open_xrc_domain {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 fd;
+ __u32 oflags;
+ __u64 driver_data[0];
+};
+
+struct ibv_open_xrc_domain_resp {
+ __u32 xrcd_handle;
+};
+
+struct ibv_close_xrc_domain {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 xrcd_handle;
+ __u64 driver_data[0];
+};
+
/*
* Compatibility with older ABI versions
*/
@@ -803,6 +844,9 @@ enum {
* trick opcodes in IBV_INIT_CMD() doesn't break.
*/
IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL_V2 = -1,
+ IB_USER_VERBS_CMD_CREATE_XRC_SRQ_V2 = -1,
+ IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN_V2 = -1,
+ IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN_V2 = -1,
};
struct ibv_destroy_cq_v1 {
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index acc1b82..4c63208 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -92,7 +92,8 @@ enum ibv_device_cap_flags {
IBV_DEVICE_SYS_IMAGE_GUID = 1 << 11,
IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12,
IBV_DEVICE_SRQ_RESIZE = 1 << 13,
- IBV_DEVICE_N_NOTIFY_CQ = 1 << 14
+ IBV_DEVICE_N_NOTIFY_CQ = 1 << 14,
+ IBV_DEVICE_XRC = 1 << 18
};
enum ibv_atomic_cap {
@@ -370,6 +371,11 @@ struct ibv_ah_attr {
uint8_t port_num;
};
+struct ibv_xrc_domain {
+ struct ibv_context *context;
+ uint32_t handle;
+};
+
enum ibv_srq_attr_mask {
IBV_SRQ_MAX_WR = 1 << 0,
IBV_SRQ_LIMIT = 1 << 1
@@ -389,7 +395,8 @@ struct ibv_srq_init_attr {
enum ibv_qp_type {
IBV_QPT_RC = 2,
IBV_QPT_UC,
- IBV_QPT_UD
+ IBV_QPT_UD,
+ IBV_QPT_XRC
};
struct ibv_qp_cap {
@@ -408,6 +415,7 @@ struct ibv_qp_init_attr {
struct ibv_qp_cap cap;
enum ibv_qp_type qp_type;
int sq_sig_all;
+ struct ibv_xrc_domain *xrc_domain;
};
enum ibv_qp_attr_mask {
@@ -526,6 +534,7 @@ struct ibv_send_wr {
uint32_t remote_qkey;
} ud;
} wr;
+ uint32_t xrc_remote_srq_num;
};
struct ibv_recv_wr {
@@ -553,6 +562,10 @@ struct ibv_srq {
pthread_mutex_t mutex;
pthread_cond_t cond;
uint32_t events_completed;
+
+ uint32_t xrc_srq_num;
+ struct ibv_xrc_domain *xrc_domain;
+ struct ibv_cq *xrc_cq;
};
struct ibv_qp {
@@ -570,6 +583,8 @@ struct ibv_qp {
pthread_mutex_t mutex;
pthread_cond_t cond;
uint32_t events_completed;
+
+ struct ibv_xrc_domain *xrc_domain;
};
struct ibv_comp_channel {
@@ -624,6 +639,7 @@ struct ibv_device {
char ibdev_path[IBV_SYSFS_PATH_MAX];
};
+#define HAVE_IBV_CREATE_XRC_SRQ
struct ibv_context_ops {
int (*query_device)(struct ibv_context *context,
struct ibv_device_attr *device_attr);
@@ -680,6 +696,13 @@ struct ibv_context_ops {
int (*detach_mcast)(struct ibv_qp *qp, union ibv_gid *gid,
uint16_t lid);
void (*async_event)(struct ibv_async_event *event);
+ struct ibv_srq * (*create_xrc_srq)(struct ibv_pd *pd,
+ struct ibv_xrc_domain *xrc_domain,
+ struct ibv_cq *xrc_cq,
+ struct ibv_srq_init_attr *srq_init_attr);
+ struct ibv_xrc_domain * (*open_xrc_domain)(struct ibv_context *context,
+ int fd, int oflag);
+ int (*close_xrc_domain)(struct ibv_xrc_domain *d);
};
struct ibv_context {
@@ -912,6 +935,25 @@ struct ibv_srq *ibv_create_srq(struct ibv_pd *pd,
struct ibv_srq_init_attr *srq_init_attr);
/**
+ * ibv_create_xrc_srq - Creates a SRQ associated with the specified protection
+ * domain and xrc domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @xrc_domain: The XRC domain associated with the SRQ.
+ * @xrc_cq: CQ to report completions for XRC packets on.
+ *
+ * @srq_init_attr: A list of initial attributes required to create the SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return. If ibv_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd,
+ struct ibv_xrc_domain *xrc_domain,
+ struct ibv_cq *xrc_cq,
+ struct ibv_srq_init_attr *srq_init_attr);
+
+/**
* ibv_modify_srq - Modifies the attributes for the specified SRQ.
* @srq: The SRQ to modify.
* @srq_attr: On input, specifies the SRQ attributes to modify. On output,
@@ -1074,6 +1116,42 @@ int ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
*/
int ibv_fork_init(void);
+/**
+ * ibv_open_xrc_domain - open an XRC domain
+ * Returns a reference to an XRC domain.
+ *
+ * @context: Device context
+ * @fd: descriptor for inode associated with the domain
+ * If fd == -1, no inode is associated with the domain; in this case,
+ * the only legal value for oflag is O_CREAT
+ *
+ * @oflag: oflag values are constructed by OR-ing flags from the following list
+ *
+ * O_CREAT
+ * If a domain belonging to device named by context is already associated
+ * with the inode, this flag has no effect, except as noted under O_EXCL
+ * below. Otherwise, a new XRC domain is created and is associated with
+ * inode specified by fd.
+ *
+ * O_EXCL
+ * If O_EXCL and O_CREAT are set, open will fail if a domain associated with
+ * the inode exists. The check for the existence of the domain and creation
+ * of the domain if it does not exist is atomic with respect to other
+ * processes executing open with fd naming the same inode.
+ */
+struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context,
+ int fd, int oflag);
+
+/**
+ * ibv_close_xrc_domain - close an XRC domain
+ * If this is the last reference, destroys the domain.
+ *
+ * @d: reference to XRC domain to close
+ *
+ * close is implicitly performed at process exit.
+ */
+int ibv_close_xrc_domain(struct ibv_xrc_domain *d);
+
END_C_DECLS
# undef __attribute_const
diff --git a/src/cmd.c b/src/cmd.c
index 6d4331f..d6b2a4b 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -482,6 +482,34 @@ int ibv_cmd_create_srq(struct ibv_pd *pd,
return 0;
}
+int ibv_cmd_create_xrc_srq(struct ibv_pd *pd,
+ struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
+ uint32_t xrcd_handle, uint32_t xrc_cq,
+ struct ibv_create_xrc_srq *cmd, size_t cmd_size,
+ struct ibv_create_srq_resp *resp, size_t resp_size)
+{
+ IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XRC_SRQ, resp, resp_size);
+ cmd->user_handle = (uintptr_t) srq;
+ cmd->pd_handle = pd->handle;
+ cmd->max_wr = attr->attr.max_wr;
+ cmd->max_sge = attr->attr.max_sge;
+ cmd->srq_limit = attr->attr.srq_limit;
+ cmd->xrcd_handle = xrcd_handle;
+ cmd->xrc_cq = xrc_cq;
+
+ if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
+
+ srq->handle = resp->srq_handle;
+ srq->context = pd->context;
+ attr->attr.max_wr = resp->max_wr;
+ attr->attr.max_sge = resp->max_sge;
+
+ return 0;
+}
+
static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq,
struct ibv_srq_attr *srq_attr,
enum ibv_srq_attr_mask srq_attr_mask,
@@ -596,7 +624,6 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
cmd->pd_handle = pd->handle;
cmd->send_cq_handle = attr->send_cq->handle;
cmd->recv_cq_handle = attr->recv_cq->handle;
- cmd->srq_handle = attr->srq ? attr->srq->handle : 0;
cmd->max_send_wr = attr->cap.max_send_wr;
cmd->max_recv_wr = attr->cap.max_recv_wr;
cmd->max_send_sge = attr->cap.max_send_sge;
@@ -604,7 +631,11 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
cmd->max_inline_data = attr->cap.max_inline_data;
cmd->sq_sig_all = attr->sq_sig_all;
cmd->qp_type = attr->qp_type;
- cmd->is_srq = !!attr->srq;
+ cmd->is_srq = attr->qp_type == IBV_QPT_XRC ?
+ !!attr->xrc_domain : !!attr->srq;
+ cmd->srq_handle = attr->qp_type == IBV_QPT_XRC ?
+ (attr->xrc_domain ? attr->xrc_domain->handle : 0) :
+ (attr->srq ? attr->srq->handle : 0);
cmd->reserved = 0;
if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
@@ -1107,3 +1138,41 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
return 0;
}
+
+int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag,
+ struct ibv_xrc_domain *d,
+ struct ibv_open_xrc_domain_resp *resp,
+ size_t resp_size)
+{
+ struct ibv_open_xrc_domain cmd;
+
+ if (abi_ver < 6)
+ return ENOSYS;
+
+ IBV_INIT_CMD_RESP(&cmd, sizeof cmd, OPEN_XRC_DOMAIN, resp, resp_size);
+ cmd.fd = fd;
+ cmd.oflags = oflag;
+
+ if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ d->handle = resp->xrcd_handle;
+
+ return 0;
+}
+
+int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d)
+{
+ struct ibv_close_xrc_domain cmd;
+
+ if (abi_ver < 6)
+ return ENOSYS;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRC_DOMAIN);
+ cmd.xrcd_handle = d->handle;
+
+ if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+ return 0;
+}
+
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 3a346ed..fea3ff7 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -91,4 +91,10 @@ IBVERBS_1.1 {
ibv_dontfork_range;
ibv_dofork_range;
ibv_register_driver;
+ ibv_create_xrc_srq;
+ ibv_cmd_create_xrc_srq;
+ ibv_open_xrc_domain;
+ ibv_cmd_open_xrc_domain;
+ ibv_close_xrc_domain;
+ ibv_cmd_close_xrc_domain;
} IBVERBS_1.0;
diff --git a/src/verbs.c b/src/verbs.c
index f5cf4d3..4083fcf 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -364,6 +364,9 @@ struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd,
srq->context = pd->context;
srq->srq_context = srq_init_attr->srq_context;
srq->pd = pd;
+ srq->xrc_domain = NULL;
+ srq->xrc_cq = NULL;
+ srq->xrc_srq_num = 0;
srq->events_completed = 0;
pthread_mutex_init(&srq->mutex, NULL);
pthread_cond_init(&srq->cond, NULL);
@@ -373,6 +376,32 @@ struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd,
}
default_symver(__ibv_create_srq, ibv_create_srq);
+struct ibv_srq *__ibv_create_xrc_srq(struct ibv_pd *pd,
+ struct ibv_xrc_domain *xrc_domain,
+ struct ibv_cq *xrc_cq,
+ struct ibv_srq_init_attr *srq_init_attr)
+{
+ struct ibv_srq *srq;
+
+ if (!pd->context->ops.create_xrc_srq)
+ return NULL;
+
+ srq = pd->context->ops.create_xrc_srq(pd, xrc_domain, xrc_cq, srq_init_attr);
+ if (srq) {
+ srq->context = pd->context;
+ srq->srq_context = srq_init_attr->srq_context;
+ srq->pd = pd;
+ srq->xrc_domain = xrc_domain;
+ srq->xrc_cq = xrc_cq;
+ srq->events_completed = 0;
+ pthread_mutex_init(&srq->mutex, NULL);
+ pthread_cond_init(&srq->cond, NULL);
+ }
+
+ return srq;
+}
+default_symver(__ibv_create_xrc_srq, ibv_create_xrc_srq);
+
int __ibv_modify_srq(struct ibv_srq *srq,
struct ibv_srq_attr *srq_attr,
enum ibv_srq_attr_mask srq_attr_mask)
@@ -396,8 +425,9 @@ default_symver(__ibv_destroy_srq, ibv_destroy_srq);
struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd,
struct ibv_qp_init_attr *qp_init_attr)
{
- struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr);
+ struct ibv_qp *qp;
+ qp = pd->context->ops.create_qp(pd, qp_init_attr);
if (qp) {
qp->context = pd->context;
qp->qp_context = qp_init_attr->qp_context;
@@ -408,6 +438,8 @@ struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd,
qp->qp_type = qp_init_attr->qp_type;
qp->state = IBV_QPS_RESET;
qp->events_completed = 0;
+ qp->xrc_domain = qp_init_attr->qp_type == IBV_QPT_XRC ?
+ qp_init_attr->xrc_domain : NULL;
pthread_mutex_init(&qp->mutex, NULL);
pthread_cond_init(&qp->cond, NULL);
}
@@ -541,3 +573,28 @@ int __ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
return qp->context->ops.detach_mcast(qp, gid, lid);
}
default_symver(__ibv_detach_mcast, ibv_detach_mcast);
+
+struct ibv_xrc_domain *__ibv_open_xrc_domain(struct ibv_context *context,
+ int fd, int oflag)
+{
+ struct ibv_xrc_domain *d;
+
+ if (!context->ops.open_xrc_domain)
+ return NULL;
+
+ d = context->ops.open_xrc_domain(context, fd, oflag);
+ if (d)
+ d->context = context;
+
+ return d;
+}
+default_symver(__ibv_open_xrc_domain, ibv_open_xrc_domain);
+
+int __ibv_close_xrc_domain(struct ibv_xrc_domain *d)
+{
+ if (!d->context->ops.close_xrc_domain)
+ return 0;
+
+ return d->context->ops.close_xrc_domain(d);
+}
+default_symver(__ibv_close_xrc_domain, ibv_close_xrc_domain);
More information about the general
mailing list