[ofa-general] [PATCH 1/8] libibverbs: implement XRC qps

Jack Morgenstein jackm at dev.mellanox.co.il
Wed Jan 23 01:59:41 PST 2008


Implements the full XRC QP interface.

Changes:
Added creation of XRC receive-only QPs for userspace, which
reside in kernel space (user cannot post-to or poll these QPs).

Motivation:  MPI community requires XRC receive QPs which will
not be destroyed when the creating process terminates.

Solution:  Userspace requests that a QP be created in kernel space.
Each userspace process using that QP (i.e. receiving packets on an XRC SRQ
via the qp), registers with that QP (-- the creator is also registered, whether
or not it is a user of the QP). When the last userspace user unregisters with
the QP, it is destroyed.  Unregistration is also part of userspace process
cleanup, so there is no leakage.

This patch implements the following:
ibv_create_xrc_rcv_qp
ibv_modify_xrc_rcv_qp
ibv_query_xrc_rcv_qp
ibv_reg_xrc_rcv_qp
ibv_unreg_xrc_rcv_qp

Creating process (userspace) workflow:
ibv_create_xrc_rcv_qp -- to create (also registers the QP).
ibv_modify_xrc_rcv_qp -- to move QP to INIT
ibv_modify_xrc_rcv_qp -- to move QP to RTR (to RTS is not needed for receive-only QPs)

ibv_unreg_xrc_rcv_qp -- instead of destroy.

Using process workflow
ibv_create_xrc_srq -- to create an SRQ
ibv_reg_xrc_rcv_qp -- to register with the QP as a user

ibv_destroy_srq
ibv_unreg_xrc_rcv_qp -- to "unregister" with the QP.  If no user processes
remain registered, the QP is destroyed.

NOTES:
1. Since there is no userspace object for the QP, the API uses the XRC
domain object and qp number instead.
  
2. Registration needs to be performed only once per process
(multiple registrations count as a single registration).

3. Async events for the receive QP are delivered to all registered processes.
The event ID is "OR'ed" with 0x80000000, to indicate that this is an
XRC receive-only QP event.  The (new) element-field union value "xrc_qp_num"
is set to the QP number which generated the event.

If the QP goes into the error state for any reason, each registered userspace
process will receive the LAST_WQE_REACHED event for the QP; each process should
then call ibv_unreg_xrc_rcv_qp() so that the QP will be destroyed.

Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>

diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 67a3bf8..e871f7d 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -99,6 +99,11 @@ int ibv_cmd_create_srq(struct ibv_pd *pd,
 		       struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
 		       struct ibv_create_srq *cmd, size_t cmd_size,
 		       struct ibv_create_srq_resp *resp, size_t resp_size);
+int ibv_cmd_create_xrc_srq(struct ibv_pd *pd,
+		       struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
+		       uint32_t xrc_domain, uint32_t xrc_cq,
+		       struct ibv_create_xrc_srq *cmd, size_t cmd_size,
+		       struct ibv_create_srq_resp *resp, size_t resp_size);
 int ibv_cmd_modify_srq(struct ibv_srq *srq,
 		       struct ibv_srq_attr *srq_attr,
 		       enum ibv_srq_attr_mask srq_attr_mask,
@@ -134,6 +139,20 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
 
 int ibv_dontfork_range(void *base, size_t size);
 int ibv_dofork_range(void *base, size_t size);
+int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag,
+			    struct ibv_xrc_domain *d,
+			    struct ibv_open_xrc_domain_resp *resp,
+			    size_t resp_size);
+int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d);
+int ibv_cmd_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
+			      uint32_t *xrc_rcv_qpn);
+int ibv_cmd_modify_xrc_rcv_qp(struct ibv_xrc_domain *d, uint32_t xrc_rcv_qpn,
+			      struct ibv_qp_attr *attr, int attr_mask);
+int ibv_cmd_query_xrc_rcv_qp(struct ibv_xrc_domain *d, uint32_t xrc_rcv_qpn,
+			     struct ibv_qp_attr *attr, int attr_mask,
+			     struct ibv_qp_init_attr *init_attr);
+int ibv_cmd_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num);
+int ibv_cmd_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num);
 
 /*
  * sysfs helper functions
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..97949b9 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -85,7 +85,15 @@ enum {
 	IB_USER_VERBS_CMD_MODIFY_SRQ,
 	IB_USER_VERBS_CMD_QUERY_SRQ,
 	IB_USER_VERBS_CMD_DESTROY_SRQ,
-	IB_USER_VERBS_CMD_POST_SRQ_RECV
+	IB_USER_VERBS_CMD_POST_SRQ_RECV,
+	IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
+	IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN,
+	IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN,
+	IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_REG_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP,
 };
 
 /*
@@ -567,6 +575,92 @@ struct ibv_destroy_qp_resp {
 	__u32 events_reported;
 };
 
+struct ibv_create_xrc_rcv_qp {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u64 user_handle;
+	__u32 xrc_domain_handle;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
+	__u8  sq_sig_all;
+	__u8  qp_type;
+	__u8  reserved[2];
+	__u64 driver_data[0];
+};
+
+struct ibv_create_xrc_rcv_qp_resp {
+	__u32 qpn;
+	__u32 reserved;
+};
+
+struct ibv_modify_xrc_rcv_qp {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	struct ibv_qp_dest dest;
+	struct ibv_qp_dest alt_dest;
+	__u32 attr_mask;
+	__u32 qkey;
+	__u32 rq_psn;
+	__u32 sq_psn;
+	__u32 dest_qp_num;
+	__u32 qp_access_flags;
+	__u16 pkey_index;
+	__u16 alt_pkey_index;
+	__u8  qp_state;
+	__u8  cur_qp_state;
+	__u8  path_mtu;
+	__u8  path_mig_state;
+	__u8  en_sqd_async_notify;
+	__u8  max_rd_atomic;
+	__u8  max_dest_rd_atomic;
+	__u8  min_rnr_timer;
+	__u8  port_num;
+	__u8  timeout;
+	__u8  retry_cnt;
+	__u8  rnr_retry;
+	__u8  alt_port_num;
+	__u8  alt_timeout;
+	__u8  reserved[2];
+	__u64 driver_data[0];
+};
+
+struct ibv_query_xrc_rcv_qp {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	__u32 attr_mask;
+	__u64 driver_data[0];
+};
+
+struct ibv_reg_xrc_rcv_qp {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	__u64 driver_data[0];
+};
+
+struct ibv_unreg_xrc_rcv_qp {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	__u64 driver_data[0];
+};
+
 struct ibv_kern_send_wr {
 	__u64 wr_id;
 	__u32 num_sge;
@@ -706,6 +800,21 @@ struct ibv_create_srq {
 	__u64 driver_data[0];
 };
 
+struct ibv_create_xrc_srq {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u32 xrcd_handle;
+	__u32 xrc_cq;
+	__u64 driver_data[0];
+};
+
 struct ibv_create_srq_resp {
 	__u32 srq_handle;
 	__u32 max_wr;
@@ -754,6 +863,29 @@ struct ibv_destroy_srq_resp {
 	__u32 events_reported;
 };
 
+struct ibv_open_xrc_domain {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 fd;
+	__u32 oflags;
+	__u64 driver_data[0];
+};
+
+struct ibv_open_xrc_domain_resp {
+	__u32 xrcd_handle;
+};
+
+struct ibv_close_xrc_domain {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 xrcd_handle;
+	__u64 driver_data[0];
+};
+
 /*
  * Compatibility with older ABI versions
  */
@@ -803,6 +935,14 @@ enum {
 	 * trick opcodes in IBV_INIT_CMD() doesn't break.
 	 */
 	IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL_V2 = -1,
+	IB_USER_VERBS_CMD_CREATE_XRC_SRQ_V2 = -1,
+	IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN_V2 = -1,
+	IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN_V2 = -1,
+	IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP_V2 = -1,
+	IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP_V2 = -1,
+	IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP_V2 = -1,
+	IB_USER_VERBS_CMD_REG_XRC_RCV_QP_V2 = -1,
+	IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP_V2 = -1,
 };
 
 struct ibv_destroy_cq_v1 {
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index acc1b82..a032a67 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -92,7 +92,8 @@ enum ibv_device_cap_flags {
 	IBV_DEVICE_SYS_IMAGE_GUID	= 1 << 11,
 	IBV_DEVICE_RC_RNR_NAK_GEN	= 1 << 12,
 	IBV_DEVICE_SRQ_RESIZE		= 1 << 13,
-	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14
+	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14,
+	IBV_DEVICE_XRC		        = 1 << 20
 };
 
 enum ibv_atomic_cap {
@@ -204,12 +205,17 @@ enum ibv_event_type {
 	IBV_EVENT_CLIENT_REREGISTER
 };
 
+enum ibv_event_flags {
+	IBV_XRC_QP_EVENT_FLAG = 0x80000000,
+};
+
 struct ibv_async_event {
 	union {
 		struct ibv_cq  *cq;
 		struct ibv_qp  *qp;
 		struct ibv_srq *srq;
 		int		port_num;
+		uint32_t	xrc_qp_num;
 	} element;
 	enum ibv_event_type	event_type;
 };
@@ -370,6 +376,11 @@ struct ibv_ah_attr {
 	uint8_t			port_num;
 };
 
+struct ibv_xrc_domain {
+	struct ibv_context     *context;
+	uint32_t		handle;
+};
+
 enum ibv_srq_attr_mask {
 	IBV_SRQ_MAX_WR	= 1 << 0,
 	IBV_SRQ_LIMIT	= 1 << 1
@@ -389,7 +400,8 @@ struct ibv_srq_init_attr {
 enum ibv_qp_type {
 	IBV_QPT_RC = 2,
 	IBV_QPT_UC,
-	IBV_QPT_UD
+	IBV_QPT_UD,
+	IBV_QPT_XRC
 };
 
 struct ibv_qp_cap {
@@ -408,6 +420,7 @@ struct ibv_qp_init_attr {
 	struct ibv_qp_cap	cap;
 	enum ibv_qp_type	qp_type;
 	int			sq_sig_all;
+	struct ibv_xrc_domain  *xrc_domain;
 };
 
 enum ibv_qp_attr_mask {
@@ -526,6 +539,7 @@ struct ibv_send_wr {
 			uint32_t	remote_qkey;
 		} ud;
 	} wr;
+	uint32_t		xrc_remote_srq_num;
 };
 
 struct ibv_recv_wr {
@@ -553,6 +567,10 @@ struct ibv_srq {
 	pthread_mutex_t		mutex;
 	pthread_cond_t		cond;
 	uint32_t		events_completed;
+
+	uint32_t		xrc_srq_num;
+	struct ibv_xrc_domain  *xrc_domain;
+	struct ibv_cq	       *xrc_cq;
 };
 
 struct ibv_qp {
@@ -570,6 +588,8 @@ struct ibv_qp {
 	pthread_mutex_t		mutex;
 	pthread_cond_t		cond;
 	uint32_t		events_completed;
+
+	struct ibv_xrc_domain  *xrc_domain;
 };
 
 struct ibv_comp_channel {
@@ -624,6 +644,32 @@ struct ibv_device {
 	char			ibdev_path[IBV_SYSFS_PATH_MAX];
 };
 
+struct ibv_xrc_ops {
+	struct ibv_srq *	(*create_xrc_srq)(struct ibv_pd *pd,
+						  struct ibv_xrc_domain *xrc_domain,
+						  struct ibv_cq *xrc_cq,
+						  struct ibv_srq_init_attr *srq_init_attr);
+	struct ibv_xrc_domain *	(*open_xrc_domain)(struct ibv_context *context,
+						   int fd, int oflag);
+	int			(*close_xrc_domain)(struct ibv_xrc_domain *d);
+	int			(*create_xrc_rcv_qp)(struct ibv_qp_init_attr *init_attr,
+						     uint32_t *xrc_qp_num);
+	int			(*modify_xrc_rcv_qp)(struct ibv_xrc_domain *xrc_domain,
+						     uint32_t xrc_qp_num,
+						     struct ibv_qp_attr *attr,
+						     int attr_mask);
+	int			(*query_xrc_rcv_qp)(struct ibv_xrc_domain *xrc_domain,
+						    uint32_t xrc_qp_num,
+						    struct ibv_qp_attr *attr,
+						    int attr_mask,
+						    struct ibv_qp_init_attr *init_attr);
+	int 			(*reg_xrc_rcv_qp)(struct ibv_xrc_domain *xrc_domain,
+						  uint32_t xrc_qp_num);
+	int 			(*unreg_xrc_rcv_qp)(struct ibv_xrc_domain *xrc_domain,
+						    uint32_t xrc_qp_num);
+
+};
+
 struct ibv_context_ops {
 	int			(*query_device)(struct ibv_context *context,
 					      struct ibv_device_attr *device_attr);
@@ -690,6 +736,7 @@ struct ibv_context {
 	int			num_comp_vectors;
 	pthread_mutex_t		mutex;
 	void		       *abi_compat;
+	struct ibv_xrc_ops     *xrc_ops;
 };
 
 /**
@@ -912,6 +959,25 @@ struct ibv_srq *ibv_create_srq(struct ibv_pd *pd,
 			       struct ibv_srq_init_attr *srq_init_attr);
 
 /**
+ * ibv_create_xrc_srq - Creates a SRQ associated with the specified protection
+ *   domain and xrc domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @xrc_domain: The XRC domain associated with the SRQ.
+ * @xrc_cq: CQ to report completions for XRC packets on.
+ *
+ * @srq_init_attr: A list of initial attributes required to create the SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return.  If ibv_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd,
+				   struct ibv_xrc_domain *xrc_domain,
+				   struct ibv_cq *xrc_cq,
+			           struct ibv_srq_init_attr *srq_init_attr);
+
+/**
  * ibv_modify_srq - Modifies the attributes for the specified SRQ.
  * @srq: The SRQ to modify.
  * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
@@ -1074,6 +1140,136 @@ int ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
  */
 int ibv_fork_init(void);
 
+/**
+ * ibv_open_xrc_domain - open an XRC domain
+ * Returns a reference to an XRC domain.
+ *
+ * @context: Device context
+ * @fd: descriptor for inode associated with the domain
+ *     If fd == -1, no inode is associated with the domain; in this case,
+ *     the only legal value for oflag is O_CREAT
+ *
+ * @oflag: oflag values are constructed by OR-ing flags from the following list
+ *
+ * O_CREAT
+ *     If a domain belonging to device named by context is already associated
+ *     with the inode, this flag has no effect, except as noted under O_EXCL
+ *     below. Otherwise, a new XRC domain is created and is associated with
+ *     inode specified by fd.
+ *
+ * O_EXCL
+ *     If O_EXCL and O_CREAT are set, open will fail if a domain associated with
+ *     the inode exists. The check for the existence of the domain and creation
+ *     of the domain if it does not exist is atomic with respect to other
+ *     processes executing open with fd naming the same inode.
+ */
+struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context,
+					   int fd, int oflag);
+
+/**
+ * ibv_close_xrc_domain - close an XRC domain
+ * If this is the last reference, destroys the domain.
+ *
+ * @d: reference to XRC domain to close
+ *
+ * close is implicitly performed at process exit.
+ */
+int ibv_close_xrc_domain(struct ibv_xrc_domain *d);
+
+/**
+ * ibv_create_xrc_rcv_qp - creates an XRC QP for serving as a receive-side only QP,
+ *
+ * This QP is created in kernel space, and persists until the last process registered
+ * for the QP calls ibv_unreg_xrc_rcv_qp() (at which time the QP is destroyed).
+ *
+ * @init_attr: init attributes to use for QP. xrc domain MUST be included here. All other fields
+ *	       are ignored.
+ * 
+ * @xrc_rcv_qpn: qp_num of created QP (if success). To be passed to the remote node (sender).
+ *		 The remote node will use xrc_rcv_qpn in ibv_post_send when sending to
+ *		 XRC SRQ's on this host in the same xrc domain.
+ *
+ * RETURNS: success (0), or a (negative) error value.
+ *
+ * NOTE: this verb also registers the calling user-process with the QP at its creation time
+ *       (implicit call to ibv_reg_xrc_rcv_qp), to avoid race conditions.
+ *       The creating process will need to call ibv_unreg_xrc_qp() for the QP to release it from
+ *       this process.
+ */
+int ibv_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
+			  uint32_t *xrc_rcv_qpn);
+
+/**
+ * ibv_modify_xrc_rcv_qp - modifies an xrc_rcv qp.
+ *
+ * @xrc_domain: xrc domain the QP belongs to (for verification).
+ * @xrc_qp_num: The (24 bit) number of the XRC QP.
+ * @attr: modify-qp attributes. The following fields must be specified: 
+ * 		for RESET_2_INIT: qp_state, pkey_index , port, qp_access_flags
+ * 		for INIT_2_RTR:   qp_state, path_mtu, dest_qp_num, rq_psn, max_dest_rd_atomic,
+ * 				  min_rnr_timer, ah_attr
+ *		The QP need not be brought to RTS for the QP to operate as a receive-only QP.
+ * @attr_mask:  bitmap indicating which attributes are provided in the attr struct.
+ * 	used for validity checking.  The following bits must be set:
+ *		for RESET_2_INIT: IBV_QP_PKEY_INDEX, IBV_QP_PORT, IBV_QP_ACCESS_FLAGS, IBV_QP_STATE
+ * 		for INIT_2_RTR: IBV_QP_AV, IBV_QP_PATH_MTU, IBV_QP_DEST_QPN, IBV_QP_RQ_PSN, 
+ * 				IBV_QP_MAX_DEST_RD_ATOMIC, IBV_QP_MIN_RNR_TIMER, IBV_QP_STATE
+ * 
+ * RETURNS: success (0), or a (negative) error value.
+ *
+ */
+int ibv_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num,
+			  struct ibv_qp_attr *attr, int attr_mask);
+
+/**
+ * ibv_query_xrc_rcv_qp - queries an xrc_rcv qp.
+ *
+ * @xrc_domain: xrc domain the QP belongs to (for verification).
+ * @xrc_qp_num: The (24 bit) number of the XRC QP.
+ * @attr: for returning qp attributes.
+ * @attr_mask:  bitmap indicating which attributes to return.
+ * @init_attr: for returning the init attributes
+ * 
+ * RETURNS: success (0), or a (negative) error value.
+ *
+ */
+int ibv_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num,
+			 struct ibv_qp_attr *attr, int attr_mask,
+			 struct ibv_qp_init_attr *init_attr);
+
+/**
+ * ibv_reg_xrc_rcv_qp: registers a user process with an XRC QP which serves as
+ *         a receive-side only QP.
+ *
+ * @xrc_domain: xrc domain the QP belongs to (for verification).
+ * @xrc_qp_num: The (24 bit) number of the XRC QP.
+ *
+ * RETURNS: success (0), 
+ *          or error (-EINVAL), if:
+ *            1. There is no such QP_num allocated.
+ *            2. The QP is allocated, but is not an receive XRC QP
+ *            3. The XRC QP does not belong to the given domain.
+ */
+int ibv_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num);
+
+/**
+ * ibv_unreg_xrc_rcv_qp: detaches a user process from an XRC QP serving as
+ *         a receive-side only QP. If as a result, there are no remaining
+ *	   userspace processes registered for this XRC QP, it is destroyed.
+ *
+ * @xrc_domain: xrc domain the QP belongs to (for verification).
+ * @xrc_qp_num: The (24 bit) number of the XRC QP.
+ *
+ * RETURNS: success (0), 
+ *          or error (-EINVAL), if:
+ *            1. There is no such QP_num allocated.
+ *            2. The QP is allocated, but is not an XRC QP
+ *            3. The XRC QP does not belong to the given domain.
+ * NOTE: I don't see any reason to return a special code if the QP is destroyed -- the unregister simply
+ *       succeeds.
+ */
+int ibv_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num);
+
 END_C_DECLS
 
 #  undef __attribute_const
diff --git a/src/cmd.c b/src/cmd.c
index 31b592e..2857a6c 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -248,7 +248,7 @@ int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
 
-	VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
+	VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
 
 	mr->handle  = resp->mr_handle;
 	mr->lkey    = resp->lkey;
@@ -291,7 +291,7 @@ static int ibv_cmd_create_cq_v2(struct ibv_context *context, int cqe,
 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
 
-	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof resp_size);
+	VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
 
 	cq->handle  = resp->cq_handle;
 	cq->cqe     = resp->cqe;
@@ -432,6 +432,7 @@ int ibv_cmd_destroy_cq(struct ibv_cq *cq)
 
 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
 	cmd.cq_handle = cq->handle;
+	cmd.reserved  = 0;
 
 	if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
@@ -482,6 +483,34 @@ int ibv_cmd_create_srq(struct ibv_pd *pd,
 	return 0;
 }
 
+int ibv_cmd_create_xrc_srq(struct ibv_pd *pd,
+		       struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
+		       uint32_t xrcd_handle, uint32_t xrc_cq,
+		       struct ibv_create_xrc_srq *cmd, size_t cmd_size,
+		       struct ibv_create_srq_resp *resp, size_t resp_size)
+{
+	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XRC_SRQ, resp, resp_size);
+	cmd->user_handle = (uintptr_t) srq;
+	cmd->pd_handle 	 = pd->handle;
+	cmd->max_wr      = attr->attr.max_wr;
+	cmd->max_sge     = attr->attr.max_sge;
+	cmd->srq_limit   = attr->attr.srq_limit;
+	cmd->xrcd_handle = xrcd_handle;
+	cmd->xrc_cq	 = xrc_cq;
+
+	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+		return errno;
+
+	VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
+
+	srq->handle  = resp->srq_handle;
+	srq->context = pd->context;
+	attr->attr.max_wr = resp->max_wr;
+	attr->attr.max_sge = resp->max_sge;
+
+	return 0;
+}
+
 static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq,
 				 struct ibv_srq_attr *srq_attr,
 				 enum ibv_srq_attr_mask srq_attr_mask,
@@ -539,10 +568,13 @@ int ibv_cmd_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
 
 	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_SRQ, &resp, sizeof resp);
 	cmd->srq_handle = srq->handle;
+	cmd->reserved   = 0;
 
 	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
 
+	VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
+
 	srq_attr->max_wr    = resp.max_wr;
 	srq_attr->max_sge   = resp.max_sge;
 	srq_attr->srq_limit = resp.srq_limit;
@@ -573,10 +605,13 @@ int ibv_cmd_destroy_srq(struct ibv_srq *srq)
 
 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_SRQ, &resp, sizeof resp);
 	cmd.srq_handle = srq->handle;
+	cmd.reserved   = 0;
 
 	if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
 
+	VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
+
 	pthread_mutex_lock(&srq->mutex);
 	while (srq->events_completed != resp.events_reported)
 		pthread_cond_wait(&srq->cond, &srq->mutex);
@@ -596,7 +631,6 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 	cmd->pd_handle 	     = pd->handle;
 	cmd->send_cq_handle  = attr->send_cq->handle;
 	cmd->recv_cq_handle  = attr->recv_cq->handle;
-	cmd->srq_handle      = attr->srq ? attr->srq->handle : 0;
 	cmd->max_send_wr     = attr->cap.max_send_wr;
 	cmd->max_recv_wr     = attr->cap.max_recv_wr;
 	cmd->max_send_sge    = attr->cap.max_send_sge;
@@ -605,6 +639,9 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 	cmd->sq_sig_all	     = attr->sq_sig_all;
 	cmd->qp_type 	     = attr->qp_type;
 	cmd->is_srq 	     = !!attr->srq;
+	cmd->srq_handle      = attr->qp_type == IBV_QPT_XRC ?
+		(attr->xrc_domain ? attr->xrc_domain->handle : 0) :
+		(attr->srq ? attr->srq->handle : 0);
 	cmd->reserved	     = 0;
 
 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
@@ -657,6 +694,8 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
 
+	VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
+
 	attr->qkey                          = resp.qkey;
 	attr->rq_psn                        = resp.rq_psn;
 	attr->sq_psn                        = resp.sq_psn;
@@ -713,6 +752,8 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	init_attr->recv_cq                  = qp->recv_cq;
 	init_attr->srq                      = qp->srq;
 	init_attr->qp_type                  = qp->qp_type;
+	if (qp->qp_type == IBV_QPT_XRC)
+		init_attr->xrc_domain = qp->xrc_domain;
 	init_attr->cap.max_send_wr          = resp.max_send_wr;
 	init_attr->cap.max_recv_wr          = resp.max_recv_wr;
 	init_attr->cap.max_send_sge         = resp.max_send_sge;
@@ -787,6 +828,186 @@ int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	return 0;
 }
 
+int ibv_cmd_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
+			     uint32_t *xrc_rcv_qpn)
+{
+	struct ibv_create_xrc_rcv_qp cmd;
+	struct ibv_create_xrc_rcv_qp_resp resp;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_XRC_RCV_QP, &resp, sizeof resp);
+
+	cmd.xrc_domain_handle = init_attr->xrc_domain->handle;
+	cmd.max_send_wr     = init_attr->cap.max_send_wr;
+	cmd.max_recv_wr     = init_attr->cap.max_recv_wr;
+	cmd.max_send_sge    = init_attr->cap.max_send_sge;
+	cmd.max_recv_sge    = init_attr->cap.max_recv_sge;
+	cmd.max_inline_data = init_attr->cap.max_inline_data;
+	cmd.sq_sig_all	     = init_attr->sq_sig_all;
+	cmd.qp_type 	     = init_attr->qp_type;
+	cmd.reserved[0] = cmd.reserved[1] = 0;
+
+	if (write(init_attr->xrc_domain->context->cmd_fd, &cmd, sizeof cmd) !=
+	    sizeof cmd)
+		return errno;
+
+	*xrc_rcv_qpn = resp.qpn;
+
+	return 0;
+}
+
+int ibv_cmd_modify_xrc_rcv_qp(struct ibv_xrc_domain *d, uint32_t xrc_qp_num,
+			      struct ibv_qp_attr *attr, int attr_mask)
+{
+	struct ibv_modify_xrc_rcv_qp cmd;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, MODIFY_XRC_RCV_QP);
+
+	cmd.xrc_domain_handle	 = d->handle;
+	cmd.qp_num 		 = xrc_qp_num;
+	cmd.attr_mask 		 = attr_mask;
+	cmd.qkey 		 = attr->qkey;
+	cmd.rq_psn 		 = attr->rq_psn;
+	cmd.sq_psn 		 = attr->sq_psn;
+	cmd.dest_qp_num 	 = attr->dest_qp_num;
+	cmd.qp_access_flags 	 = attr->qp_access_flags;
+	cmd.pkey_index		 = attr->pkey_index;
+	cmd.alt_pkey_index 	 = attr->alt_pkey_index;
+	cmd.qp_state 		 = attr->qp_state;
+	cmd.cur_qp_state 	 = attr->cur_qp_state;
+	cmd.path_mtu 		 = attr->path_mtu;
+	cmd.path_mig_state 	 = attr->path_mig_state;
+	cmd.en_sqd_async_notify  = attr->en_sqd_async_notify;
+	cmd.max_rd_atomic 	 = attr->max_rd_atomic;
+	cmd.max_dest_rd_atomic   = attr->max_dest_rd_atomic;
+	cmd.min_rnr_timer 	 = attr->min_rnr_timer;
+	cmd.port_num 		 = attr->port_num;
+	cmd.timeout 		 = attr->timeout;
+	cmd.retry_cnt 		 = attr->retry_cnt;
+	cmd.rnr_retry 		 = attr->rnr_retry;
+	cmd.alt_port_num 	 = attr->alt_port_num;
+	cmd.alt_timeout 	 = attr->alt_timeout;
+
+	memcpy(cmd.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+	cmd.dest.flow_label 	    = attr->ah_attr.grh.flow_label;
+	cmd.dest.dlid 		    = attr->ah_attr.dlid;
+	cmd.dest.reserved	    = 0;
+	cmd.dest.sgid_index 	    = attr->ah_attr.grh.sgid_index;
+	cmd.dest.hop_limit 	    = attr->ah_attr.grh.hop_limit;
+	cmd.dest.traffic_class      = attr->ah_attr.grh.traffic_class;
+	cmd.dest.sl 		    = attr->ah_attr.sl;
+	cmd.dest.src_path_bits      = attr->ah_attr.src_path_bits;
+	cmd.dest.static_rate 	    = attr->ah_attr.static_rate;
+	cmd.dest.is_global 	    = attr->ah_attr.is_global;
+	cmd.dest.port_num 	    = attr->ah_attr.port_num;
+
+	memcpy(cmd.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+	cmd.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
+	cmd.alt_dest.dlid 	    = attr->alt_ah_attr.dlid;
+	cmd.alt_dest.reserved	    = 0;
+	cmd.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
+	cmd.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
+	cmd.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+	cmd.alt_dest.sl 	    = attr->alt_ah_attr.sl;
+	cmd.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+	cmd.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
+	cmd.alt_dest.is_global     = attr->alt_ah_attr.is_global;
+	cmd.alt_dest.port_num 	    = attr->alt_ah_attr.port_num;
+
+	cmd.reserved[0] = cmd.reserved[1] = 0;
+
+	if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	return 0;
+}
+
+int ibv_cmd_query_xrc_rcv_qp(struct ibv_xrc_domain *d, uint32_t xrc_qp_num,
+			     struct ibv_qp_attr *attr, int attr_mask,
+			     struct ibv_qp_init_attr *init_attr)
+{
+	struct ibv_query_xrc_rcv_qp cmd;
+	struct ibv_query_qp_resp resp;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY_XRC_RCV_QP, &resp, sizeof resp);
+	cmd.xrc_domain_handle = d->handle;
+	cmd.qp_num = xrc_qp_num;
+	cmd.attr_mask = attr_mask;
+
+	if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
+
+	attr->qkey                          = resp.qkey;
+	attr->rq_psn                        = resp.rq_psn;
+	attr->sq_psn                        = resp.sq_psn;
+	attr->dest_qp_num                   = resp.dest_qp_num;
+	attr->qp_access_flags               = resp.qp_access_flags;
+	attr->pkey_index                    = resp.pkey_index;
+	attr->alt_pkey_index                = resp.alt_pkey_index;
+	attr->qp_state                      = resp.qp_state;
+	attr->cur_qp_state                  = resp.cur_qp_state;
+	attr->path_mtu                      = resp.path_mtu;
+	attr->path_mig_state                = resp.path_mig_state;
+	attr->sq_draining                   = resp.sq_draining;
+	attr->max_rd_atomic                 = resp.max_rd_atomic;
+	attr->max_dest_rd_atomic            = resp.max_dest_rd_atomic;
+	attr->min_rnr_timer                 = resp.min_rnr_timer;
+	attr->port_num                      = resp.port_num;
+	attr->timeout                       = resp.timeout;
+	attr->retry_cnt                     = resp.retry_cnt;
+	attr->rnr_retry                     = resp.rnr_retry;
+	attr->alt_port_num                  = resp.alt_port_num;
+	attr->alt_timeout                   = resp.alt_timeout;
+	attr->cap.max_send_wr               = resp.max_send_wr;
+	attr->cap.max_recv_wr               = resp.max_recv_wr;
+	attr->cap.max_send_sge              = resp.max_send_sge;
+	attr->cap.max_recv_sge              = resp.max_recv_sge;
+	attr->cap.max_inline_data           = resp.max_inline_data;
+
+	memcpy(attr->ah_attr.grh.dgid.raw, resp.dest.dgid, 16);
+	attr->ah_attr.grh.flow_label        = resp.dest.flow_label;
+	attr->ah_attr.dlid                  = resp.dest.dlid;
+	attr->ah_attr.grh.sgid_index        = resp.dest.sgid_index;
+	attr->ah_attr.grh.hop_limit         = resp.dest.hop_limit;
+	attr->ah_attr.grh.traffic_class     = resp.dest.traffic_class;
+	attr->ah_attr.sl                    = resp.dest.sl;
+	attr->ah_attr.src_path_bits         = resp.dest.src_path_bits;
+	attr->ah_attr.static_rate           = resp.dest.static_rate;
+	attr->ah_attr.is_global             = resp.dest.is_global;
+	attr->ah_attr.port_num              = resp.dest.port_num;
+
+	memcpy(attr->alt_ah_attr.grh.dgid.raw, resp.alt_dest.dgid, 16);
+	attr->alt_ah_attr.grh.flow_label    = resp.alt_dest.flow_label;
+	attr->alt_ah_attr.dlid              = resp.alt_dest.dlid;
+	attr->alt_ah_attr.grh.sgid_index    = resp.alt_dest.sgid_index;
+	attr->alt_ah_attr.grh.hop_limit     = resp.alt_dest.hop_limit;
+	attr->alt_ah_attr.grh.traffic_class = resp.alt_dest.traffic_class;
+	attr->alt_ah_attr.sl                = resp.alt_dest.sl;
+	attr->alt_ah_attr.src_path_bits     = resp.alt_dest.src_path_bits;
+	attr->alt_ah_attr.static_rate       = resp.alt_dest.static_rate;
+	attr->alt_ah_attr.is_global         = resp.alt_dest.is_global;
+	attr->alt_ah_attr.port_num          = resp.alt_dest.port_num;
+
+	init_attr->cap.max_send_wr          = resp.max_send_wr;
+	init_attr->cap.max_recv_wr          = resp.max_recv_wr;
+	init_attr->cap.max_send_sge         = resp.max_send_sge;
+	init_attr->cap.max_recv_sge         = resp.max_recv_sge;
+	init_attr->cap.max_inline_data      = resp.max_inline_data;
+	init_attr->sq_sig_all               = resp.sq_sig_all;
+
+	return 0;
+}
+
 static int ibv_cmd_destroy_qp_v1(struct ibv_qp *qp)
 {
 	struct ibv_destroy_qp_v1 cmd;
@@ -1067,6 +1288,7 @@ int ibv_cmd_destroy_qp(struct ibv_qp *qp)
 
 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_QP, &resp, sizeof resp);
 	cmd.qp_handle = qp->handle;
+	cmd.reserved  = 0;
 
 	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
@@ -1089,6 +1311,7 @@ int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
 	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
 	cmd.qp_handle = qp->handle;
 	cmd.mlid      = lid;
+	cmd.reserved  = 0;
 
 	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
@@ -1104,9 +1327,81 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
 	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
 	cmd.qp_handle = qp->handle;
 	cmd.mlid      = lid;
+	cmd.reserved  = 0;
 
 	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
 
 	return 0;
 }
+
+int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag,
+			    struct ibv_xrc_domain *d,
+			    struct ibv_open_xrc_domain_resp *resp,
+			    size_t resp_size)
+{
+	struct ibv_open_xrc_domain cmd;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, OPEN_XRC_DOMAIN, resp, resp_size);
+	cmd.fd = fd;
+	cmd.oflags = oflag;
+
+	if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	d->handle = resp->xrcd_handle;
+
+	return 0;
+}
+
+int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d)
+{
+	struct ibv_close_xrc_domain cmd;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRC_DOMAIN);
+	cmd.xrcd_handle = d->handle;
+
+	if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+	return 0;
+}
+
+int ibv_cmd_reg_xrc_rcv_qp(struct ibv_xrc_domain *d, uint32_t xrc_qp_num)
+{
+	struct ibv_reg_xrc_rcv_qp cmd;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, REG_XRC_RCV_QP);
+	cmd.xrc_domain_handle = d->handle;
+	cmd.qp_num = xrc_qp_num;
+
+	if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+	return 0;
+}
+
+int ibv_cmd_unreg_xrc_rcv_qp(struct ibv_xrc_domain *d, uint32_t xrc_qp_num)
+{
+	struct ibv_unreg_xrc_rcv_qp cmd;
+
+	if (abi_ver < 6)
+		return ENOSYS;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, UNREG_XRC_RCV_QP);
+	cmd.xrc_domain_handle = d->handle;
+	cmd.qp_num = xrc_qp_num;
+
+	if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+	return 0;
+}
+
+
diff --git a/src/device.c b/src/device.c
index 3abc1eb..8af0eaa 100644
--- a/src/device.c
+++ b/src/device.c
@@ -182,31 +182,33 @@ int __ibv_get_async_event(struct ibv_context *context,
 
 	event->event_type = ev.event_type;
 
-	switch (event->event_type) {
-	case IBV_EVENT_CQ_ERR:
-		event->element.cq = (void *) (uintptr_t) ev.element;
-		break;
-
-	case IBV_EVENT_QP_FATAL:
-	case IBV_EVENT_QP_REQ_ERR:
-	case IBV_EVENT_QP_ACCESS_ERR:
-	case IBV_EVENT_COMM_EST:
-	case IBV_EVENT_SQ_DRAINED:
-	case IBV_EVENT_PATH_MIG:
-	case IBV_EVENT_PATH_MIG_ERR:
-	case IBV_EVENT_QP_LAST_WQE_REACHED:
-		event->element.qp = (void *) (uintptr_t) ev.element;
-		break;
-
-	case IBV_EVENT_SRQ_ERR:
-	case IBV_EVENT_SRQ_LIMIT_REACHED:
-		event->element.srq = (void *) (uintptr_t) ev.element;
-		break;
-
-	default:
-		event->element.port_num = ev.element;
-		break;
-	}
+	if (event->event_type & IBV_XRC_QP_EVENT_FLAG) {
+		event->element.xrc_qp_num = ev.element;
+	} else
+		switch (event->event_type) {
+		case IBV_EVENT_CQ_ERR:
+			event->element.cq = (void *) (uintptr_t) ev.element;
+			break;
+	
+		case IBV_EVENT_QP_FATAL:
+		case IBV_EVENT_QP_REQ_ERR:
+		case IBV_EVENT_QP_ACCESS_ERR:
+		case IBV_EVENT_COMM_EST:
+		case IBV_EVENT_SQ_DRAINED:
+		case IBV_EVENT_PATH_MIG:
+		case IBV_EVENT_PATH_MIG_ERR:
+		case IBV_EVENT_QP_LAST_WQE_REACHED:
+			event->element.qp = (void *) (uintptr_t) ev.element;
+			break;
+	
+		case IBV_EVENT_SRQ_ERR:
+		case IBV_EVENT_SRQ_LIMIT_REACHED:
+			event->element.srq = (void *) (uintptr_t) ev.element;
+			break;
+		default:
+			event->element.port_num = ev.element;
+			break;
+		}
 
 	if (context->ops.async_event)
 		context->ops.async_event(event);
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 3a346ed..dfa53a4 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -91,4 +91,15 @@ IBVERBS_1.1 {
 		ibv_dontfork_range;
 		ibv_dofork_range;
 		ibv_register_driver;
+		ibv_create_xrc_srq;
+		ibv_cmd_create_xrc_srq;
+		ibv_open_xrc_domain;
+		ibv_cmd_open_xrc_domain;
+		ibv_close_xrc_domain;
+		ibv_cmd_close_xrc_domain;
+		ibv_cmd_create_xrc_rcv_qp;
+		ibv_cmd_modify_xrc_rcv_qp;
+		ibv_cmd_query_xrc_rcv_qp;
+		ibv_cmd_reg_xrc_rcv_qp;
+		ibv_cmd_unreg_xrc_rcv_qp;
 } IBVERBS_1.0;
diff --git a/src/verbs.c b/src/verbs.c
index f5cf4d3..11d3c4c 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -364,6 +364,9 @@ struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd,
 		srq->context          = pd->context;
 		srq->srq_context      = srq_init_attr->srq_context;
 		srq->pd               = pd;
+		srq->xrc_domain       = NULL;
+		srq->xrc_cq           = NULL;
+		srq->xrc_srq_num      = 0;
 		srq->events_completed = 0;
 		pthread_mutex_init(&srq->mutex, NULL);
 		pthread_cond_init(&srq->cond, NULL);
@@ -373,6 +376,33 @@ struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd,
 }
 default_symver(__ibv_create_srq, ibv_create_srq);
 
+struct ibv_srq *__ibv_create_xrc_srq(struct ibv_pd *pd,
+				     struct ibv_xrc_domain *xrc_domain,
+				     struct ibv_cq *xrc_cq,
+				     struct ibv_srq_init_attr *srq_init_attr)
+{
+	struct ibv_srq *srq;
+
+	if (!pd->context->xrc_ops)
+		return NULL;
+
+	srq = pd->context->xrc_ops->create_xrc_srq(pd, xrc_domain,
+						   xrc_cq, srq_init_attr);
+	if (srq) {
+		srq->context          = pd->context;
+		srq->srq_context      = srq_init_attr->srq_context;
+		srq->pd               = pd;
+		srq->xrc_domain       = xrc_domain;
+		srq->xrc_cq           = xrc_cq;
+		srq->events_completed = 0;
+		pthread_mutex_init(&srq->mutex, NULL);
+		pthread_cond_init(&srq->cond, NULL);
+	}
+
+	return srq;
+}
+default_symver(__ibv_create_xrc_srq, ibv_create_xrc_srq);
+
 int __ibv_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *srq_attr,
 		     enum ibv_srq_attr_mask srq_attr_mask)
@@ -396,8 +426,9 @@ default_symver(__ibv_destroy_srq, ibv_destroy_srq);
 struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd,
 			       struct ibv_qp_init_attr *qp_init_attr)
 {
-	struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr);
+	struct ibv_qp *qp;
 
+	qp = pd->context->ops.create_qp(pd, qp_init_attr);
 	if (qp) {
 		qp->context    	     = pd->context;
 		qp->qp_context 	     = qp_init_attr->qp_context;
@@ -408,6 +439,8 @@ struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd,
 		qp->qp_type          = qp_init_attr->qp_type;
 		qp->state	     = IBV_QPS_RESET;
 		qp->events_completed = 0;
+		qp->xrc_domain       = qp_init_attr->qp_type == IBV_QPT_XRC ?
+			qp_init_attr->xrc_domain : NULL;
 		pthread_mutex_init(&qp->mutex, NULL);
 		pthread_cond_init(&qp->cond, NULL);
 	}
@@ -541,3 +574,92 @@ int __ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
 	return qp->context->ops.detach_mcast(qp, gid, lid);
 }
 default_symver(__ibv_detach_mcast, ibv_detach_mcast);
+
+struct ibv_xrc_domain *__ibv_open_xrc_domain(struct ibv_context *context,
+					     int fd, int oflag)
+{
+	struct ibv_xrc_domain *d;
+
+	if (!context->xrc_ops)
+		return NULL;
+
+	d = context->xrc_ops->open_xrc_domain(context, fd, oflag);
+	if (d)
+		d->context = context;
+
+	return d;
+}
+default_symver(__ibv_open_xrc_domain, ibv_open_xrc_domain);
+
+int __ibv_close_xrc_domain(struct ibv_xrc_domain *d)
+{
+	if (!d->context->xrc_ops)
+		return 0;
+
+	return d->context->xrc_ops->close_xrc_domain(d);
+}
+default_symver(__ibv_close_xrc_domain, ibv_close_xrc_domain);
+
+int __ibv_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
+			    uint32_t *xrc_rcv_qpn)
+{
+	struct ibv_context *c;
+	if (!init_attr || !(init_attr->xrc_domain))
+		return EINVAL;
+
+	c = init_attr->xrc_domain->context;
+	if (!c->xrc_ops)
+		return ENOSYS;
+
+	return c->xrc_ops->create_xrc_rcv_qp(init_attr,
+					    xrc_rcv_qpn);
+}
+default_symver(__ibv_create_xrc_rcv_qp, ibv_create_xrc_rcv_qp);
+
+int __ibv_modify_xrc_rcv_qp(struct ibv_xrc_domain *d,
+			    uint32_t xrc_rcv_qpn,
+			    struct ibv_qp_attr *attr,
+			    int attr_mask)
+{
+	if (!d || !attr)
+		return EINVAL;
+
+	if (!d->context->xrc_ops)
+		return ENOSYS;
+
+	return d->context->xrc_ops->modify_xrc_rcv_qp(d, xrc_rcv_qpn, attr,
+						      attr_mask);
+}
+default_symver(__ibv_modify_xrc_rcv_qp, ibv_modify_xrc_rcv_qp);
+
+int __ibv_query_xrc_rcv_qp(struct ibv_xrc_domain *d,
+			   uint32_t xrc_rcv_qpn,
+			   struct ibv_qp_attr *attr,
+			   int attr_mask,
+			   struct ibv_qp_init_attr *init_attr)
+{
+	if (!d)
+		return EINVAL;
+
+	if (!d->context->xrc_ops)
+		return ENOSYS;
+
+	return d->context->xrc_ops->query_xrc_rcv_qp(d, xrc_rcv_qpn, attr,
+						     attr_mask, init_attr);
+}
+default_symver(__ibv_query_xrc_rcv_qp, ibv_query_xrc_rcv_qp);
+
+int __ibv_reg_xrc_rcv_qp(struct ibv_xrc_domain *d,
+			 uint32_t xrc_rcv_qpn)
+{
+	return d->context->xrc_ops->reg_xrc_rcv_qp(d, xrc_rcv_qpn);
+}
+default_symver(__ibv_reg_xrc_rcv_qp, ibv_reg_xrc_rcv_qp);
+
+int __ibv_unreg_xrc_rcv_qp(struct ibv_xrc_domain *d,
+			   uint32_t xrc_rcv_qpn)
+{
+	return d->context->xrc_ops->unreg_xrc_rcv_qp(d, xrc_rcv_qpn);
+}
+default_symver(__ibv_unreg_xrc_rcv_qp, ibv_unreg_xrc_rcv_qp);
+



More information about the general mailing list