[ofa-general] [PATCH 2/8] libmlx4: implement XRC qps (including xrc receive-only qps)

Jack Morgenstein jackm at dev.mellanox.co.il
Wed Jan 23 01:59:45 PST 2008


Implements the XRC-receive-only verbs for the userspace driver.

Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>

diff --git a/configure.in b/configure.in
index 25f27f7..9304539 100644
--- a/configure.in
+++ b/configure.in
@@ -42,6 +42,9 @@ AC_CHECK_HEADER(valgrind/memcheck.h,
 dnl Checks for typedefs, structures, and compiler characteristics.
 AC_C_CONST
 AC_CHECK_SIZEOF(long)
+AC_CHECK_MEMBER(struct ibv_context.xrc_ops,
+    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if xrc_ops is a member of ibv_context])],,
+    [#include <infiniband/verbs.h>])
 
 dnl Checks for library functions
 AC_CHECK_FUNC(ibv_read_sysfs_file, [],
diff --git a/src/cq.c b/src/cq.c
index cae8406..5d67b5f 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -196,10 +196,12 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 	struct mlx4_cqe *cqe;
 	struct mlx4_srq *srq;
 	uint32_t qpn;
+	uint32_t srqn;
 	uint32_t g_mlpath_rqpn;
 	uint16_t wqe_index;
 	int is_error;
 	int is_send;
+	int is_src_recv = 0;
 
 	cqe = next_cqe_sw(cq);
 	if (!cqe)
@@ -221,20 +223,30 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	if (!*cur_qp ||
-	    (ntohl(cqe->my_qpn) & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
-		/*
-		 * We do not have to take the QP table lock here,
-		 * because CQs will be locked while QPs are removed
-		 * from the table.
-		 */
-		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context),
-				       ntohl(cqe->my_qpn) & 0xffffff);
-		if (!*cur_qp)
+	if (qpn & MLX4_XRC_QPN_BIT && !is_send) {
+		srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff;
+                /*
+                 * We do not have to take the XRC SRQ table lock here,
+                 * because CQs will be locked while XRC SRQs are removed
+                 * from the table.
+                 */
+		srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn);
+		if (!srq)
 			return CQ_POLL_ERR;
-	}
-
-	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
+		is_src_recv = 1;
+        } else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
+                /*
+                 * We do not have to take the QP table lock here,
+                 * because CQs will be locked while QPs are removed
+                 * from the table.
+                 */
+                *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context),
+                                       qpn & 0xffffff);
+                if (!*cur_qp)
+                        return CQ_POLL_ERR;
+        }
+
+	wc->qp_num = qpn & 0xffffff;
 
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
@@ -242,6 +254,10 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
+	} else if (is_src_recv) {
+		wqe_index = htons(cqe->wqe_index);
+		wc->wr_id = srq->wrid[wqe_index];
+		mlx4_free_srq_wqe(srq, wqe_index);
 	} else if ((*cur_qp)->ibv_qp.srq) {
 		srq = to_msrq((*cur_qp)->ibv_qp.srq);
 		wqe_index = htons(cqe->wqe_index);
@@ -387,6 +403,10 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	uint32_t prod_index;
 	uint8_t owner_bit;
 	int nfreed = 0;
+	int is_xrc_srq = 0;
+
+	if (srq && srq->ibv_srq.xrc_cq)
+		is_xrc_srq = 1;
 
 	pthread_spin_lock(&cq->lock);
 
@@ -407,7 +427,12 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
-		if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
+		if (is_xrc_srq &&
+		    (ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) &&
+		    !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
+			mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
+			++nfreed;
+		} else if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
 			++nfreed;
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index 20a40c9..1b1253c 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -68,6 +68,14 @@ struct mlx4_resize_cq {
 	__u64				buf_addr;
 };
 
+#ifdef HAVE_IBV_XRC_OPS
+struct mlx4_create_xrc_srq {
+	struct ibv_create_xrc_srq	ibv_cmd;
+	__u64				buf_addr;
+	__u64				db_addr;
+};
+#endif
+
 struct mlx4_create_srq {
 	struct ibv_create_srq		ibv_cmd;
 	__u64				buf_addr;
@@ -90,4 +98,12 @@ struct mlx4_create_qp {
 	__u8				reserved[5];
 };
 
+#ifdef HAVE_IBV_XRC_OPS
+struct mlx4_open_xrc_domain_resp {
+	struct ibv_open_xrc_domain_resp	ibv_resp;
+	__u32				xrcdn;
+	__u32				reserved;
+};
+#endif
+
 #endif /* MLX4_ABI_H */
diff --git a/src/mlx4.c b/src/mlx4.c
index 671e849..12d2255 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -68,6 +68,19 @@ struct {
 	HCA(MELLANOX, 0x673c),	/* MT25408 "Hermon" QDR PCIe gen2 */
 };
 
+#ifdef HAVE_IBV_XRC_OPS
+static struct ibv_xrc_ops mlx4_xrc_ops = {
+	.create_xrc_srq   = mlx4_create_xrc_srq,
+	.open_xrc_domain  = mlx4_open_xrc_domain,
+	.close_xrc_domain = mlx4_close_xrc_domain,
+	.create_xrc_rcv_qp = mlx4_create_xrc_rcv_qp,
+	.modify_xrc_rcv_qp = mlx4_modify_xrc_rcv_qp,
+	.query_xrc_rcv_qp = mlx4_query_xrc_rcv_qp,
+	.reg_xrc_rcv_qp   = mlx4_reg_xrc_rcv_qp,
+	.unreg_xrc_rcv_qp = mlx4_unreg_xrc_rcv_qp,
+};
+#endif
+
 static struct ibv_context_ops mlx4_ctx_ops = {
 	.query_device  = mlx4_query_device,
 	.query_port    = mlx4_query_port,
@@ -124,6 +137,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 	for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
 		context->qp_table[i].refcnt = 0;
 
+	context->num_xrc_srqs	= resp.qp_tab_size;
+	context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1
+				       - MLX4_XRC_SRQ_TABLE_BITS;
+	context->xrc_srq_table_mask  = (1 << context->xrc_srq_table_shift) - 1;
+
+	pthread_mutex_init(&context->xrc_srq_table_mutex, NULL);
+	for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
+		context->xrc_srq_table[i].refcnt = 0;
+
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
 
@@ -156,6 +178,9 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
 	context->ibv_ctx.ops = mlx4_ctx_ops;
+#ifdef HAVE_IBV_XRC_OPS
+	context->ibv_ctx.xrc_ops = &mlx4_xrc_ops;
+#endif
 
 	if (mlx4_query_device(&context->ibv_ctx, &dev_attrs))
 		goto query_free;
diff --git a/src/mlx4.h b/src/mlx4.h
index 0b47adc..0dd2d9a 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -111,6 +111,16 @@ enum {
 	MLX4_QP_TABLE_MASK		= MLX4_QP_TABLE_SIZE - 1
 };
 
+enum {
+	MLX4_XRC_SRQ_TABLE_BITS		= 8,
+	MLX4_XRC_SRQ_TABLE_SIZE		= 1 << MLX4_XRC_SRQ_TABLE_BITS,
+	MLX4_XRC_SRQ_TABLE_MASK		= MLX4_XRC_SRQ_TABLE_SIZE - 1
+};
+
+enum {
+	MLX4_XRC_QPN_BIT		= (1 << 23)
+};
+
 enum mlx4_db_type {
 	MLX4_DB_TYPE_CQ,
 	MLX4_DB_TYPE_RQ,
@@ -174,6 +184,15 @@ struct mlx4_context {
 	int				max_sge;
 	int				max_cqe;
 
+	struct {
+		struct mlx4_srq       **table;
+		int			refcnt;
+	}				xrc_srq_table[MLX4_XRC_SRQ_TABLE_SIZE];
+	pthread_mutex_t			xrc_srq_table_mutex;
+	int				num_xrc_srqs;
+	int				xrc_srq_table_shift;
+	int				xrc_srq_table_mask;
+
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
 	pthread_mutex_t			db_list_mutex;
 };
@@ -259,6 +278,11 @@ struct mlx4_ah {
 	struct mlx4_av			av;
 };
 
+struct mlx4_xrc_domain {
+	struct ibv_xrc_domain		ibv_xrcd;
+	uint32_t			xrcdn;
+};
+
 static inline unsigned long align(unsigned long val, unsigned long align)
 {
 	return (val + align - 1) & ~(align - 1);
@@ -303,6 +327,13 @@ static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
 	return to_mxxx(ah, ah);
 }
 
+#ifdef HAVE_IBV_XRC_OPS
+static inline struct mlx4_xrc_domain *to_mxrcd(struct ibv_xrc_domain *ibxrcd)
+{
+	return to_mxxx(xrcd, xrc_domain);
+}
+#endif
+
 int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
 void mlx4_free_buf(struct mlx4_buf *buf);
 
@@ -347,6 +378,10 @@ void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
 		       struct ibv_recv_wr **bad_wr);
+struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn);
+int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn,
+		       struct mlx4_srq *srq);
+void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn);
 
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
@@ -377,5 +412,31 @@ int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
 void mlx4_free_av(struct mlx4_ah *ah);
 int mlx4_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
 int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
+#ifdef HAVE_IBV_XRC_OPS
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
+				    struct ibv_xrc_domain *xrc_domain,
+				    struct ibv_cq *xrc_cq,
+				    struct ibv_srq_init_attr *attr);
+struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
+					    int fd, int oflag);
+
+int mlx4_close_xrc_domain(struct ibv_xrc_domain *d);
+int mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
+			   uint32_t *xrc_qp_num);
+int mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			   uint32_t xrc_qp_num,
+			   struct ibv_qp_attr *attr,
+			   int attr_mask);
+int mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			  uint32_t xrc_qp_num,
+			  struct ibv_qp_attr *attr,
+			  int attr_mask,
+			  struct ibv_qp_init_attr *init_attr);
+int mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			uint32_t xrc_qp_num);
+int mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			uint32_t xrc_qp_num);
+#endif
+
 
 #endif /* MLX4_H */
diff --git a/src/qp.c b/src/qp.c
index 9f219b9..4322513 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -210,7 +224,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
 		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
 
-		ctrl->srcrb_flags =
+		ctrl->xrcrb_flags =
 			(wr->send_flags & IBV_SEND_SIGNALED ?
 			 htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
 			(wr->send_flags & IBV_SEND_SOLICITED ?
@@ -227,6 +241,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		size = sizeof *ctrl / 16;
 
 		switch (ibqp->qp_type) {
+		case IBV_QPT_XRC:
+                        ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8);
+                        /* fall thru */
 		case IBV_QPT_RC:
 		case IBV_QPT_UC:
 			switch (wr->opcode) {
@@ -526,6 +544,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		break;
 
+	case IBV_QPT_XRC:
 	case IBV_QPT_RC:
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		/*
@@ -614,6 +633,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 
 	case IBV_QPT_UC:
 	case IBV_QPT_RC:
+	case IBV_QPT_XRC:
 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
 		break;
 
diff --git a/src/srq.c b/src/srq.c
index ba2ceb9..b70c18b 100644
--- a/src/srq.c
+++ b/src/srq.c
@@ -167,3 +167,53 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 
 	return 0;
 }
+
+struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn)
+{
+	int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
+
+	if (ctx->xrc_srq_table[tind].refcnt)
+		return ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask];
+	else
+		return NULL;
+}
+
+int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn,
+		       struct mlx4_srq *srq)
+{
+	int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
+	int ret = 0;
+
+	pthread_mutex_lock(&ctx->xrc_srq_table_mutex);
+
+	if (!ctx->xrc_srq_table[tind].refcnt) {
+		ctx->xrc_srq_table[tind].table = calloc(ctx->xrc_srq_table_mask + 1,
+						   sizeof (struct mlx4_srq *));
+		if (!ctx->xrc_srq_table[tind].table) {
+			ret = -1;
+			goto out;
+		}
+	}
+
+	++ctx->xrc_srq_table[tind].refcnt;
+	ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = srq;
+
+out:
+	pthread_mutex_unlock(&ctx->xrc_srq_table_mutex);
+	return ret;
+}
+
+void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn)
+{
+	int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
+
+	pthread_mutex_lock(&ctx->xrc_srq_table_mutex);
+
+	if (!--ctx->xrc_srq_table[tind].refcnt)
+		free(ctx->xrc_srq_table[tind].table);
+	else
+		ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = NULL;
+
+	pthread_mutex_unlock(&ctx->xrc_srq_table_mutex);
+}
+
diff --git a/src/verbs.c b/src/verbs.c
index 38b40d2..bb904e2 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -320,18 +320,36 @@ int mlx4_query_srq(struct ibv_srq *srq,
 	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
 }
 
-int mlx4_destroy_srq(struct ibv_srq *srq)
+int mlx4_destroy_srq(struct ibv_srq *ibsrq)
 {
+	struct mlx4_srq *srq = to_msrq(ibsrq);
+	struct mlx4_cq *mcq = NULL;
 	int ret;
 
-	ret = ibv_cmd_destroy_srq(srq);
-	if (ret)
+	if (ibsrq->xrc_cq) {
+		/* is an xrc_srq */
+		mcq = to_mcq(ibsrq->xrc_cq);
+		mlx4_cq_clean(mcq, 0, srq);
+		pthread_spin_lock(&mcq->lock);
+		mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn);
+		pthread_spin_unlock(&mcq->lock);
+	}
+
+	ret = ibv_cmd_destroy_srq(ibsrq);
+	if (ret) {
+		if (ibsrq->xrc_cq) {
+			pthread_spin_lock(&mcq->lock);
+			mlx4_store_xrc_srq(to_mctx(ibsrq->context),
+					   srq->srqn, srq);
+			pthread_spin_unlock(&mcq->lock);
+		}
 		return ret;
+	}
 
-	mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db);
-	mlx4_free_buf(&to_msrq(srq)->buf);
-	free(to_msrq(srq)->wrid);
-	free(to_msrq(srq));
+	mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db);
+	mlx4_free_buf(&srq->buf);
+	free(srq->wrid);
+	free(srq);
 
 	return 0;
 }
@@ -367,7 +385,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
 	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
 
-	if (attr->srq)
+	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
 		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
 	else {
 		if (attr->cap.max_recv_sge < 1)
@@ -385,7 +403,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
 		goto err_free;
 
-	if (!attr->srq) {
+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
 		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
 		if (!qp->db)
 			goto err_free;
@@ -394,7 +412,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	}
 
 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
-	if (attr->srq)
+	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
 		cmd.db_addr = 0;
 	else
 		cmd.db_addr = (uintptr_t) qp->db;
@@ -437,7 +455,7 @@ err_destroy:
 	ibv_cmd_destroy_qp(&qp->ibv_qp);
 
 err_rq_db:
-	if (!attr->srq)
+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
 		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
 
 err_free:
@@ -496,7 +514,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
 
 		mlx4_init_qp_indices(to_mqp(qp));
-		if (!qp->srq)
+		if (!qp->srq && qp->qp_type != IBV_QPT_XRC)
 			*to_mqp(qp)->db = 0;
 	}
 
@@ -558,7 +576,7 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 		return ret;
 	}
 
-	if (!ibqp->srq)
+	if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC)
 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
 	free(qp->sq.wrid);
 	if (qp->rq.wqe_cnt)
@@ -616,3 +634,158 @@ int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
 {
 	return ibv_cmd_detach_mcast(qp, gid, lid);
 }
+
+#ifdef HAVE_IBV_XRC_OPS
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
+				    struct ibv_xrc_domain *xrc_domain,
+				    struct ibv_cq *xrc_cq,
+				    struct ibv_srq_init_attr *attr)
+{
+	struct mlx4_create_xrc_srq  cmd;
+	struct mlx4_create_srq_resp resp;
+	struct mlx4_srq		   *srq;
+	int			    ret;
+
+	/* Sanity check SRQ size before proceeding */
+	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
+		return NULL;
+
+	srq = malloc(sizeof *srq);
+	if (!srq)
+		return NULL;
+
+	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	srq->max     = align_queue_size(attr->attr.max_wr + 1);
+	srq->max_gs  = attr->attr.max_sge;
+	srq->counter = 0;
+
+	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
+		goto err;
+
+	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+	if (!srq->db)
+		goto err_free;
+
+	*srq->db = 0;
+
+	cmd.buf_addr = (uintptr_t) srq->buf.buf;
+	cmd.db_addr  = (uintptr_t) srq->db;
+
+	ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr,
+				     xrc_domain->handle,
+				     xrc_cq->handle,
+				     &cmd.ibv_cmd, sizeof cmd,
+				     &resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_db;
+
+	srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn;
+
+	ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq);
+	if (ret)
+		goto err_destroy;
+
+	return &srq->ibv_srq;
+
+err_destroy:
+	ibv_cmd_destroy_srq(&srq->ibv_srq);
+
+err_db:
+	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
+
+err_free:
+	free(srq->wrid);
+	mlx4_free_buf(&srq->buf);
+
+err:
+	free(srq);
+
+	return NULL;
+}
+
+struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
+					    int fd, int oflag)
+{
+	int ret;
+	struct mlx4_open_xrc_domain_resp resp;
+	struct mlx4_xrc_domain *xrcd;
+
+	xrcd = malloc(sizeof *xrcd);
+	if (!xrcd)
+		return NULL;
+
+	ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd,
+				      &resp.ibv_resp, sizeof resp);
+	if (ret) {
+		free(xrcd);
+		return NULL;
+	}
+
+	xrcd->xrcdn = resp.xrcdn;
+	return &xrcd->ibv_xrcd;
+}
+
+int mlx4_close_xrc_domain(struct ibv_xrc_domain *d)
+{
+	ibv_cmd_close_xrc_domain(d);
+	free(d);
+	return 0;
+}
+
+int mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
+			   uint32_t *xrc_qp_num)
+{
+
+	return ibv_cmd_create_xrc_rcv_qp(init_attr, xrc_qp_num);
+}
+
+int mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			   uint32_t xrc_qp_num,
+			   struct ibv_qp_attr *attr,
+			   int attr_mask)
+{
+	return ibv_cmd_modify_xrc_rcv_qp(xrc_domain, xrc_qp_num,
+					 attr, attr_mask);
+}
+
+int mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			  uint32_t xrc_qp_num,
+			  struct ibv_qp_attr *attr,
+			  int attr_mask,
+			  struct ibv_qp_init_attr *init_attr)
+{
+	int ret;
+
+	ret = ibv_cmd_query_xrc_rcv_qp(xrc_domain, xrc_qp_num,
+				       attr, attr_mask, init_attr);
+	if (ret)
+		return ret;
+
+	init_attr->cap.max_send_wr = init_attr->cap.max_send_sge = 1;
+	init_attr->cap.max_recv_sge = init_attr->cap.max_recv_wr = 0;
+	init_attr->cap.max_inline_data = 0;
+	init_attr->recv_cq = init_attr->send_cq = NULL;
+	init_attr->srq = NULL;
+	init_attr->xrc_domain = xrc_domain;
+	init_attr->qp_type = IBV_QPT_XRC;
+	init_attr->qp_context = NULL;
+	attr->cap = init_attr->cap;
+
+	return 0;
+}
+
+int mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			uint32_t xrc_qp_num)
+{
+	return ibv_cmd_reg_xrc_rcv_qp(xrc_domain, xrc_qp_num);
+}
+
+int mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
+			  uint32_t xrc_qp_num)
+{
+	return ibv_cmd_unreg_xrc_rcv_qp(xrc_domain, xrc_qp_num);
+}
+
+#endif
diff --git a/src/wqe.h b/src/wqe.h
index 6f7f309..fa2f8ac 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -65,7 +65,7 @@ struct mlx4_wqe_ctrl_seg {
 	 * [1]   SE (solicited event)
 	 * [0]   FL (force loopback)
 	 */
-	uint32_t		srcrb_flags;
+	uint32_t		xrcrb_flags;
 	/*
 	 * imm is immediate data for send/RDMA write w/ immediate;
 	 * also invalidation key for send with invalidate; input



More information about the general mailing list