[openib-general] Re: [PATCH 2 of 2] libmthca: qp capability calculations

Tue Nov 8 13:46:13 PST 2005

Similar minor changes (add ChangeLog entries, better WQE size
computation)... comments?

--- libibverbs/include/infiniband/kern-abi.h	(revision 3989)
+++ libibverbs/include/infiniband/kern-abi.h	(working copy)
@@ -48,7 +48,7 @@
  * The minimum and maximum kernel ABI that we can handle.
  */
 #define IB_USER_VERBS_MIN_ABI_VERSION	1
-#define IB_USER_VERBS_MAX_ABI_VERSION	3
+#define IB_USER_VERBS_MAX_ABI_VERSION	4
 
 enum {
 	IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -382,6 +382,11 @@ struct ibv_create_qp {
 struct ibv_create_qp_resp {
 	__u32 qp_handle;
 	__u32 qpn;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
 };
 
 struct ibv_qp_dest {
@@ -615,9 +620,7 @@ struct ibv_modify_srq {
 	__u32 srq_handle;
 	__u32 attr_mask;
 	__u32 max_wr;
-	__u32 max_sge;
 	__u32 srq_limit;
-	__u32 reserved;
 	__u64 driver_data[0];
 };
 
@@ -726,4 +729,22 @@ struct ibv_create_cq_v2 {
 	__u64 driver_data[0];
 };
 
+struct ibv_modify_srq_v3 {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 srq_handle;
+	__u32 attr_mask;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u32 reserved;
+	__u64 driver_data[0];
+};
+
+struct ibv_create_qp_resp_v3 {
+	__u32 qp_handle;
+	__u32 qpn;
+};
+
 #endif /* KERN_ABI_H */
--- libibverbs/ChangeLog	(revision 3989)
+++ libibverbs/ChangeLog	(working copy)
@@ -1,3 +1,11 @@
+2005-11-08  Roland Dreier  <roland at cisco.com>
+
+	* src/cmd.c (ibv_cmd_create_qp): Add handling for new create QP
+	interface, which has the kernel return QP capabilities.
+
+	* src/cmd.c (ibv_cmd_modify_srq): Split off handling of modify SRQ
+	for ABI versions 3 and older, which passed max_sge as part of command.
+
 2005-10-30  Roland Dreier  <roland at cisco.com>
 
 	* examples/srq_pingpong.c (pp_init_ctx): Create CQ with rx_depth +
--- libibverbs/src/cmd.c	(revision 3989)
+++ libibverbs/src/cmd.c	(working copy)
@@ -420,19 +420,49 @@ int ibv_cmd_create_srq(struct ibv_pd *pd
 	return 0;
 }
 
+static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq,
+				 struct ibv_srq_attr *srq_attr,
+				 enum ibv_srq_attr_mask srq_attr_mask,
+				 struct ibv_modify_srq *new_cmd,
+				 size_t new_cmd_size)
+{
+	struct ibv_modify_srq_v3 *cmd;
+	size_t cmd_size;
+
+	cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
+	cmd      = alloca(cmd_size);
+	memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof *new_cmd);
+
+	IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
+
+	cmd->srq_handle	= srq->handle;
+	cmd->attr_mask	= srq_attr_mask;
+	cmd->max_wr	= srq_attr->max_wr;
+	cmd->srq_limit	= srq_attr->srq_limit;
+	cmd->max_sge	= 0;
+	cmd->reserved	= 0;
+
+	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
+		return errno;
+
+	return 0;
+}
+
 int ibv_cmd_modify_srq(struct ibv_srq *srq,
 		       struct ibv_srq_attr *srq_attr,
 		       enum ibv_srq_attr_mask srq_attr_mask,
 		       struct ibv_modify_srq *cmd, size_t cmd_size)
 {
+	if (abi_ver == 3)
+		return ibv_cmd_modify_srq_v3(srq, srq_attr, srq_attr_mask,
+					     cmd, cmd_size);
+
 	IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
 
 	cmd->srq_handle	= srq->handle;
 	cmd->attr_mask	= srq_attr_mask;
 	cmd->max_wr	= srq_attr->max_wr;
-	cmd->max_sge	= srq_attr->max_sge;
 	cmd->srq_limit	= srq_attr->srq_limit;
-	cmd->reserved	= 0;
 
 	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
@@ -479,9 +509,15 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 		      struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
 		      struct ibv_create_qp *cmd, size_t cmd_size)
 {
-	struct ibv_create_qp_resp resp;
-
-	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, &resp, sizeof resp);
+	union {
+		struct ibv_create_qp_resp    resp;
+		struct ibv_create_qp_resp_v3 resp_v3;
+	} r;
+
+	if (abi_ver > 3)
+		IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, &r.resp, sizeof r.resp);
+	else
+		IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, &r.resp_v3, sizeof r.resp_v3);
 	cmd->user_handle     = (uintptr_t) qp;
 	cmd->pd_handle 	     = pd->handle;
 	cmd->send_cq_handle  = attr->send_cq->handle;
@@ -499,8 +535,18 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
 
-	qp->handle  = resp.qp_handle;
-	qp->qp_num  = resp.qpn;
+	if (abi_ver > 3) {
+		qp->handle 		  = r.resp.qp_handle;
+		qp->qp_num 		  = r.resp.qpn;
+		attr->cap.max_recv_sge    = r.resp.max_recv_sge;
+		attr->cap.max_send_sge    = r.resp.max_send_sge;
+		attr->cap.max_recv_wr     = r.resp.max_recv_wr;
+		attr->cap.max_send_wr     = r.resp.max_send_wr;
+		attr->cap.max_inline_data = r.resp.max_inline_data;
+	} else {
+		qp->handle  = r.resp_v3.qp_handle;
+		qp->qp_num  = r.resp_v3.qpn;
+	}
 
 	return 0;
 }
--- libmthca/src/qp.c	(revision 3989)
+++ libmthca/src/qp.c	(working copy)
@@ -216,7 +216,6 @@ int mthca_tavor_post_send(struct ibv_qp 
 
 		if (wr->send_flags & IBV_SEND_INLINE) {
 			struct mthca_inline_seg *seg = wqe;
-			int max_size = (1 << qp->sq.wqe_shift) - sizeof *seg - size * 16;
 			int s = 0;
 
 			wqe += sizeof *seg;
@@ -225,7 +224,7 @@ int mthca_tavor_post_send(struct ibv_qp 
 
 				s += sge->length;
 
-				if (s > max_size) {
+				if (s > qp->max_inline_data) {
 					ret = -1;
 					*bad_wr = wr;
 					goto out;
@@ -515,7 +514,6 @@ int mthca_arbel_post_send(struct ibv_qp 
 
 		if (wr->send_flags & IBV_SEND_INLINE) {
 			struct mthca_inline_seg *seg = wqe;
-			int max_size = (1 << qp->sq.wqe_shift) - sizeof *seg - size * 16;
 			int s = 0;
 
 			wqe += sizeof *seg;
@@ -524,7 +522,7 @@ int mthca_arbel_post_send(struct ibv_qp 
 
 				s += sge->length;
 
-				if (s > max_size) {
+				if (s > qp->max_inline_data) {
 					ret = -1;
 					*bad_wr = wr;
 					goto out;
@@ -683,12 +681,14 @@ int mthca_alloc_qp_buf(struct ibv_pd *pd
 		       enum ibv_qp_type type, struct mthca_qp *qp)
 {
 	int size;
+	int max_sq_sge;
 
 	qp->rq.max_gs 	 = cap->max_recv_sge;
-	qp->sq.max_gs 	 = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
+	qp->sq.max_gs 	 = cap->max_send_sge;
+	max_sq_sge 	 = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
 				 sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
-	if (qp->sq.max_gs < cap->max_send_sge)
-		qp->sq.max_gs = cap->max_send_sge;
+	if (max_sq_sge < cap->max_send_sge)
+		max_sq_sge = cap->max_send_sge;
 
 	qp->wrid = malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
 	if (!qp->wrid)
@@ -701,20 +701,42 @@ int mthca_alloc_qp_buf(struct ibv_pd *pd
 	     qp->rq.wqe_shift++)
 		; /* nothing */
 
-	size = sizeof (struct mthca_next_seg) +
-		qp->sq.max_gs * sizeof (struct mthca_data_seg);
+	size = max_sq_sge * sizeof (struct mthca_data_seg);
 	switch (type) {
 	case IBV_QPT_UD:
-		if (mthca_is_memfree(pd->context))
-			size += sizeof (struct mthca_arbel_ud_seg);
-		else
-			size += sizeof (struct mthca_tavor_ud_seg);
+		size += mthca_is_memfree(pd->context) ?
+			sizeof (struct mthca_arbel_ud_seg) :
+			sizeof (struct mthca_tavor_ud_seg);
+		break;
+
+	case IBV_QPT_UC:
+		size += sizeof (struct mthca_raddr_seg);
+		break;
+
+	case IBV_QPT_RC:
+		size += sizeof (struct mthca_raddr_seg);
+		/*
+		 * An atomic op will require an atomic segment, a
+		 * remote address segment and one scatter entry.
+		 */
+		if (size < (sizeof (struct mthca_atomic_seg) +
+			    sizeof (struct mthca_raddr_seg) +
+			    sizeof (struct mthca_data_seg)))
+			size = (sizeof (struct mthca_atomic_seg) +
+				sizeof (struct mthca_raddr_seg) +
+				sizeof (struct mthca_data_seg));
 		break;
+
 	default:
-		/* bind seg is as big as atomic + raddr segs */
-		size += sizeof (struct mthca_bind_seg);
+		break;
 	}
 
+	/* Make sure that we have enough space for a bind request */
+	if (size < sizeof (struct mthca_bind_seg))
+		size = sizeof (struct mthca_bind_seg);
+
+	size += sizeof (struct mthca_next_seg);
+
 	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
 	     qp->sq.wqe_shift++)
 		; /* nothing */
@@ -767,36 +789,6 @@ int mthca_alloc_qp_buf(struct ibv_pd *pd
 	return 0;
 }
 
-void mthca_return_cap(struct ibv_pd *pd, struct mthca_qp *qp,
-		      enum ibv_qp_type type, struct ibv_qp_cap *cap)
-{
-	/*
-	 * Maximum inline data size is the full WQE size less the size
-	 * of the next segment, inline segment and other non-data segments.
-	 */
-	cap->max_inline_data = (1 << qp->sq.wqe_shift) -
-		sizeof (struct mthca_next_seg) -
-		sizeof (struct mthca_inline_seg);
-
-	switch (type) {
-	case IBV_QPT_UD:
-		if (mthca_is_memfree(pd->context))
-			cap->max_inline_data -= sizeof (struct mthca_arbel_ud_seg);
-		else
-			cap->max_inline_data -= sizeof (struct mthca_tavor_ud_seg);
-		break;
-
-	default:
-		cap->max_inline_data -= sizeof (struct mthca_raddr_seg);
-		break;
-	}
-
-	cap->max_send_wr     = qp->sq.max;
-	cap->max_recv_wr     = qp->rq.max;
-	cap->max_send_sge    = qp->sq.max_gs;
-	cap->max_recv_sge    = qp->rq.max_gs;
-}
-
 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
 {
 	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
--- libmthca/src/verbs.c	(revision 3989)
+++ libmthca/src/verbs.c	(working copy)
@@ -476,7 +476,11 @@ struct ibv_qp *mthca_create_qp(struct ib
 	if (ret)
 		goto err_destroy;
 
-	mthca_return_cap(pd, qp, attr->qp_type, &attr->cap);
+	qp->sq.max 	    = attr->cap.max_send_wr;
+	qp->rq.max 	    = attr->cap.max_recv_wr;
+	qp->sq.max_gs 	    = attr->cap.max_send_sge;
+	qp->rq.max_gs 	    = attr->cap.max_recv_sge;
+	qp->max_inline_data = attr->cap.max_inline_data;
 
 	return &qp->ibv_qp;
 
--- libmthca/src/mthca.h	(revision 3989)
+++ libmthca/src/mthca.h	(working copy)
@@ -177,6 +177,7 @@ struct mthca_qp {
 	void            *buf;
 	uint64_t        *wrid;
 	int              send_wqe_offset;
+	int              max_inline_data;
 	int              buf_size;
 	struct mthca_wq  sq;
 	struct mthca_wq  rq;
@@ -319,8 +320,6 @@ extern int mthca_arbel_post_recv(struct 
 				 struct ibv_recv_wr **bad_wr);
 extern int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 			      enum ibv_qp_type type, struct mthca_qp *qp);
-extern void mthca_return_cap(struct ibv_pd *pd, struct mthca_qp *qp,
-			     enum ibv_qp_type type, struct ibv_qp_cap *cap);
 extern struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn);
 extern int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp);
 extern void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn);
--- libmthca/ChangeLog	(revision 3989)
+++ libmthca/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2005-11-08  Roland Dreier  <roland at cisco.com>
+
+	* src/qp.c, src/verbs.c, src/mthca.h: Delegate setting of QP
+	capabilities (max_sge, max_inline_data, etc) to kernel.
+
 2005-11-04  Roland Dreier  <roland at cisco.com>
 
 	* src/verbs.c (mthca_destroy_qp): Clean CQEs when we destroy a QP.