[openib-general] Re: [PATCH 1 of 2] mthca: qp size calculations
Roland Dreier
rolandd at cisco.com
Tue Nov 8 13:44:57 PST 2005
Thanks, this looks pretty good. I updated the computation of the WQE
sizes to be more accurate (I hope). We know that a bind request will
not have any s/g entries, and an atomic request will have only one s/g
entry, so we don't have to add together all of our worst cases.
Since this is bumping the uverbs ABI anyway, I also took the
opportunity to get rid of the max_sge member in the modify SRQ
command. It's not a valid parameter, so there's no reason to pass it
from userspace.
Comments?
--- infiniband/include/rdma/ib_user_verbs.h (revision 3989)
+++ infiniband/include/rdma/ib_user_verbs.h (working copy)
@@ -43,7 +43,7 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define IB_USER_VERBS_ABI_VERSION 3
+#define IB_USER_VERBS_ABI_VERSION 4
enum {
IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -333,6 +333,11 @@ struct ib_uverbs_create_qp {
struct ib_uverbs_create_qp_resp {
__u32 qp_handle;
__u32 qpn;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
};
/*
@@ -552,9 +557,7 @@ struct ib_uverbs_modify_srq {
__u32 srq_handle;
__u32 attr_mask;
__u32 max_wr;
- __u32 max_sge;
__u32 srq_limit;
- __u32 reserved;
__u64 driver_data[0];
};
--- infiniband/core/uverbs_cmd.c (revision 3989)
+++ infiniband/core/uverbs_cmd.c (working copy)
@@ -908,7 +908,12 @@ retry:
if (ret)
goto err_destroy;
- resp.qp_handle = uobj->uobject.id;
+ resp.qp_handle = uobj->uobject.id;
+ resp.max_recv_sge = attr.cap.max_recv_sge;
+ resp.max_send_sge = attr.cap.max_send_sge;
+ resp.max_recv_wr = attr.cap.max_recv_wr;
+ resp.max_send_wr = attr.cap.max_send_wr;
+ resp.max_inline_data = attr.cap.max_inline_data;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
@@ -1701,7 +1706,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_u
}
attr.max_wr = cmd.max_wr;
- attr.max_sge = cmd.max_sge;
attr.srq_limit = cmd.srq_limit;
ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
--- infiniband/hw/mthca/mthca_provider.c (revision 3989)
+++ infiniband/hw/mthca/mthca_provider.c (working copy)
@@ -604,11 +604,11 @@ static struct ib_qp *mthca_create_qp(str
return ERR_PTR(err);
}
- init_attr->cap.max_inline_data = 0;
init_attr->cap.max_send_wr = qp->sq.max;
init_attr->cap.max_recv_wr = qp->rq.max;
init_attr->cap.max_send_sge = qp->sq.max_gs;
init_attr->cap.max_recv_sge = qp->rq.max_gs;
+ init_attr->cap.max_inline_data = qp->max_inline_data;
return &qp->ibqp;
}
--- infiniband/hw/mthca/mthca_provider.h (revision 3989)
+++ infiniband/hw/mthca/mthca_provider.h (working copy)
@@ -251,6 +251,7 @@ struct mthca_qp {
struct mthca_wq sq;
enum ib_sig_type sq_policy;
int send_wqe_offset;
+ int max_inline_data;
u64 *wrid;
union mthca_buf queue;
--- infiniband/hw/mthca/mthca_cmd.c (revision 3989)
+++ infiniband/hw/mthca/mthca_cmd.c (working copy)
@@ -1060,6 +1060,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev
dev_lim->hca.arbel.resize_srq = field & 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET);
+ dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz);
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
dev_lim->mpt_entry_sz = size;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
--- infiniband/hw/mthca/mthca_dev.h (revision 3989)
+++ infiniband/hw/mthca/mthca_dev.h (working copy)
@@ -131,6 +131,7 @@ struct mthca_limits {
int max_sg;
int num_qps;
int max_wqes;
+ int max_desc_sz;
int max_qp_init_rdma;
int reserved_qps;
int num_srqs;
--- infiniband/hw/mthca/mthca_main.c (revision 3989)
+++ infiniband/hw/mthca/mthca_main.c (working copy)
@@ -168,6 +168,7 @@ static int __devinit mthca_dev_lim(struc
mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
/*
* Subtract 1 from the limit because we need to allocate a
* spare CQE so the HCA HW can tell the difference between an
--- infiniband/hw/mthca/mthca_qp.c (revision 3989)
+++ infiniband/hw/mthca/mthca_qp.c (working copy)
@@ -883,6 +883,48 @@ int mthca_modify_qp(struct ib_qp *ibqp,
return err;
}
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_qp *qp)
+{
+ int max_data_size;
+
+ /*
+ * Calculate the maximum size of WQE s/g segments, excluding
+ * the next segment and other non-data segments.
+ */
+ max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) -
+ sizeof (struct mthca_next_seg);
+
+ switch (qp->transport) {
+ case MLX:
+ max_data_size -= 2 * sizeof (struct mthca_data_seg);
+ break;
+
+ case UD:
+ if (mthca_is_memfree(dev))
+ max_data_size -= sizeof (struct mthca_arbel_ud_seg);
+ else
+ max_data_size -= sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ default:
+ max_data_size -= sizeof (struct mthca_raddr_seg);
+ break;
+ }
+
+ /* We don't support inline data for kernel QPs (yet). */
+ if (!pd->ibpd.uobject)
+ qp->max_inline_data = 0;
+ else
+ qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE;
+
+ qp->sq.max_gs = max_data_size / sizeof (struct mthca_data_seg);
+ qp->rq.max_gs = (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
+ sizeof (struct mthca_next_seg)) /
+ sizeof (struct mthca_data_seg);
+}
+
/*
* Allocate and register buffer for WQEs. qp->rq.max, sq.max,
* rq.max_gs and sq.max_gs must all be assigned.
@@ -900,27 +942,53 @@ static int mthca_alloc_wqe_buf(struct mt
size = sizeof (struct mthca_next_seg) +
qp->rq.max_gs * sizeof (struct mthca_data_seg);
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
qp->rq.wqe_shift++)
; /* nothing */
- size = sizeof (struct mthca_next_seg) +
- qp->sq.max_gs * sizeof (struct mthca_data_seg);
+ size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
switch (qp->transport) {
case MLX:
size += 2 * sizeof (struct mthca_data_seg);
break;
+
case UD:
- if (mthca_is_memfree(dev))
- size += sizeof (struct mthca_arbel_ud_seg);
- else
- size += sizeof (struct mthca_tavor_ud_seg);
+ size += mthca_is_memfree(dev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ case UC:
+ size += sizeof (struct mthca_raddr_seg);
+ break;
+
+ case RC:
+ size += sizeof (struct mthca_raddr_seg);
+ /*
+ * An atomic op will require an atomic segment, a
+ * remote address segment and one scatter entry.
+ */
+ size = max_t(int, size,
+ sizeof (struct mthca_atomic_seg) +
+ sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_data_seg));
break;
+
default:
- /* bind seg is as big as atomic + raddr segs */
- size += sizeof (struct mthca_bind_seg);
+ break;
}
+ /* Make sure that we have enough space for a bind request */
+ size = max_t(int, size, sizeof (struct mthca_bind_seg));
+
+ size += sizeof (struct mthca_next_seg);
+
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
qp->sq.wqe_shift++)
; /* nothing */
@@ -1064,6 +1132,8 @@ static int mthca_alloc_qp_common(struct
return ret;
}
+ mthca_adjust_qp_caps(dev, pd, qp);
+
/*
* If this is a userspace QP, we're done now. The doorbells
* will be allocated and buffers will be initialized in
More information about the general
mailing list