[ofa-general] [PATCH 11/11] mlx4: XRC receive-only QPs
Jack Morgenstein
jackm at dev.mellanox.co.il
Mon Jun 23 06:02:02 PDT 2008
mlx4: Implement XRC receive-only QP support.
Support for XRC RCV-only QP (requested by userspace,
but resides in kernel space).
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
Index: infiniband/drivers/infiniband/hw/mlx4/mlx4_ib.h
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/mlx4_ib.h 2008-06-23 14:07:54.000000000 +0300
+++ infiniband/drivers/infiniband/hw/mlx4/mlx4_ib.h 2008-06-23 14:08:03.000000000 +0300
@@ -58,6 +58,8 @@ struct mlx4_ib_pd {
struct mlx4_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
};
struct mlx4_ib_cq_buf {
@@ -106,7 +108,8 @@ struct mlx4_ib_wq {
};
enum mlx4_ib_qp_flags {
- MLX4_IB_QP_LSO = 1 << 0
+ MLX4_IB_QP_LSO = 1 << 0,
+ MLX4_IB_XRC_RCV = 1 << 1
};
struct mlx4_ib_qp {
@@ -129,6 +132,7 @@ struct mlx4_ib_qp {
int buf_size;
struct mutex mutex;
u32 flags;
+ struct list_head xrc_reg_list;
u16 xrcdn;
u8 port;
u8 alt_port;
@@ -172,6 +176,7 @@ struct mlx4_ib_dev {
spinlock_t sm_lock;
struct mutex cap_mask_mutex;
+ struct mutex xrc_reg_mutex;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -309,6 +314,16 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
+ u32 *qp_num);
+int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
+ struct ib_qp_attr *attr, int attr_mask);
+int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
+ struct ib_qp_attr *attr, int attr_mask,
+ struct ib_qp_init_attr *init_attr);
+int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void * context, u32 qp_num);
+int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void * context, u32 qp_num);
+
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
Index: infiniband/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/qp.c 2008-06-23 14:07:59.000000000 +0300
+++ infiniband/drivers/infiniband/hw/mlx4/qp.c 2008-06-23 14:08:03.000000000 +0300
@@ -56,6 +56,12 @@ enum {
MLX4_IB_UD_HEADER_SIZE = 72
};
+
+struct mlx4_ib_xrc_reg_entry {
+ struct list_head list;
+ void *context;
+};
+
struct mlx4_ib_sqp {
struct mlx4_ib_qp qp;
int pkey_index;
@@ -202,14 +208,15 @@ static inline unsigned pad_wraparound(st
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
struct ib_event event;
- struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+ struct mlx4_ib_qp *mqp = to_mibqp(qp);
+ struct ib_qp *ibqp = &mqp->ibqp;
+ struct mlx4_ib_xrc_reg_entry *ctx_entry;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
if (ibqp->event_handler) {
event.device = ibqp->device;
- event.element.qp = ibqp;
switch (type) {
case MLX4_EVENT_TYPE_PATH_MIG:
event.event = IB_EVENT_PATH_MIG;
@@ -241,7 +248,16 @@ static void mlx4_ib_qp_event(struct mlx4
return;
}
- ibqp->event_handler(&event, ibqp->qp_context);
+ if (!(ibqp->qp_type == IB_QPT_XRC &&
+ mqp->flags & MLX4_IB_XRC_RCV)) {
+ event.element.qp = ibqp;
+ ibqp->event_handler(&event, ibqp->qp_context);
+ } else {
+ event.event |= IB_XRC_QP_EVENT_FLAG;
+ event.element.xrc_qp_num = ibqp->qp_num;
+ list_for_each_entry(ctx_entry, &mqp->xrc_reg_list, list)
+ ibqp->event_handler(&event, ctx_entry->context);
+ }
}
}
@@ -469,6 +485,10 @@ static int create_qp_common(struct mlx4_
else
qp->sq_signal_bits = 0;
+ if (init_attr->qp_type == IB_QPT_XRC &&
+ init_attr->create_flags & QP_CREATE_XRC_RCV)
+ qp->flags |= MLX4_IB_XRC_RCV;
+
err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
!!init_attr->srq || !!init_attr->xrc_domain , qp);
if (err)
@@ -708,6 +728,8 @@ struct ib_qp *mlx4_ib_create_qp(struct i
if (!qp)
return ERR_PTR(-ENOMEM);
+ memset(qp, 0, sizeof *qp);
+ INIT_LIST_HEAD(&qp->xrc_reg_list);
err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
if (err) {
kfree(qp);
@@ -1872,3 +1894,260 @@ out:
return err;
}
+int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
+ u32 *qp_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(init_attr->xrc_domain->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(init_attr->xrc_domain);
+ struct ib_qp_init_attr ia = *init_attr;
+ struct mlx4_ib_qp *qp;
+ struct ib_qp *ibqp;
+ struct mlx4_ib_xrc_reg_entry *ctx_entry;
+
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -ENOSYS;
+
+ ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
+ if (!ctx_entry)
+ return -ENOMEM;
+
+ ia.qp_type = IB_QPT_XRC;
+ ia.create_flags = QP_CREATE_XRC_RCV;
+ ia.recv_cq = ia.send_cq = xrcd->cq;
+
+ ibqp = mlx4_ib_create_qp(xrcd->pd, &ia, NULL);
+ if (IS_ERR(ibqp)) {
+ kfree(ctx_entry);
+ return PTR_ERR(ibqp);
+ }
+
+ /* set the ibpq attributes which will be used by the mlx4 module */
+ ibqp->device = init_attr->xrc_domain->device;
+ ibqp->pd = xrcd->pd;
+ ibqp->send_cq = ibqp->recv_cq = xrcd->cq;
+ ibqp->event_handler = init_attr->event_handler;
+ ibqp->qp_context = init_attr->qp_context;
+ ibqp->qp_type = init_attr->qp_type;
+ ibqp->xrcd = init_attr->xrc_domain;
+
+ qp = to_mqp(ibqp);
+
+ mutex_lock(&qp->mutex);
+ ctx_entry->context = init_attr->qp_context;
+ list_add_tail(&ctx_entry->list, &qp->xrc_reg_list);
+ mutex_unlock(&qp->mutex);
+ *qp_num = qp->mqp.qpn;
+ return 0;
+}
+
+int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+ struct mlx4_qp *mqp;
+ int err;
+
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -ENOSYS;
+
+ mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: unknown QPN %06x\n",
+ qp_num);
+ return -EINVAL;
+ }
+
+ if (xrcd->xrcdn != to_mxrcd(to_mibqp(mqp)->ibqp.xrcd)->xrcdn)
+ return -EINVAL;
+
+ err = mlx4_ib_modify_qp(&(to_mibqp(mqp)->ibqp), attr, attr_mask, NULL);
+ return err;
+}
+
+int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+ struct mlx4_ib_qp *qp;
+ struct mlx4_qp *mqp;
+ struct mlx4_qp_context context;
+ int mlx4_state;
+ int err;
+
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -ENOSYS;
+
+ mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: unknown QPN %06x\n",
+ qp_num);
+ return -EINVAL;
+ }
+
+ qp = to_mibqp(mqp);
+ if (xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
+ return -EINVAL;
+
+ if (qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ err = mlx4_qp_query(dev->dev, mqp, &context);
+ if (err)
+ return -EINVAL;
+
+ mlx4_state = be32_to_cpu(context.flags) >> 28;
+
+ qp_attr->qp_state = to_ib_qp_state(mlx4_state);
+ qp_attr->path_mtu = context.mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context.qkey);
+ qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context.params2));
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC) {
+ to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
+ to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ }
+
+ qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+ if (qp_attr->qp_state == IB_QPS_INIT)
+ qp_attr->port_num = qp->port;
+ else
+ qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->timeout = context.pri_path.ackto >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
+ qp_attr->alt_timeout = context.alt_path.ackto >> 3;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = 0;
+ qp_attr->cap.max_recv_sge = 0;
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
+ qp_attr->cap.max_inline_data = 0;
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+
+ struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+ struct mlx4_qp *mqp;
+ struct mlx4_ib_qp *mibqp;
+ struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
+ int err = -EINVAL;
+
+ mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ mqp = __mlx4_qp_lookup(to_mdev(xrcd->device)->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: unknown QPN %06x\n",
+ qp_num);
+ goto err_out;
+ }
+
+ mibqp = to_mibqp(mqp);
+
+ if (mxrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn)
+ goto err_out;
+
+ ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
+ if (!ctx_entry) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ mutex_lock(&mibqp->mutex);
+ list_for_each_entry(tmp, &mibqp->xrc_reg_list, list)
+ if (tmp->context == context) {
+ mutex_unlock(&mibqp->mutex);
+ kfree(ctx_entry);
+ mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ return 0;
+ }
+
+ ctx_entry->context = context;
+ list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list);
+ mutex_unlock(&mibqp->mutex);
+ mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ return 0;
+
+err_out:
+ mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ return err;
+}
+
+int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+
+ struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+ struct mlx4_qp *mqp;
+ struct mlx4_ib_qp *mibqp;
+ struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
+ int found = 0;
+ int err = -EINVAL;
+
+ mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ mqp = __mlx4_qp_lookup(to_mdev(xrcd->device)->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_unreg_xrc_rcv_qp: unknown QPN %06x\n",
+ qp_num);
+ goto err_out;
+ }
+
+ mibqp = to_mibqp(mqp);
+
+ if (mxrcd->xrcdn != (mibqp->xrcdn & 0xffff))
+ goto err_out;
+
+ mutex_lock(&mibqp->mutex);
+ list_for_each_entry_safe(ctx_entry, tmp, &mibqp->xrc_reg_list, list)
+ if (ctx_entry->context == context) {
+ found = 1;
+ list_del(&ctx_entry->list);
+ kfree(ctx_entry);
+ break;
+ }
+
+ mutex_unlock(&mibqp->mutex);
+ if (!found)
+ goto err_out;
+
+ /* destroy the QP if the registration list is empty */
+ if (list_empty(&mibqp->xrc_reg_list))
+ mlx4_ib_destroy_qp(&mibqp->ibqp);
+
+ mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ return 0;
+
+err_out:
+ mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+ return err;
+}
+
Index: infiniband/drivers/infiniband/hw/mlx4/main.c
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/main.c 2008-06-23 14:07:54.000000000 +0300
+++ infiniband/drivers/infiniband/hw/mlx4/main.c 2008-06-23 14:08:03.000000000 +0300
@@ -412,6 +412,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(st
if (!pd)
return ERR_PTR(-ENOMEM);
+ memset(pd, 0, sizeof *pd);
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
if (err) {
kfree(pd);
@@ -448,12 +449,18 @@ static int mlx4_ib_mcg_detach(struct ib_
&to_mqp(ibqp)->mqp, gid->raw);
}
+static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+}
+
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_xrcd *xrcd;
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ struct ib_pd *pd;
+ struct ib_cq *cq;
int err;
if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
@@ -464,23 +471,51 @@ static struct ib_xrcd *mlx4_ib_alloc_xrc
return ERR_PTR(-ENOMEM);
err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(err);
+ if (err)
+ goto err_xrcd;
+
+ pd = mlx4_ib_alloc_pd(ibdev,NULL,NULL);
+ if (IS_ERR(pd)) {
+ err = PTR_ERR(pd);
+ goto err_pd;
}
+ pd->device = ibdev;
+
+ cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
+ if (IS_ERR(cq)) {
+ err = PTR_ERR(cq);
+ goto err_cq;
+ }
+ cq->device = ibdev;
+ cq->comp_handler = mlx4_dummy_comp_handler;
if (context)
if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof (__u32))) {
- mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
- kfree(xrcd);
- return ERR_PTR(-EFAULT);
+ err = -EFAULT;
+ goto err_copy;
}
+ xrcd->cq = cq;
+ xrcd->pd = pd;
return &xrcd->ibxrcd;
+
+err_copy:
+ mlx4_ib_destroy_cq(cq);
+err_cq:
+ mlx4_ib_dealloc_pd(pd);
+err_pd:
+ mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
+err_xrcd:
+ kfree(xrcd);
+ return ERR_PTR(err);
}
static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
+ struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+ mlx4_ib_destroy_cq(mxrcd->cq);
+ mlx4_ib_dealloc_pd(mxrcd->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
kfree(xrcd);
@@ -678,18 +713,28 @@ static void *mlx4_ib_add(struct mlx4_dev
ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq;
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+ ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp;
+ ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp;
+ ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp;
+ ibdev->ib_dev.reg_xrc_rcv_qp = mlx4_ib_reg_xrc_rcv_qp;
+ ibdev->ib_dev.unreg_xrc_rcv_qp = mlx4_ib_unreg_xrc_rcv_qp;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CREATE_XRC_SRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN);
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_REG_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP);
}
-
if (init_node_data(ibdev))
goto err_map;
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
+ mutex_init(&ibdev->xrc_reg_mutex);
if (ib_register_device(&ibdev->ib_dev))
goto err_map;
Index: infiniband/drivers/infiniband/hw/mlx4/cq.c
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/cq.c 2008-06-23 14:07:59.000000000 +0300
+++ infiniband/drivers/infiniband/hw/mlx4/cq.c 2008-06-23 14:08:03.000000000 +0300
@@ -178,6 +178,7 @@ struct ib_cq *mlx4_ib_create_cq(struct i
if (!cq)
return ERR_PTR(-ENOMEM);
+ memset(cq, 0, sizeof *cq);
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
More information about the general
mailing list