[ewg] [PATCH] IB/mlx4: Add support for Receive Affinity
Eli Cohen
eli at mellanox.co.il
Tue Jul 15 09:14:00 PDT 2008
This patch handles the implementation at the mlx4 level, of the
changes required to support RCA. It mainly handles the creation of a
range of QPs and also handles the configuration of the special RCA QP
and the required changes to the inbox parameters.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
drivers/infiniband/hw/mlx4/main.c | 4 +-
drivers/infiniband/hw/mlx4/mlx4_ib.h | 4 +
drivers/infiniband/hw/mlx4/qp.c | 228 +++++++++++++++++++++-------------
include/linux/mlx4/qp.h | 48 +++++++-
4 files changed, 193 insertions(+), 91 deletions(-)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0c453d0..d3c8878 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -91,7 +91,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK |
+ IB_DEVICE_IPOIB_RCA;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -618,6 +619,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
+ ibdev->ib_dev.create_qp_range = mlx4_ib_create_qp_range;
ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index ec9bf28..e26c3d6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -103,6 +103,7 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = 1 << 0,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ MLX4_IB_QP_RCA = 1 << 2,
};
struct mlx4_ib_qp {
@@ -268,6 +269,9 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
+int mlx4_ib_create_qp_range(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata, int nqps,
+ int align, struct ib_qp *list[]);
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index fc61556..72a2d5d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -502,9 +502,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
+ if (init_attr->create_flags & IB_QP_CREATE_IPOIB_RCA)
+ qp->flags |= MLX4_IB_QP_RCA;
err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
if (err)
@@ -541,11 +542,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
}
- if (!sqpn)
- err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);
- if (err)
- goto err_wrid;
-
err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
if (err) {
mlx4_qp_release_range(dev->dev, sqpn, 1);
@@ -659,9 +655,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_free(dev->dev, &qp->mqp);
- if (!is_sqp(dev, qp))
- mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
-
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
@@ -678,91 +671,138 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
}
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_qp_range(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, int nqps,
+ int align, struct ib_qp *list[])
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_sqp *sqp;
struct mlx4_ib_qp *qp;
int err;
+ int base_qpn, qpn;
+ int i;
- /*
- * We only support LSO and multicast loopback blocking, and
- * only for kernel UD QPs.
- */
- if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
- return ERR_PTR(-EINVAL);
+ for (i = 0; i < nqps; ++i) {
+ /*
+ * We only support LSO, multicast loopback blocking and RCA, and
+ * only for kernel UD QPs.
+ */
+ if (init_attr[i].create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_QP_CREATE_IPOIB_RCA))
+ return -EINVAL;
+ if (init_attr[i].create_flags & (IB_QP_CREATE_IPOIB_UD_LSO |
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_QP_CREATE_IPOIB_RCA) &&
+ (pd->uobject || init_attr[i].qp_type != IB_QPT_UD))
+ return -EINVAL;
- if (init_attr->create_flags &&
- (pd->uobject || init_attr->qp_type != IB_QPT_UD))
- return ERR_PTR(-EINVAL);
+ /* Userspace is not allowed to create special QPs: */
+ if (pd->uobject && (init_attr[i].qp_type == IB_QPT_SMI ||
+ init_attr[i].qp_type == IB_QPT_GSI))
+ return -EINVAL;
- switch (init_attr->qp_type) {
- case IB_QPT_RC:
- case IB_QPT_UC:
- case IB_QPT_UD:
- {
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
- err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ if (nqps > 1 && (init_attr[i].qp_type == IB_QPT_SMI ||
+ init_attr[i].qp_type == IB_QPT_GSI))
+ return -EINVAL;
+ }
- qp->ibqp.qp_num = qp->mqp.qpn;
+ err = mlx4_qp_reserve_range(dev->dev, nqps, align, &base_qpn);
+ if (err)
+ return err;
- break;
- }
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- {
- /* Userspace is not allowed to create special QPs: */
- if (pd->uobject)
- return ERR_PTR(-EINVAL);
-
- sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
- if (!sqp)
- return ERR_PTR(-ENOMEM);
-
- qp = &sqp->qp;
-
- err = create_qp_common(dev, pd, init_attr, udata,
- dev->dev->caps.sqp_start +
- (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
- init_attr->port_num - 1,
- qp);
- if (err) {
- kfree(sqp);
- return ERR_PTR(err);
+ for (i = 0, qpn = base_qpn; i < nqps; ++i, ++qpn) {
+ switch (init_attr[i].qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ {
+ qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp) {
+ err = -ENOMEM;
+ goto exit_fail;
+ }
+
+ err = create_qp_common(dev, pd, init_attr + i, udata, qpn, qp);
+ if (err) {
+ kfree(qp);
+ err = err;
+ goto exit_fail;
+ }
+
+ qp->ibqp.qp_num = qp->mqp.qpn;
+
+ break;
}
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ {
+ sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
+ if (!sqp) {
+ err = -ENOMEM;
+ goto exit_fail;
+ }
- qp->port = init_attr->port_num;
- qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
+ qp = &sqp->qp;
- break;
- }
- default:
- /* Don't support raw QPs */
- return ERR_PTR(-EINVAL);
+ err = create_qp_common(dev, pd, init_attr + i, udata,
+ dev->dev->caps.sqp_start +
+ (init_attr[i].qp_type == IB_QPT_SMI ? 0 : 2) +
+ init_attr[i].port_num - 1,
+ qp);
+ if (err) {
+ kfree(sqp);
+ goto exit_fail;
+ }
+
+ qp->port = init_attr[i].port_num;
+ qp->ibqp.qp_num = init_attr[i].qp_type == IB_QPT_SMI ? 0 : 1;
+
+ break;
+ }
+ default:
+ /* Don't support raw QPs */
+ err = -EINVAL;
+ goto exit_fail;
+ }
+ list[i] = &qp->ibqp;
}
+ return 0;
+
+exit_fail:
+ for (--i; i >= 0; --i)
+ destroy_qp_common(dev, to_mqp(list[i]), !!pd->uobject);
- return &qp->ibqp;
+ mlx4_qp_release_range(dev->dev, base_qpn, nqps);
+ return err;
+}
+
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_qp *qp;
+ int err;
+
+ err = mlx4_ib_create_qp_range(pd, init_attr, udata, 1, 1, &qp);
+ if (err)
+ return ERR_PTR(err);
+
+ return qp;
}
int mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
+ int qpn = qp->qp_num;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+ if (qpn >= dev->dev->caps.sqp_start + 8)
+ mlx4_qp_release_range(dev->dev, qpn, 1);
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -884,6 +924,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
+ context->flags |= cpu_to_be32(qp->flags & MLX4_IB_QP_RCA ? 1 << 13 : 0);
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -942,18 +983,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_PORT) {
if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
!(attr_mask & IB_QP_AV)) {
- mlx4_set_sched(&context->pri_path, attr->port_num);
+ mlx4_set_sched(&context->path.pri_path, attr->port_num);
optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
}
}
if (attr_mask & IB_QP_PKEY_INDEX) {
- context->pri_path.pkey_index = attr->pkey_index;
+ context->path.pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+ if (mlx4_set_path(dev, &attr->ah_attr, &context->path.pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
goto out;
@@ -962,7 +1003,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_TIMEOUT) {
- context->pri_path.ackto = attr->timeout << 3;
+ context->path.pri_path.ackto = attr->timeout << 3;
optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
}
@@ -975,12 +1016,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->path.alt_path,
attr->alt_port_num))
goto out;
- context->alt_path.pkey_index = attr->alt_pkey_index;
- context->alt_path.ackto = attr->alt_timeout << 3;
+ context->path.alt_path.pkey_index = attr->alt_pkey_index;
+ context->path.alt_path.ackto = attr->alt_timeout << 3;
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
@@ -1048,11 +1089,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
new_state == IB_QPS_RTR &&
(ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
ibqp->qp_type == IB_QPT_UD)) {
- context->pri_path.sched_queue = (qp->port - 1) << 6;
+ context->path.pri_path.sched_queue = (qp->port - 1) << 6;
if (is_qp0(dev, qp))
- context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
+ context->path.pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
else
- context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+ context->path.pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1061,6 +1102,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
else
sqd_event = 0;
+ if (attr_mask & IB_QP_RCA) {
+ context->path.pri_path.rss.tbl_sz_base_qpn =
+ cpu_to_be32(attr->rca.base_qpn | ilog2(attr->rca.num_qpn) << 24);
+ context->path.pri_path.rss.default_qpn = cpu_to_be32(attr->rca.default_qpn);
+ context->rca.key.flags_hash_fn = cpu_to_be32(MLX4_RCA_TCP_IPV6 |
+ MLX4_RCA_IPV6 |
+ MLX4_RCA_TCP_IPV4 |
+ MLX4_RCA_IPV4);
+ memset(context->rca.key.rca_key, 0, sizeof context->rca.key.rca_key);
+ }
+
/*
* Before passing a kernel QP to the HW, make sure that the
* ownership bits of the send queue are set and the SQ
@@ -1182,6 +1234,12 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (attr_mask & IB_QP_RCA){
+ if ((cur_state != IB_QPS_RESET || new_state != IB_QPS_INIT) &&
+ (cur_state != IB_QPS_RTS || new_state != IB_QPS_RTS))
+ goto out;
+ }
+
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
@@ -1805,17 +1863,17 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
to_ib_qp_access_flags(be32_to_cpu(context.params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
- qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+ to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.path.pri_path);
+ to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.path.alt_path);
+ qp_attr->alt_pkey_index = context.path.alt_path.pkey_index & 0x7f;
qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
}
- qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+ qp_attr->pkey_index = context.path.pri_path.pkey_index & 0x7f;
if (qp_attr->qp_state == IB_QPS_INIT)
qp_attr->port_num = qp->port;
else
- qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+ qp_attr->port_num = context.path.pri_path.sched_queue & 0x40 ? 2 : 1;
/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
@@ -1826,10 +1884,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
qp_attr->min_rnr_timer =
(be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
- qp_attr->timeout = context.pri_path.ackto >> 3;
+ qp_attr->timeout = context.path.pri_path.ackto >> 3;
qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
- qp_attr->alt_timeout = context.alt_path.ackto >> 3;
+ qp_attr->alt_timeout = context.path.alt_path.ackto >> 3;
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 1bb2ba4..333afce 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -95,11 +95,22 @@ enum {
MLX4_QP_BIT_RIC = 1 << 4,
};
+struct mlx4_net_path {
+ u16 reserved;
+ __be16 dmac_hi;
+ __be32 dmac_lo;
+};
+
+struct mlx4_rss_path {
+ __be32 tbl_sz_base_qpn;
+ __be32 default_qpn;
+};
+
struct mlx4_qp_path {
u8 fl;
u8 reserved1[2];
u8 pkey_index;
- u8 reserved2;
+ u8 counter_index;
u8 grh_mylmc;
__be16 rlid;
u8 ackto;
@@ -111,8 +122,33 @@ struct mlx4_qp_path {
u8 sched_queue;
u8 snooper_flags;
u8 reserved3[2];
- u8 counter_index;
- u8 reserved4[7];
+ union {
+ struct mlx4_net_path net;
+ struct mlx4_rss_path rss;
+ };
+};
+
+struct mlx4_addr_path {
+ struct mlx4_qp_path pri_path;
+ struct mlx4_qp_path alt_path;
+};
+
+enum {
+ MLX4_RCA_TCP_IPV6 = 1 << 2,
+ MLX4_RCA_IPV6 = 1 << 3,
+ MLX4_RCA_TCP_IPV4 = 1 << 4,
+ MLX4_RCA_IPV4 = 1 << 5,
+ MLX4_HASH_FN_OFF = 8
+};
+
+struct mlx4_rca_key {
+ __be32 flags_hash_fn;
+ __be32 rca_key[10];
+};
+
+struct mlx4_rca {
+ struct mlx4_qp_path pri_path;
+ struct mlx4_rca_key key;
};
struct mlx4_qp_context {
@@ -125,8 +161,10 @@ struct mlx4_qp_context {
__be32 usr_page;
__be32 local_qpn;
__be32 remote_qpn;
- struct mlx4_qp_path pri_path;
- struct mlx4_qp_path alt_path;
+ union {
+ struct mlx4_addr_path path;
+ struct mlx4_rca rca;
+ };
__be32 params1;
u32 reserved1;
__be32 next_send_psn;
--
1.5.6
More information about the ewg
mailing list