[ewg] [PATCH] IB/mlx4: Add support for Receive Affinity

Eli Cohen eli at mellanox.co.il
Tue Jul 15 09:14:00 PDT 2008


This patch handles the implementation at the mlx4 level, of the
changes required to support RCA. It mainly handles the creation of a
range of QPs and also handles the configuration of the special RCA QP
and the required changes to the inbox parameters.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
 drivers/infiniband/hw/mlx4/main.c    |    4 +-
 drivers/infiniband/hw/mlx4/mlx4_ib.h |    4 +
 drivers/infiniband/hw/mlx4/qp.c      |  228 +++++++++++++++++++++-------------
 include/linux/mlx4/qp.h              |   48 +++++++-
 4 files changed, 193 insertions(+), 91 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0c453d0..d3c8878 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -91,7 +91,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		IB_DEVICE_PORT_ACTIVE_EVENT		|
 		IB_DEVICE_SYS_IMAGE_GUID		|
 		IB_DEVICE_RC_RNR_NAK_GEN		|
-		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK	|
+		IB_DEVICE_IPOIB_RCA;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -618,6 +619,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.query_srq		= mlx4_ib_query_srq;
 	ibdev->ib_dev.destroy_srq	= mlx4_ib_destroy_srq;
 	ibdev->ib_dev.post_srq_recv	= mlx4_ib_post_srq_recv;
+	ibdev->ib_dev.create_qp_range	= mlx4_ib_create_qp_range;
 	ibdev->ib_dev.create_qp		= mlx4_ib_create_qp;
 	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
 	ibdev->ib_dev.query_qp		= mlx4_ib_query_qp;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index ec9bf28..e26c3d6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -103,6 +103,7 @@ struct mlx4_ib_wq {
 enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO				= 1 << 0,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
+	MLX4_IB_QP_RCA				= 1 << 2,
 };
 
 struct mlx4_ib_qp {
@@ -268,6 +269,9 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
 int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			  struct ib_recv_wr **bad_wr);
 
+int mlx4_ib_create_qp_range(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr,
+			    struct ib_udata *udata, int nqps,
+			    int align, struct ib_qp *list[]);
 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index fc61556..72a2d5d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -502,9 +502,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 
 		if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
 			qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
 		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
 			qp->flags |= MLX4_IB_QP_LSO;
+		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_RCA)
+			qp->flags |= MLX4_IB_QP_RCA;
 
 		err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
 		if (err)
@@ -541,11 +542,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		}
 	}
 
-	if (!sqpn)
-		err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);
-	if (err)
-		goto err_wrid;
-
 	err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
 	if (err) {
 		mlx4_qp_release_range(dev->dev, sqpn, 1);
@@ -659,9 +655,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 
 	mlx4_qp_free(dev->dev, &qp->mqp);
 
-	if (!is_sqp(dev, qp))
-		mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
-
 	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 
 	if (is_user) {
@@ -678,91 +671,138 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 	}
 }
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-				struct ib_qp_init_attr *init_attr,
-				struct ib_udata *udata)
+int mlx4_ib_create_qp_range(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata, int nqps,
+			    int align, struct ib_qp *list[])
 {
 	struct mlx4_ib_dev *dev = to_mdev(pd->device);
 	struct mlx4_ib_sqp *sqp;
 	struct mlx4_ib_qp *qp;
 	int err;
+	int base_qpn, qpn;
+	int i;
 
-	/*
-	 * We only support LSO and multicast loopback blocking, and
-	 * only for kernel UD QPs.
-	 */
-	if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
-					IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
-		return ERR_PTR(-EINVAL);
+	for (i = 0; i < nqps; ++i) {
+		/*
+		 * We only support LSO, multicast loopback blocking and RCA, and
+		 * only for kernel UD QPs.
+		 */
+		if (init_attr[i].create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
+						  IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+						  IB_QP_CREATE_IPOIB_RCA))
+			return -EINVAL;
+		if (init_attr[i].create_flags & (IB_QP_CREATE_IPOIB_UD_LSO |
+						 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+						 IB_QP_CREATE_IPOIB_RCA) &&
+		    (pd->uobject || init_attr[i].qp_type != IB_QPT_UD))
+			return -EINVAL;
 
-	if (init_attr->create_flags &&
-	    (pd->uobject || init_attr->qp_type != IB_QPT_UD))
-		return ERR_PTR(-EINVAL);
+		/* Userspace is not allowed to create special QPs: */
+		if (pd->uobject && (init_attr[i].qp_type == IB_QPT_SMI ||
+				    init_attr[i].qp_type == IB_QPT_GSI))
+			return -EINVAL;
 
-	switch (init_attr->qp_type) {
-	case IB_QPT_RC:
-	case IB_QPT_UC:
-	case IB_QPT_UD:
-	{
-		qp = kzalloc(sizeof *qp, GFP_KERNEL);
-		if (!qp)
-			return ERR_PTR(-ENOMEM);
-
-		err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
-		if (err) {
-			kfree(qp);
-			return ERR_PTR(err);
-		}
+		if (nqps > 1 && (init_attr[i].qp_type == IB_QPT_SMI ||
+				    init_attr[i].qp_type == IB_QPT_GSI))
+			return -EINVAL;
+	}
 
-		qp->ibqp.qp_num = qp->mqp.qpn;
+	err = mlx4_qp_reserve_range(dev->dev, nqps, align, &base_qpn);
+	if (err)
+		return err;
 
-		break;
-	}
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-	{
-		/* Userspace is not allowed to create special QPs: */
-		if (pd->uobject)
-			return ERR_PTR(-EINVAL);
-
-		sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
-		if (!sqp)
-			return ERR_PTR(-ENOMEM);
-
-		qp = &sqp->qp;
-
-		err = create_qp_common(dev, pd, init_attr, udata,
-				       dev->dev->caps.sqp_start +
-				       (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
-				       init_attr->port_num - 1,
-				       qp);
-		if (err) {
-			kfree(sqp);
-			return ERR_PTR(err);
+	for (i = 0, qpn = base_qpn; i < nqps; ++i, ++qpn) {
+		switch (init_attr[i].qp_type) {
+		case IB_QPT_RC:
+		case IB_QPT_UC:
+		case IB_QPT_UD:
+		{
+			qp = kzalloc(sizeof *qp, GFP_KERNEL);
+			if (!qp) {
+				err = -ENOMEM;
+				goto exit_fail;
+			}
+
+			err = create_qp_common(dev, pd, init_attr + i, udata, qpn, qp);
+			if (err) {
+				kfree(qp);
+				err = err;
+				goto exit_fail;
+			}
+
+			qp->ibqp.qp_num = qp->mqp.qpn;
+
+			break;
 		}
+		case IB_QPT_SMI:
+		case IB_QPT_GSI:
+		{
+			sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
+			if (!sqp) {
+				err = -ENOMEM;
+				goto exit_fail;
+			}
 
-		qp->port	= init_attr->port_num;
-		qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
+			qp = &sqp->qp;
 
-		break;
-	}
-	default:
-		/* Don't support raw QPs */
-		return ERR_PTR(-EINVAL);
+			err = create_qp_common(dev, pd, init_attr + i, udata,
+					       dev->dev->caps.sqp_start +
+					       (init_attr[i].qp_type == IB_QPT_SMI ? 0 : 2) +
+					       init_attr[i].port_num - 1,
+					       qp);
+			if (err) {
+				kfree(sqp);
+				goto exit_fail;
+			}
+
+			qp->port	= init_attr[i].port_num;
+			qp->ibqp.qp_num = init_attr[i].qp_type == IB_QPT_SMI ? 0 : 1;
+
+			break;
+		}
+		default:
+			/* Don't support raw QPs */
+			err = -EINVAL;
+			goto exit_fail;
+		}
+		list[i] = &qp->ibqp;
 	}
+	return 0;
+
+exit_fail:
+	for (--i; i >= 0; --i)
+		destroy_qp_common(dev, to_mqp(list[i]), !!pd->uobject);
 
-	return &qp->ibqp;
+	mlx4_qp_release_range(dev->dev, base_qpn, nqps);
+	return err;
+}
+
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+				struct ib_qp_init_attr *init_attr,
+				struct ib_udata *udata)
+{
+	struct ib_qp *qp;
+	int err;
+
+	err = mlx4_ib_create_qp_range(pd, init_attr, udata, 1, 1, &qp);
+	if (err)
+		return ERR_PTR(err);
+
+	return qp;
 }
 
 int mlx4_ib_destroy_qp(struct ib_qp *qp)
 {
 	struct mlx4_ib_dev *dev = to_mdev(qp->device);
 	struct mlx4_ib_qp *mqp = to_mqp(qp);
+	int qpn = qp->qp_num;
 
 	if (is_qp0(dev, mqp))
 		mlx4_CLOSE_PORT(dev->dev, mqp->port);
 
 	destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+	if (qpn >= dev->dev->caps.sqp_start + 8)
+		mlx4_qp_release_range(dev->dev, qpn, 1);
 
 	if (is_sqp(dev, mqp))
 		kfree(to_msqp(mqp));
@@ -884,6 +924,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
 				     (to_mlx4_st(ibqp->qp_type) << 16));
 	context->flags     |= cpu_to_be32(1 << 8); /* DE? */
+	context->flags |= cpu_to_be32(qp->flags & MLX4_IB_QP_RCA ? 1 << 13 : 0);
 
 	if (!(attr_mask & IB_QP_PATH_MIG_STATE))
 		context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -942,18 +983,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	if (attr_mask & IB_QP_PORT) {
 		if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
 		    !(attr_mask & IB_QP_AV)) {
-			mlx4_set_sched(&context->pri_path, attr->port_num);
+			mlx4_set_sched(&context->path.pri_path, attr->port_num);
 			optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
 		}
 	}
 
 	if (attr_mask & IB_QP_PKEY_INDEX) {
-		context->pri_path.pkey_index = attr->pkey_index;
+		context->path.pri_path.pkey_index = attr->pkey_index;
 		optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
 	}
 
 	if (attr_mask & IB_QP_AV) {
-		if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+		if (mlx4_set_path(dev, &attr->ah_attr, &context->path.pri_path,
 				  attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
 			goto out;
 
@@ -962,7 +1003,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	}
 
 	if (attr_mask & IB_QP_TIMEOUT) {
-		context->pri_path.ackto = attr->timeout << 3;
+		context->path.pri_path.ackto = attr->timeout << 3;
 		optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
 	}
 
@@ -975,12 +1016,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 		    dev->dev->caps.pkey_table_len[attr->alt_port_num])
 			goto out;
 
-		if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+		if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->path.alt_path,
 				  attr->alt_port_num))
 			goto out;
 
-		context->alt_path.pkey_index = attr->alt_pkey_index;
-		context->alt_path.ackto = attr->alt_timeout << 3;
+		context->path.alt_path.pkey_index = attr->alt_pkey_index;
+		context->path.alt_path.ackto = attr->alt_timeout << 3;
 		optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
 	}
 
@@ -1048,11 +1089,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	    new_state == IB_QPS_RTR  &&
 	    (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
 	     ibqp->qp_type == IB_QPT_UD)) {
-		context->pri_path.sched_queue = (qp->port - 1) << 6;
+		context->path.pri_path.sched_queue = (qp->port - 1) << 6;
 		if (is_qp0(dev, qp))
-			context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
+			context->path.pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
 		else
-			context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+			context->path.pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
 	}
 
 	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
@@ -1061,6 +1102,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	else
 		sqd_event = 0;
 
+	if (attr_mask & IB_QP_RCA) {
+		context->path.pri_path.rss.tbl_sz_base_qpn =
+			cpu_to_be32(attr->rca.base_qpn |  ilog2(attr->rca.num_qpn) << 24);
+		context->path.pri_path.rss.default_qpn = cpu_to_be32(attr->rca.default_qpn);
+		context->rca.key.flags_hash_fn = cpu_to_be32(MLX4_RCA_TCP_IPV6	|
+							     MLX4_RCA_IPV6	|
+							     MLX4_RCA_TCP_IPV4	|
+							     MLX4_RCA_IPV4);
+		memset(context->rca.key.rca_key, 0, sizeof context->rca.key.rca_key);
+	}
+
 	/*
 	 * Before passing a kernel QP to the HW, make sure that the
 	 * ownership bits of the send queue are set and the SQ
@@ -1182,6 +1234,12 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		goto out;
 	}
 
+	if (attr_mask & IB_QP_RCA){
+		 if ((cur_state != IB_QPS_RESET || new_state != IB_QPS_INIT) &&
+		     (cur_state != IB_QPS_RTS || new_state != IB_QPS_RTS))
+			 goto out;
+	}
+
 	err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
 
 out:
@@ -1805,17 +1863,17 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 		to_ib_qp_access_flags(be32_to_cpu(context.params2));
 
 	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
-		to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
-		to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
-		qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+		to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.path.pri_path);
+		to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.path.alt_path);
+		qp_attr->alt_pkey_index = context.path.alt_path.pkey_index & 0x7f;
 		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
 	}
 
-	qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+	qp_attr->pkey_index = context.path.pri_path.pkey_index & 0x7f;
 	if (qp_attr->qp_state == IB_QPS_INIT)
 		qp_attr->port_num = qp->port;
 	else
-		qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+		qp_attr->port_num = context.path.pri_path.sched_queue & 0x40 ? 2 : 1;
 
 	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
 	qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
@@ -1826,10 +1884,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 		1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
 	qp_attr->min_rnr_timer	    =
 		(be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
-	qp_attr->timeout	    = context.pri_path.ackto >> 3;
+	qp_attr->timeout	    = context.path.pri_path.ackto >> 3;
 	qp_attr->retry_cnt	    = (be32_to_cpu(context.params1) >> 16) & 0x7;
 	qp_attr->rnr_retry	    = (be32_to_cpu(context.params1) >> 13) & 0x7;
-	qp_attr->alt_timeout	    = context.alt_path.ackto >> 3;
+	qp_attr->alt_timeout	    = context.path.alt_path.ackto >> 3;
 
 done:
 	qp_attr->cur_qp_state	     = qp_attr->qp_state;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 1bb2ba4..333afce 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -95,11 +95,22 @@ enum {
 	MLX4_QP_BIT_RIC				= 1 <<	4,
 };
 
+struct mlx4_net_path {
+	u16		reserved;
+	__be16		dmac_hi;
+	__be32		dmac_lo;
+};
+
+struct mlx4_rss_path {
+	__be32		tbl_sz_base_qpn;
+	__be32		default_qpn;
+};
+
 struct mlx4_qp_path {
 	u8			fl;
 	u8			reserved1[2];
 	u8			pkey_index;
-	u8			reserved2;
+	u8			counter_index;
 	u8			grh_mylmc;
 	__be16			rlid;
 	u8			ackto;
@@ -111,8 +122,33 @@ struct mlx4_qp_path {
 	u8			sched_queue;
 	u8			snooper_flags;
 	u8			reserved3[2];
-	u8			counter_index;
-	u8			reserved4[7];
+	union {
+		struct mlx4_net_path	net;
+		struct mlx4_rss_path	rss;
+	};
+};
+
+struct mlx4_addr_path {
+	struct mlx4_qp_path	pri_path;
+	struct mlx4_qp_path	alt_path;
+};
+
+enum {
+	MLX4_RCA_TCP_IPV6	= 1 << 2,
+	MLX4_RCA_IPV6 		= 1 << 3,
+	MLX4_RCA_TCP_IPV4	= 1 << 4,
+	MLX4_RCA_IPV4		= 1 << 5,
+	MLX4_HASH_FN_OFF	= 8
+};
+
+struct mlx4_rca_key {
+	__be32		flags_hash_fn;
+	__be32		rca_key[10];
+};
+
+struct mlx4_rca {
+	struct mlx4_qp_path	pri_path;
+	struct mlx4_rca_key	key;
 };
 
 struct mlx4_qp_context {
@@ -125,8 +161,10 @@ struct mlx4_qp_context {
 	__be32			usr_page;
 	__be32			local_qpn;
 	__be32			remote_qpn;
-	struct			mlx4_qp_path pri_path;
-	struct			mlx4_qp_path alt_path;
+	union {
+		struct mlx4_addr_path	path;
+		struct mlx4_rca		rca;
+	};
 	__be32			params1;
 	u32			reserved1;
 	__be32			next_send_psn;
-- 
1.5.6




More information about the ewg mailing list