[ofa-general] [PATCH] infiniband/hw/mthca: Add optional router mode initialization

swelch at systemfabricworks.com swelch at systemfabricworks.com
Fri Aug 31 07:53:06 PDT 2007



   This patch allows for the kernel mthca driver to optionally initialize the
   mthca devices in router mode.  Router mode is enabled at module load with
   the setting of the module parm "router_mode=1".  This setting acts on the
   device as a whole.  In this mode the mthca device(s) in the system will be
   brought up with hardware transport operations disabled and all packets that
   meet each ports lid/lmc as well as DR SMPS will be delivered to one of 256
   router QPs (1 per TClass) along with their packet headers.  Without
   "router_mode=1" specified, the device operation will not be altered.

   When in mthca router mode, the SMI/GSI QP may still be used to send packets,
   all ingress packets will be delivered to the consumer that creates the first
   256 QP on that device.  Additional QP may be created for sending.  It is the
   responsibility of this consumer to deliver ingress SMP, and non-forwarded
   ingress GMP packets to the local management stack.

   This patch provides a API friendly method for interfacing with the driver
   when running in router mode.  Additions to the IB Verbs API are not
   required to create and use the QP.  A subsequent patch will provide a
   method for forwarding packets via the libmthca running on top of these
   changes.

   Roland, these changes certainly are not the most eloquent way to
   implement mthca router mode, but they are relatively non-intrusive and
   will provide a mechanism for expermentation of software IB routing.
   I suspect you may find a better solution.

Signed-off-by: Steve Welch <swelch at systemfabricworks.com>
---
 drivers/infiniband/hw/mthca/mthca_cmd.c      |    5 +++
 drivers/infiniband/hw/mthca/mthca_dev.h      |    9 ++++-
 drivers/infiniband/hw/mthca/mthca_main.c     |   47 ++++++++++++++++++++++++++
 drivers/infiniband/hw/mthca/mthca_provider.c |    3 +-
 drivers/infiniband/hw/mthca/mthca_qp.c       |   40 +++++++++++++++++----
 5 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index acc9589..d6039c7 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1272,6 +1272,7 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 
 #define INIT_HCA_IN_SIZE             	 0x200
 #define INIT_HCA_FLAGS1_OFFSET           0x00c
+#define INIT_HCA_ROUTER_OFFSET           0x010
 #define INIT_HCA_FLAGS2_OFFSET           0x014
 #define INIT_HCA_QPC_OFFSET          	 0x020
 #define  INIT_HCA_QPC_BASE_OFFSET    	 (INIT_HCA_QPC_OFFSET + 0x10)
@@ -1318,6 +1319,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 		MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
 
+	if (dev->mthca_flags & MTHCA_FLAG_ROUTER_OPT)
+		*(inbox + INIT_HCA_ROUTER_OFFSET / 4) =
+		         cpu_to_be32(0x80000000 + dev->rqp_base );
+
 #if defined(__LITTLE_ENDIAN)
 	*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
 #elif defined(__BIG_ENDIAN)
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 9bae3cc..5499360 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -66,7 +66,8 @@ enum {
 	MTHCA_FLAG_FMR        = 1 << 6,
 	MTHCA_FLAG_MEMFREE    = 1 << 7,
 	MTHCA_FLAG_PCIE       = 1 << 8,
-	MTHCA_FLAG_SINAI_OPT  = 1 << 9
+	MTHCA_FLAG_SINAI_OPT  = 1 << 9,
+	MTHCA_FLAG_ROUTER_OPT = 1 << 10
 };
 
 enum {
@@ -93,6 +94,11 @@ enum {
 };
 
 enum {
+	MTHCA_RQP_BASE_ALIGNMENT = 256,
+	MTHCA_RQP_NUM_INGRESS    = 256
+};
+
+enum {
 	MTHCA_EQ_CMD,
 	MTHCA_EQ_ASYNC,
 	MTHCA_EQ_COMP,
@@ -360,6 +366,7 @@ struct mthca_dev {
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
 	spinlock_t            sm_lock;
 	u8                    rate[MTHCA_MAX_PORTS];
+	int                   rqp_base;
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 76fed75..405f4b4 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -80,6 +80,10 @@ static int tune_pci = 0;
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
+static int router_mode = 0;
+module_param(router_mode, int, 0444);
+MODULE_PARM_DESC(router_mode, "initialize the mthca driver in router mode");
+
 DEFINE_MUTEX(mthca_device_mutex);
 
 #define MTHCA_DEFAULT_NUM_QP            (1 << 16)
@@ -292,6 +296,26 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
 	return 0;
 }
 
+static int mthca_init_rqp_base(struct mthca_dev *mdev)
+{
+	int  err = 0;
+	int  i = mdev->limits.reserved_qps + MTHCA_MAX_PORTS * 2 + 1;
+
+	/*
+	 * Determine the first available router QP number. The
+	 * base must be aligned on a 256 boundary.
+	 */
+	for (mdev->rqp_base=MTHCA_RQP_BASE_ALIGNMENT; mdev->rqp_base < i;
+	     mdev->rqp_base += MTHCA_RQP_BASE_ALIGNMENT)
+		;
+
+	if (mdev->rqp_base + MTHCA_RQP_NUM_INGRESS > mdev->limits.num_qps) {
+		mdev->rqp_base = 0;
+		err = -EINVAL;
+	}
+	return err;
+}
+
 static int mthca_init_tavor(struct mthca_dev *mdev)
 {
 	u8 status;
@@ -350,6 +374,15 @@ static int mthca_init_tavor(struct mthca_dev *mdev)
 	if (err < 0)
 		goto err_disable;
 
+	if (mdev->mthca_flags & MTHCA_FLAG_ROUTER_OPT) {
+		err = mthca_init_rqp_base(mdev);
+		if (err) {
+			mthca_err(mdev, "Insufficient router QP resources, "
+					"aborting.\n");
+			goto err_disable;
+		}
+	}
+
 	err = mthca_INIT_HCA(mdev, &init_hca, &status);
 	if (err) {
 		mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
@@ -684,6 +717,15 @@ static int mthca_init_arbel(struct mthca_dev *mdev)
 	if (err)
 		goto err_stop_fw;
 
+	if (mdev->mthca_flags & MTHCA_FLAG_ROUTER_OPT) {
+		err = mthca_init_rqp_base(mdev);
+		if (err) {
+			mthca_err(mdev, "Insufficient router QP resources, "
+					"aborting.\n");
+			goto err_disable;
+		}
+	}
+
 	err = mthca_INIT_HCA(mdev, &init_hca, &status);
 	if (err) {
 		mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
@@ -1104,6 +1146,11 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
+	if (router_mode) {
+		printk(KERN_INFO PFX "Initializing in MTHCA router mode\n" );
+		mdev->mthca_flags |= MTHCA_FLAG_ROUTER_OPT;
+	}
+
 	/*
 	 * Now reset the HCA before we touch the PCI capabilities or
 	 * attempt a firmware command, since a boot ROM may have left
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6bcde1c..5e81039 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1309,7 +1309,8 @@ int mthca_register_device(struct mthca_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
+	dev->ib_dev.node_type = dev->mthca_flags & MTHCA_FLAG_ROUTER_OPT ?
+					RDMA_NODE_IB_ROUTER : RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.num_comp_vectors     = 1;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index df01b20..185bcff 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1298,12 +1298,19 @@ int mthca_alloc_qp(struct mthca_dev *dev,
 {
 	int err;
 
-	switch (type) {
-	case IB_QPT_RC: qp->transport = RC; break;
-	case IB_QPT_UC: qp->transport = UC; break;
-	case IB_QPT_UD: qp->transport = UD; break;
-	default: return -EINVAL;
-	}
+	/*
+	 * In router mode no hardware transport endpoints are supported,
+	 * always set the QP to actually use the MLX transport.
+	 */
+	if (dev->mthca_flags & MTHCA_FLAG_ROUTER_OPT)
+		qp->transport = MLX;
+	else
+		switch (type) {
+		case IB_QPT_RC: qp->transport = RC; break;
+		case IB_QPT_UC: qp->transport = UC; break;
+		case IB_QPT_UD: qp->transport = UD; break;
+		default: return -EINVAL;
+		}
 
 	err = mthca_set_qp_size(dev, cap, pd, qp);
 	if (err)
@@ -1313,6 +1320,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
 	if (qp->qpn == -1)
 		return -ENOMEM;
 
+	/*
+	 * Router mode QPs must use the exact QPN number range dedicated
+	 * to this function. Take out any "top" portion inserted by the
+	 * allocation routine.
+	 */
+	if (dev->mthca_flags & MTHCA_FLAG_ROUTER_OPT)
+		qp->qpn &= dev->qp_table.alloc.max - 1;
+
 	/* initialize port to zero for error-catching. */
 	qp->port = 0;
 
@@ -2291,6 +2306,7 @@ int mthca_init_qp_table(struct mthca_dev *dev)
 	int err;
 	u8 status;
 	int i;
+	int reserve_qp;
 
 	spin_lock_init(&dev->qp_table.lock);
 
@@ -2299,11 +2315,19 @@ int mthca_init_qp_table(struct mthca_dev *dev)
 	 * special QP for port 1 has to be even, so round up.
 	 */
 	dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
+
+	/*
+	 * In router mode we also reserve all of the QPs between the
+	 * special QP and the next multiple of 256 (the router QP base).
+	 */
+	reserve_qp = dev->mthca_flags & MTHCA_FLAG_ROUTER_OPT ?
+			dev->rqp_base :
+			dev->qp_table.sqp_start + MTHCA_MAX_PORTS * 2;
+
 	err = mthca_alloc_init(&dev->qp_table.alloc,
 			       dev->limits.num_qps,
 			       (1 << 24) - 1,
-			       dev->qp_table.sqp_start +
-			       MTHCA_MAX_PORTS * 2);
+			       reserve_qp);
 	if (err)
 		return err;
 



More information about the general mailing list