[ofa-general] [PATCH 4/6 v2] fix pkey change handling and remove the cahce

Yosef Etigin yosefe at voltaire.com
Mon May 7 05:58:22 PDT 2007


mthca: cache pkeys and gids

* Use incoming mads to update the internal cache: use PKEY_TABLE mads
  to update pkey table cache, and GUID_INFO, PORT_INFO mads to update
  gid table cache (which update guid table and gid prefix, accordingly).
* Modify query_pkey and query_gid to use this cache, which makes them
  non-blocking
* While creating a MLX QP, use these functions instead of the cache
  from ib core.


Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
---
 drivers/infiniband/hw/mthca/mthca_av.c       |    3 
 drivers/infiniband/hw/mthca/mthca_dev.h      |   20 +
 drivers/infiniband/hw/mthca/mthca_mad.c      |    3 
 drivers/infiniband/hw/mthca/mthca_provider.c |  284 ++++++++++++++++++++-------
 drivers/infiniband/hw/mthca/mthca_qp.c       |    5 
 include/rdma/ib_smi.h                        |    4 
 6 files changed, 245 insertions(+), 74 deletions(-)

Index: b/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_dev.h	2007-05-07 14:28:47.574320783 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h	2007-05-07 14:28:55.365929626 +0300
@@ -49,6 +49,8 @@
 
 #include <asm/semaphore.h>
 
+#include <rdma/ib_smi.h>
+
 #include "mthca_provider.h"
 #include "mthca_doorbell.h"
 
@@ -287,6 +289,19 @@ struct mthca_catas_err {
 	struct list_head	list;
 };
 
+struct mthca_pkey_cache {
+	rwlock_t lock;
+	int      table_len;
+	u16      table[0];
+};
+
+struct mthca_gid_cache {
+	rwlock_t lock;
+	u64      gid_prefix;
+	int      table_len;
+	u64      guid_table[0];
+};
+
 extern struct mutex mthca_device_mutex;
 
 struct mthca_dev {
@@ -360,6 +375,9 @@ struct mthca_dev {
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
 	spinlock_t            sm_lock;
 	u8                    rate[MTHCA_MAX_PORTS];
+
+	struct mthca_pkey_cache *pkey_cache[MTHCA_MAX_PORTS];
+	struct mthca_gid_cache *gid_cache[MTHCA_MAX_PORTS];
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
@@ -585,6 +603,8 @@ int mthca_process_mad(struct ib_device *
 int mthca_create_agents(struct mthca_dev *dev);
 void mthca_free_agents(struct mthca_dev *dev);
 
+int mthca_cache_update(struct mthca_dev *mdev, u8 port_num, struct ib_mad *mad);
+
 static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
 {
 	return container_of(ibdev, struct mthca_dev, ib_dev);
Index: b/drivers/infiniband/hw/mthca/mthca_mad.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_mad.c	2007-05-07 14:28:47.574320783 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c	2007-05-07 14:28:55.366929448 +0300
@@ -139,6 +139,9 @@ static void smp_snoop(struct ib_device *
 			event.element.port_num = port_num;
 			ib_dispatch_event(&event);
 		}
+
+		/* update cache with the incoming mad */
+		mthca_cache_update(to_mdev(ibdev), port_num, mad);
 	}
 }
 
Index: b/drivers/infiniband/hw/mthca/mthca_provider.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_provider.c	2007-05-07 14:28:47.575320605 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c	2007-05-07 14:28:55.367929269 +0300
@@ -243,87 +243,44 @@ out:
 static int mthca_query_pkey(struct ib_device *ibdev,
 			    u8 port, u16 index, u16 *pkey)
 {
-	struct ib_smp *in_mad  = NULL;
-	struct ib_smp *out_mad = NULL;
-	int err = -ENOMEM;
-	u8 status;
+	struct mthca_dev *mdev;
+	struct mthca_pkey_cache *pkey_cache;
+	unsigned int flags;
 
-	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-	if (!in_mad || !out_mad)
-		goto out;
+	mdev = to_mdev(ibdev);
 
-	init_query_mad(in_mad);
-	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
-	in_mad->attr_mod = cpu_to_be32(index / 32);
-
-	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
-			    port, NULL, NULL, in_mad, out_mad,
-			    &status);
-	if (err)
-		goto out;
-	if (status) {
-		err = -EINVAL;
-		goto out;
+	if (port < 1 || port > mdev->ib_dev.phys_port_cnt ||
+		index >= mdev->pkey_cache[ port - 1 ]->table_len ) {
+		return -EINVAL;
 	}
 
-	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
-
- out:
-	kfree(in_mad);
-	kfree(out_mad);
-	return err;
+	pkey_cache = mdev->pkey_cache[ port - 1 ];
+	read_lock_irqsave(&pkey_cache->lock, flags);
+	*pkey = be16_to_cpu( pkey_cache->table[ index ] );
+	read_unlock_irqrestore(&pkey_cache->lock, flags);
+	return 0;
 }
 
 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
 			   int index, union ib_gid *gid)
 {
-	struct ib_smp *in_mad  = NULL;
-	struct ib_smp *out_mad = NULL;
-	int err = -ENOMEM;
-	u8 status;
-
-	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-	if (!in_mad || !out_mad)
-		goto out;
+	struct mthca_dev * mdev;
+	unsigned int flags;
+	struct mthca_gid_cache *gid_cache;
 
-	init_query_mad(in_mad);
-	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
-	in_mad->attr_mod = cpu_to_be32(port);
+	mdev = to_mdev(ibdev);
 
-	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
-			    port, NULL, NULL, in_mad, out_mad,
-			    &status);
-	if (err)
-		goto out;
-	if (status) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	memcpy(gid->raw, out_mad->data + 8, 8);
-
-	init_query_mad(in_mad);
-	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
-	in_mad->attr_mod = cpu_to_be32(index / 8);
-
-	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
-			    port, NULL, NULL, in_mad, out_mad,
-			    &status);
-	if (err)
-		goto out;
-	if (status) {
-		err = -EINVAL;
-		goto out;
+	if (port < 1 || port > mdev->ib_dev.phys_port_cnt ||
+		index >= mdev->gid_cache[ port - 1 ]->table_len ) {
+		return -EINVAL;
 	}
 
-	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
-
- out:
-	kfree(in_mad);
-	kfree(out_mad);
-	return err;
+	gid_cache = mdev->gid_cache[ port - 1 ];
+	read_lock_irqsave(&gid_cache->lock, flags);
+	memcpy( gid->raw, &gid_cache->gid_prefix, 8);
+	memcpy( gid->raw + 8, gid_cache->guid_table + index, 8);
+	read_unlock_irqrestore(&gid_cache->lock, flags);
+	return 0;
 }
 
 static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
@@ -1259,6 +1216,189 @@ out:
 	return err;
 }
 
+/* update a cached table */
+static int mthca_cache_update_table(struct mthca_dev *mdev,
+			void *table, int table_size,
+			void *data, int data_size, int table_offset)
+{
+
+	/* make sure the offset is valid */
+	if (table_size < table_offset+data_size) {
+		mthca_warn(mdev, "cache table offset out of range - ignoring\n");
+		return -EINVAL;
+	}
+
+	/* update the cache */
+	memcpy((u8*)table+table_offset, data, data_size);
+
+	return 0;
+}
+
+/* update the cache with mad */
+int mthca_cache_update(struct mthca_dev *mdev, u8 port_num, struct ib_mad *mad)
+{
+	struct mthca_pkey_cache *pkey_cache;
+	struct mthca_gid_cache *gid_cache;
+	unsigned long flags;
+	struct ib_smp *smp;
+	unsigned int offset;
+	int ret = 0;
+
+	smp = (struct ib_smp*)mad;
+	offset = ( be32_to_cpu(smp->attr_mod) & 0xFFFF );
+	//TODO check if port# is valid
+
+	switch (mad->mad_hdr.attr_id) {
+	case IB_SMP_ATTR_PKEY_TABLE:
+		mthca_dbg(mdev, "port %d: pkey table change\n", port_num);
+		pkey_cache = mdev->pkey_cache[ port_num - 1 ];
+		write_lock_irqsave(&pkey_cache->lock, flags);
+		mthca_cache_update_table(mdev,
+				pkey_cache->table, pkey_cache->table_len * sizeof (u16),
+				smp->data, IB_SMP_NUM_PKEY_ENTRIES * sizeof (u16),
+				offset * IB_SMP_NUM_PKEY_ENTRIES * sizeof (u16));
+		write_unlock_irqrestore(&pkey_cache->lock, flags);
+		break;
+
+	case IB_SMP_ATTR_GUID_INFO:
+		mthca_dbg(mdev, "port %d: guid table change\n", port_num);
+		gid_cache = mdev->gid_cache[ port_num - 1 ];
+		write_lock_irqsave(&gid_cache->lock, flags);
+		mthca_cache_update_table(mdev,
+				gid_cache->guid_table, gid_cache->table_len * sizeof (u64),
+				smp->data, IB_SMP_NUM_GUID_ENTRIES * sizeof (u64),
+				offset * IB_SMP_NUM_GUID_ENTRIES * sizeof (u64));
+		write_unlock_irqrestore(&gid_cache->lock, flags);
+		break;
+
+	case IB_SMP_ATTR_PORT_INFO:
+		mthca_dbg(mdev, "port %d: port info change\n", port_num);
+		gid_cache = mdev->gid_cache[ port_num - 1 ];
+		write_lock_irqsave(&gid_cache->lock, flags);
+		gid_cache->gid_prefix = *(u64*)(smp->data + 8);
+		write_unlock_irqrestore(&gid_cache->lock, flags);
+		break;
+	}
+	return ret;
+}
+
+static int mthca_cache_init(struct mthca_dev *mdev)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	struct mthca_pkey_cache *pkey_cache;
+	struct mthca_gid_cache *gid_cache;
+	unsigned int i, offset;
+	u8 status;
+	int err = -ENOMEM;
+
+	memset(mdev->pkey_cache, 0, sizeof mdev->pkey_cache);
+
+	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
+	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+
+	if (!in_mad || !out_mad)
+		goto out;
+
+	for ( i = 0; i < mdev->ib_dev.phys_port_cnt; ++i ) {
+
+		unsigned int port = i + 1;
+
+		/* allocate pkey cache */
+		mdev->pkey_cache[ i ] = pkey_cache = kmalloc(sizeof *pkey_cache
+				+ mdev->limits.pkey_table_len * sizeof(u16), GFP_KERNEL);
+		if ( ! pkey_cache )
+			goto out;
+
+		rwlock_init(&pkey_cache->lock);
+
+		/* populate pkey table */
+		pkey_cache->table_len = mdev->limits.pkey_table_len;
+		for (offset = 0; offset < pkey_cache->table_len;
+				offset += IB_SMP_NUM_PKEY_ENTRIES) {
+
+			memset(in_mad, 0, sizeof *in_mad);
+			init_query_mad(in_mad);
+			in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
+			in_mad->attr_mod = cpu_to_be32( offset / IB_SMP_NUM_PKEY_ENTRIES);
+
+			err = mthca_MAD_IFC(mdev, 1, 1,
+				    port, NULL, NULL, in_mad, out_mad,
+				    &status);
+
+			if (err || status)
+				break;
+
+			mthca_cache_update_table(mdev,
+					pkey_cache->table, pkey_cache->table_len * sizeof (u16),
+					out_mad->data, IB_SMP_NUM_PKEY_ENTRIES * sizeof (u16),
+					offset * sizeof (u16));
+		}
+
+		/* allocate gid cache */
+		mdev->gid_cache[ i ] = gid_cache = kmalloc(sizeof *gid_cache
+				+ mdev->limits.gid_table_len * sizeof(u64), GFP_KERNEL);
+		if ( !gid_cache )
+			goto out;
+
+		rwlock_init(&gid_cache->lock);
+
+		/* populate guid table */
+		gid_cache->table_len = mdev->limits.gid_table_len;
+		for (offset = 0; offset < gid_cache->table_len;
+				offset += IB_SMP_NUM_GUID_ENTRIES) {
+
+			memset(in_mad, 0, sizeof *in_mad);
+			init_query_mad(in_mad);
+			in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
+			in_mad->attr_mod = cpu_to_be32( offset / IB_SMP_NUM_GUID_ENTRIES);
+
+			err = mthca_MAD_IFC(mdev, 1, 1,
+				    port, NULL, NULL, in_mad, out_mad,
+				    &status);
+
+			if (err || status)
+				break;
+
+			mthca_cache_update_table(mdev,
+					gid_cache->guid_table, gid_cache->table_len * sizeof (u64),
+					out_mad->data, IB_SMP_NUM_GUID_ENTRIES * sizeof (u64),
+					offset * sizeof (u64));
+		}
+
+		/* read gid prefix */
+		init_query_mad(in_mad);
+		in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+		in_mad->attr_mod = cpu_to_be32(port);
+
+		err = mthca_MAD_IFC(mdev, 1, 1,
+			    port, NULL, NULL, in_mad, out_mad,
+			    &status);
+
+		if (err || status)
+			continue;
+
+		mdev->gid_cache[ i ]->gid_prefix = *(u64*)(out_mad->data + 8);
+	}
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+	return err;
+}
+
+/*
+ * Destroy the cache
+ */
+static void mthca_cache_destroy(struct mthca_dev *mdev)
+{
+	int i;
+	for ( i = 0; i < mdev->ib_dev.phys_port_cnt; ++i ) {
+		kfree( mdev->pkey_cache[ i ] );
+		kfree( mdev->gid_cache[ i ] );
+	}
+}
+
 int mthca_register_device(struct mthca_dev *dev)
 {
 	int ret;
@@ -1365,6 +1505,12 @@ int mthca_register_device(struct mthca_d
 
 	mutex_init(&dev->cap_mask_mutex);
 
+	ret = mthca_cache_init(dev);
+	if (ret) {
+		mthca_cache_destroy(dev);
+		return ret;
+	}
+
 	ret = ib_register_device(&dev->ib_dev);
 	if (ret)
 		return ret;
@@ -1387,4 +1533,6 @@ void mthca_unregister_device(struct mthc
 {
 	mthca_stop_catas_poll(dev);
 	ib_unregister_device(&dev->ib_dev);
+	mthca_cache_destroy(dev);
 }
+
Index: b/include/rdma/ib_smi.h
===================================================================
--- a/include/rdma/ib_smi.h	2007-05-07 14:28:47.576320426 +0300
+++ b/include/rdma/ib_smi.h	2007-05-07 14:28:55.367929269 +0300
@@ -42,7 +42,9 @@
 #include <rdma/ib_mad.h>
 
 #define IB_SMP_DATA_SIZE			64
-#define IB_SMP_MAX_PATH_HOPS			64
+#define IB_SMP_MAX_PATH_HOPS		64
+#define IB_SMP_NUM_PKEY_ENTRIES		32
+#define IB_SMP_NUM_GUID_ENTRIES		8
 
 struct ib_smp {
 	u8	base_version;
Index: b/drivers/infiniband/hw/mthca/mthca_qp.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_qp.c	2007-05-07 14:28:47.575320605 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c	2007-05-07 14:28:55.369928912 +0300
@@ -41,7 +41,6 @@
 #include <asm/io.h>
 
 #include <rdma/ib_verbs.h>
-#include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
 
 #include "mthca_dev.h"
@@ -1485,10 +1484,10 @@ static int build_mlx_header(struct mthca
 		sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
 	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
 	if (!sqp->qp.ibqp.qp_num)
-		ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
+		dev->ib_dev.query_pkey(&dev->ib_dev, sqp->qp.port,
 				   sqp->pkey_index, &pkey);
 	else
-		ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
+		dev->ib_dev.query_pkey(&dev->ib_dev, sqp->qp.port,
 				   wr->wr.ud.pkey_index, &pkey);
 	sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
 	sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
Index: b/drivers/infiniband/hw/mthca/mthca_av.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_av.c	2007-05-07 14:28:47.575320605 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_av.c	2007-05-07 14:28:55.369928912 +0300
@@ -37,7 +37,6 @@
 #include <linux/slab.h>
 
 #include <rdma/ib_verbs.h>
-#include <rdma/ib_cache.h>
 
 #include "mthca_dev.h"
 
@@ -279,7 +278,7 @@ int mthca_read_ah(struct mthca_dev *dev,
 			(be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
 		header->grh.flow_label    =
 			ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
-		ib_get_cached_gid(&dev->ib_dev,
+		dev->ib_dev.query_gid(&dev->ib_dev,
 				  be32_to_cpu(ah->av->port_pd) >> 24,
 				  ah->av->gid_index % dev->limits.gid_table_len,
 				  &header->grh.source_gid);




More information about the general mailing list