[ofa-general] [PATCH 3/3] mthca: provider-level caching of pkeys

Yosef Etigin yosefe at voltaire.com
Wed May 2 08:57:50 PDT 2007


Add provider-level caching of pkeys to mthca

* have the dirver intercept smp's which are pkey table notifications,
  and update its internal cache with the new values.
* modify query_pkey to use this cache instead of doing a blocking HW
  call
* while creating a MLX QP, use this cache


Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
---
 drivers/infiniband/hw/mthca/mthca_dev.h      |   12 +
 drivers/infiniband/hw/mthca/mthca_mad.c      |    5 
 drivers/infiniband/hw/mthca/mthca_provider.c |  167 +++++++++++++++++++++++----
 drivers/infiniband/hw/mthca/mthca_qp.c       |    5 
 include/rdma/ib_smi.h                        |    1 
 5 files changed, 163 insertions(+), 27 deletions(-)

Index: b/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_dev.h	2007-05-02 17:47:52.931912600 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h	2007-05-02 17:48:31.525038376 +0300
@@ -49,6 +49,8 @@
 
 #include <asm/semaphore.h>
 
+#include <rdma/ib_smi.h>
+
 #include "mthca_provider.h"
 #include "mthca_doorbell.h"
 
@@ -287,6 +289,11 @@ struct mthca_catas_err {
 	struct list_head	list;
 };
 
+struct mthca_pkey_cache {
+	int		table_len;
+	u16		table[0];
+};
+
 extern struct mutex mthca_device_mutex;
 
 struct mthca_dev {
@@ -360,6 +367,9 @@ struct mthca_dev {
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
 	spinlock_t            sm_lock;
 	u8                    rate[MTHCA_MAX_PORTS];
+
+	rwlock_t               pkey_cache_lock;
+	struct mthca_pkey_cache *pkey_cache[MTHCA_MAX_PORTS];
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
@@ -585,6 +595,8 @@ int mthca_process_mad(struct ib_device *
 int mthca_create_agents(struct mthca_dev *dev);
 void mthca_free_agents(struct mthca_dev *dev);
 
+int mthca_cache_update(struct mthca_dev *mdev, struct ib_smp *smp, u8 port_num);
+
 static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
 {
 	return container_of(ibdev, struct mthca_dev, ib_dev);
Index: b/drivers/infiniband/hw/mthca/mthca_mad.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_mad.c	2007-05-02 17:47:53.067888380 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c	2007-05-02 17:48:31.525038376 +0300
@@ -134,6 +134,11 @@ static void smp_snoop(struct ib_device *
 		}
 
 		if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+
+			/* update pkey cache from a snnoped MAD */
+			mthca_dbg(to_mdev(ibdev), "pkey change at port %d\n", port_num);
+			mthca_cache_update(to_mdev(ibdev), (struct ib_smp*) mad, port_num);
+
 			event.device           = ibdev;
 			event.event            = IB_EVENT_PKEY_CHANGE;
 			event.element.port_num = port_num;
Index: b/drivers/infiniband/hw/mthca/mthca_provider.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_provider.c	2007-05-02 17:47:52.996901024 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c	2007-05-02 17:48:31.526038198 +0300
@@ -243,36 +243,27 @@ out:
 static int mthca_query_pkey(struct ib_device *ibdev,
 			    u8 port, u16 index, u16 *pkey)
 {
-	struct ib_smp *in_mad  = NULL;
-	struct ib_smp *out_mad = NULL;
-	int err = -ENOMEM;
-	u8 status;
+	struct mthca_dev * mdev;
+	unsigned int flags;
 
-	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-	if (!in_mad || !out_mad)
-		goto out;
+	mdev = to_mdev(ibdev);
+	read_lock_irqsave(&mdev->pkey_cache_lock, flags);
 
-	init_query_mad(in_mad);
-	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
-	in_mad->attr_mod = cpu_to_be32(index / 32);
+	if (port < 1 || port > mdev->ib_dev.phys_port_cnt ||
+		index >= mdev->pkey_cache[ port - 1 ]->table_len ) {
+		mthca_warn(mdev, "pkey request at %d[%d] is out of range %d[%d] - %d[%d]\n",
+					port, index,
+					1, 0,
+					mdev->ib_dev.phys_port_cnt, mdev->pkey_cache[ port - 1 ]->table_len -1);
 
-	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
-			    port, NULL, NULL, in_mad, out_mad,
-			    &status);
-	if (err)
-		goto out;
-	if (status) {
-		err = -EINVAL;
-		goto out;
+		read_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+		return -EINVAL;
 	}
 
-	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
+	*pkey = mdev->pkey_cache[ port - 1 ]->table[ index ];
 
- out:
-	kfree(in_mad);
-	kfree(out_mad);
-	return err;
+	read_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+	return 0;
 }
 
 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
@@ -1259,6 +1250,127 @@ out:
 	return err;
 }
 
+/*
+ * Initiallize cache:
+ *  ask the SM for the table
+ */
+static int mthca_cache_init(struct mthca_dev *mdev)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	struct ib_port_attr *tprops = NULL;
+	unsigned int i;
+	unsigned int tbl_len;
+
+	int err = -ENOMEM;
+	u8 status;
+
+	rwlock_init(&mdev->pkey_cache_lock);
+
+	mthca_dbg(mdev, "setting up PKey cache\n");
+
+	memset(mdev->pkey_cache, 0, sizeof mdev->pkey_cache);
+
+	tprops = kmalloc( sizeof * tprops, GFP_KERNEL );
+	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
+	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+
+	if (!tprops || !in_mad || !out_mad)
+		goto out;
+
+	for ( i = 0; i < mdev->ib_dev.phys_port_cnt; ++i ) {
+
+		/* find out how many pkeys this port holds */
+		err = mthca_query_port(&mdev->ib_dev, i+1, tprops);
+		if (err)
+			continue;
+
+		/* allocate cache */
+		tbl_len = tprops->pkey_tbl_len;
+		mdev->pkey_cache[ i ] = kmalloc(sizeof(struct mthca_pkey_cache)
+						+ tbl_len *	sizeof(u16), GFP_KERNEL);
+		if ( ! mdev->pkey_cache[ i ] )
+			goto out;
+
+		mdev->pkey_cache[ i ]->table_len = tbl_len;
+
+		while (tbl_len) {
+
+			/* send pkey query mad */
+			memset(in_mad, 0, sizeof * in_mad);
+			init_query_mad(in_mad);
+			in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
+			in_mad->attr_mod = cpu_to_be32( (tbl_len-1) / IB_SMP_NUM_PKEY_ENTRIES);
+
+			err = mthca_MAD_IFC(mdev, 1, 1,
+				    i + 1, NULL, NULL, in_mad, out_mad,
+				    &status);
+
+			if (err || status)
+				break;
+
+			mthca_cache_update(mdev, out_mad, i + 1);
+			tbl_len -= IB_SMP_NUM_PKEY_ENTRIES;
+		}
+	}
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+	kfree(tprops);
+	return err;
+}
+
+/*
+ * Destroy the pkey cache
+ */
+static void mthca_cache_destroy(struct mthca_dev *mdev)
+{
+	int i;
+	for ( i = 0; i < mdev->ib_dev.phys_port_cnt; ++i ) {
+		kfree( mdev->pkey_cache[ i ] );
+	}
+}
+
+/*
+ * We snooped a pkey-table mad
+ * extract the new pkey table, and update our internal cache
+ */
+int mthca_cache_update(struct mthca_dev *mdev, struct ib_smp *smp, u8 port_num)
+{
+	unsigned int table_offset;
+	unsigned long flags;
+	int i;
+	struct mthca_pkey_cache *pkey_cache;
+	u16	*entry;
+
+	table_offset = ( be32_to_cpu(smp->attr_mod) & 0xFFFF ) *
+										IB_SMP_NUM_PKEY_ENTRIES;
+
+	mthca_dbg(mdev, "port %d: new pkey table at offset %d\n",
+					port_num, table_offset);
+
+	write_lock_irqsave(&mdev->pkey_cache_lock, flags);
+
+	pkey_cache = mdev->pkey_cache[ port_num - 1 ];
+
+	if (pkey_cache->table_len < IB_SMP_NUM_PKEY_ENTRIES + table_offset) {
+		mthca_warn(mdev, "pkey table out of range - ignoring\n");
+		write_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+		return -EINVAL;
+	}
+
+	/* update the cache */
+	entry = pkey_cache->table + table_offset;
+	for ( i = 0; i < IB_SMP_NUM_PKEY_ENTRIES; ++i ) {
+		u16 pkey = be16_to_cpu ( *( ( (u16*)smp->data ) + i ) );
+		*(entry++) = pkey;
+	}
+
+	write_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+	return 0;
+}
+
 int mthca_register_device(struct mthca_dev *dev)
 {
 	int ret;
@@ -1365,6 +1477,12 @@ int mthca_register_device(struct mthca_d
 
 	mutex_init(&dev->cap_mask_mutex);
 
+	ret = mthca_cache_init(dev);
+	if (ret) {
+		mthca_cache_destroy(dev);
+		return ret;
+	}
+
 	ret = ib_register_device(&dev->ib_dev);
 	if (ret)
 		return ret;
@@ -1387,4 +1505,5 @@ void mthca_unregister_device(struct mthc
 {
 	mthca_stop_catas_poll(dev);
 	ib_unregister_device(&dev->ib_dev);
+	mthca_cache_destroy(dev);
 }
Index: b/include/rdma/ib_smi.h
===================================================================
--- a/include/rdma/ib_smi.h	2007-05-02 17:47:12.741071381 +0300
+++ b/include/rdma/ib_smi.h	2007-05-02 17:48:31.527038020 +0300
@@ -43,6 +43,7 @@
 
 #define IB_SMP_DATA_SIZE			64
 #define IB_SMP_MAX_PATH_HOPS			64
+#define IB_SMP_NUM_PKEY_ENTRIES		32
 
 struct ib_smp {
 	u8	base_version;
Index: b/drivers/infiniband/hw/mthca/mthca_qp.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_qp.c	2007-05-02 17:48:30.752176039 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c	2007-05-02 17:48:31.528037842 +0300
@@ -41,7 +41,6 @@
 #include <asm/io.h>
 
 #include <rdma/ib_pack.h>
-#include <rdma/ib_verbs.h>
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
@@ -1484,9 +1483,9 @@ static int build_mlx_header(struct mthca
 		sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
 	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
 	if (!sqp->qp.ibqp.qp_num)
-		ib_query_pkey(&dev->ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+		dev->ib_dev.query_pkey(&dev->ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
 	else
-		ib_query_pkey(&dev->ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+		dev->ib_dev.query_pkey(&dev->ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
 
 	sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
 	sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);



More information about the general mailing list