[openib-general] [GIT PULL] post-2.6.17-rc1 fixes

Roland Dreier rdreier at cisco.com
Sun Apr 9 15:06:39 PDT 2006


Linus, please pull from

    master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus

This tree is also available from kernel.org mirrors at:

    git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus

This includes some changes that I asked you to pull last week right
before you left.  There are a couple of largish changes in here, but I
think they are all needed:
 - the IPoIB ring size tunables fix horrible performance IBM sees
 - the static rate change fixes big problems on mixed rate networks
 - the callback refcounting changes are fixing possible crashes on
   module unload

The exact changes and patch are:

Eli Cohen:
      IPoIB: Wait for join to finish before freeing mcast struct
      IPoIB: Close race in ipoib_flush_paths()

Jack Morgenstein:
      IB: simplify static rate encoding

Michael S. Tsirkin:
      IB/mad: fix oops in cancel_mads
      IPoIB: Consolidate private neighbour data handling
      IB/mthca: Disable tuning PCI read burst size
      IB/sa: Don't let a module be unloaded with a callback running

Roland Dreier:
      IPoIB: Always build debugging code unless CONFIG_EMBEDDED=y
      IB/mthca: Always build debugging code unless CONFIG_EMBEDDED=y
      IB/srp: Fix memory leak in options parsing
      IPoIB: Use spin_lock_irq() instead of spin_lock_irqsave()

Sean Hefty:
      IB/cm: Don't let a module be unloaded with a callback running

Shirley Ma:
      IPoIB: Make send and receive queue sizes tunable

 drivers/infiniband/core/cm.c                   |   31 +++-
 drivers/infiniband/core/mad.c                  |    2 
 drivers/infiniband/core/sa_query.c             |   93 +++++++-----
 drivers/infiniband/core/verbs.c                |   34 ++++
 drivers/infiniband/hw/mthca/Kconfig            |   11 +
 drivers/infiniband/hw/mthca/Makefile           |    4 -
 drivers/infiniband/hw/mthca/mthca_av.c         |   96 ++++++++++++
 drivers/infiniband/hw/mthca/mthca_cmd.c        |    4 +
 drivers/infiniband/hw/mthca/mthca_cmd.h        |    1 
 drivers/infiniband/hw/mthca/mthca_dev.h        |   21 ++-
 drivers/infiniband/hw/mthca/mthca_mad.c        |   42 +++++
 drivers/infiniband/hw/mthca/mthca_main.c       |   27 ++++
 drivers/infiniband/hw/mthca/mthca_provider.h   |    3 
 drivers/infiniband/hw/mthca/mthca_qp.c         |   46 ++++--
 drivers/infiniband/ulp/ipoib/Kconfig           |    3 
 drivers/infiniband/ulp/ipoib/ipoib.h           |    7 +
 drivers/infiniband/ulp/ipoib/ipoib_fs.c        |    2 
 drivers/infiniband/ulp/ipoib/ipoib_ib.c        |   22 +--
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |   88 +++++++----
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   58 +++-----
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c     |    6 -
 drivers/infiniband/ulp/srp/ib_srp.c            |    1 
 include/rdma/ib_cm.h                           |   13 +-
 include/rdma/ib_sa.h                           |  185 ++++++++++++++++--------
 include/rdma/ib_verbs.h                        |   28 ++++
 25 files changed, 603 insertions(+), 225 deletions(-)


diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 7cfedb8..66d1cb3 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -118,6 +118,7 @@ struct cm_timewait_info {
 
 struct cm_id_private {
 	struct ib_cm_id	id;
+	struct module *owner;
 
 	struct rb_node service_node;
 	struct rb_node sidr_id_node;
@@ -538,9 +539,9 @@ static void cm_reject_sidr_req(struct cm
 	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
 }
 
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
-				 ib_cm_handler cm_handler,
-				 void *context)
+struct ib_cm_id *__ib_create_cm_id(struct ib_device *device,
+				   ib_cm_handler cm_handler,
+				   void *context, struct module *owner)
 {
 	struct cm_id_private *cm_id_priv;
 	int ret;
@@ -549,6 +550,7 @@ struct ib_cm_id *ib_create_cm_id(struct 
 	if (!cm_id_priv)
 		return ERR_PTR(-ENOMEM);
 
+	cm_id_priv->owner = owner;
 	cm_id_priv->id.state = IB_CM_IDLE;
 	cm_id_priv->id.device = device;
 	cm_id_priv->id.cm_handler = cm_handler;
@@ -569,7 +571,7 @@ error:
 	kfree(cm_id_priv);
 	return ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL(ib_create_cm_id);
+EXPORT_SYMBOL(__ib_create_cm_id);
 
 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
 {
@@ -1086,6 +1088,18 @@ static void cm_format_req_event(struct c
 	work->cm_event.private_data = &req_msg->private_data;
 }
 
+static int invoke_cm_handler(struct cm_id_private *cm_id_priv,
+			     struct ib_cm_event *event)
+{
+	int ret;
+
+	BUG_ON(!try_module_get(cm_id_priv->owner));
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, event);
+	module_put(cm_id_priv->owner);
+
+	return ret;
+}
+
 static void cm_process_work(struct cm_id_private *cm_id_priv,
 			    struct cm_work *work)
 {
@@ -1093,7 +1107,7 @@ static void cm_process_work(struct cm_id
 	int ret;
 
 	/* We will typically only have the current event to report. */
-	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+	ret = invoke_cm_handler(cm_id_priv, &work->cm_event);
 	cm_free_work(work);
 
 	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
@@ -1101,8 +1115,7 @@ static void cm_process_work(struct cm_id
 		work = cm_dequeue_work(cm_id_priv);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		BUG_ON(!work);
-		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
-						&work->cm_event);
+		ret = invoke_cm_handler(cm_id_priv, &work->cm_event);
 		cm_free_work(work);
 	}
 	cm_deref_id(cm_id_priv);
@@ -1291,6 +1304,7 @@ static int cm_req_handler(struct cm_work
 		goto error2;
 	}
 
+	cm_id_priv->owner = listen_cm_id_priv->owner;
 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
 	cm_id_priv->id.context = listen_cm_id_priv->id.context;
 	cm_id_priv->id.service_id = req_msg->service_id;
@@ -2662,6 +2676,7 @@ static int cm_sidr_req_handler(struct cm
 	atomic_inc(&cur_cm_id_priv->refcount);
 	spin_unlock_irqrestore(&cm.lock, flags);
 
+	cm_id_priv->owner = cur_cm_id_priv->owner;
 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
 	cm_id_priv->id.service_id = sidr_req_msg->service_id;
@@ -2830,7 +2845,7 @@ static void cm_process_send_error(struct
 	cm_event.param.send_status = wc_status;
 
 	/* No other events can occur on the cm_id at this point. */
-	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
+	ret = invoke_cm_handler(cm_id_priv, &cm_event);
 	cm_free_msg(msg);
 	if (ret)
 		ib_destroy_cm_id(&cm_id_priv->id);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ba54c85..3a702da 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2311,6 +2311,7 @@ static void local_completions(void *data
 		local = list_entry(mad_agent_priv->local_list.next,
 				   struct ib_mad_local_private,
 				   completion_list);
+		list_del(&local->completion_list);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 		if (local->mad_priv) {
 			recv_mad_agent = local->recv_mad_agent;
@@ -2362,7 +2363,6 @@ local_send_completion:
 						   &mad_send_wc);
 
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
-		list_del(&local->completion_list);
 		atomic_dec(&mad_agent_priv->refcount);
 		if (!recv)
 			kmem_cache_free(ib_mad_cache, local->mad_priv);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 501cc05..c43ed75 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -74,6 +74,7 @@ struct ib_sa_device {
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct module	       *owner;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@@ -547,15 +548,16 @@ static void ib_sa_path_rec_release(struc
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
-		       struct ib_sa_path_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct ib_sa_path_rec *resp,
-					void *context),
-		       void *context,
-		       struct ib_sa_query **sa_query)
+int __ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+			 struct ib_sa_path_rec *rec,
+			 ib_sa_comp_mask comp_mask,
+			 int timeout_ms, gfp_t gfp_mask,
+			 void (*callback)(int status,
+					  struct ib_sa_path_rec *resp,
+					  void *context),
+			 void *context,
+			 struct module *owner,
+			 struct ib_sa_query **sa_query)
 {
 	struct ib_sa_path_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
@@ -590,6 +592,7 @@ int ib_sa_path_rec_get(struct ib_device 
 
 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
 	query->sa_query.release  = ib_sa_path_rec_release;
+	query->sa_query.owner    = owner;
 	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
@@ -613,7 +616,7 @@ err1:
 	kfree(query);
 	return ret;
 }
-EXPORT_SYMBOL(ib_sa_path_rec_get);
+EXPORT_SYMBOL(__ib_sa_path_rec_get);
 
 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
 				    int status,
@@ -663,15 +666,16 @@ static void ib_sa_service_rec_release(st
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
-			    struct ib_sa_service_rec *rec,
-			    ib_sa_comp_mask comp_mask,
-			    int timeout_ms, gfp_t gfp_mask,
-			    void (*callback)(int status,
-					     struct ib_sa_service_rec *resp,
-					     void *context),
-			    void *context,
-			    struct ib_sa_query **sa_query)
+int __ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+			      struct ib_sa_service_rec *rec,
+			      ib_sa_comp_mask comp_mask,
+			      int timeout_ms, gfp_t gfp_mask,
+			      void (*callback)(int status,
+					       struct ib_sa_service_rec *resp,
+					       void *context),
+			      void *context,
+			      struct module *owner,
+			      struct ib_sa_query **sa_query)
 {
 	struct ib_sa_service_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
@@ -711,6 +715,7 @@ int ib_sa_service_rec_query(struct ib_de
 
 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
 	query->sa_query.release  = ib_sa_service_rec_release;
+	query->sa_query.owner    = owner;
 	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = method;
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
@@ -735,7 +740,7 @@ err1:
 	kfree(query);
 	return ret;
 }
-EXPORT_SYMBOL(ib_sa_service_rec_query);
+EXPORT_SYMBOL(__ib_sa_service_rec_query);
 
 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
 					int status,
@@ -759,16 +764,17 @@ static void ib_sa_mcmember_rec_release(s
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
-			     u8 method,
-			     struct ib_sa_mcmember_rec *rec,
-			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, gfp_t gfp_mask,
-			     void (*callback)(int status,
-					      struct ib_sa_mcmember_rec *resp,
-					      void *context),
-			     void *context,
-			     struct ib_sa_query **sa_query)
+int __ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+			       u8 method,
+			       struct ib_sa_mcmember_rec *rec,
+			       ib_sa_comp_mask comp_mask,
+			       int timeout_ms, gfp_t gfp_mask,
+			       void (*callback)(int status,
+						struct ib_sa_mcmember_rec *resp,
+						void *context),
+			       void *context,
+			       struct module *owner,
+			       struct ib_sa_query **sa_query)
 {
 	struct ib_sa_mcmember_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
@@ -803,6 +809,7 @@ int ib_sa_mcmember_rec_query(struct ib_d
 
 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
 	query->sa_query.release  = ib_sa_mcmember_rec_release;
+	query->sa_query.owner    = owner;
 	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = method;
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
@@ -827,7 +834,15 @@ err1:
 	kfree(query);
 	return ret;
 }
-EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
+EXPORT_SYMBOL(__ib_sa_mcmember_rec_query);
+
+static void call_sa_callback(struct ib_sa_query *query, int status,
+			     struct ib_sa_mad *mad)
+{
+	BUG_ON(!try_module_get(query->owner));
+	query->callback(query, status, mad);
+	module_put(query->owner);
+}
 
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
@@ -841,13 +856,13 @@ static void send_handler(struct ib_mad_a
 			/* No callback -- already got recv */
 			break;
 		case IB_WC_RESP_TIMEOUT_ERR:
-			query->callback(query, -ETIMEDOUT, NULL);
+			call_sa_callback(query, -ETIMEDOUT, NULL);
 			break;
 		case IB_WC_WR_FLUSH_ERR:
-			query->callback(query, -EINTR, NULL);
+			call_sa_callback(query, -EINTR, NULL);
 			break;
 		default:
-			query->callback(query, -EIO, NULL);
+			call_sa_callback(query, -EIO, NULL);
 			break;
 		}
 
@@ -871,12 +886,12 @@ static void recv_handler(struct ib_mad_a
 
 	if (query->callback) {
 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
-			query->callback(query,
-					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
-					-EINVAL : 0,
-					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
+			call_sa_callback(query,
+					 mad_recv_wc->recv_buf.mad->mad_hdr.status ?
+					 -EINVAL : 0,
+					 (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
 		else
-			query->callback(query, -EIO, NULL);
+			call_sa_callback(query, -EIO, NULL);
 	}
 
 	ib_free_recv_mad(mad_recv_wc);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index cae0845..b78e7dc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -45,6 +45,40 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 
+int ib_rate_to_mult(enum ib_rate rate)
+{
+	switch (rate) {
+	case IB_RATE_2_5_GBPS: return  1;
+	case IB_RATE_5_GBPS:   return  2;
+	case IB_RATE_10_GBPS:  return  4;
+	case IB_RATE_20_GBPS:  return  8;
+	case IB_RATE_30_GBPS:  return 12;
+	case IB_RATE_40_GBPS:  return 16;
+	case IB_RATE_60_GBPS:  return 24;
+	case IB_RATE_80_GBPS:  return 32;
+	case IB_RATE_120_GBPS: return 48;
+	default:	       return -1;
+	}
+}
+EXPORT_SYMBOL(ib_rate_to_mult);
+
+enum ib_rate mult_to_ib_rate(int mult)
+{
+	switch (mult) {
+	case 1:  return IB_RATE_2_5_GBPS;
+	case 2:  return IB_RATE_5_GBPS;
+	case 4:  return IB_RATE_10_GBPS;
+	case 8:  return IB_RATE_20_GBPS;
+	case 12: return IB_RATE_30_GBPS;
+	case 16: return IB_RATE_40_GBPS;
+	case 24: return IB_RATE_60_GBPS;
+	case 32: return IB_RATE_80_GBPS;
+	case 48: return IB_RATE_120_GBPS;
+	default: return IB_RATE_PORT_CURRENT;
+	}
+}
+EXPORT_SYMBOL(mult_to_ib_rate);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index e88be85..9aa5a44 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -7,10 +7,11 @@ config INFINIBAND_MTHCA
 	  ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
 
 config INFINIBAND_MTHCA_DEBUG
-	bool "Verbose debugging output"
+	bool "Verbose debugging output" if EMBEDDED
 	depends on INFINIBAND_MTHCA
-	default n
+	default y
 	---help---
-	  This option causes the mthca driver produce a bunch of debug
-	  messages.  Select this is you are developing the driver or
-	  trying to diagnose a problem.
+	  This option causes debugging code to be compiled into the
+	  mthca driver.  The output can be turned on via the
+	  debug_level module parameter (which can also be set after
+	  the driver is loaded through sysfs).
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index 47ec5a7..e388d95 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -1,7 +1,3 @@
-ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
-EXTRA_CFLAGS += -DDEBUG
-endif
-
 obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
 
 ib_mthca-y :=	mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index bc5bdcb..87e7c63 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -42,6 +42,20 @@
 
 #include "mthca_dev.h"
 
+enum {
+      MTHCA_RATE_TAVOR_FULL   = 0,
+      MTHCA_RATE_TAVOR_1X     = 1,
+      MTHCA_RATE_TAVOR_4X     = 2,
+      MTHCA_RATE_TAVOR_1X_DDR = 3
+};
+
+enum {
+      MTHCA_RATE_MEMFREE_FULL    = 0,
+      MTHCA_RATE_MEMFREE_QUARTER = 1,
+      MTHCA_RATE_MEMFREE_EIGHTH  = 2,
+      MTHCA_RATE_MEMFREE_HALF    = 3
+};
+
 struct mthca_av {
 	__be32 port_pd;
 	u8     reserved1;
@@ -55,6 +69,86 @@ struct mthca_av {
 	__be32 dgid[4];
 };
 
+static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+	switch (mthca_rate) {
+	case MTHCA_RATE_MEMFREE_EIGHTH:  return port_rate / 8;
+	case MTHCA_RATE_MEMFREE_QUARTER: return port_rate / 4;
+	case MTHCA_RATE_MEMFREE_HALF:    return port_rate / 2;
+	case MTHCA_RATE_MEMFREE_FULL:    return port_rate;
+	default:                         return port_rate;
+	}
+}
+
+static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+	switch (mthca_rate) {
+	case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
+	case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
+	case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
+	default:		      return port_rate;
+	}
+}
+
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
+{
+	if (mthca_is_memfree(dev)) {
+		/* Handle old Arbel FW */
+		if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
+			return IB_RATE_2_5_GBPS;
+
+		return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+	} else
+		return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+}
+
+static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
+{
+	if (cur_rate <= req_rate)
+		return 0;
+
+	/*
+	 * Inter-packet delay (IPD) to get from rate X down to a rate
+	 * no more than Y is (X - 1) / Y.
+	 */
+	switch ((cur_rate - 1) / req_rate) {
+	case 0:	 return MTHCA_RATE_MEMFREE_FULL;
+	case 1:	 return MTHCA_RATE_MEMFREE_HALF;
+	case 2:	 /* fall through */
+	case 3:	 return MTHCA_RATE_MEMFREE_QUARTER;
+	default: return MTHCA_RATE_MEMFREE_EIGHTH;
+	}
+}
+
+static u8 ib_rate_to_tavor(u8 static_rate)
+{
+	switch (static_rate) {
+	case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
+	case IB_RATE_5_GBPS:   return MTHCA_RATE_TAVOR_1X_DDR;
+	case IB_RATE_10_GBPS:  return MTHCA_RATE_TAVOR_4X;
+	default:	       return MTHCA_RATE_TAVOR_FULL;
+	}
+}
+
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
+{
+	u8 rate;
+
+	if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
+		return 0;
+
+	if (mthca_is_memfree(dev))
+		rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
+					  dev->rate[port - 1]);
+	else
+		rate = ib_rate_to_tavor(static_rate);
+
+	if (!(dev->limits.stat_rate_support & (1 << rate)))
+		rate = 1;
+
+	return rate;
+}
+
 int mthca_create_ah(struct mthca_dev *dev,
 		    struct mthca_pd *pd,
 		    struct ib_ah_attr *ah_attr,
@@ -107,7 +201,7 @@ on_hca_fail:
 	av->g_slid  = ah_attr->src_path_bits;
 	av->dlid    = cpu_to_be16(ah_attr->dlid);
 	av->msg_sr  = (3 << 4) | /* 2K message */
-		ah_attr->static_rate;
+		mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
 	av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		av->g_slid |= 0x80;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 343eca5..1985b5d 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -965,6 +965,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev
 	u32 *outbox;
 	u8 field;
 	u16 size;
+	u16 stat_rate;
 	int err;
 
 #define QUERY_DEV_LIM_OUT_SIZE             0x100
@@ -995,6 +996,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev
 #define QUERY_DEV_LIM_MTU_WIDTH_OFFSET      0x36
 #define QUERY_DEV_LIM_VL_PORT_OFFSET        0x37
 #define QUERY_DEV_LIM_MAX_GID_OFFSET        0x3b
+#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET   0x3c
 #define QUERY_DEV_LIM_MAX_PKEY_OFFSET       0x3f
 #define QUERY_DEV_LIM_FLAGS_OFFSET          0x44
 #define QUERY_DEV_LIM_RSVD_UAR_OFFSET       0x48
@@ -1086,6 +1088,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev
 	dev_lim->num_ports = field & 0xf;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
 	dev_lim->max_gids = 1 << (field & 0xf);
+	MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET);
+	dev_lim->stat_rate_support = stat_rate;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
 	dev_lim->max_pkeys = 1 << (field & 0xf);
 	MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index e4ec35c..2f976f2 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -146,6 +146,7 @@ struct mthca_dev_lim {
 	int max_vl;
 	int num_ports;
 	int max_gids;
+	u16 stat_rate_support;
 	int max_pkeys;
 	u32 flags;
 	int reserved_uars;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ad52edb..49d0eae 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -172,6 +172,7 @@ struct mthca_limits {
 	int      reserved_pds;
 	u32      page_size_cap;
 	u32      flags;
+	u16      stat_rate_support;
 	u8       port_width_cap;
 };
 
@@ -353,10 +354,24 @@ struct mthca_dev {
 	struct ib_mad_agent  *send_agent[MTHCA_MAX_PORTS][2];
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
 	spinlock_t            sm_lock;
+	u8                    rate[MTHCA_MAX_PORTS];
 };
 
-#define mthca_dbg(mdev, format, arg...) \
-	dev_dbg(&mdev->pdev->dev, format, ## arg)
+#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+extern int mthca_debug_level;
+
+#define mthca_dbg(mdev, format, arg...)					\
+	do {								\
+		if (mthca_debug_level)					\
+			dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
+	} while (0)
+
+#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
+#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
+
+#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
 #define mthca_err(mdev, format, arg...) \
 	dev_err(&mdev->pdev->dev, format, ## arg)
 #define mthca_info(mdev, format, arg...) \
@@ -542,6 +557,8 @@ int mthca_read_ah(struct mthca_dev *dev,
 		  struct ib_ud_header *header);
 int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
 int mthca_ah_grh_present(struct mthca_ah *ah);
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
 
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index dfb482e..f235c7e 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -49,6 +49,30 @@ enum {
 	MTHCA_VENDOR_CLASS2 = 0xa
 };
 
+int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
+{
+	struct ib_port_attr *tprops = NULL;
+	int                  ret;
+
+	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+	if (!tprops)
+		return -ENOMEM;
+
+	ret = ib_query_port(&dev->ib_dev, port_num, tprops);
+	if (ret) {
+		printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
+		       ret, dev->ib_dev.name, port_num);
+		goto out;
+	}
+
+	dev->rate[port_num - 1] = tprops->active_speed *
+				  ib_width_enum_to_int(tprops->active_width);
+
+out:
+	kfree(tprops);
+	return ret;
+}
+
 static void update_sm_ah(struct mthca_dev *dev,
 			 u8 port_num, u16 lid, u8 sl)
 {
@@ -90,6 +114,7 @@ static void smp_snoop(struct ib_device *
 	     mad->mad_hdr.mgmt_class  == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
 	    mad->mad_hdr.method     == IB_MGMT_METHOD_SET) {
 		if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
 				     be16_to_cpup((__be16 *) (mad->data + 58)),
 				     (*(u8 *) (mad->data + 76)) & 0xf);
@@ -246,6 +271,7 @@ int mthca_create_agents(struct mthca_dev
 {
 	struct ib_mad_agent *agent;
 	int p, q;
+	int ret;
 
 	spin_lock_init(&dev->sm_lock);
 
@@ -255,11 +281,23 @@ int mthca_create_agents(struct mthca_dev
 						      q ? IB_QPT_GSI : IB_QPT_SMI,
 						      NULL, 0, send_handler,
 						      NULL, NULL);
-			if (IS_ERR(agent))
+			if (IS_ERR(agent)) {
+				ret = PTR_ERR(agent);
 				goto err;
+			}
 			dev->send_agent[p][q] = agent;
 		}
 
+
+	for (p = 1; p <= dev->limits.num_ports; ++p) {
+		ret = mthca_update_rate(dev, p);
+		if (ret) {
+			mthca_err(dev, "Failed to obtain port %d rate."
+				  " aborting.\n", p);
+			goto err;
+		}
+	}
+
 	return 0;
 
 err:
@@ -268,7 +306,7 @@ err:
 			if (dev->send_agent[p][q])
 				ib_unregister_mad_agent(dev->send_agent[p][q]);
 
-	return PTR_ERR(agent);
+	return ret;
 }
 
 void __devexit mthca_free_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 266f347..7e9c97b 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox InfiniBand 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
+#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+
+int mthca_debug_level = 0;
+module_param_named(debug_level, mthca_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+
+#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
 #ifdef CONFIG_PCI_MSI
 
 static int msi_x = 0;
@@ -69,6 +77,10 @@ MODULE_PARM_DESC(msi, "attempt to use MS
 
 #endif /* CONFIG_PCI_MSI */
 
+static int tune_pci = 0;
+module_param(tune_pci, int, 0444);
+MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
+
 static const char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -90,6 +102,9 @@ static int __devinit mthca_tune_pci(stru
 	int cap;
 	u16 val;
 
+	if (!tune_pci)
+		return 0;
+
 	/* First try to max out Read Byte Count */
 	cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
 	if (cap) {
@@ -191,6 +206,18 @@ static int __devinit mthca_dev_lim(struc
 	mdev->limits.port_width_cap     = dev_lim->max_port_width;
 	mdev->limits.page_size_cap      = ~(u32) (dev_lim->min_page_sz - 1);
 	mdev->limits.flags              = dev_lim->flags;
+	/*
+	 * For old FW that doesn't return static rate support, use a
+	 * value of 0x3 (only static rate values of 0 or 1 are handled),
+	 * except on Sinai, where even old FW can handle static rate
+	 * values of 2 and 3.
+	 */
+	if (dev_lim->stat_rate_support)
+		mdev->limits.stat_rate_support = dev_lim->stat_rate_support;
+	else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+		mdev->limits.stat_rate_support = 0xf;
+	else
+		mdev->limits.stat_rate_support = 0x3;
 
 	/* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
 	   May be doable since hardware supports it for SRQ.
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 2e7f521..6676a78 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -257,6 +257,8 @@ struct mthca_qp {
 	atomic_t               refcount;
 	u32                    qpn;
 	int                    is_direct;
+	u8                     port; /* for SQP and memfree use only */
+	u8                     alt_port; /* for memfree use only */
 	u8                     transport;
 	u8                     state;
 	u8                     atomic_rd_en;
@@ -278,7 +280,6 @@ struct mthca_qp {
 
 struct mthca_sqp {
 	struct mthca_qp qp;
-	int             port;
 	int             pkey_index;
 	u32             qkey;
 	u32             send_psn;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 057c8e6..f37b0e3 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -248,6 +248,9 @@ void mthca_qp_event(struct mthca_dev *de
 		return;
 	}
 
+	if (event_type == IB_EVENT_PATH_MIG)
+		qp->port = qp->alt_port;
+
 	event.device      = &dev->ib_dev;
 	event.event       = event_type;
 	event.element.qp  = &qp->ibqp;
@@ -392,10 +395,16 @@ static void to_ib_ah_attr(struct mthca_d
 {
 	memset(ib_ah_attr, 0, sizeof *path);
 	ib_ah_attr->port_num 	  = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+
+	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
+		return;
+
 	ib_ah_attr->dlid     	  = be16_to_cpu(path->rlid);
 	ib_ah_attr->sl       	  = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
 	ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
-	ib_ah_attr->static_rate   = path->static_rate & 0x7;
+	ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
+						     path->static_rate & 0x7,
+						     ib_ah_attr->port_num);
 	ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
 	if (ib_ah_attr->ah_flags) {
 		ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
@@ -455,8 +464,10 @@ int mthca_query_qp(struct ib_qp *ibqp, s
 	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
 	qp_attr->cap.max_inline_data = qp->max_inline_data;
 
-	to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
-	to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+	if (qp->transport == RC || qp->transport == UC) {
+		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+	}
 
 	qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
 	qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
@@ -484,11 +495,11 @@ out:
 }
 
 static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
-			  struct mthca_qp_path *path)
+			  struct mthca_qp_path *path, u8 port)
 {
 	path->g_mylmc     = ah->src_path_bits & 0x7f;
 	path->rlid        = cpu_to_be16(ah->dlid);
-	path->static_rate = !!ah->static_rate;
+	path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
 
 	if (ah->ah_flags & IB_AH_GRH) {
 		if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
@@ -634,7 +645,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 
 	if (qp->transport == MLX)
 		qp_context->pri_path.port_pkey |=
-			cpu_to_be32(to_msqp(qp)->port << 24);
+			cpu_to_be32(qp->port << 24);
 	else {
 		if (attr_mask & IB_QP_PORT) {
 			qp_context->pri_path.port_pkey |=
@@ -657,7 +668,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 	}
 
 	if (attr_mask & IB_QP_AV) {
-		if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path))
+		if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
+				   attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
 			return -EINVAL;
 
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
@@ -681,7 +693,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 			return -EINVAL;
 		}
 
-		if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path))
+		if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
+				   attr->alt_ah_attr.port_num))
 			return -EINVAL;
 
 		qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
@@ -791,6 +804,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 		qp->atomic_rd_en = attr->qp_access_flags;
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 		qp->resp_depth = attr->max_dest_rd_atomic;
+	if (attr_mask & IB_QP_PORT)
+		qp->port = attr->port_num;
+	if (attr_mask & IB_QP_ALT_PATH)
+		qp->alt_port = attr->alt_port_num;
 
 	if (is_sqp(dev, qp))
 		store_attrs(to_msqp(qp), attr, attr_mask);
@@ -802,13 +819,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 	if (is_qp0(dev, qp)) {
 		if (cur_state != IB_QPS_RTR &&
 		    new_state == IB_QPS_RTR)
-			init_port(dev, to_msqp(qp)->port);
+			init_port(dev, qp->port);
 
 		if (cur_state != IB_QPS_RESET &&
 		    cur_state != IB_QPS_ERR &&
 		    (new_state == IB_QPS_RESET ||
 		     new_state == IB_QPS_ERR))
-			mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
+			mthca_CLOSE_IB(dev, qp->port, &status);
 	}
 
 	/*
@@ -1212,6 +1229,9 @@ int mthca_alloc_qp(struct mthca_dev *dev
 	if (qp->qpn == -1)
 		return -ENOMEM;
 
+	/* initialize port to zero for error-catching. */
+	qp->port = 0;
+
 	err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
 				    send_policy, qp);
 	if (err) {
@@ -1261,7 +1281,7 @@ int mthca_alloc_sqp(struct mthca_dev *de
 	if (err)
 		goto err_out;
 
-	sqp->port = port;
+	sqp->qp.port      = port;
 	sqp->qp.qpn       = mqpn;
 	sqp->qp.transport = MLX;
 
@@ -1404,10 +1424,10 @@ static int build_mlx_header(struct mthca
 		sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
 	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
 	if (!sqp->qp.ibqp.qp_num)
-		ib_get_cached_pkey(&dev->ib_dev, sqp->port,
+		ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
 				   sqp->pkey_index, &pkey);
 	else
-		ib_get_cached_pkey(&dev->ib_dev, sqp->port,
+		ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
 				   wr->wr.ud.pkey_index, &pkey);
 	sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
 	sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 8d2e04c..13d6d01 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -10,8 +10,9 @@ config INFINIBAND_IPOIB
 	  group: <http://www.ietf.org/html.charters/ipoib-charter.html>.
 
 config INFINIBAND_IPOIB_DEBUG
-	bool "IP-over-InfiniBand debugging"
+	bool "IP-over-InfiniBand debugging" if EMBEDDED
 	depends on INFINIBAND_IPOIB
+	default y
 	---help---
 	  This option causes debugging code to be compiled into the
 	  IPoIB driver.  The output can be turned on via the
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index b640107..12a1e05 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -65,6 +65,8 @@ enum {
 
 	IPOIB_RX_RING_SIZE 	  = 128,
 	IPOIB_TX_RING_SIZE 	  = 64,
+	IPOIB_MAX_QUEUE_SIZE	  = 8192,
+	IPOIB_MIN_QUEUE_SIZE	  = 2,
 
 	IPOIB_NUM_WC 		  = 4,
 
@@ -230,6 +232,9 @@ static inline struct ipoib_neigh **to_ip
 				     INFINIBAND_ALEN, sizeof(void *));
 }
 
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
+void ipoib_neigh_free(struct ipoib_neigh *neigh);
+
 extern struct workqueue_struct *ipoib_workqueue;
 
 /* functions */
@@ -329,6 +334,8 @@ static inline void ipoib_unregister_debu
 #define ipoib_warn(priv, format, arg...)		\
 	ipoib_printk(KERN_WARNING, priv, format , ## arg)
 
+extern int ipoib_sendq_size;
+extern int ipoib_recvq_size;
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 685258e..5dde380 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -213,7 +213,7 @@ static int ipoib_path_seq_show(struct se
 		   gid_buf, path.pathrec.dlid ? "yes" : "no");
 
 	if (path.pathrec.dlid) {
-		rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25;
+		rate = ib_rate_to_mult(path.pathrec.rate) * 25;
 
 		seq_printf(file,
 			   "  DLID:     0x%04x\n"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ed65202..a54da42 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i;
 
-	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
+	for (i = 0; i < ipoib_recvq_size; ++i) {
 		if (ipoib_alloc_rx_skb(dev, i)) {
 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
 			return -ENOMEM;
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct ne
 	if (wr_id & IPOIB_OP_RECV) {
 		wr_id &= ~IPOIB_OP_RECV;
 
-		if (wr_id < IPOIB_RX_RING_SIZE) {
+		if (wr_id < ipoib_recvq_size) {
 			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
 			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
 
@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct ne
 		struct ipoib_tx_buf *tx_req;
 		unsigned long flags;
 
-		if (wr_id >= IPOIB_TX_RING_SIZE) {
+		if (wr_id >= ipoib_sendq_size) {
 			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-				   wr_id, IPOIB_TX_RING_SIZE);
+				   wr_id, ipoib_sendq_size);
 			return;
 		}
 
@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct ne
 		spin_lock_irqsave(&priv->tx_lock, flags);
 		++priv->tx_tail;
 		if (netif_queue_stopped(dev) &&
-		    priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
+		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
 			netif_wake_queue(dev);
 		spin_unlock_irqrestore(&priv->tx_lock, flags);
 
@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, 
 	 * means we have to make sure everything is properly recorded and
 	 * our state is consistent before we call post_send().
 	 */
-	tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)];
+	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->skb = skb;
 	addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
 			      DMA_TO_DEVICE);
 	pci_unmap_addr_set(tx_req, mapping, addr);
 
-	if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
+	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 			       address->ah, qpn, addr, skb->len))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
 
-		if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
+		if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
 			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
 			netif_stop_queue(dev);
 		}
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_devi
 	int pending = 0;
 	int i;
 
-	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
+	for (i = 0; i < ipoib_recvq_size; ++i)
 		if (priv->rx_ring[i].skb)
 			++pending;
 
@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device 
 			 */
 			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
 				tx_req = &priv->tx_ring[priv->tx_tail &
-							(IPOIB_TX_RING_SIZE - 1)];
+							(ipoib_sendq_size - 1)];
 				dma_unmap_single(priv->ca->dma_device,
 						 pci_unmap_addr(tx_req, mapping),
 						 tx_req->skb->len,
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device 
 				++priv->tx_tail;
 			}
 
-			for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
+			for (i = 0; i < ipoib_recvq_size; ++i)
 				if (priv->rx_ring[i].skb) {
 					dma_unmap_single(priv->ca->dma_device,
 							 pci_unmap_addr(&priv->rx_ring[i],
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9b0bd7c..cb078a7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -41,6 +41,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/kernel.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
 
@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
 MODULE_LICENSE("Dual BSD/GPL");
 
+int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
+int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
+
+module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
+module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;
 
@@ -252,8 +261,8 @@ static void path_free(struct net_device 
 		 */
 		if (neigh->ah)
 			ipoib_put_ah(neigh->ah);
-		*to_ipoib_neigh(neigh->neighbour) = NULL;
-		kfree(neigh);
+
+		ipoib_neigh_free(neigh);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -327,9 +336,8 @@ void ipoib_flush_paths(struct net_device
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_path *path, *tp;
 	LIST_HEAD(remove_list);
-	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 
 	list_splice(&priv->path_list, &remove_list);
 	INIT_LIST_HEAD(&priv->path_list);
@@ -337,14 +345,15 @@ void ipoib_flush_paths(struct net_device
 	list_for_each_entry(path, &remove_list, list)
 		rb_erase(&path->rb_node, &priv->path_tree);
 
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	list_for_each_entry_safe(path, tp, &remove_list, list) {
 		if (path->query)
 			ib_sa_cancel_query(path->query_id, path->query);
+		spin_unlock_irq(&priv->lock);
 		wait_for_completion(&path->done);
 		path_free(dev, path);
+		spin_lock_irq(&priv->lock);
 	}
+	spin_unlock_irq(&priv->lock);
 }
 
 static void path_rec_completion(int status,
@@ -373,16 +382,9 @@ static void path_rec_completion(int stat
 		struct ib_ah_attr av = {
 			.dlid 	       = be16_to_cpu(pathrec->dlid),
 			.sl 	       = pathrec->sl,
-			.port_num      = priv->port
+			.port_num      = priv->port,
+			.static_rate   = pathrec->rate
 		};
-		int path_rate = ib_sa_rate_enum_to_int(pathrec->rate);
-
-		if (path_rate > 0 && priv->local_rate > path_rate)
-			av.static_rate = (priv->local_rate - 1) / path_rate;
-
-		ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
-			  av.static_rate, priv->local_rate,
-			  ib_sa_rate_enum_to_int(pathrec->rate));
 
 		ah = ipoib_create_ah(dev, priv->pd, &av);
 	}
@@ -481,7 +483,7 @@ static void neigh_add_path(struct sk_buf
 	struct ipoib_path *path;
 	struct ipoib_neigh *neigh;
 
-	neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
+	neigh = ipoib_neigh_alloc(skb->dst->neighbour);
 	if (!neigh) {
 		++priv->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
@@ -489,8 +491,6 @@ static void neigh_add_path(struct sk_buf
 	}
 
 	skb_queue_head_init(&neigh->queue);
-	neigh->neighbour = skb->dst->neighbour;
-	*to_ipoib_neigh(skb->dst->neighbour) = neigh;
 
 	/*
 	 * We can only be called from ipoib_start_xmit, so we're
@@ -503,7 +503,7 @@ static void neigh_add_path(struct sk_buf
 		path = path_rec_create(dev,
 				       (union ib_gid *) (skb->dst->neighbour->ha + 4));
 		if (!path)
-			goto err;
+			goto err_path;
 
 		__path_add(dev, path);
 	}
@@ -521,17 +521,17 @@ static void neigh_add_path(struct sk_buf
 		__skb_queue_tail(&neigh->queue, skb);
 
 		if (!path->query && path_rec_start(dev, path))
-			goto err;
+			goto err_list;
 	}
 
 	spin_unlock(&priv->lock);
 	return;
 
-err:
-	*to_ipoib_neigh(skb->dst->neighbour) = NULL;
+err_list:
 	list_del(&neigh->list);
-	kfree(neigh);
 
+err_path:
+	ipoib_neigh_free(neigh);
 	++priv->stats.tx_dropped;
 	dev_kfree_skb_any(skb);
 
@@ -763,8 +763,7 @@ static void ipoib_neigh_destructor(struc
 		if (neigh->ah)
 			ah = neigh->ah;
 		list_del(&neigh->list);
-		*to_ipoib_neigh(n) = NULL;
-		kfree(neigh);
+		ipoib_neigh_free(neigh);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -773,6 +772,26 @@ static void ipoib_neigh_destructor(struc
 		ipoib_put_ah(ah);
 }
 
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
+{
+	struct ipoib_neigh *neigh;
+
+	neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
+	if (!neigh)
+		return NULL;
+
+	neigh->neighbour = neighbour;
+	*to_ipoib_neigh(neighbour) = neigh;
+
+	return neigh;
+}
+
+void ipoib_neigh_free(struct ipoib_neigh *neigh)
+{
+	*to_ipoib_neigh(neigh->neighbour) = NULL;
+	kfree(neigh);
+}
+
 static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
 {
 	parms->neigh_destructor = ipoib_neigh_destructor;
@@ -785,20 +804,19 @@ int ipoib_dev_init(struct net_device *de
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	/* Allocate RX/TX "rings" to hold queued skbs */
-
-	priv->rx_ring =	kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
+	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
 				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
-		       ca->name, IPOIB_RX_RING_SIZE);
+		       ca->name, ipoib_recvq_size);
 		goto out;
 	}
 
-	priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
+	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
 				GFP_KERNEL);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
-		       ca->name, IPOIB_TX_RING_SIZE);
+		       ca->name, ipoib_sendq_size);
 		goto out_rx_ring_cleanup;
 	}
 
@@ -866,7 +884,7 @@ static void ipoib_setup(struct net_devic
 	dev->hard_header_len 	 = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
 	dev->addr_len 		 = INFINIBAND_ALEN;
 	dev->type 		 = ARPHRD_INFINIBAND;
-	dev->tx_queue_len 	 = IPOIB_TX_RING_SIZE * 2;
+	dev->tx_queue_len 	 = ipoib_sendq_size * 2;
 	dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
 
 	/* MTU will be reset when mcast join happens */
@@ -1118,6 +1136,14 @@ static int __init ipoib_init_module(void
 {
 	int ret;
 
+	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
+	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
+	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
+
+	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
+	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
+	ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
+
 	ret = ipoib_register_debugfs();
 	if (ret)
 		return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 93c462e..1dae4b2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,8 +114,7 @@ static void ipoib_mcast_free(struct ipoi
 		 */
 		if (neigh->ah)
 			ipoib_put_ah(neigh->ah);
-		*to_ipoib_neigh(neigh->neighbour) = NULL;
-		kfree(neigh);
+		ipoib_neigh_free(neigh);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -251,6 +250,7 @@ static int ipoib_mcast_join_finish(struc
 			.port_num      = priv->port,
 			.sl	       = mcast->mcmember.sl,
 			.ah_flags      = IB_AH_GRH,
+			.static_rate   = mcast->mcmember.rate,
 			.grh	       = {
 				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
 				.hop_limit     = mcast->mcmember.hop_limit,
@@ -258,17 +258,8 @@ static int ipoib_mcast_join_finish(struc
 				.traffic_class = mcast->mcmember.traffic_class
 			}
 		};
-		int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate);
-
 		av.grh.dgid = mcast->mcmember.mgid;
 
-		if (path_rate > 0 && priv->local_rate > path_rate)
-			av.static_rate = (priv->local_rate - 1) / path_rate;
-
-		ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
-				av.static_rate, priv->local_rate,
-				ib_sa_rate_enum_to_int(mcast->mcmember.rate));
-
 		ah = ipoib_create_ah(dev, priv->pd, &av);
 		if (!ah) {
 			ipoib_warn(priv, "ib_address_create failed\n");
@@ -618,6 +609,22 @@ int ipoib_mcast_start_thread(struct net_
 	return 0;
 }
 
+static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
+				struct ipoib_mcast *mcast)
+{
+	spin_lock_irq(&priv->lock);
+	if (mcast && mcast->query) {
+		ib_sa_cancel_query(mcast->query_id, mcast->query);
+		mcast->query = NULL;
+		spin_unlock_irq(&priv->lock);
+		ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
+				IPOIB_GID_ARG(mcast->mcmember.mgid));
+		wait_for_completion(&mcast->done);
+	}
+	else
+		spin_unlock_irq(&priv->lock);
+}
+
 int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -637,28 +644,10 @@ int ipoib_mcast_stop_thread(struct net_d
 	if (flush)
 		flush_workqueue(ipoib_workqueue);
 
-	spin_lock_irq(&priv->lock);
-	if (priv->broadcast && priv->broadcast->query) {
-		ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
-		priv->broadcast->query = NULL;
-		spin_unlock_irq(&priv->lock);
-		ipoib_dbg_mcast(priv, "waiting for bcast\n");
-		wait_for_completion(&priv->broadcast->done);
-	} else
-		spin_unlock_irq(&priv->lock);
+	wait_for_mcast_join(priv, priv->broadcast);
 
-	list_for_each_entry(mcast, &priv->multicast_list, list) {
-		spin_lock_irq(&priv->lock);
-		if (mcast->query) {
-			ib_sa_cancel_query(mcast->query_id, mcast->query);
-			mcast->query = NULL;
-			spin_unlock_irq(&priv->lock);
-			ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
-					IPOIB_GID_ARG(mcast->mcmember.mgid));
-			wait_for_completion(&mcast->done);
-		} else
-			spin_unlock_irq(&priv->lock);
-	}
+	list_for_each_entry(mcast, &priv->multicast_list, list)
+		wait_for_mcast_join(priv, mcast);
 
 	return 0;
 }
@@ -772,13 +761,11 @@ out:
 		if (skb->dst            &&
 		    skb->dst->neighbour &&
 		    !*to_ipoib_neigh(skb->dst->neighbour)) {
-			struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
+			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
 
 			if (neigh) {
 				kref_get(&mcast->ah->ref);
 				neigh->ah  	= mcast->ah;
-				neigh->neighbour = skb->dst->neighbour;
-				*to_ipoib_neigh(skb->dst->neighbour) = neigh;
 				list_add_tail(&neigh->list, &mcast->neigh_list);
 			}
 		}
@@ -913,6 +900,7 @@ void ipoib_mcast_restart_task(void *dev_
 
 	/* We have to cancel outside of the spinlock */
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+		wait_for_mcast_join(priv, mcast);
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5f03880..1d49d16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_init_attr init_attr = {
 		.cap = {
-			.max_send_wr  = IPOIB_TX_RING_SIZE,
-			.max_recv_wr  = IPOIB_RX_RING_SIZE,
+			.max_send_wr  = ipoib_sendq_size,
+			.max_recv_wr  = ipoib_recvq_size,
 			.max_send_sge = 1,
 			.max_recv_sge = 1
 		},
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_
 	}
 
 	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
-				IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1);
+				ipoib_sendq_size + ipoib_recvq_size + 1);
 	if (IS_ERR(priv->cq)) {
 		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
 		goto out_free_pd;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fd8a95a..5f2b3f6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1434,6 +1434,7 @@ static int srp_parse_options(const char 
 			p = match_strdup(args);
 			if (strlen(p) != 32) {
 				printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
+				kfree(p);
 				goto out;
 			}
 
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 0a9fcd5..c552ee7 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -292,6 +292,10 @@ struct ib_cm_id {
 	u32			remote_cm_qpn;  /* 1 unless redirected */
 };
 
+struct ib_cm_id *__ib_create_cm_id(struct ib_device *device,
+				   ib_cm_handler cm_handler,
+				   void *context, struct module *owner);
+
 /**
  * ib_create_cm_id - Allocate a communication identifier.
  * @device: Device associated with the cm_id.  All related communication will
@@ -303,9 +307,12 @@ struct ib_cm_id {
  * Communication identifiers are used to track connection states, service
  * ID resolution requests, and listen requests.
  */
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
-				 ib_cm_handler cm_handler,
-				 void *context);
+static inline struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+					       ib_cm_handler cm_handler,
+					       void *context)
+{
+	return __ib_create_cm_id(device, cm_handler, context, THIS_MODULE);
+}
 
 /**
  * ib_destroy_cm_id - Destroy a connection identifier.
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index f404fe2..6769d1b 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -91,34 +91,6 @@ enum ib_sa_selector {
 	IB_SA_BEST = 3
 };
 
-enum ib_sa_rate {
-	IB_SA_RATE_2_5_GBPS = 2,
-	IB_SA_RATE_5_GBPS   = 5,
-	IB_SA_RATE_10_GBPS  = 3,
-	IB_SA_RATE_20_GBPS  = 6,
-	IB_SA_RATE_30_GBPS  = 4,
-	IB_SA_RATE_40_GBPS  = 7,
-	IB_SA_RATE_60_GBPS  = 8,
-	IB_SA_RATE_80_GBPS  = 9,
-	IB_SA_RATE_120_GBPS = 10
-};
-
-static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate)
-{
-	switch (rate) {
-	case IB_SA_RATE_2_5_GBPS: return  1;
-	case IB_SA_RATE_5_GBPS:   return  2;
-	case IB_SA_RATE_10_GBPS:  return  4;
-	case IB_SA_RATE_20_GBPS:  return  8;
-	case IB_SA_RATE_30_GBPS:  return 12;
-	case IB_SA_RATE_40_GBPS:  return 16;
-	case IB_SA_RATE_60_GBPS:  return 24;
-	case IB_SA_RATE_80_GBPS:  return 32;
-	case IB_SA_RATE_120_GBPS: return 48;
-	default: 	          return -1;
-	}
-}
-
 /*
  * Structures for SA records are named "struct ib_sa_xxx_rec."  No
  * attempt is made to pack structures to match the physical layout of
@@ -282,37 +254,80 @@ struct ib_sa_query;
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
-		       struct ib_sa_path_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct ib_sa_path_rec *resp,
-					void *context),
-		       void *context,
-		       struct ib_sa_query **query);
-
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
-			     u8 method,
-			     struct ib_sa_mcmember_rec *rec,
-			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, gfp_t gfp_mask,
-			     void (*callback)(int status,
-					      struct ib_sa_mcmember_rec *resp,
-					      void *context),
-			     void *context,
-			     struct ib_sa_query **query);
-
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
-			 u8 method,
-			 struct ib_sa_service_rec *rec,
+int __ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+			 struct ib_sa_path_rec *rec,
 			 ib_sa_comp_mask comp_mask,
 			 int timeout_ms, gfp_t gfp_mask,
 			 void (*callback)(int status,
-					  struct ib_sa_service_rec *resp,
+					  struct ib_sa_path_rec *resp,
 					  void *context),
 			 void *context,
-			 struct ib_sa_query **sa_query);
+			 struct module *owner,
+			 struct ib_sa_query **query);
+
+int __ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+			       u8 method,
+			       struct ib_sa_mcmember_rec *rec,
+			       ib_sa_comp_mask comp_mask,
+			       int timeout_ms, gfp_t gfp_mask,
+			       void (*callback)(int status,
+						struct ib_sa_mcmember_rec *resp,
+						void *context),
+			       void *context,
+			       struct module *owner,
+			       struct ib_sa_query **query);
+
+int __ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
+			      u8 method,
+			      struct ib_sa_service_rec *rec,
+			      ib_sa_comp_mask comp_mask,
+			      int timeout_ms, gfp_t gfp_mask,
+			      void (*callback)(int status,
+					       struct ib_sa_service_rec *resp,
+					       void *context),
+			      void *context,
+			      struct module *owner,
+			      struct ib_sa_query **sa_query);
+
+/**
+ * ib_sa_path_rec_get - Start a Path get query
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Path Record to send in query
+ * @comp_mask:component mask to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when query completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * Send a Path Record Get query to the SA to look up a path.  The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query.  The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_path_rec_get() is negative, it is an
+ * error code.  Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+static inline int
+ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+		   struct ib_sa_path_rec *rec,
+		   ib_sa_comp_mask comp_mask,
+		   int timeout_ms, gfp_t gfp_mask,
+		   void (*callback)(int status,
+				    struct ib_sa_path_rec *resp,
+				    void *context),
+		   void *context,
+		   struct ib_sa_query **sa_query)
+{
+	return __ib_sa_path_rec_get(device, port_num, rec, comp_mask,
+				    timeout_ms, gfp_mask, callback,
+				    context, THIS_MODULE, sa_query);
+}
 
 /**
  * ib_sa_mcmember_rec_set - Start an MCMember set query
@@ -349,11 +364,11 @@ ib_sa_mcmember_rec_set(struct ib_device 
 		       void *context,
 		       struct ib_sa_query **query)
 {
-	return ib_sa_mcmember_rec_query(device, port_num,
-					IB_MGMT_METHOD_SET,
-					rec, comp_mask,
-					timeout_ms, gfp_mask, callback,
-					context, query);
+	return __ib_sa_mcmember_rec_query(device, port_num,
+					  IB_MGMT_METHOD_SET,
+					  rec, comp_mask,
+					  timeout_ms, gfp_mask, callback,
+					  context, THIS_MODULE, query);
 }
 
 /**
@@ -391,12 +406,54 @@ ib_sa_mcmember_rec_delete(struct ib_devi
 			  void *context,
 			  struct ib_sa_query **query)
 {
-	return ib_sa_mcmember_rec_query(device, port_num,
-					IB_SA_METHOD_DELETE,
-					rec, comp_mask,
-					timeout_ms, gfp_mask, callback,
-					context, query);
+	return __ib_sa_mcmember_rec_query(device, port_num,
+					  IB_SA_METHOD_DELETE,
+					  rec, comp_mask,
+					  timeout_ms, gfp_mask, callback,
+					  context, THIS_MODULE, query);
 }
 
+/**
+ * ib_sa_service_rec_query - Start Service Record operation
+ * @device:device to send request on
+ * @port_num: port number to send request on
+ * @method:SA method - should be get, set, or delete
+ * @rec:Service Record to send in request
+ * @comp_mask:component mask to send in request
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when request completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:request context, used to cancel request
+ *
+ * Send a Service Record set/get/delete to the SA to register,
+ * unregister or query a service record.
+ * The callback function will be called when the request completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query.  The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_service_rec_query() is negative, it is an
+ * error code.  Otherwise it is a request ID that can be used to cancel
+ * the query.
+ */
+static inline int
+ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+			struct ib_sa_service_rec *rec,
+			ib_sa_comp_mask comp_mask,
+			int timeout_ms, gfp_t gfp_mask,
+			void (*callback)(int status,
+					 struct ib_sa_service_rec *resp,
+					 void *context),
+			void *context,
+			struct ib_sa_query **sa_query)
+{
+	return __ib_sa_service_rec_query(device, port_num, method, rec,
+					 comp_mask, timeout_ms, gfp_mask,
+					 callback, context, THIS_MODULE,
+					 sa_query);
+}
 
 #endif /* IB_SA_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c1ad627..6bbf1b3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -314,6 +314,34 @@ enum ib_ah_flags {
 	IB_AH_GRH	= 1
 };
 
+enum ib_rate {
+	IB_RATE_PORT_CURRENT = 0,
+	IB_RATE_2_5_GBPS = 2,
+	IB_RATE_5_GBPS   = 5,
+	IB_RATE_10_GBPS  = 3,
+	IB_RATE_20_GBPS  = 6,
+	IB_RATE_30_GBPS  = 4,
+	IB_RATE_40_GBPS  = 7,
+	IB_RATE_60_GBPS  = 8,
+	IB_RATE_80_GBPS  = 9,
+	IB_RATE_120_GBPS = 10
+};
+
+/**
+ * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
+ * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
+ * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
+
+/**
+ * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
+ * enum.
+ * @mult: multiple to convert.
+ */
+enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
+
 struct ib_ah_attr {
 	struct ib_global_route	grh;
 	u16			dlid;



More information about the general mailing list