[ewg] [PATCH 4/9] ib_core: Add RDMAoE SA support

Eli Cohen eli at mellanox.co.il
Mon Jun 15 00:34:50 PDT 2009


Add support for resolving paths and joining multicast group for RDMAoE ports.
The Ethernet specific code will complete immediately but will call the callback
from a workqueue context to avoid deadloks.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
 drivers/infiniband/core/multicast.c |  153 ++++++++++++++++++++++++++++----
 drivers/infiniband/core/sa_query.c  |  167 +++++++++++++++++++++++++++-------
 2 files changed, 269 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 107f170..2417f6b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -488,6 +488,36 @@ retest:
 	}
 }
 
+struct eth_work {
+	struct work_struct	 work;
+	struct mcast_member	*member;
+	struct ib_device	*device;
+	u8			 port_num;
+};
+
+static void eth_mcast_work_handler(struct work_struct *work)
+{
+	struct eth_work *w = container_of(work, struct eth_work, work);
+	int err;
+	struct ib_port_attr port_attr;
+	int status = 0;
+
+	err = ib_query_port(w->device, w->port_num, &port_attr);
+	if (err)
+		status = err;
+	else if (port_attr.state != IB_PORT_ACTIVE)
+		status = -EAGAIN;
+
+	w->member->multicast.rec.qkey = cpu_to_be32(0xc2c);
+	atomic_inc(&w->member->refcount);
+	err = w->member->multicast.callback(status, &w->member->multicast);
+	deref_member(w->member);
+	if (err)
+		ib_sa_free_multicast(&w->member->multicast);
+
+	kfree(w);
+}
+
 /*
  * Fail a join request if it is still active - at the head of the pending queue.
  */
@@ -586,21 +616,14 @@ found:
 	return group;
 }
 
-/*
- * We serialize all join requests to a single group to make our lives much
- * easier.  Otherwise, two users could try to join the same group
- * simultaneously, with different configurations, one could leave while the
- * join is in progress, etc., which makes locking around error recovery
- * difficult.
- */
-struct ib_sa_multicast *
-ib_sa_join_multicast(struct ib_sa_client *client,
-		     struct ib_device *device, u8 port_num,
-		     struct ib_sa_mcmember_rec *rec,
-		     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
-		     int (*callback)(int status,
-				     struct ib_sa_multicast *multicast),
-		     void *context)
+static struct ib_sa_multicast *
+ib_join_multicast(struct ib_sa_client *client,
+		  struct ib_device *device, u8 port_num,
+		  struct ib_sa_mcmember_rec *rec,
+		  ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+		  int (*callback)(int status,
+				  struct ib_sa_multicast *multicast),
+		  void *context)
 {
 	struct mcast_device *dev;
 	struct mcast_member *member;
@@ -647,9 +670,81 @@ err:
 	kfree(member);
 	return ERR_PTR(ret);
 }
+
+static struct ib_sa_multicast *
+eth_join_multicast(struct ib_sa_client *client,
+		   struct ib_device *device, u8 port_num,
+		   struct ib_sa_mcmember_rec *rec,
+		   ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+		   int (*callback)(int status,
+				   struct ib_sa_multicast *multicast),
+		   void *context)
+{
+	struct mcast_device *dev;
+	struct eth_work *w;
+	struct mcast_member *member;
+	int err;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	member = kzalloc(sizeof *member, gfp_mask);
+	if (!member)
+		return ERR_PTR(-ENOMEM);
+
+	w = kzalloc(sizeof *w, gfp_mask);
+	if (!w) {
+		err = -ENOMEM;
+		goto out1;
+	}
+	w->member = member;
+	w->device = device;
+	w->port_num = port_num;
+
+	member->multicast.context = context;
+	member->multicast.callback = callback;
+	member->client = client;
+	member->multicast.rec.mgid = rec->mgid;
+	init_completion(&member->comp);
+	atomic_set(&member->refcount, 1);
+
+	ib_sa_client_get(client);
+	INIT_WORK(&w->work, eth_mcast_work_handler);
+	queue_work(mcast_wq, &w->work);
+
+	return &member->multicast;
+
+out1:
+	kfree(member);
+	return ERR_PTR(err);
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier.  Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+		     struct ib_device *device, u8 port_num,
+		     struct ib_sa_mcmember_rec *rec,
+		     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+		     int (*callback)(int status,
+				     struct ib_sa_multicast *multicast),
+		     void *context)
+{
+	return ib_get_port_link_type(device, port_num) == PORT_LINK_IB ?
+		ib_join_multicast(client, device, port_num, rec, comp_mask,
+				  gfp_mask, callback, context) :
+		eth_join_multicast(client, device, port_num, rec, comp_mask,
+				  gfp_mask, callback, context);
+}
 EXPORT_SYMBOL(ib_sa_join_multicast);
 
-void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+static void ib_free_multicast(struct ib_sa_multicast *multicast)
 {
 	struct mcast_member *member;
 	struct mcast_group *group;
@@ -678,6 +773,32 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
 	ib_sa_client_put(member->client);
 	kfree(member);
 }
+
+static void eth_free_multicast(struct ib_sa_multicast *multicast)
+{
+	struct mcast_member *member;
+
+	member = container_of(multicast, struct mcast_member, multicast);
+
+	deref_member(member);
+	wait_for_completion(&member->comp);
+	ib_sa_client_put(member->client);
+	kfree(member);
+}
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+	struct mcast_member *member;
+	struct mcast_group *group;
+
+	member = container_of(multicast, struct mcast_member, multicast);
+	group = member->group;
+
+	if (!group)
+		eth_free_multicast(multicast);
+	else
+		ib_free_multicast(multicast);
+}
 EXPORT_SYMBOL(ib_sa_free_multicast);
 
 int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1865049..7bf9b5c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -45,6 +45,7 @@
 
 #include <rdma/ib_pack.h>
 #include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
 #include "sa.h"
 
 MODULE_AUTHOR("Roland Dreier");
@@ -92,6 +93,8 @@ struct ib_sa_path_query {
 	void (*callback)(int, struct ib_sa_path_rec *, void *);
 	void *context;
 	struct ib_sa_query sa_query;
+	union ib_gid dgid;
+	union ib_gid sgid;
 };
 
 struct ib_sa_mcmember_query {
@@ -304,6 +307,9 @@ static const struct ib_field mcmember_rec_table[] = {
 	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
 	.field_name          = "sa_service_rec:" #field
 
+
+static struct rdma_addr_client addr_client;
+
 static const struct ib_field service_rec_table[] = {
 	{ SERVICE_REC_FIELD(id),
 	  .offset_words = 0,
@@ -363,6 +369,11 @@ static void update_sm_ah(struct work_struct *work)
 	struct ib_port_attr port_attr;
 	struct ib_ah_attr   ah_attr;
 
+
+	if (ib_get_port_link_type(port->agent->device, port->port_num) !=
+	    PORT_LINK_IB)
+		return;
+
 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
 		printk(KERN_WARNING "Couldn't query port\n");
 		return;
@@ -626,41 +637,16 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
 }
 
-/**
- * ib_sa_path_rec_get - Start a Path get query
- * @client:SA client
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:Path Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * Send a Path Record Get query to the SA to look up a path.  The
- * callback function will be called when the query completes (or
- * fails); status is 0 for a successful response, -EINTR if the query
- * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
- * occurred sending the query.  The resp parameter of the callback is
- * only valid if status is 0.
- *
- * If the return value of ib_sa_path_rec_get() is negative, it is an
- * error code.  Otherwise it is a query ID that can be used to cancel
- * the query.
- */
-int ib_sa_path_rec_get(struct ib_sa_client *client,
-		       struct ib_device *device, u8 port_num,
-		       struct ib_sa_path_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct ib_sa_path_rec *resp,
-					void *context),
-		       void *context,
-		       struct ib_sa_query **sa_query)
+static int ib_path_rec_get(struct ib_sa_client *client,
+			   struct ib_device *device, u8 port_num,
+			   struct ib_sa_path_rec *rec,
+			   ib_sa_comp_mask comp_mask,
+			   int timeout_ms, gfp_t gfp_mask,
+			   void (*callback)(int status,
+					    struct ib_sa_path_rec *resp,
+					    void *context),
+			   void *context,
+			   struct ib_sa_query **sa_query)
 {
 	struct ib_sa_path_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
@@ -717,6 +703,114 @@ err1:
 	kfree(query);
 	return ret;
 }
+
+struct eth_work_container {
+	struct work_struct work;
+	struct ib_sa_path_query *query;
+};
+
+static void resolve_callback(struct work_struct *work)
+{
+	struct eth_work_container *eth =
+		container_of(work, struct eth_work_container, work);
+	struct ib_sa_path_query *query = eth->query;
+	struct ib_sa_path_rec res = {};
+
+	res.dgid = query->dgid;
+	res.sgid = query->sgid;
+	res.hop_limit = 2; /* TBD fix this */
+	res.mtu = IB_MTU_1024; /* TBD fix me */
+	query->callback(0, &res, query->context);
+
+	ib_sa_client_put(query->sa_query.client);
+}
+
+static int eth_path_rec_get(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num,
+			    struct ib_sa_path_rec *rec,
+			    ib_sa_comp_mask comp_mask,
+			    int timeout_ms, gfp_t gfp_mask,
+			    void (*callback)(int status,
+					     struct ib_sa_path_rec *resp,
+					     void *context),
+			    void *context,
+			    struct ib_sa_query **sa_query)
+{
+	struct ib_sa_path_query *query;
+	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+	struct eth_work_container *eth;
+
+	if (!sa_dev)
+		return -ENODEV;
+
+	query = kzalloc(sizeof *query, gfp_mask);
+	if (!query)
+		return -ENOMEM;
+
+	eth = kzalloc(sizeof *eth, gfp_mask);
+	if (!eth) {
+		kfree(query);
+		return -ENOMEM;
+	}
+
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
+	query->dgid	       = rec->dgid;
+	query->sgid	       = rec->sgid;
+
+	*sa_query = &query->sa_query;
+
+	eth->query = query;
+	INIT_WORK(&eth->work, resolve_callback);
+	schedule_work(&eth->work);
+
+	return 0;
+}
+
+/**
+ * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Path Record to send in query
+ * @comp_mask:component mask to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when query completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * Send a Path Record Get query to the SA to look up a path.  The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query.  The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_path_rec_get() is negative, it is an
+ * error code.  Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
+		       struct ib_sa_path_rec *rec,
+		       ib_sa_comp_mask comp_mask,
+		       int timeout_ms, gfp_t gfp_mask,
+		       void (*callback)(int status,
+					struct ib_sa_path_rec *resp,
+					void *context),
+		       void *context,
+		       struct ib_sa_query **sa_query)
+{
+	return ib_get_port_link_type(device, port_num) == PORT_LINK_IB ?
+		ib_path_rec_get(client, device, port_num, rec, comp_mask,
+				timeout_ms, gfp_mask, callback, context, sa_query) :
+		eth_path_rec_get(client, device, port_num, rec, comp_mask,
+				 timeout_ms, gfp_mask, callback, context, sa_query);
+}
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 
 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
@@ -1094,6 +1188,8 @@ static int __init ib_sa_init(void)
 		goto err2;
 	}
 
+	rdma_addr_register_client(&addr_client);
+
 	return 0;
 err2:
 	ib_unregister_client(&sa_client);
@@ -1103,6 +1199,7 @@ err1:
 
 static void __exit ib_sa_cleanup(void)
 {
+	rdma_addr_unregister_client(&addr_client);
 	mcast_cleanup();
 	ib_unregister_client(&sa_client);
 	idr_destroy(&query_idr);
-- 
1.6.3.1




More information about the ewg mailing list