[ewg] [PATCH 4/9] ib_core: Add RDMAoE SA support
Eli Cohen
eli at mellanox.co.il
Mon Jun 15 00:34:50 PDT 2009
Add support for resolving paths and joining multicast group for RDMAoE ports.
The Ethernet specific code will complete immediately but will call the callback
from a workqueue context to avoid deadloks.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
drivers/infiniband/core/multicast.c | 153 ++++++++++++++++++++++++++++----
drivers/infiniband/core/sa_query.c | 167 +++++++++++++++++++++++++++-------
2 files changed, 269 insertions(+), 51 deletions(-)
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 107f170..2417f6b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -488,6 +488,36 @@ retest:
}
}
+struct eth_work {
+ struct work_struct work;
+ struct mcast_member *member;
+ struct ib_device *device;
+ u8 port_num;
+};
+
+static void eth_mcast_work_handler(struct work_struct *work)
+{
+ struct eth_work *w = container_of(work, struct eth_work, work);
+ int err;
+ struct ib_port_attr port_attr;
+ int status = 0;
+
+ err = ib_query_port(w->device, w->port_num, &port_attr);
+ if (err)
+ status = err;
+ else if (port_attr.state != IB_PORT_ACTIVE)
+ status = -EAGAIN;
+
+ w->member->multicast.rec.qkey = cpu_to_be32(0xc2c);
+ atomic_inc(&w->member->refcount);
+ err = w->member->multicast.callback(status, &w->member->multicast);
+ deref_member(w->member);
+ if (err)
+ ib_sa_free_multicast(&w->member->multicast);
+
+ kfree(w);
+}
+
/*
* Fail a join request if it is still active - at the head of the pending queue.
*/
@@ -586,21 +616,14 @@ found:
return group;
}
-/*
- * We serialize all join requests to a single group to make our lives much
- * easier. Otherwise, two users could try to join the same group
- * simultaneously, with different configurations, one could leave while the
- * join is in progress, etc., which makes locking around error recovery
- * difficult.
- */
-struct ib_sa_multicast *
-ib_sa_join_multicast(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_mcmember_rec *rec,
- ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
- int (*callback)(int status,
- struct ib_sa_multicast *multicast),
- void *context)
+static struct ib_sa_multicast *
+ib_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast),
+ void *context)
{
struct mcast_device *dev;
struct mcast_member *member;
@@ -647,9 +670,81 @@ err:
kfree(member);
return ERR_PTR(ret);
}
+
+static struct ib_sa_multicast *
+eth_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast),
+ void *context)
+{
+ struct mcast_device *dev;
+ struct eth_work *w;
+ struct mcast_member *member;
+ int err;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ member = kzalloc(sizeof *member, gfp_mask);
+ if (!member)
+ return ERR_PTR(-ENOMEM);
+
+ w = kzalloc(sizeof *w, gfp_mask);
+ if (!w) {
+ err = -ENOMEM;
+ goto out1;
+ }
+ w->member = member;
+ w->device = device;
+ w->port_num = port_num;
+
+ member->multicast.context = context;
+ member->multicast.callback = callback;
+ member->client = client;
+ member->multicast.rec.mgid = rec->mgid;
+ init_completion(&member->comp);
+ atomic_set(&member->refcount, 1);
+
+ ib_sa_client_get(client);
+ INIT_WORK(&w->work, eth_mcast_work_handler);
+ queue_work(mcast_wq, &w->work);
+
+ return &member->multicast;
+
+out1:
+ kfree(member);
+ return ERR_PTR(err);
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier. Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast),
+ void *context)
+{
+ return ib_get_port_link_type(device, port_num) == PORT_LINK_IB ?
+ ib_join_multicast(client, device, port_num, rec, comp_mask,
+ gfp_mask, callback, context) :
+ eth_join_multicast(client, device, port_num, rec, comp_mask,
+ gfp_mask, callback, context);
+}
EXPORT_SYMBOL(ib_sa_join_multicast);
-void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+static void ib_free_multicast(struct ib_sa_multicast *multicast)
{
struct mcast_member *member;
struct mcast_group *group;
@@ -678,6 +773,32 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
ib_sa_client_put(member->client);
kfree(member);
}
+
+static void eth_free_multicast(struct ib_sa_multicast *multicast)
+{
+ struct mcast_member *member;
+
+ member = container_of(multicast, struct mcast_member, multicast);
+
+ deref_member(member);
+ wait_for_completion(&member->comp);
+ ib_sa_client_put(member->client);
+ kfree(member);
+}
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+ struct mcast_member *member;
+ struct mcast_group *group;
+
+ member = container_of(multicast, struct mcast_member, multicast);
+ group = member->group;
+
+ if (!group)
+ eth_free_multicast(multicast);
+ else
+ ib_free_multicast(multicast);
+}
EXPORT_SYMBOL(ib_sa_free_multicast);
int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1865049..7bf9b5c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -45,6 +45,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
#include "sa.h"
MODULE_AUTHOR("Roland Dreier");
@@ -92,6 +93,8 @@ struct ib_sa_path_query {
void (*callback)(int, struct ib_sa_path_rec *, void *);
void *context;
struct ib_sa_query sa_query;
+ union ib_gid dgid;
+ union ib_gid sgid;
};
struct ib_sa_mcmember_query {
@@ -304,6 +307,9 @@ static const struct ib_field mcmember_rec_table[] = {
.struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
.field_name = "sa_service_rec:" #field
+
+static struct rdma_addr_client addr_client;
+
static const struct ib_field service_rec_table[] = {
{ SERVICE_REC_FIELD(id),
.offset_words = 0,
@@ -363,6 +369,11 @@ static void update_sm_ah(struct work_struct *work)
struct ib_port_attr port_attr;
struct ib_ah_attr ah_attr;
+
+ if (ib_get_port_link_type(port->agent->device, port->port_num) !=
+ PORT_LINK_IB)
+ return;
+
if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
printk(KERN_WARNING "Couldn't query port\n");
return;
@@ -626,41 +637,16 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
-/**
- * ib_sa_path_rec_get - Start a Path get query
- * @client:SA client
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:Path Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * Send a Path Record Get query to the SA to look up a path. The
- * callback function will be called when the query completes (or
- * fails); status is 0 for a successful response, -EINTR if the query
- * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
- * occurred sending the query. The resp parameter of the callback is
- * only valid if status is 0.
- *
- * If the return value of ib_sa_path_rec_get() is negative, it is an
- * error code. Otherwise it is a query ID that can be used to cancel
- * the query.
- */
-int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_path_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_path_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
struct ib_sa_path_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
@@ -717,6 +703,114 @@ err1:
kfree(query);
return ret;
}
+
+struct eth_work_container {
+ struct work_struct work;
+ struct ib_sa_path_query *query;
+};
+
+static void resolve_callback(struct work_struct *work)
+{
+ struct eth_work_container *eth =
+ container_of(work, struct eth_work_container, work);
+ struct ib_sa_path_query *query = eth->query;
+ struct ib_sa_path_rec res = {};
+
+ res.dgid = query->dgid;
+ res.sgid = query->sgid;
+ res.hop_limit = 2; /* TBD fix this */
+ res.mtu = IB_MTU_1024; /* TBD fix me */
+ query->callback(0, &res, query->context);
+
+ ib_sa_client_put(query->sa_query.client);
+}
+
+static int eth_path_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_path_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct eth_work_container *eth;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ query = kzalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ eth = kzalloc(sizeof *eth, gfp_mask);
+ if (!eth) {
+ kfree(query);
+ return -ENOMEM;
+ }
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+ query->dgid = rec->dgid;
+ query->sgid = rec->sgid;
+
+ *sa_query = &query->sa_query;
+
+ eth->query = query;
+ INIT_WORK(ð->work, resolve_callback);
+ schedule_work(ð->work);
+
+ return 0;
+}
+
+/**
+ * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Path Record to send in query
+ * @comp_mask:component mask to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when query completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * Send a Path Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_path_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ return ib_get_port_link_type(device, port_num) == PORT_LINK_IB ?
+ ib_path_rec_get(client, device, port_num, rec, comp_mask,
+ timeout_ms, gfp_mask, callback, context, sa_query) :
+ eth_path_rec_get(client, device, port_num, rec, comp_mask,
+ timeout_ms, gfp_mask, callback, context, sa_query);
+}
EXPORT_SYMBOL(ib_sa_path_rec_get);
static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
@@ -1094,6 +1188,8 @@ static int __init ib_sa_init(void)
goto err2;
}
+ rdma_addr_register_client(&addr_client);
+
return 0;
err2:
ib_unregister_client(&sa_client);
@@ -1103,6 +1199,7 @@ err1:
static void __exit ib_sa_cleanup(void)
{
+ rdma_addr_unregister_client(&addr_client);
mcast_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
--
1.6.3.1
More information about the ewg
mailing list