[ewg] RE: [ofa-general] [PATCH 4/9] ib_core: Add RDMAoE SA support
Sean Hefty
sean.hefty at intel.com
Tue Jun 16 10:27:33 PDT 2009
>diff --git a/drivers/infiniband/core/multicast.c
>b/drivers/infiniband/core/multicast.c
>index 107f170..2417f6b 100644
>--- a/drivers/infiniband/core/multicast.c
>+++ b/drivers/infiniband/core/multicast.c
>@@ -488,6 +488,36 @@ retest:
> }
> }
>
>+struct eth_work {
>+ struct work_struct work;
>+ struct mcast_member *member;
>+ struct ib_device *device;
>+ u8 port_num;
>+};
>+
>+static void eth_mcast_work_handler(struct work_struct *work)
>+{
>+ struct eth_work *w = container_of(work, struct eth_work, work);
>+ int err;
>+ struct ib_port_attr port_attr;
>+ int status = 0;
>+
>+ err = ib_query_port(w->device, w->port_num, &port_attr);
>+ if (err)
>+ status = err;
>+ else if (port_attr.state != IB_PORT_ACTIVE)
>+ status = -EAGAIN;
>+
>+ w->member->multicast.rec.qkey = cpu_to_be32(0xc2c);
How can a user control this? An app needs the same qkey for unicast traffic.
>+ atomic_inc(&w->member->refcount);
This needs to be moved below...
>+ err = w->member->multicast.callback(status, &w->member->multicast);
>+ deref_member(w->member);
>+ if (err)
>+ ib_sa_free_multicast(&w->member->multicast);
>+
>+ kfree(w);
>+}
>+
> /*
> * Fail a join request if it is still active - at the head of the pending
>queue.
> */
>@@ -586,21 +616,14 @@ found:
> return group;
> }
>
>-/*
>- * We serialize all join requests to a single group to make our lives much
>- * easier. Otherwise, two users could try to join the same group
>- * simultaneously, with different configurations, one could leave while the
>- * join is in progress, etc., which makes locking around error recovery
>- * difficult.
>- */
>-struct ib_sa_multicast *
>-ib_sa_join_multicast(struct ib_sa_client *client,
>- struct ib_device *device, u8 port_num,
>- struct ib_sa_mcmember_rec *rec,
>- ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>- int (*callback)(int status,
>- struct ib_sa_multicast *multicast),
>- void *context)
>+static struct ib_sa_multicast *
>+ib_join_multicast(struct ib_sa_client *client,
>+ struct ib_device *device, u8 port_num,
>+ struct ib_sa_mcmember_rec *rec,
>+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>+ int (*callback)(int status,
>+ struct ib_sa_multicast *multicast),
>+ void *context)
> {
> struct mcast_device *dev;
> struct mcast_member *member;
>@@ -647,9 +670,81 @@ err:
> kfree(member);
> return ERR_PTR(ret);
> }
>+
>+static struct ib_sa_multicast *
>+eth_join_multicast(struct ib_sa_client *client,
>+ struct ib_device *device, u8 port_num,
>+ struct ib_sa_mcmember_rec *rec,
>+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>+ int (*callback)(int status,
>+ struct ib_sa_multicast *multicast),
>+ void *context)
>+{
>+ struct mcast_device *dev;
>+ struct eth_work *w;
>+ struct mcast_member *member;
>+ int err;
>+
>+ dev = ib_get_client_data(device, &mcast_client);
>+ if (!dev)
>+ return ERR_PTR(-ENODEV);
>+
>+ member = kzalloc(sizeof *member, gfp_mask);
>+ if (!member)
>+ return ERR_PTR(-ENOMEM);
>+
>+ w = kzalloc(sizeof *w, gfp_mask);
>+ if (!w) {
>+ err = -ENOMEM;
>+ goto out1;
>+ }
>+ w->member = member;
>+ w->device = device;
>+ w->port_num = port_num;
>+
>+ member->multicast.context = context;
>+ member->multicast.callback = callback;
>+ member->client = client;
>+ member->multicast.rec.mgid = rec->mgid;
>+ init_completion(&member->comp);
>+ atomic_set(&member->refcount, 1);
>+
>+ ib_sa_client_get(client);
>+ INIT_WORK(&w->work, eth_mcast_work_handler);
>+ queue_work(mcast_wq, &w->work);
>+
>+ return &member->multicast;
The user could leave/destroy the multicast join request before the queued work
item runs. We need to hold an additional reference on the member until the work
item completes.
>+
>+out1:
>+ kfree(member);
>+ return ERR_PTR(err);
>+}
>+
>+/*
>+ * We serialize all join requests to a single group to make our lives much
>+ * easier. Otherwise, two users could try to join the same group
>+ * simultaneously, with different configurations, one could leave while the
>+ * join is in progress, etc., which makes locking around error recovery
>+ * difficult.
>+ */
>+struct ib_sa_multicast *
>+ib_sa_join_multicast(struct ib_sa_client *client,
>+ struct ib_device *device, u8 port_num,
>+ struct ib_sa_mcmember_rec *rec,
>+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>+ int (*callback)(int status,
>+ struct ib_sa_multicast *multicast),
>+ void *context)
>+{
>+ return ib_get_port_link_type(device, port_num) == PORT_LINK_IB ?
>+ ib_join_multicast(client, device, port_num, rec, comp_mask,
>+ gfp_mask, callback, context) :
>+ eth_join_multicast(client, device, port_num, rec, comp_mask,
>+ gfp_mask, callback, context);
>+}
> EXPORT_SYMBOL(ib_sa_join_multicast);
There's substantial differences in functionality between an IB multicast group
and the Ethernet group. I would rather see the differences hidden by the
rdma_cm, than the IB SA module.
>diff --git a/drivers/infiniband/core/sa_query.c
>b/drivers/infiniband/core/sa_query.c
>index 1865049..7bf9b5c 100644
>--- a/drivers/infiniband/core/sa_query.c
>+++ b/drivers/infiniband/core/sa_query.c
>@@ -45,6 +45,7 @@
There's not a userspace interface into the sa_query module. How will those apps
work, or apps that send MADs on QPs other than QP1?
- Sean
More information about the ewg
mailing list