[ewg] RE: [ofa-general] [PATCH 4/9] ib_core: Add RDMAoE SA support

Sean Hefty sean.hefty at intel.com
Tue Jun 16 10:27:33 PDT 2009


>diff --git a/drivers/infiniband/core/multicast.c
>b/drivers/infiniband/core/multicast.c
>index 107f170..2417f6b 100644
>--- a/drivers/infiniband/core/multicast.c
>+++ b/drivers/infiniband/core/multicast.c
>@@ -488,6 +488,36 @@ retest:
> 	}
> }
>
>+struct eth_work {
>+	struct work_struct	 work;
>+	struct mcast_member	*member;
>+	struct ib_device	*device;
>+	u8			 port_num;
>+};
>+
>+static void eth_mcast_work_handler(struct work_struct *work)
>+{
>+	struct eth_work *w = container_of(work, struct eth_work, work);
>+	int err;
>+	struct ib_port_attr port_attr;
>+	int status = 0;
>+
>+	err = ib_query_port(w->device, w->port_num, &port_attr);
>+	if (err)
>+		status = err;
>+	else if (port_attr.state != IB_PORT_ACTIVE)
>+		status = -EAGAIN;
>+
>+	w->member->multicast.rec.qkey = cpu_to_be32(0xc2c);

How can a user control this?  An app needs the same qkey for unicast traffic.

>+	atomic_inc(&w->member->refcount);

This needs to be moved below...

>+	err = w->member->multicast.callback(status, &w->member->multicast);
>+	deref_member(w->member);
>+	if (err)
>+		ib_sa_free_multicast(&w->member->multicast);
>+
>+	kfree(w);
>+}
>+
> /*
>  * Fail a join request if it is still active - at the head of the pending
>queue.
>  */
>@@ -586,21 +616,14 @@ found:
> 	return group;
> }
>
>-/*
>- * We serialize all join requests to a single group to make our lives much
>- * easier.  Otherwise, two users could try to join the same group
>- * simultaneously, with different configurations, one could leave while the
>- * join is in progress, etc., which makes locking around error recovery
>- * difficult.
>- */
>-struct ib_sa_multicast *
>-ib_sa_join_multicast(struct ib_sa_client *client,
>-		     struct ib_device *device, u8 port_num,
>-		     struct ib_sa_mcmember_rec *rec,
>-		     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>-		     int (*callback)(int status,
>-				     struct ib_sa_multicast *multicast),
>-		     void *context)
>+static struct ib_sa_multicast *
>+ib_join_multicast(struct ib_sa_client *client,
>+		  struct ib_device *device, u8 port_num,
>+		  struct ib_sa_mcmember_rec *rec,
>+		  ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>+		  int (*callback)(int status,
>+				  struct ib_sa_multicast *multicast),
>+		  void *context)
> {
> 	struct mcast_device *dev;
> 	struct mcast_member *member;
>@@ -647,9 +670,81 @@ err:
> 	kfree(member);
> 	return ERR_PTR(ret);
> }
>+
>+static struct ib_sa_multicast *
>+eth_join_multicast(struct ib_sa_client *client,
>+		   struct ib_device *device, u8 port_num,
>+		   struct ib_sa_mcmember_rec *rec,
>+		   ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>+		   int (*callback)(int status,
>+				   struct ib_sa_multicast *multicast),
>+		   void *context)
>+{
>+	struct mcast_device *dev;
>+	struct eth_work *w;
>+	struct mcast_member *member;
>+	int err;
>+
>+	dev = ib_get_client_data(device, &mcast_client);
>+	if (!dev)
>+		return ERR_PTR(-ENODEV);
>+
>+	member = kzalloc(sizeof *member, gfp_mask);
>+	if (!member)
>+		return ERR_PTR(-ENOMEM);
>+
>+	w = kzalloc(sizeof *w, gfp_mask);
>+	if (!w) {
>+		err = -ENOMEM;
>+		goto out1;
>+	}
>+	w->member = member;
>+	w->device = device;
>+	w->port_num = port_num;
>+
>+	member->multicast.context = context;
>+	member->multicast.callback = callback;
>+	member->client = client;
>+	member->multicast.rec.mgid = rec->mgid;
>+	init_completion(&member->comp);
>+	atomic_set(&member->refcount, 1);
>+
>+	ib_sa_client_get(client);
>+	INIT_WORK(&w->work, eth_mcast_work_handler);
>+	queue_work(mcast_wq, &w->work);
>+
>+	return &member->multicast;

The user could leave/destroy the multicast join request before the queued work
item runs.  We need to hold an additional reference on the member until the work
item completes.

>+
>+out1:
>+	kfree(member);
>+	return ERR_PTR(err);
>+}
>+
>+/*
>+ * We serialize all join requests to a single group to make our lives much
>+ * easier.  Otherwise, two users could try to join the same group
>+ * simultaneously, with different configurations, one could leave while the
>+ * join is in progress, etc., which makes locking around error recovery
>+ * difficult.
>+ */
>+struct ib_sa_multicast *
>+ib_sa_join_multicast(struct ib_sa_client *client,
>+		     struct ib_device *device, u8 port_num,
>+		     struct ib_sa_mcmember_rec *rec,
>+		     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
>+		     int (*callback)(int status,
>+				     struct ib_sa_multicast *multicast),
>+		     void *context)
>+{
>+	return ib_get_port_link_type(device, port_num) == PORT_LINK_IB ?
>+		ib_join_multicast(client, device, port_num, rec, comp_mask,
>+				  gfp_mask, callback, context) :
>+		eth_join_multicast(client, device, port_num, rec, comp_mask,
>+				  gfp_mask, callback, context);
>+}
> EXPORT_SYMBOL(ib_sa_join_multicast);

There's substantial differences in functionality between an IB multicast group
and the Ethernet group.  I would rather see the differences hidden by the
rdma_cm, than the IB SA module.

>diff --git a/drivers/infiniband/core/sa_query.c
>b/drivers/infiniband/core/sa_query.c
>index 1865049..7bf9b5c 100644
>--- a/drivers/infiniband/core/sa_query.c
>+++ b/drivers/infiniband/core/sa_query.c
>@@ -45,6 +45,7 @@

There's not a userspace interface into the sa_query module.  How will those apps
work, or apps that send MADs on QPs other than QP1?

- Sean




More information about the ewg mailing list