[openib-general] [RFC] [PATCH 7/7-but i can't count] rdma_cm 2.6.20: add multicast support

Sean Hefty sean.hefty at intel.com
Tue Oct 10 16:39:45 PDT 2006


Add multicast QP support to the rdma_cm.

- Users identify multicast groups by using a multicast IP address.
- IB multicast group parameters are based on the ipoib broadcast group.
  The MGID is derived using a method similar to ipoib, but with a
  different signature.
- QPs are automatically attached and detached from groups.
- A QP may join multiple groups.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
This patch differs from svn as a result of reporting data with the
multicast event, rather than through a separate call.

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 19d91c8..4726292 100755
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -45,6 +45,7 @@ #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
+#include <rdma/ib_multicast.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -114,6 +115,7 @@ struct rdma_id_private {
 	struct list_head	list;
 	struct list_head	listen_list;
 	struct cma_device	*cma_dev;
+	struct list_head	mc_list;
 
 	enum cma_state		state;
 	spinlock_t		lock;
@@ -136,6 +138,18 @@ struct rdma_id_private {
 	u8			srq;
 };
 
+struct cma_multicast {
+	struct rdma_id_private *id_priv;
+	union {
+		struct ib_multicast *ib;
+	} multicast;
+	struct list_head	list;
+	void			*context;
+	struct sockaddr		addr;
+	u8			pad[sizeof(struct sockaddr_in6) -
+				    sizeof(struct sockaddr)];
+};
+
 struct cma_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
@@ -323,6 +337,7 @@ struct rdma_cm_id *rdma_create_id(rdma_c
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
 	INIT_LIST_HEAD(&id_priv->listen_list);
+	INIT_LIST_HEAD(&id_priv->mc_list);
 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 
 	return &id_priv->id;
@@ -696,6 +711,19 @@ static void cma_release_port(struct rdma
 	mutex_unlock(&lock);
 }
 
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+	struct cma_multicast *mc;
+
+	while (!list_empty(&id_priv->mc_list)) {
+		mc = container_of(id_priv->mc_list.next,
+				  struct cma_multicast, list);
+		list_del(&mc->list);
+		ib_free_multicast(mc->multicast.ib);
+		kfree(mc);
+	}
+}
+
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
@@ -720,6 +748,7 @@ void rdma_destroy_id(struct rdma_cm_id *
 		default:
 			break;
 		}
+		cma_leave_mc_groups(id_priv);
 		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
 	}
@@ -2333,6 +2362,159 @@ out:
 }
 EXPORT_SYMBOL(rdma_disconnect);
 
+static int cma_ib_mc_handler(int status, struct ib_multicast *multicast)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc = multicast->context;
+	struct rdma_cm_event event;
+	int ret;
+
+	id_priv = mc->id_priv;
+	atomic_inc(&id_priv->dev_remove);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		goto out;
+
+	if (!status && id_priv->id.qp)
+		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+					 multicast->rec.mlid);
+
+	memset(&event, 0, sizeof event);
+	event.status = status;
+	event.param.ud.private_data = mc->context;
+	if (!status) {
+		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+		ib_init_ah_from_mcmember(id_priv->id.device,
+					 id_priv->id.port_num, &multicast->rec,
+					 &event.param.ud.ah_attr);
+		event.param.ud.qp_num = 0xFFFFFF;
+		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+	} else
+		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return 0;
+	}
+out:
+	cma_release_remove(id_priv);
+	return 0;
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+				 struct cma_multicast *mc)
+{
+	struct ib_sa_mcmember_rec rec;
+	unsigned char mc_map[MAX_ADDR_LEN];
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	struct sockaddr_in *sin = (struct sockaddr_in *) &mc->addr;
+	ib_sa_comp_mask comp_mask;
+	int ret;
+
+	ib_addr_get_mgid(dev_addr, &rec.mgid);
+	ret = ib_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+				  &rec.mgid, &rec);
+	if (ret)
+		return ret;
+
+	ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+	mc_map[7] = 0x01;			/* Use RDMA CM signature */
+	mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+	mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+
+	rec.mgid = *(union ib_gid *) (mc_map + 4);
+	ib_addr_get_sgid(dev_addr, &rec.port_gid);
+	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+	rec.join_state = 1;
+	rec.qkey = sin->sin_addr.s_addr;
+
+	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
+		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+	mc->multicast.ib = ib_join_multicast(id_priv->id.device,
+					     id_priv->id.port_num, &rec,
+					     comp_mask, GFP_KERNEL,
+					     cma_ib_mc_handler, mc);
+	if (IS_ERR(mc->multicast.ib))
+		return PTR_ERR(mc->multicast.ib);
+
+	return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		return -EINVAL;
+
+	mc = kmalloc(sizeof *mc, GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	memcpy(&mc->addr, addr, ip_addr_size(addr));
+	mc->context = context;
+	mc->id_priv = id_priv;
+
+	spin_lock(&id_priv->lock);
+	list_add(&mc->list, &id_priv->mc_list);
+	spin_unlock(&id_priv->lock);
+
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_join_ib_multicast(id_priv, mc);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	if (ret) {
+		spin_lock_irq(&id_priv->lock);
+		list_del(&mc->list);
+		spin_unlock_irq(&id_priv->lock);
+		kfree(mc);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	spin_lock_irq(&id_priv->lock);
+	list_for_each_entry(mc, &id_priv->mc_list, list) {
+		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+			list_del(&mc->list);
+			spin_unlock_irq(&id_priv->lock);
+
+			if (id->qp)
+				ib_detach_mcast(id->qp,
+						&mc->multicast.ib->rec.mgid,
+						mc->multicast.ib->rec.mlid);
+			ib_free_multicast(mc->multicast.ib);
+			kfree(mc);
+			return;
+		}
+	}
+	spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 82d4736..c4c204d 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -500,6 +500,36 @@ int ib_init_ah_from_path(struct ib_devic
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
 
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	u16 gid_index;
+	u8 p;
+
+	ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+	if (ret)
+		return ret;
+
+	memset(ah_attr, 0, sizeof *ah_attr);
+	ah_attr->dlid = be16_to_cpu(rec->mlid);
+	ah_attr->sl = rec->sl;
+	ah_attr->port_num = port_num;
+	ah_attr->static_rate = rec->rate;
+
+	ah_attr->ah_flags = IB_AH_GRH;
+	ah_attr->grh.dgid = rec->mgid;
+
+	ah_attr->grh.sgid_index = (u8) gid_index;
+	ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+	ah_attr->grh.hop_limit = rec->hop_limit;
+	ah_attr->grh.traffic_class = rec->traffic_class;
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 {
 	unsigned long flags;
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 81b6230..5bc318c 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -92,6 +92,12 @@ static inline void ib_addr_set_pkey(stru
 	dev_addr->broadcast[9] = (unsigned char) pkey;
 }
 
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
 static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 				    union ib_gid *gid)
 {
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index e94656a..1c2ccc2 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -399,6 +399,14 @@ ib_sa_mcmember_rec_delete(struct ib_sa_c
 					context, query);
 }
 
+ /**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on an
+ *   SA mcmember record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr);
+
 /**
  * ib_init_ah_from_path - Initialize address handle attributes based on an SA
  *   path record.
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 595f1a7..9efbbdc 100755
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -52,6 +52,8 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_ESTABLISHED,
 	RDMA_CM_EVENT_DISCONNECTED,
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
+	RDMA_CM_EVENT_MULTICAST_JOIN,
+	RDMA_CM_EVENT_MULTICAST_ERROR
 };
 
 enum rdma_port_space {
@@ -289,5 +291,21 @@ int rdma_reject(struct rdma_cm_id *id, c
  */
 int rdma_disconnect(struct rdma_cm_id *id);
 
-#endif /* RDMA_CM_H */
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ *   address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context);
 
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ *   address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */





More information about the general mailing list