[openib-general] [PATCH 6/7 v2] for 2.6.20 rdma/cma: add multicast support through the rdma_cm
Sean Hefty
sean.hefty at intel.com
Tue Oct 24 15:54:32 PDT 2006
Add multicast QP support to the rdma_cm.
- Users identify multicast groups by using a multicast IP address.
Normal IP address translation services are used to map the address to
a local RDMA port.
- IB multicast group parameters are based on the ipoib broadcast group.
The MGID is derived using a method similar to ipoib, but with a
different signature.
- QPs are automatically attached and detached from groups.
- A QP may join multiple groups.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9bfd427..b83ea5d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -114,6 +114,7 @@ struct rdma_id_private {
struct list_head list;
struct list_head listen_list;
struct cma_device *cma_dev;
+ struct list_head mc_list;
enum cma_state state;
spinlock_t lock;
@@ -136,6 +137,18 @@ struct rdma_id_private {
u8 srq;
};
+struct cma_multicast {
+ struct rdma_id_private *id_priv;
+ union {
+ struct ib_sa_multicast *ib;
+ } multicast;
+ struct list_head list;
+ void *context;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
+};
+
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
@@ -323,6 +336,7 @@ struct rdma_cm_id *rdma_create_id(rdma_c
init_waitqueue_head(&id_priv->wait_remove);
atomic_set(&id_priv->dev_remove, 0);
INIT_LIST_HEAD(&id_priv->listen_list);
+ INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
return &id_priv->id;
@@ -696,6 +710,19 @@ static void cma_release_port(struct rdma
mutex_unlock(&lock);
}
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+ struct cma_multicast *mc;
+
+ while (!list_empty(&id_priv->mc_list)) {
+ mc = container_of(id_priv->mc_list.next,
+ struct cma_multicast, list);
+ list_del(&mc->list);
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ }
+}
+
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
@@ -720,6 +747,7 @@ void rdma_destroy_id(struct rdma_cm_id *
default:
break;
}
+ cma_leave_mc_groups(id_priv);
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
}
@@ -2340,6 +2368,159 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_cm_event event;
+ int ret;
+
+ id_priv = mc->id_priv;
+ atomic_inc(&id_priv->dev_remove);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ goto out;
+
+ if (!status && id_priv->id.qp)
+ status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+ multicast->rec.mlid);
+
+ memset(&event, 0, sizeof event);
+ event.status = status;
+ event.param.ud.private_data = mc->context;
+ if (!status) {
+ event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num, &multicast->rec,
+ &event.param.ud.ah_attr);
+ event.param.ud.qp_num = 0xFFFFFF;
+ event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ } else
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return 0;
+ }
+out:
+ cma_release_remove(id_priv);
+ return 0;
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct ib_sa_mcmember_rec rec;
+ unsigned char mc_map[MAX_ADDR_LEN];
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *) &mc->addr;
+ ib_sa_comp_mask comp_mask;
+ int ret;
+
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ &rec.mgid, &rec);
+ if (ret)
+ return ret;
+
+ ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+ mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+
+ rec.mgid = *(union ib_gid *) (mc_map + 4);
+ ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ rec.join_state = 1;
+ rec.qkey = sin->sin_addr.s_addr;
+
+ comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+ IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+ mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec,
+ comp_mask, GFP_KERNEL,
+ cma_ib_mc_handler, mc);
+ if (IS_ERR(mc->multicast.ib))
+ return PTR_ERR(mc->multicast.ib);
+
+ return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ return -EINVAL;
+
+ mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ memcpy(&mc->addr, addr, ip_addr_size(addr));
+ mc->context = context;
+ mc->id_priv = id_priv;
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret) {
+ spin_lock_irq(&id_priv->lock);
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+ kfree(mc);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irq(&id_priv->lock);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ if (id->qp)
+ ib_detach_mcast(id->qp,
+ &mc->multicast.ib->rec.mgid,
+ mc->multicast.ib->rec.mlid);
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ return;
+ }
+ }
+ spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 81b6230..5bc318c 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -92,6 +92,12 @@ static inline void ib_addr_set_pkey(stru
dev_addr->broadcast[9] = (unsigned char) pkey;
}
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+ union ib_gid *gid)
+{
+ memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
{
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 595f1a7..9efbbdc 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -52,6 +52,8 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_ESTABLISHED,
RDMA_CM_EVENT_DISCONNECTED,
RDMA_CM_EVENT_DEVICE_REMOVAL,
+ RDMA_CM_EVENT_MULTICAST_JOIN,
+ RDMA_CM_EVENT_MULTICAST_ERROR
};
enum rdma_port_space {
@@ -289,5 +291,21 @@ int rdma_reject(struct rdma_cm_id *id, c
*/
int rdma_disconnect(struct rdma_cm_id *id);
-#endif /* RDMA_CM_H */
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ * address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context);
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ * address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
More information about the general
mailing list