[openib-general] [RFC] [PATCH 7/7-but i can't count] rdma_cm 2.6.20: add multicast support
Sean Hefty
sean.hefty at intel.com
Tue Oct 10 16:39:45 PDT 2006
Add multicast QP support to the rdma_cm.
- Users identify multicast groups by using a multicast IP address.
- IB multicast group parameters are based on the ipoib broadcast group.
The MGID is derived using a method similar to ipoib, but with a
different signature.
- QPs are automatically attached and detached from groups.
- A QP may join multiple groups.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
This patch differs from svn as a result of reporting data with the
multicast event, rather than through a separate call.
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 19d91c8..4726292 100755
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -45,6 +45,7 @@ #include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
+#include <rdma/ib_multicast.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -114,6 +115,7 @@ struct rdma_id_private {
struct list_head list;
struct list_head listen_list;
struct cma_device *cma_dev;
+ struct list_head mc_list;
enum cma_state state;
spinlock_t lock;
@@ -136,6 +138,18 @@ struct rdma_id_private {
u8 srq;
};
+struct cma_multicast {
+ struct rdma_id_private *id_priv;
+ union {
+ struct ib_multicast *ib;
+ } multicast;
+ struct list_head list;
+ void *context;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
+};
+
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
@@ -323,6 +337,7 @@ struct rdma_cm_id *rdma_create_id(rdma_c
init_waitqueue_head(&id_priv->wait_remove);
atomic_set(&id_priv->dev_remove, 0);
INIT_LIST_HEAD(&id_priv->listen_list);
+ INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
return &id_priv->id;
@@ -696,6 +711,19 @@ static void cma_release_port(struct rdma
mutex_unlock(&lock);
}
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+ struct cma_multicast *mc;
+
+ while (!list_empty(&id_priv->mc_list)) {
+ mc = container_of(id_priv->mc_list.next,
+ struct cma_multicast, list);
+ list_del(&mc->list);
+ ib_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ }
+}
+
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
@@ -720,6 +748,7 @@ void rdma_destroy_id(struct rdma_cm_id *
default:
break;
}
+ cma_leave_mc_groups(id_priv);
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
}
@@ -2333,6 +2362,159 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static int cma_ib_mc_handler(int status, struct ib_multicast *multicast)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_cm_event event;
+ int ret;
+
+ id_priv = mc->id_priv;
+ atomic_inc(&id_priv->dev_remove);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ goto out;
+
+ if (!status && id_priv->id.qp)
+ status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+ multicast->rec.mlid);
+
+ memset(&event, 0, sizeof event);
+ event.status = status;
+ event.param.ud.private_data = mc->context;
+ if (!status) {
+ event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num, &multicast->rec,
+ &event.param.ud.ah_attr);
+ event.param.ud.qp_num = 0xFFFFFF;
+ event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ } else
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return 0;
+ }
+out:
+ cma_release_remove(id_priv);
+ return 0;
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct ib_sa_mcmember_rec rec;
+ unsigned char mc_map[MAX_ADDR_LEN];
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *) &mc->addr;
+ ib_sa_comp_mask comp_mask;
+ int ret;
+
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ &rec.mgid, &rec);
+ if (ret)
+ return ret;
+
+ ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+ mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+
+ rec.mgid = *(union ib_gid *) (mc_map + 4);
+ ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ rec.join_state = 1;
+ rec.qkey = sin->sin_addr.s_addr;
+
+ comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+ IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+ mc->multicast.ib = ib_join_multicast(id_priv->id.device,
+ id_priv->id.port_num, &rec,
+ comp_mask, GFP_KERNEL,
+ cma_ib_mc_handler, mc);
+ if (IS_ERR(mc->multicast.ib))
+ return PTR_ERR(mc->multicast.ib);
+
+ return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ return -EINVAL;
+
+ mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ memcpy(&mc->addr, addr, ip_addr_size(addr));
+ mc->context = context;
+ mc->id_priv = id_priv;
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret) {
+ spin_lock_irq(&id_priv->lock);
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+ kfree(mc);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irq(&id_priv->lock);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ if (id->qp)
+ ib_detach_mcast(id->qp,
+ &mc->multicast.ib->rec.mgid,
+ mc->multicast.ib->rec.mlid);
+ ib_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ return;
+ }
+ }
+ spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 82d4736..c4c204d 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -500,6 +500,36 @@ int ib_init_ah_from_path(struct ib_devic
}
EXPORT_SYMBOL(ib_init_ah_from_path);
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ struct ib_ah_attr *ah_attr)
+{
+ int ret;
+ u16 gid_index;
+ u8 p;
+
+ ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ if (ret)
+ return ret;
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(rec->mlid);
+ ah_attr->sl = rec->sl;
+ ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
+
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.dgid = rec->mgid;
+
+ ah_attr->grh.sgid_index = (u8) gid_index;
+ ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+ ah_attr->grh.hop_limit = rec->hop_limit;
+ ah_attr->grh.traffic_class = rec->traffic_class;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
{
unsigned long flags;
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 81b6230..5bc318c 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -92,6 +92,12 @@ static inline void ib_addr_set_pkey(stru
dev_addr->broadcast[9] = (unsigned char) pkey;
}
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+ union ib_gid *gid)
+{
+ memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
{
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index e94656a..1c2ccc2 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -399,6 +399,14 @@ ib_sa_mcmember_rec_delete(struct ib_sa_c
context, query);
}
+ /**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on an
+ * SA mcmember record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ struct ib_ah_attr *ah_attr);
+
/**
* ib_init_ah_from_path - Initialize address handle attributes based on an SA
* path record.
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 595f1a7..9efbbdc 100755
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -52,6 +52,8 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_ESTABLISHED,
RDMA_CM_EVENT_DISCONNECTED,
RDMA_CM_EVENT_DEVICE_REMOVAL,
+ RDMA_CM_EVENT_MULTICAST_JOIN,
+ RDMA_CM_EVENT_MULTICAST_ERROR
};
enum rdma_port_space {
@@ -289,5 +291,21 @@ int rdma_reject(struct rdma_cm_id *id, c
*/
int rdma_disconnect(struct rdma_cm_id *id);
-#endif /* RDMA_CM_H */
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ * address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context);
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ * address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
More information about the general
mailing list