[openib-general] [PATCH 4/5] rdma cm: add support to join / leave multicast groups
Sean Hefty
sean.hefty at intel.com
Fri Jun 9 15:15:18 PDT 2006
Add IB multicast abstraction to the CMA.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
--- svn3/gen2/trunk/src/linux-kernel/infiniband/include/rdma/rdma_cm.h 2006-06-06 16:53:56.000000000 -0700
+++ svn/gen2/trunk/src/linux-kernel/infiniband/include/rdma/rdma_cm.h 2006-06-02 10:22:29.000000000 -0700
@@ -52,6 +52,8 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_ESTABLISHED,
RDMA_CM_EVENT_DISCONNECTED,
RDMA_CM_EVENT_DEVICE_REMOVAL,
+ RDMA_CM_EVENT_MULTICAST_JOIN,
+ RDMA_CM_EVENT_MULTICAST_ERROR
};
enum rdma_port_space {
@@ -77,6 +79,13 @@ struct rdma_route {
int num_paths;
};
+struct rdma_multicast_data {
+ void *context;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
+};
+
struct rdma_cm_event {
enum rdma_cm_event_type event;
int status;
@@ -258,5 +267,20 @@ int rdma_reject(struct rdma_cm_id *id, c
*/
int rdma_disconnect(struct rdma_cm_id *id);
-#endif /* RDMA_CM_H */
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ * address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context);
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ * address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
--- svn3/gen2/trunk/src/linux-kernel/infiniband/core/cma.c 2006-06-06 19:30:12.000000000 -0700
+++ svn/gen2/trunk/src/linux-kernel/infiniband/core/cma.c 2006-06-06 16:12:42.000000000 -0700
@@ -43,6 +43,7 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_local_sa.h>
+#include <rdma/ib_multicast.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -111,6 +112,7 @@ struct rdma_id_private {
struct list_head list;
struct list_head listen_list;
struct cma_device *cma_dev;
+ struct list_head mc_list;
enum cma_state state;
spinlock_t lock;
@@ -137,6 +139,15 @@ struct rdma_id_private {
u8 srq;
};
+struct cma_multicast {
+ struct rdma_id_private *id_priv;
+ union {
+ struct ib_multicast *ib;
+ } multicast;
+ struct list_head list;
+ struct rdma_multicast_data data;
+};
+
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
@@ -328,6 +339,7 @@ struct rdma_cm_id* rdma_create_id(rdma_c
init_waitqueue_head(&id_priv->wait_remove);
atomic_set(&id_priv->dev_remove, 0);
INIT_LIST_HEAD(&id_priv->listen_list);
+ INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
return &id_priv->id;
@@ -474,6 +486,32 @@ int rdma_init_qp_attr(struct rdma_cm_id
}
EXPORT_SYMBOL(rdma_init_qp_attr);
+static int cma_get_ib_mc_attr(struct rdma_id_private *id_priv,
+ struct sockaddr *addr,
+ struct ib_ah_attr *ah_attr, uint32_t *remote_qpn,
+ uint32_t *remote_qkey)
+{
+ struct cma_multicast *mc;
+ unsigned long flags;
+ int ret = -EADDRNOTAVAIL;
+
+ spin_lock_irqsave(&id_priv->lock, flags);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->data.addr, addr, ip_addr_size(addr))) {
+ ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num,
+ &mc->multicast.ib->rec,
+ ah_attr);
+ *remote_qpn = 0xFFFFFF;
+ *remote_qkey = be32_to_cpu(mc->multicast.ib->rec.qkey);
+ ret = 0;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+
int rdma_get_dst_attr(struct rdma_cm_id *id, struct sockaddr *addr,
struct ib_ah_attr *ah_attr, u32 *remote_qpn,
u32 *remote_qkey)
@@ -484,7 +522,10 @@ int rdma_get_dst_attr(struct rdma_cm_id
id_priv = container_of(id, struct rdma_id_private, id);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (!memcmp(&id->route.addr.dst_addr, addr, ip_addr_size(addr)))
+ ret = cma_get_ib_mc_attr(id_priv, addr, ah_attr,
+ remote_qpn, remote_qkey);
+ if (ret && id_priv->cm_id.ib &&
+ !memcmp(&id->route.addr.dst_addr, addr, ip_addr_size(addr)))
ret = ib_cm_get_dst_attr(id_priv->cm_id.ib, ah_attr,
remote_qpn, remote_qkey);
break;
@@ -718,6 +759,19 @@ static void cma_release_port(struct rdma
mutex_unlock(&lock);
}
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+ struct cma_multicast *mc;
+
+ while (!list_empty(&id_priv->mc_list)) {
+ mc = container_of(id_priv->mc_list.next,
+ struct cma_multicast, list);
+ list_del(&mc->list);
+ ib_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ }
+}
+
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
@@ -736,6 +790,7 @@ void rdma_destroy_id(struct rdma_cm_id *
default:
break;
}
+ cma_leave_mc_groups(id_priv);
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
@@ -2053,6 +2108,150 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static int cma_ib_join_handler(int status, struct ib_multicast *multicast)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc = multicast->context;
+ enum rdma_cm_event_type event;
+ int ret;
+
+ id_priv = mc->id_priv;
+ atomic_inc(&id_priv->dev_remove);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ goto out;
+
+ if (!status && id_priv->id.qp) {
+ status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+ multicast->rec.mlid);
+ }
+
+ event = status ? RDMA_CM_EVENT_MULTICAST_ERROR :
+ RDMA_CM_EVENT_MULTICAST_JOIN;
+
+ ret = cma_notify_user(id_priv, event, status, &mc->data,
+ sizeof mc->data);
+ if (ret) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return 0;
+ }
+out:
+ cma_release_remove(id_priv);
+ return 0;
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct ib_sa_mcmember_rec rec;
+ unsigned char mc_map[MAX_ADDR_LEN];
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *) &mc->data.addr;
+ ib_sa_comp_mask comp_mask;
+ int ret;
+
+ ret = ib_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ ib_addr_get_mgid(dev_addr), &rec);
+ if (ret)
+ return ret;
+
+ ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+ mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+
+ rec.mgid = *(union ib_gid *) (mc_map + 4);
+ rec.port_gid = *ib_addr_get_sgid(dev_addr);
+ rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ rec.join_state = 1;
+ rec.qkey = sin->sin_addr.s_addr;
+
+ comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+ IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+ mc->multicast.ib = ib_join_multicast(id_priv->id.device,
+ id_priv->id.port_num, &rec,
+ comp_mask, GFP_KERNEL,
+ cma_ib_join_handler, mc);
+ if (IS_ERR(mc->multicast.ib))
+ return PTR_ERR(mc->multicast.ib);
+
+ return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ return -EINVAL;
+
+ mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ memcpy(&mc->data.addr, addr, ip_addr_size(addr));
+ mc->data.context = context;
+ mc->id_priv = id_priv;
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret) {
+ spin_lock_irq(&id_priv->lock);
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+ kfree(mc);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irq(&id_priv->lock);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->data.addr, addr, ip_addr_size(addr))) {
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ if (id->qp)
+ ib_detach_mcast(id->qp,
+ &mc->multicast.ib->rec.mgid,
+ mc->multicast.ib->rec.mlid);
+ ib_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ return;
+ }
+ }
+ spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
More information about the general
mailing list