[ofa-general] RE: impossibility to bind a device/port with the rdma-cm when the port is down

Sean Hefty sean.hefty at intel.com
Thu Feb 5 11:49:24 PST 2009


From: Yossi Etigin <yosefe at Voltaire.COM>

  When doing rdma_resolve_addr() and relevant port is down, the function fails
and rdma_cm id is not bound to the device. Therefore, application does not have
device handle and cannot wait for the port to become active. The function
fails because ipoib is not joined to the multicast group and therefore sa does
not have a multicast record to take a qkey from.
  The proposed patch is to make lazy qkey resolution - cma_set_qkey will set
id_priv->qkey if it was not set, and will be called just before the qkey is
really required.

Signed-off-by: Yossi Etigin <yosefe at voltaire.com>

Acked-by: Sean Hefty <sean.hefty at intel.com>
---
Roland, any objection to queuing this for 2.6.30?

> drivers/infiniband/core/cma.c |   41 +++++++++++++++++++++++++++--------------
> 1 file changed, 27 insertions(+), 14 deletions(-)
>
>Index: b/drivers/infiniband/core/cma.c
>===================================================================
>--- a/drivers/infiniband/core/cma.c	2009-02-04 20:40:20.000000000 +0200
>+++ b/drivers/infiniband/core/cma.c	2009-02-04 20:57:59.000000000 +0200
>@@ -296,21 +296,25 @@ static void cma_detach_from_dev(struct r
> 	id_priv->cma_dev = NULL;
> }
>
>-static int cma_set_qkey(struct ib_device *device, u8 port_num,
>-			enum rdma_port_space ps,
>-			struct rdma_dev_addr *dev_addr, u32 *qkey)
>+static int cma_set_qkey(struct rdma_id_private *id_priv)
> {
> 	struct ib_sa_mcmember_rec rec;
> 	int ret = 0;
>
>-	switch (ps) {
>+	if (id_priv->qkey)
>+		return;
>+
>+	switch (id_priv->id.ps) {
> 	case RDMA_PS_UDP:
>-		*qkey = RDMA_UDP_QKEY;
>+		id_priv->qkey = RDMA_UDP_QKEY;
> 		break;
> 	case RDMA_PS_IPOIB:
>-		ib_addr_get_mgid(dev_addr, &rec.mgid);
>-		ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
>-		*qkey = be32_to_cpu(rec.qkey);
>+		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
>+		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
>+		                             id_priv->id.port_num, &rec.mgid,
>+		                             &rec);
>+		if (!ret)
>+			id_priv->qkey = be32_to_cpu(rec.qkey);
> 		break;
> 	default:
> 		break;
>@@ -340,12 +344,7 @@ static int cma_acquire_dev(struct rdma_i
> 		ret = ib_find_cached_gid(cma_dev->device, &gid,
> 					 &id_priv->id.port_num, NULL);
> 		if (!ret) {
>-			ret = cma_set_qkey(cma_dev->device,
>-					   id_priv->id.port_num,
>-					   id_priv->id.ps, dev_addr,
>-					   &id_priv->qkey);
>-			if (!ret)
>-				cma_attach_to_dev(id_priv, cma_dev);
>+			cma_attach_to_dev(id_priv, cma_dev);
> 			break;
> 		}
> 	}
>@@ -577,6 +576,10 @@ static int cma_ib_init_qp_attr(struct rd
> 	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
>
> 	if (cma_is_ud_ps(id_priv->id.ps)) {
>+		ret = cma_set_qkey(id_priv);
>+		if (ret)
>+			return ret;
>+
> 		qp_attr->qkey = id_priv->qkey;
> 		*qp_attr_mask |= IB_QP_QKEY;
> 	} else {
>@@ -2167,6 +2170,12 @@ static int cma_sidr_rep_handler(struct i
> 			event.status = ib_event->param.sidr_rep_rcvd.status;
> 			break;
> 		}
>+		ret = cma_set_qkey(id_priv);
>+		if (ret) {
>+			event.event = RDMA_CM_EVENT_ADDR_ERROR;
>+			event.status = -EINVAL;
>+			break;
>+		}
> 		if (id_priv->qkey != rep->qkey) {
> 			event.event = RDMA_CM_EVENT_UNREACHABLE;
> 			event.status = -EINVAL;
>@@ -2446,10 +2455,14 @@ static int cma_send_sidr_rep(struct rdma
> 			     const void *private_data, int private_data_len)
> {
> 	struct ib_cm_sidr_rep_param rep;
>+	int ret;
>
> 	memset(&rep, 0, sizeof rep);
> 	rep.status = status;
> 	if (status == IB_SIDR_SUCCESS) {
>+		ret = cma_set_qkey(id_priv);
>+		if (ret)
>+			return ret;
> 		rep.qp_num = id_priv->qp_num;
> 		rep.qkey = id_priv->qkey;
> 	}




More information about the general mailing list