[ewg] [PATCH] libibmad: Handle MAD redirection

Joachim Fenkes fenkes at de.ibm.com
Mon Jun 29 05:10:30 PDT 2009


Previously, libibmad reacted to GSI MAD responses with a "redirect" status
by throwing an error. IBM eHCA adapters use redirection, so most
infiniband_diags tools didn't work against eHCA.

Fix: Modify mad_rpc() so that it resends the request to the redirection
target if a "redirect" GS response is received. This is repeated until no
"redirect" response is received, allowing for multiple levels of
indirection.

The dport argument is updated with the redirection target, so subsequent
MADs will not go through the redirection process again but reach the target
directly.

Tested using perfquery between ehca, mlx4 and mthca in all possible
combinations.

Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
---

Vlad, please queue this patch for OFED 1.5 if Hal doesn't object -- thanks!

Regards,
  Joachim

 libibmad/include/infiniband/mad.h |    9 +++++++
 libibmad/src/gs.c                 |    6 +++-
 libibmad/src/rpc.c                |   47 +++++++++++++++++++++++++-----------
 3 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
index aa27eb5..bdf5158 100644
--- a/libibmad/include/infiniband/mad.h
+++ b/libibmad/include/infiniband/mad.h
@@ -115,6 +115,8 @@ enum MAD_ATTR_ID {
 
 enum MAD_STATUS {
 	IB_MAD_STS_OK                        = (0 << 2),
+	IB_MAD_STS_BUSY                      = (1 << 0),
+	IB_MAD_STS_REDIRECT                  = (1 << 1),
 	IB_MAD_STS_BAD_BASE_VER_OR_CLASS     = (1 << 2),
 	IB_MAD_STS_METHOD_NOT_SUPPORTED      = (2 << 2),
 	IB_MAD_STS_METHOD_ATTR_NOT_SUPPORTED = (3 << 2),
@@ -783,8 +785,15 @@ MAD_EXPORT int madrpc_set_timeout(int timeout);
 MAD_EXPORT struct ibmad_port *mad_rpc_open_port(char *dev_name, int dev_port,
 			int *mgmt_classes, int num_classes);
 MAD_EXPORT void mad_rpc_close_port(struct ibmad_port *srcport);
+
+/*
+ * On redirection, the dport argument is updated with the redirection target,
+ * so subsequent MADs will not go through the redirection process again but
+ * reach the target directly.
+ */
 MAD_EXPORT void *mad_rpc(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			ib_portid_t * dport, void *payload, void *rcvdata);
+
 MAD_EXPORT void *mad_rpc_rmpp(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			      ib_portid_t * dport, ib_rmpp_hdr_t * rmpp,
 			      void *data);
diff --git a/libibmad/src/gs.c b/libibmad/src/gs.c
index f3d245e..c7e4ff6 100644
--- a/libibmad/src/gs.c
+++ b/libibmad/src/gs.c
@@ -70,7 +70,8 @@ uint8_t *pma_query_via(void *rcvbuf, ib_portid_t * dest, int port,
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
 
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
@@ -109,7 +110,8 @@ uint8_t *performance_reset_via(void *rcvbuf, ib_portid_t * dest,
 	rpc.timeout = timeout;
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
index 07b623d..c809060 100644
--- a/libibmad/src/rpc.c
+++ b/libibmad/src/rpc.c
@@ -189,27 +189,44 @@ void *mad_rpc(const struct ibmad_port *port, ib_rpc_t * rpc,
 	int status, len;
 	uint8_t sndbuf[1024], rcvbuf[1024], *mad;
 	int timeout, retries;
+	int redirect = 1;
 
-	len = 0;
-	memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
+	while (redirect) {
+		len = 0;
+		memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
 
-	if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
-		return 0;
+		if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
+			return 0;
 
-	timeout = rpc->timeout ? rpc->timeout :
-	    port->timeout ? port->timeout : madrpc_timeout;
-	retries = port->retries ? port->retries : madrpc_retries;
+		timeout = rpc->timeout ? rpc->timeout :
+			port->timeout ? port->timeout : madrpc_timeout;
+		retries = port->retries ? port->retries : madrpc_retries;
 
-	if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
-			      port->class_agents[rpc->mgtclass],
-			      len, timeout, retries)) < 0) {
-		IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
-		return 0;
-	}
+		if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
+				      port->class_agents[rpc->mgtclass],
+				      len, timeout, retries)) < 0) {
+			IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
+			return 0;
+		}
 
-	mad = umad_get_mad(rcvbuf);
+		mad = umad_get_mad(rcvbuf);
+		status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+
+		/* check for exact match instead of only the redirect bit;
+		 * that way, weird statuses cause an error, too */
+		if (status == IB_MAD_STS_REDIRECT) {
+			/* update dport for next request and retry */
+			dport->lid = mad_get_field(mad, 64, IB_CPI_REDIRECT_LID_F);
+			dport->qp = mad_get_field(mad, 64, IB_CPI_REDIRECT_QP_F);
+			dport->qkey = mad_get_field(mad, 64, IB_CPI_REDIRECT_QKEY_F);
+			if (ibdebug)
+				IBWARN("redirected to lid 0x%x, qp 0x%x, qkey 0x%x, pkey 0x%x",
+				       dport->lid, dport->qp, dport->qkey, dport->pkey_idx);
+		} else
+			redirect = 0;
+	}
 
-	if ((status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F)) != 0) {
+	if (status != 0) {
 		ERRS("MAD completed with error status 0x%x; dport (%s)",
 		     status, portid2str(dport));
 		return 0;
-- 
1.5.5






More information about the ewg mailing list