[ewg] [PATCH v4] libibmad: Handle MAD redirection

Joachim Fenkes fenkes at de.ibm.com
Tue Jul 7 07:20:19 PDT 2009


Previously, libibmad reacted to GSI MAD responses with a "redirect" status
by throwing an error. IBM eHCA adapters use redirection, so most
infiniband_diags tools didn't work against eHCA.

Fix: Modify mad_rpc() so that it resends the request to the redirection
target if a "redirect" GS response is received. This is repeated until no
"redirect" response is received, allowing for multiple levels of
indirection.

The dport argument is updated with the redirection target, so subsequent
MADs will not go through the redirection process again but reach the target
directly.

Tested using perfquery between ehca, mlx4 and mthca in all possible
combinations.

Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
---

After all has been said and done, here's the hopefully last iteration of the
patch, with the hex display of the redirect LID replaced by decimal.

Any objections against this patch?

Regards,
  Joachim

 libibmad/include/infiniband/mad.h |    9 +++++
 libibmad/src/gs.c                 |    6 ++-
 libibmad/src/rpc.c                |   65 ++++++++++++++++++++++++++++--------
 3 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
index aa27eb5..bdf5158 100644
--- a/libibmad/include/infiniband/mad.h
+++ b/libibmad/include/infiniband/mad.h
@@ -115,6 +115,8 @@ enum MAD_ATTR_ID {
 
 enum MAD_STATUS {
 	IB_MAD_STS_OK                        = (0 << 2),
+	IB_MAD_STS_BUSY                      = (1 << 0),
+	IB_MAD_STS_REDIRECT                  = (1 << 1),
 	IB_MAD_STS_BAD_BASE_VER_OR_CLASS     = (1 << 2),
 	IB_MAD_STS_METHOD_NOT_SUPPORTED      = (2 << 2),
 	IB_MAD_STS_METHOD_ATTR_NOT_SUPPORTED = (3 << 2),
@@ -783,8 +785,15 @@ MAD_EXPORT int madrpc_set_timeout(int timeout);
 MAD_EXPORT struct ibmad_port *mad_rpc_open_port(char *dev_name, int dev_port,
 			int *mgmt_classes, int num_classes);
 MAD_EXPORT void mad_rpc_close_port(struct ibmad_port *srcport);
+
+/*
+ * On redirection, the dport argument is updated with the redirection target,
+ * so subsequent MADs will not go through the redirection process again but
+ * reach the target directly.
+ */
 MAD_EXPORT void *mad_rpc(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			ib_portid_t * dport, void *payload, void *rcvdata);
+
 MAD_EXPORT void *mad_rpc_rmpp(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			      ib_portid_t * dport, ib_rmpp_hdr_t * rmpp,
 			      void *data);
diff --git a/libibmad/src/gs.c b/libibmad/src/gs.c
index f3d245e..c7e4ff6 100644
--- a/libibmad/src/gs.c
+++ b/libibmad/src/gs.c
@@ -70,7 +70,8 @@ uint8_t *pma_query_via(void *rcvbuf, ib_portid_t * dest, int port,
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
 
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
@@ -109,7 +110,8 @@ uint8_t *performance_reset_via(void *rcvbuf, ib_portid_t * dest,
 	rpc.timeout = timeout;
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
index 07b623d..efea1d3 100644
--- a/libibmad/src/rpc.c
+++ b/libibmad/src/rpc.c
@@ -183,33 +183,68 @@ _do_madrpc(int port_id, void *sndbuf, void *rcvbuf, int agentid, int len,
 	return -1;
 }
 
+static int redirect_port(ib_portid_t *port, uint8_t *mad)
+{
+	port->lid = mad_get_field(mad, 64, IB_CPI_REDIRECT_LID_F);
+	if (!port->lid) {
+		IBWARN("GID-based redirection is not supported");
+		return -1;
+	}
+
+	port->qp = mad_get_field(mad, 64, IB_CPI_REDIRECT_QP_F);
+	port->qkey = mad_get_field(mad, 64, IB_CPI_REDIRECT_QKEY_F);
+	port->sl = mad_get_field(mad, 64, IB_CPI_REDIRECT_SL_F);
+
+	/* TODO: Reverse map redirection P_Key to P_Key index */
+
+	if (ibdebug)
+		IBWARN("redirected to lid %d, qp 0x%x, qkey 0x%x, sl 0x%x",
+		       port->lid, port->qp, port->qkey, port->sl);
+
+	return 0;
+}
+
 void *mad_rpc(const struct ibmad_port *port, ib_rpc_t * rpc,
 	      ib_portid_t * dport, void *payload, void *rcvdata)
 {
 	int status, len;
 	uint8_t sndbuf[1024], rcvbuf[1024], *mad;
 	int timeout, retries;
+	int redirect = 1;
 
-	len = 0;
-	memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
+	while (redirect) {
+		len = 0;
+		memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
 
-	if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
-		return 0;
+		if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
+			return 0;
 
-	timeout = rpc->timeout ? rpc->timeout :
-	    port->timeout ? port->timeout : madrpc_timeout;
-	retries = port->retries ? port->retries : madrpc_retries;
+		timeout = rpc->timeout ? rpc->timeout :
+			port->timeout ? port->timeout : madrpc_timeout;
+		retries = port->retries ? port->retries : madrpc_retries;
 
-	if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
-			      port->class_agents[rpc->mgtclass],
-			      len, timeout, retries)) < 0) {
-		IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
-		return 0;
-	}
+		if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
+				      port->class_agents[rpc->mgtclass],
+				      len, timeout, retries)) < 0) {
+			IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
+			return 0;
+		}
 
-	mad = umad_get_mad(rcvbuf);
+		mad = umad_get_mad(rcvbuf);
+		status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+
+		/* check for exact match instead of only the redirect bit;
+		 * that way, weird statuses cause an error, too */
+		if (status == IB_MAD_STS_REDIRECT) {
+			/* update dport for next request and retry */
+			/* bail if redirection fails */
+			if (redirect_port(dport, mad))
+				redirect = 0;
+		} else
+			redirect = 0;
+	}
 
-	if ((status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F)) != 0) {
+	if (status != 0) {
 		ERRS("MAD completed with error status 0x%x; dport (%s)",
 		     status, portid2str(dport));
 		return 0;
-- 
1.5.5






More information about the ewg mailing list