[ewg] [PATCH v3] libibmad: Handle MAD redirection

Joachim Fenkes fenkes at de.ibm.com
Wed Jul 1 06:34:56 PDT 2009


Previously, libibmad reacted to GSI MAD responses with a "redirect" status
by throwing an error. IBM eHCA adapters use redirection, so most
infiniband_diags tools didn't work against eHCA.

Fix: Modify mad_rpc() so that it resends the request to the redirection
target if a "redirect" GS response is received. This is repeated until no
"redirect" response is received, allowing for multiple levels of
indirection.

The dport argument is updated with the redirection target, so subsequent
MADs will not go through the redirection process again but reach the target
directly.

Tested using perfquery between ehca, mlx4 and mthca in all possible
combinations.

Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
---

Hi, Hal and Jason,

here's an updated patch that will bail on GID-routed redirection. Also, I
moved the redirection itself into its own function so it can easily be
included into RMPP as well.

Of course, I tested this again using ehca, mthca and mlx4.

If you have nothing to add to this patch, please queue it for OFED 1.5.

Thanks and regards,
  Joachim


 libibmad/include/infiniband/mad.h |    9 +++++
 libibmad/src/gs.c                 |    6 ++-
 libibmad/src/rpc.c                |   65 ++++++++++++++++++++++++++++--------
 3 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
index aa27eb5..bdf5158 100644
--- a/libibmad/include/infiniband/mad.h
+++ b/libibmad/include/infiniband/mad.h
@@ -115,6 +115,8 @@ enum MAD_ATTR_ID {
 
 enum MAD_STATUS {
 	IB_MAD_STS_OK                        = (0 << 2),
+	IB_MAD_STS_BUSY                      = (1 << 0),
+	IB_MAD_STS_REDIRECT                  = (1 << 1),
 	IB_MAD_STS_BAD_BASE_VER_OR_CLASS     = (1 << 2),
 	IB_MAD_STS_METHOD_NOT_SUPPORTED      = (2 << 2),
 	IB_MAD_STS_METHOD_ATTR_NOT_SUPPORTED = (3 << 2),
@@ -783,8 +785,15 @@ MAD_EXPORT int madrpc_set_timeout(int timeout);
 MAD_EXPORT struct ibmad_port *mad_rpc_open_port(char *dev_name, int dev_port,
 			int *mgmt_classes, int num_classes);
 MAD_EXPORT void mad_rpc_close_port(struct ibmad_port *srcport);
+
+/*
+ * On redirection, the dport argument is updated with the redirection target,
+ * so subsequent MADs will not go through the redirection process again but
+ * reach the target directly.
+ */
 MAD_EXPORT void *mad_rpc(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			ib_portid_t * dport, void *payload, void *rcvdata);
+
 MAD_EXPORT void *mad_rpc_rmpp(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			      ib_portid_t * dport, ib_rmpp_hdr_t * rmpp,
 			      void *data);
diff --git a/libibmad/src/gs.c b/libibmad/src/gs.c
index f3d245e..c7e4ff6 100644
--- a/libibmad/src/gs.c
+++ b/libibmad/src/gs.c
@@ -70,7 +70,8 @@ uint8_t *pma_query_via(void *rcvbuf, ib_portid_t * dest, int port,
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
 
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
@@ -109,7 +110,8 @@ uint8_t *performance_reset_via(void *rcvbuf, ib_portid_t * dest,
 	rpc.timeout = timeout;
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
index 07b623d..7364940 100644
--- a/libibmad/src/rpc.c
+++ b/libibmad/src/rpc.c
@@ -183,33 +183,68 @@ _do_madrpc(int port_id, void *sndbuf, void *rcvbuf, int agentid, int len,
 	return -1;
 }
 
+static int redirect_port(ib_portid_t *port, uint8_t *mad)
+{
+	port->lid = mad_get_field(mad, 64, IB_CPI_REDIRECT_LID_F);
+	if (!port->lid) {
+		IBWARN("GID-based redirection is not supported");
+		return -1;
+	}
+
+	port->qp = mad_get_field(mad, 64, IB_CPI_REDIRECT_QP_F);
+	port->qkey = mad_get_field(mad, 64, IB_CPI_REDIRECT_QKEY_F);
+	port->sl = mad_get_field(mad, 64, IB_CPI_REDIRECT_SL_F);
+
+	/* TODO: Reverse map redirection P_Key to P_Key index */
+
+	if (ibdebug)
+		IBWARN("redirected to lid 0x%x, qp 0x%x, qkey 0x%x, sl 0x%x",
+		       port->lid, port->qp, port->qkey, port->sl);
+
+	return 0;
+}
+
 void *mad_rpc(const struct ibmad_port *port, ib_rpc_t * rpc,
 	      ib_portid_t * dport, void *payload, void *rcvdata)
 {
 	int status, len;
 	uint8_t sndbuf[1024], rcvbuf[1024], *mad;
 	int timeout, retries;
+	int redirect = 1;
 
-	len = 0;
-	memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
+	while (redirect) {
+		len = 0;
+		memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
 
-	if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
-		return 0;
+		if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
+			return 0;
 
-	timeout = rpc->timeout ? rpc->timeout :
-	    port->timeout ? port->timeout : madrpc_timeout;
-	retries = port->retries ? port->retries : madrpc_retries;
+		timeout = rpc->timeout ? rpc->timeout :
+			port->timeout ? port->timeout : madrpc_timeout;
+		retries = port->retries ? port->retries : madrpc_retries;
 
-	if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
-			      port->class_agents[rpc->mgtclass],
-			      len, timeout, retries)) < 0) {
-		IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
-		return 0;
-	}
+		if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
+				      port->class_agents[rpc->mgtclass],
+				      len, timeout, retries)) < 0) {
+			IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
+			return 0;
+		}
 
-	mad = umad_get_mad(rcvbuf);
+		mad = umad_get_mad(rcvbuf);
+		status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+
+		/* check for exact match instead of only the redirect bit;
+		 * that way, weird statuses cause an error, too */
+		if (status == IB_MAD_STS_REDIRECT) {
+			/* update dport for next request and retry */
+			/* bail if redirection fails */
+			if (redirect_port(dport, mad))
+				redirect = 0;
+		} else
+			redirect = 0;
+	}
 
-	if ((status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F)) != 0) {
+	if (status != 0) {
 		ERRS("MAD completed with error status 0x%x; dport (%s)",
 		     status, portid2str(dport));
 		return 0;
-- 
1.5.5






More information about the ewg mailing list