[ewg] [PATCH v2] libibmad: Handle MAD redirection

Joachim Fenkes fenkes at de.ibm.com
Tue Jun 30 09:53:41 PDT 2009


Previously, libibmad reacted to GSI MAD responses with a "redirect" status
by throwing an error. IBM eHCA adapters use redirection, so most
infiniband_diags tools didn't work against eHCA.

Fix: Modify mad_rpc() so that it resends the request to the redirection
target if a "redirect" GS response is received. This is repeated until no
"redirect" response is received, allowing for multiple levels of
indirection.

The dport argument is updated with the redirection target, so subsequent
MADs will not go through the redirection process again but reach the target
directly.

Tested using perfquery between ehca, mlx4 and mthca in all possible
combinations.

Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
---

On Tuesday 30 June 2009 16:14, Hal Rosenstock wrote:
> On Tue, Jun 30, 2009 at 8:04 AM, Joachim Fenkes<fenkes at de.ibm.com> wrote:
> > On Tuesday 30 June 2009 00:01, Hal Rosenstock wrote:
> >> On Mon, Jun 29, 2009 at 8:10 AM, Joachim Fenkes<fenkes at de.ibm.com> wrote:

> >> Are there GS classes other than PerfMgt which would be redirected by eHCA ?
> >
> > Not right now, no. If you're interested in the details of how and when the
> > eHCA driver redirects, please have a look at drivers/infiniband/hw/ehca/ehca_sqp.c.
> 
> SL is always set to 0 and RespTimeValue is hardcoded.

The hypervisor only gives us a QP#, and we hardcode the rest because it
never changes.
 
> > so I left it at zero. Incidentally, there isn't a single
> > code line in management.git that actually changes the pkey_index from its
> > init value of 0, so I figured that omission couldn't be too bad.
> 
> Agreed. I think you're referring to infiniband-diags and not opensm.

Yes, sorry -- none of {infiniband_diags,libibumad,libibmad} make any effort
of setting up the pkey_index or even a default value for it, so I thought
"why bother" =)
 
Here's an updated patch with the SL set and a comment stating that we should
look at the P_Key one day. What do you think of it?

Regards,
  Joachim


 libibmad/include/infiniband/mad.h |    9 +++++++
 libibmad/src/gs.c                 |    6 +++-
 libibmad/src/rpc.c                |   49 +++++++++++++++++++++++++-----------
 3 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
index aa27eb5..bdf5158 100644
--- a/libibmad/include/infiniband/mad.h
+++ b/libibmad/include/infiniband/mad.h
@@ -115,6 +115,8 @@ enum MAD_ATTR_ID {
 
 enum MAD_STATUS {
 	IB_MAD_STS_OK                        = (0 << 2),
+	IB_MAD_STS_BUSY                      = (1 << 0),
+	IB_MAD_STS_REDIRECT                  = (1 << 1),
 	IB_MAD_STS_BAD_BASE_VER_OR_CLASS     = (1 << 2),
 	IB_MAD_STS_METHOD_NOT_SUPPORTED      = (2 << 2),
 	IB_MAD_STS_METHOD_ATTR_NOT_SUPPORTED = (3 << 2),
@@ -783,8 +785,15 @@ MAD_EXPORT int madrpc_set_timeout(int timeout);
 MAD_EXPORT struct ibmad_port *mad_rpc_open_port(char *dev_name, int dev_port,
 			int *mgmt_classes, int num_classes);
 MAD_EXPORT void mad_rpc_close_port(struct ibmad_port *srcport);
+
+/*
+ * On redirection, the dport argument is updated with the redirection target,
+ * so subsequent MADs will not go through the redirection process again but
+ * reach the target directly.
+ */
 MAD_EXPORT void *mad_rpc(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			ib_portid_t * dport, void *payload, void *rcvdata);
+
 MAD_EXPORT void *mad_rpc_rmpp(const struct ibmad_port *srcport, ib_rpc_t * rpc,
 			      ib_portid_t * dport, ib_rmpp_hdr_t * rmpp,
 			      void *data);
diff --git a/libibmad/src/gs.c b/libibmad/src/gs.c
index f3d245e..c7e4ff6 100644
--- a/libibmad/src/gs.c
+++ b/libibmad/src/gs.c
@@ -70,7 +70,8 @@ uint8_t *pma_query_via(void *rcvbuf, ib_portid_t * dest, int port,
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
 
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
@@ -109,7 +110,8 @@ uint8_t *performance_reset_via(void *rcvbuf, ib_portid_t * dest,
 	rpc.timeout = timeout;
 	rpc.datasz = IB_PC_DATA_SZ;
 	rpc.dataoffs = IB_PC_DATA_OFFS;
-	dest->qp = 1;
+	if (!dest->qp)
+		dest->qp = 1;
 	if (!dest->qkey)
 		dest->qkey = IB_DEFAULT_QP1_QKEY;
 
diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
index 07b623d..f2e6d5f 100644
--- a/libibmad/src/rpc.c
+++ b/libibmad/src/rpc.c
@@ -189,27 +189,46 @@ void *mad_rpc(const struct ibmad_port *port, ib_rpc_t * rpc,
 	int status, len;
 	uint8_t sndbuf[1024], rcvbuf[1024], *mad;
 	int timeout, retries;
+	int redirect = 1;
 
-	len = 0;
-	memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
+	while (redirect) {
+		len = 0;
+		memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
 
-	if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
-		return 0;
+		if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
+			return 0;
 
-	timeout = rpc->timeout ? rpc->timeout :
-	    port->timeout ? port->timeout : madrpc_timeout;
-	retries = port->retries ? port->retries : madrpc_retries;
+		timeout = rpc->timeout ? rpc->timeout :
+			port->timeout ? port->timeout : madrpc_timeout;
+		retries = port->retries ? port->retries : madrpc_retries;
 
-	if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
-			      port->class_agents[rpc->mgtclass],
-			      len, timeout, retries)) < 0) {
-		IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
-		return 0;
-	}
+		if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
+				      port->class_agents[rpc->mgtclass],
+				      len, timeout, retries)) < 0) {
+			IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
+			return 0;
+		}
 
-	mad = umad_get_mad(rcvbuf);
+		mad = umad_get_mad(rcvbuf);
+		status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+
+		/* check for exact match instead of only the redirect bit;
+		 * that way, weird statuses cause an error, too */
+		if (status == IB_MAD_STS_REDIRECT) {
+			/* update dport for next request and retry */
+			dport->lid = mad_get_field(mad, 64, IB_CPI_REDIRECT_LID_F);
+			dport->qp = mad_get_field(mad, 64, IB_CPI_REDIRECT_QP_F);
+			dport->qkey = mad_get_field(mad, 64, IB_CPI_REDIRECT_QKEY_F);
+			dport->sl = mad_get_field(mad, 64, IB_CPI_REDIRECT_SL_F);
+			/* TODO: Reverse map redirection P_Key to P_Key index */
+			if (ibdebug)
+				IBWARN("redirected to lid 0x%x, qp 0x%x, qkey 0x%x, pkey 0x%x",
+				       dport->lid, dport->qp, dport->qkey, dport->pkey_idx);
+		} else
+			redirect = 0;
+	}
 
-	if ((status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F)) != 0) {
+	if (status != 0) {
 		ERRS("MAD completed with error status 0x%x; dport (%s)",
 		     status, portid2str(dport));
 		return 0;
-- 
1.5.5






More information about the ewg mailing list