[ewg] Re: [PATCH v2] libibmad: Handle MAD redirection

Hal Rosenstock hal.rosenstock at gmail.com
Tue Jun 30 11:44:47 PDT 2009


On Tue, Jun 30, 2009 at 12:53 PM, Joachim Fenkes<fenkes at de.ibm.com> wrote:
> Previously, libibmad reacted to GSI MAD responses with a "redirect" status
> by throwing an error. IBM eHCA adapters use redirection, so most
> infiniband_diags tools didn't work against eHCA.
>
> Fix: Modify mad_rpc() so that it resends the request to the redirection
> target if a "redirect" GS response is received. This is repeated until no
> "redirect" response is received, allowing for multiple levels of
> indirection.
>
> The dport argument is updated with the redirection target, so subsequent
> MADs will not go through the redirection process again but reach the target
> directly.
>
> Tested using perfquery between ehca, mlx4 and mthca in all possible
> combinations.
>
> Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
> ---
>
> On Tuesday 30 June 2009 16:14, Hal Rosenstock wrote:
>> On Tue, Jun 30, 2009 at 8:04 AM, Joachim Fenkes<fenkes at de.ibm.com> wrote:
>> > On Tuesday 30 June 2009 00:01, Hal Rosenstock wrote:
>> >> On Mon, Jun 29, 2009 at 8:10 AM, Joachim Fenkes<fenkes at de.ibm.com> wrote:
>
>> >> Are there GS classes other than PerfMgt which would be redirected by eHCA ?
>> >
>> > Not right now, no. If you're interested in the details of how and when the
>> > eHCA driver redirects, please have a look at drivers/infiniband/hw/ehca/ehca_sqp.c.
>>
>> SL is always set to 0 and RespTimeValue is hardcoded.
>
> The hypervisor only gives us a QP#, and we hardcode the rest because it
> never changes.

I don't think that SL nor RespTimeValue will work in all cases. I
think there are better ways to allow for more flexibility here.

>> > so I left it at zero. Incidentally, there isn't a single
>> > code line in management.git that actually changes the pkey_index from its
>> > init value of 0, so I figured that omission couldn't be too bad.
>>
>> Agreed. I think you're referring to infiniband-diags and not opensm.
>
> Yes, sorry -- none of {infiniband_diags,libibumad,libibmad} make any effort
> of setting up the pkey_index or even a default value for it, so I thought
> "why bother" =)
>
> Here's an updated patch with the SL set and a comment stating that we should
> look at the P_Key one day. What do you think of it?

Looks OK to me with one comment below.

I think the eHCA redirector side can be improved too.

-- Hal

> Regards,
>  Joachim
>
>
>  libibmad/include/infiniband/mad.h |    9 +++++++
>  libibmad/src/gs.c                 |    6 +++-
>  libibmad/src/rpc.c                |   49 +++++++++++++++++++++++++-----------
>  3 files changed, 47 insertions(+), 17 deletions(-)
>
> diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
> index aa27eb5..bdf5158 100644
> --- a/libibmad/include/infiniband/mad.h
> +++ b/libibmad/include/infiniband/mad.h
> @@ -115,6 +115,8 @@ enum MAD_ATTR_ID {
>
>  enum MAD_STATUS {
>        IB_MAD_STS_OK                        = (0 << 2),
> +       IB_MAD_STS_BUSY                      = (1 << 0),
> +       IB_MAD_STS_REDIRECT                  = (1 << 1),
>        IB_MAD_STS_BAD_BASE_VER_OR_CLASS     = (1 << 2),
>        IB_MAD_STS_METHOD_NOT_SUPPORTED      = (2 << 2),
>        IB_MAD_STS_METHOD_ATTR_NOT_SUPPORTED = (3 << 2),
> @@ -783,8 +785,15 @@ MAD_EXPORT int madrpc_set_timeout(int timeout);
>  MAD_EXPORT struct ibmad_port *mad_rpc_open_port(char *dev_name, int dev_port,
>                        int *mgmt_classes, int num_classes);
>  MAD_EXPORT void mad_rpc_close_port(struct ibmad_port *srcport);
> +
> +/*
> + * On redirection, the dport argument is updated with the redirection target,
> + * so subsequent MADs will not go through the redirection process again but
> + * reach the target directly.
> + */
>  MAD_EXPORT void *mad_rpc(const struct ibmad_port *srcport, ib_rpc_t * rpc,
>                        ib_portid_t * dport, void *payload, void *rcvdata);
> +
>  MAD_EXPORT void *mad_rpc_rmpp(const struct ibmad_port *srcport, ib_rpc_t * rpc,
>                              ib_portid_t * dport, ib_rmpp_hdr_t * rmpp,
>                              void *data);
> diff --git a/libibmad/src/gs.c b/libibmad/src/gs.c
> index f3d245e..c7e4ff6 100644
> --- a/libibmad/src/gs.c
> +++ b/libibmad/src/gs.c
> @@ -70,7 +70,8 @@ uint8_t *pma_query_via(void *rcvbuf, ib_portid_t * dest, int port,
>        rpc.datasz = IB_PC_DATA_SZ;
>        rpc.dataoffs = IB_PC_DATA_OFFS;
>
> -       dest->qp = 1;
> +       if (!dest->qp)
> +               dest->qp = 1;
>        if (!dest->qkey)
>                dest->qkey = IB_DEFAULT_QP1_QKEY;
>
> @@ -109,7 +110,8 @@ uint8_t *performance_reset_via(void *rcvbuf, ib_portid_t * dest,
>        rpc.timeout = timeout;
>        rpc.datasz = IB_PC_DATA_SZ;
>        rpc.dataoffs = IB_PC_DATA_OFFS;
> -       dest->qp = 1;
> +       if (!dest->qp)
> +               dest->qp = 1;
>        if (!dest->qkey)
>                dest->qkey = IB_DEFAULT_QP1_QKEY;
>
> diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
> index 07b623d..f2e6d5f 100644
> --- a/libibmad/src/rpc.c
> +++ b/libibmad/src/rpc.c
> @@ -189,27 +189,46 @@ void *mad_rpc(const struct ibmad_port *port, ib_rpc_t * rpc,
>        int status, len;
>        uint8_t sndbuf[1024], rcvbuf[1024], *mad;
>        int timeout, retries;
> +       int redirect = 1;
>
> -       len = 0;
> -       memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
> +       while (redirect) {
> +               len = 0;
> +               memset(sndbuf, 0, umad_size() + IB_MAD_SIZE);
>
> -       if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
> -               return 0;
> +               if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
> +                       return 0;
>
> -       timeout = rpc->timeout ? rpc->timeout :
> -           port->timeout ? port->timeout : madrpc_timeout;
> -       retries = port->retries ? port->retries : madrpc_retries;
> +               timeout = rpc->timeout ? rpc->timeout :
> +                       port->timeout ? port->timeout : madrpc_timeout;
> +               retries = port->retries ? port->retries : madrpc_retries;
>
> -       if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
> -                             port->class_agents[rpc->mgtclass],
> -                             len, timeout, retries)) < 0) {
> -               IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
> -               return 0;
> -       }
> +               if ((len = _do_madrpc(port->port_id, sndbuf, rcvbuf,
> +                                     port->class_agents[rpc->mgtclass],
> +                                     len, timeout, retries)) < 0) {
> +                       IBWARN("_do_madrpc failed; dport (%s)", portid2str(dport));
> +                       return 0;
> +               }
>
> -       mad = umad_get_mad(rcvbuf);
> +               mad = umad_get_mad(rcvbuf);
> +               status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
> +
> +               /* check for exact match instead of only the redirect bit;
> +                * that way, weird statuses cause an error, too */
> +               if (status == IB_MAD_STS_REDIRECT) {

Note here that only LID based redirection supported currently (and
perhaps GID based redirection is a TODO).

> +                       /* update dport for next request and retry */
> +                       dport->lid = mad_get_field(mad, 64, IB_CPI_REDIRECT_LID_F);
> +                       dport->qp = mad_get_field(mad, 64, IB_CPI_REDIRECT_QP_F);
> +                       dport->qkey = mad_get_field(mad, 64, IB_CPI_REDIRECT_QKEY_F);
> +                       dport->sl = mad_get_field(mad, 64, IB_CPI_REDIRECT_SL_F);
> +                       /* TODO: Reverse map redirection P_Key to P_Key index */
> +                       if (ibdebug)
> +                               IBWARN("redirected to lid 0x%x, qp 0x%x, qkey 0x%x, pkey 0x%x",
> +                                      dport->lid, dport->qp, dport->qkey, dport->pkey_idx);
> +               } else
> +                       redirect = 0;
> +       }
>
> -       if ((status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F)) != 0) {
> +       if (status != 0) {
>                ERRS("MAD completed with error status 0x%x; dport (%s)",
>                     status, portid2str(dport));
>                return 0;
> --
> 1.5.5
>
>
>
>



More information about the ewg mailing list