***SPAM*** Re: [ofa-general] ***SPAM*** Re: [PATCHv2] opensm/PerfMgr: Better redirection support

Hal Rosenstock hal.rosenstock at gmail.com
Wed Apr 15 13:29:31 PDT 2009


Hi Sasha,

On Wed, Apr 15, 2009 at 8:59 AM, Sasha Khapyorsky <sashak at voltaire.com> wrote:
> Hi Hal,
>
> On 15:21 Thu 12 Mar     , Hal Rosenstock wrote:
>>
>> Handle PKey and QPN redirection information
>> GID redirection handling remains
>>
>> Signed-off-by: Hal Rosenstock <hal.rosenstock at gmail.com>
>>
>> ---
>> Changes since v1:
>> Added include of osm_helper.h to osm_perfmgr.c
>>
>> Notes: osm_console redirection patch is unchanged
>> Also, relies on ib_gid_is_notzero patch
>>
>> diff --git a/opensm/include/opensm/osm_perfmgr.h b/opensm/include/opensm/osm_perfmgr.h
>> index 45dec54..651ea80 100644
>> --- a/opensm/include/opensm/osm_perfmgr.h
>> +++ b/opensm/include/opensm/osm_perfmgr.h
>> @@ -92,8 +92,14 @@ typedef enum {
>>
>>  /* Redirection information */
>>  typedef struct redir {
>> +     boolean_t redirection;
>> +     boolean_t invalid;
>
> Why using lid value != 0 is/was bad for redirection invalidation?

b/c there are other fields supplied in the redirection which also
could be invalid so this one flag summarizes that instead of
overloading one of the fields. Previously the only thing looked at was
lid (and qp) so using lid 0 as invalid was used.

>> +     ib_gid_t redir_gid;
>>       ib_net16_t redir_lid;
>> +     ib_net16_t redir_pkey;
>>       ib_net32_t redir_qp;
>> +     uint16_t redir_pkey_ix;
>
> Don't need to repeat structure name (redir) in field names - it just
> makes lines longer - 'redir->pkey_idx' looks pretty clear.

I was following the naming already present (redir_lid/qp). I'll change
this in the next version.

>> +     ib_net16_t orig_lid;
>>  } redir_t;
>>
>>  /* Node to store information about which nodes we are monitoring */
>> @@ -134,6 +140,7 @@ typedef struct osm_perfmgr {
>>       uint32_t max_outstanding_queries;
>>       cl_qmap_t monitored_map;        /* map the nodes we are tracking */
>>       __monitored_node_t *remove_list;
>> +     int16_t local_port;
>>  } osm_perfmgr_t;
>>  /*
>>  * FIELDS
>> diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c
>> index 4a6f65c..fc1b7cb 100644
>> --- a/opensm/opensm/osm_perfmgr.c
>> +++ b/opensm/opensm/osm_perfmgr.c
>> @@ -47,7 +47,6 @@
>>  #endif                               /* HAVE_CONFIG_H */
>>
>>  #ifdef ENABLE_OSM_PERF_MGR
>> -
>>  #include <stdlib.h>
>>  #include <stdint.h>
>>  #include <string.h>
>> @@ -65,8 +64,11 @@
>>  #include <opensm/osm_log.h>
>>  #include <opensm/osm_node.h>
>>  #include <opensm/osm_opensm.h>
>> +#include <opensm/osm_helper.h>
>>
>>  #define OSM_PERFMGR_INITIAL_TID_VALUE 0xcafe
>> +#define MAX_LOCAL_IBPORTS 64
>> +#define MAX_LOCAL_PKEYS   256
>>
>>  #if ENABLE_OSM_PERF_MGR_PROFILE
>>  struct {
>> @@ -118,8 +120,6 @@ static inline void diff_time(struct timeval *before,
>>  }
>>  #endif
>>
>> -extern int wait_for_pending_transactions(osm_stats_t * stats);
>> -
>>  /**********************************************************************
>>   * Internal helper functions.
>>   **********************************************************************/
>> @@ -203,6 +203,7 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
>>       uint8_t port = context->perfmgr_context.port;
>>       cl_map_item_t *p_node;
>>       __monitored_node_t *p_mon_node;
>> +     ib_net16_t orig_lid;
>>
>>       OSM_LOG_ENTER(pm->log);
>>
>> @@ -233,9 +234,10 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
>>                               p_mon_node->guid, p_mon_node->redir_tbl_size);
>>                       goto Exit;
>>               }
>> -             /* Clear redirection info */
>> -             p_mon_node->redir_port[port].redir_lid = 0;
>> -             p_mon_node->redir_port[port].redir_qp = 0;
>> +             /* Clear redirection info for this port except orig_lid */
>> +             orig_lid = p_mon_node->redir_port[port].orig_lid;
>> +             memset(&p_mon_node->redir_port[port], 0, sizeof(redir_t));
>> +             p_mon_node->redir_port[port].orig_lid = orig_lid;
>
> Hmm, why should 'orig_lid' be part of redirection structure and not
> placed on original node/port (below I see that it is used in
> non-redirected paths)?

What are you referring to here ?

> I think it would be better to use structures like:
>
> struct node {
>        ....
>        uint16_t lid;
>        uint16_t pkey_ix;

Why would lid and pkey_ix be part of node ?

>        unsigned num_ports;
>        struct port {
>                ....
>                struct redir_info { /* or even same 'struct port' */
>                        ...
>                } *ri;
>        } ports[0];
> }

It's like this with redir_tbl_size r.t. num_ports and redir_t
redir_port[1] instead of your struct port {} ports[0]. Why would what
you propose be better ?


>>               cl_plock_release(pm->lock);
>>       }
>>
>> @@ -292,6 +294,40 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid)
>>               goto Exit;
>>       }
>>
>> +     /* if redirection enabled, determine local port from port GUID */
>> +     if (pm->subn->opt.perfmgr_redir) {
>> +             ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS];
>> +             uint32_t num_ports = MAX_LOCAL_IBPORTS;
>> +             int i;
>> +
>> +             for (i = 0; i < num_ports; i++) {
>> +                     attr_array[i].num_pkeys = 0;
>> +                     attr_array[i].p_pkey_table = NULL;
>> +             }
>> +
>> +             /* call transport layer for a list of local port GUIDs */
>> +             status = osm_vendor_get_all_port_attr(pm->subn->p_osm->p_vendor,
>> +                                                   attr_array, &num_ports);
>> +             if (status != IB_SUCCESS) {
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                             "ERR 4C1C: osm_vendor_get_all_port_attr status 0x%x\n",
>> +                             status);
>> +                     goto Exit;
>> +             }
>> +             if (num_ports == 0) {
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                             "ERR 4C1D: No local ports detected!\n");
>> +                     goto Exit;
>> +             }
>> +
>> +             for (i = 0; i < num_ports; i++) {
>> +                     if (port_guid == attr_array[i].port_guid) {
>> +                             pm->local_port = attr_array[i].port_num;
>> +                             break;
>> +                     }
>> +             }
>> +     }
>> +
>
> PerfMgr is always running over discovered fabric so maybe local port
> number should be detected later at start of PerfMgr process cycle just
> using OpenSM DB.

Why is that better than doing this at bind time of PerfMgr ?

> Also see comment below about pkey validation per redirection request.
>
>>  Exit:
>>       OSM_LOG_EXIT(pm->log);
>>       return (status);
>> @@ -321,25 +357,16 @@ static ib_net32_t get_qp(__monitored_node_t * mon_node, uint8_t port)
>>
>>       if (mon_node && mon_node->redir_tbl_size &&
>>           port < mon_node->redir_tbl_size &&
>> -         mon_node->redir_port[port].redir_lid &&
>> +         mon_node->redir_port[port].redirection &&
>>           mon_node->redir_port[port].redir_qp)
>>               qp = mon_node->redir_port[port].redir_qp;
>>
>>       return qp;
>>  }
>>
>> -/**********************************************************************
>> - * Given a node, a port, and an optional monitored node,
>> - * return the appropriate lid to query that port
>> - **********************************************************************/
>>  static ib_net16_t
>> -get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
>> +get_base_lid(osm_node_t * p_node, uint8_t port)
>>  {
>> -     if (mon_node && mon_node->redir_tbl_size &&
>> -         port < mon_node->redir_tbl_size &&
>> -         mon_node->redir_port[port].redir_lid)
>> -             return mon_node->redir_port[port].redir_lid;
>> -
>>       switch (p_node->node_info.node_type) {
>>       case IB_NODE_TYPE_CA:
>>       case IB_NODE_TYPE_ROUTER:
>> @@ -352,12 +379,27 @@ get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
>>  }
>>
>>  /**********************************************************************
>> + * Given a node, a port, and an optional monitored node,
>> + * return the lid appropriate to query that port
>> + **********************************************************************/
>> +static ib_net16_t
>> +get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
>> +{
>> +     if (mon_node && mon_node->redir_tbl_size &&
>> +         port < mon_node->redir_tbl_size &&
>> +         mon_node->redir_port[port].redir_lid)
>> +             return mon_node->redir_port[port].redir_lid;
>> +
>> +     return get_base_lid(p_node, port);
>> +}
>> +
>> +/**********************************************************************
>>   * Form and send the Port Counters MAD for a single port.
>>   **********************************************************************/
>>  static ib_api_status_t
>>  osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
>> -                     ib_net32_t dest_qp, uint8_t port, uint8_t mad_method,
>> -                     osm_madw_context_t * const p_context)
>> +                     ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t port,
>> +                     uint8_t mad_method, osm_madw_context_t * const p_context)
>>  {
>>       ib_api_status_t status = IB_SUCCESS;
>>       ib_port_counters_t *port_counter = NULL;
>> @@ -396,8 +438,7 @@ osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
>>       p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
>>       p_madw->mad_addr.addr_type.gsi.remote_qkey =
>>           cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
>> -     /* FIXME what about other partitions */
>> -     p_madw->mad_addr.addr_type.gsi.pkey_ix = 0;
>> +     p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
>>       p_madw->mad_addr.addr_type.gsi.service_level = 0;
>>       p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
>>       p_madw->resp_expected = TRUE;
>> @@ -432,28 +473,32 @@ static void __collect_guids(cl_map_item_t * const p_map_item, void *context)
>>       uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
>>       osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
>>       __monitored_node_t *mon_node = NULL;
>> -     uint32_t size;
>> +     uint32_t num_ports;
>> +     int port;
>>
>>       OSM_LOG_ENTER(pm->log);
>>
>>       if (cl_qmap_get(&pm->monitored_map, node_guid)
>>           == cl_qmap_end(&pm->monitored_map)) {
>>               /* if not already in our map add it */
>> -             size = osm_node_get_num_physp(node);
>> -             mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size);
>> +             num_ports = osm_node_get_num_physp(node);
>> +             mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * num_ports);
>>               if (!mon_node) {
>>                       OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: "
>>                               "malloc failed: not handling node %s"
>>                               "(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid);
>>                       goto Exit;
>>               }
>> -             memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * size);
>> +             memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * num_ports);
>>               mon_node->guid = node_guid;
>>               mon_node->name = strdup(node->print_desc);
>> -             mon_node->redir_tbl_size = size;
>> +             mon_node->redir_tbl_size = num_ports;
>>               /* check for enhanced switch port 0 */
>>               mon_node->esp0 = (node->sw &&
>>                   ib_switch_info_is_enhanced_port0(&node->sw->switch_info));
>> +             for (port = (mon_node->esp0) ? 0 : 1; port < num_ports; port++)
>> +                     mon_node->redir_port[port].orig_lid = get_base_lid(node, port);
>> +
>>               cl_qmap_insert(&(pm->monitored_map), node_guid,
>>                              (cl_map_item_t *) mon_node);
>>       }
>> @@ -511,6 +556,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
>>               if (!osm_node_get_physp_ptr(node, port))
>>                       continue;
>>
>> +             if (mon_node->redir_port[port].redirection &&
>> +                 mon_node->redir_port[port].invalid)
>> +                     continue;
>> +
>
> Are two flags really needed? Couldn't this be stripped down?

Just invalid should be sufficient. I'll change this in the next version.

> Also what about letting "chance" for port to refresh redirection info?

What do you mean ?

>>               lid = get_lid(node, port, mon_node);
>>               if (lid == 0) {
>>                       OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
>> @@ -532,8 +581,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
>>                       PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
>>                       cl_ntoh16(lid), node->print_desc);
>>               status =
>> -                 osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
>> -                                         IB_MAD_METHOD_GET, &mad_context);
>> +                 osm_perfmgr_send_pc_mad(pm, lid, remote_qp,
>> +                                         mon_node->redir_port[port].redir_pkey_ix,
>> +                                         port, IB_MAD_METHOD_GET,
>> +                                         &mad_context);
>>               if (status != IB_SUCCESS)
>>                       OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: "
>>                               "Failed to issue port counter query for node 0x%"
>> @@ -550,6 +601,7 @@ Exit:
>>   * Discovery stuff.
>>   * Basically this code should not be here, but merged with main OpenSM
>>   **********************************************************************/
>> +extern int wait_for_pending_transactions(osm_stats_t * stats);
>>  extern void osm_drop_mgr_process(IN osm_sm_t *sm);
>>
>>  static int sweep_hop_1(osm_sm_t * sm)
>> @@ -980,6 +1032,10 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
>>               osm_node_t *p_node = NULL;
>>               ib_net16_t lid = 0;
>>
>> +             if (mon_node->redir_port[port].redirection &&
>> +                 mon_node->redir_port[port].invalid)
>> +                     goto Exit;
>> +
>>               osm_log(pm->log, OSM_LOG_VERBOSE,
>>                       "PerfMgr: Counter overflow: %s (0x%" PRIx64
>>                       ") port %d; clearing counters\n",
>> @@ -1004,8 +1060,10 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
>>               mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
>>               /* clear port counters */
>>               status =
>> -                 osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
>> -                                         IB_MAD_METHOD_SET, &mad_context);
>> +                 osm_perfmgr_send_pc_mad(pm, lid, remote_qp,
>> +                                         mon_node->redir_port[port].redir_pkey_ix,
>> +                                         port, IB_MAD_METHOD_SET,
>> +                                         &mad_context);
>>               if (status != IB_SUCCESS)
>>                       OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: "
>>                               "Failed to send clear counters MAD for %s (0x%"
>> @@ -1063,6 +1121,73 @@ osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t
>>                       time_diff, mon_node->name, mon_node->guid, port);
>>  }
>>
>> +static boolean_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey,
>> +                                  uint16_t *pkey_ix)
>> +{
>
> This function can just return pkey index value (or negative in case of
> failure).

Architecturally that's not true but practically it is as noone
implements many pkeys. I'll change in next version.

>> +     ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS];
>> +     uint32_t num_ports = MAX_LOCAL_IBPORTS;
>> +     ib_api_status_t status;
>> +     boolean_t sts = FALSE;
>> +     uint16_t i = 0;
>> +
>> +     OSM_LOG_ENTER(pm->log);
>> +
>> +     for (i = 0; i < num_ports; i++) {
>> +             attr_array[i].num_pkeys = 0;
>> +             attr_array[i].p_pkey_table = NULL;
>> +     }
>> +
>> +     /* If local port couldn't be determined previously */
>> +     if (pm->local_port == -1)
>> +             goto not_found;
>> +
>> +     attr_array[pm->local_port].num_pkeys = MAX_LOCAL_PKEYS;
>> +     attr_array[pm->local_port].p_pkey_table =
>> +                             malloc(MAX_LOCAL_PKEYS * sizeof(ib_net16_t));
>> +     if (!attr_array[pm->local_port].p_pkey_table) {
>> +             OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                     "ERR 4C20: No memory for port %d pkey table\n",
>> +                     pm->local_port);
>> +             goto not_found;
>> +     }
>> +
>> +     /* call the transport layer for a list of local port pkeys */
>> +     status = osm_vendor_get_all_port_attr(pm->subn->p_osm->p_vendor,
>> +                                           attr_array, &num_ports);
>
> This heavy stuff is performed per redirection request, but it actually
> uses same data (local port's pkey table). This looks very inefficient.
> Instead of doing this you can get local port's pkey table only once at
> PerfMgr process cycle start and do all checks against this already
> initialized table. Of course only in case when redirection is enabled at
> all.

Redirection does not occur frequently. Also, the pkey table could
change in between and there's no local event support in OpenSM so I
don't see a way around this other than polling.

>> +     if (status != IB_SUCCESS) {
>> +             OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                     "ERR 4C1E: osm_vendor_get_all_port_attr status 0x%x\n",
>> +                     status);
>> +             goto not_found;
>> +     }
>> +     if (num_ports == 0 || pm->local_port > num_ports) {
>> +             OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                     "ERR 4C1F: No local ports detected or local port out of range!\n");
>> +             goto not_found;
>> +     }
>> +     ib_net16_t *pkey_table = attr_array[pm->local_port].p_pkey_table;
>> +     for (i = 0; i < attr_array[pm->local_port].num_pkeys; i++)
>> +             if (pkey_table[i] == pkey)
>> +                     break;
>> +     if (i == attr_array[pm->local_port].num_pkeys) {
>> +             i = 0;
>> +             goto not_found;
>> +     }
>> +     free(attr_array[pm->local_port].p_pkey_table);
>> +     sts = TRUE;
>> +     goto Exit;
>> +
>> +not_found:
>> +     if (attr_array[pm->local_port].p_pkey_table)
>> +             free(attr_array[pm->local_port].p_pkey_table);
>> +     sts = FALSE;
>> +Exit:
>> +     if (pkey_ix)
>> +             *pkey_ix = i;
>> +     OSM_LOG_EXIT(pm->log);
>> +     return sts;
>> +}
>> +
>>  /**********************************************************************
>>   * The dispatcher uses a thread pool which will call this function when
>>   * we have a thread available to process our mad received from the wire.
>> @@ -1082,6 +1207,8 @@ static void osm_pc_rcv_process(void *context, void *data)
>>       perfmgr_db_data_cnt_reading_t data_reading;
>>       cl_map_item_t *p_node;
>>       __monitored_node_t *p_mon_node;
>> +     uint16_t pkey_ix;
>> +     boolean_t invalid = FALSE;
>>
>>       OSM_LOG_ENTER(pm->log);
>>
>> @@ -1105,7 +1232,8 @@ static void osm_pc_rcv_process(void *context, void *data)
>>                 p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
>>
>>       /* Response could also be redirection (IBM eHCA PMA does this) */
>> -     if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
>> +     if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
>> +         p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
>>               char gid_str[INET6_ADDRSTRLEN];
>>               ib_class_port_info_t *cpi =
>>                   (ib_class_port_info_t *) &
>> @@ -1119,17 +1247,48 @@ static void osm_pc_rcv_process(void *context, void *data)
>>                                 sizeof gid_str),
>>                       cl_ntoh32(cpi->redir_qp));
>>
>> -             /* LID or GID redirection ? */
>> -             /* For GID redirection, need to get PathRecord from SA */
>> +             /* valid redirection ? */
>>               if (cpi->redir_lid == 0) {
>> -                     OSM_LOG(pm->log, OSM_LOG_VERBOSE,
>> -                             "GID redirection not currently implemented!\n");
>> -                     goto Exit;
>> +                     if (!ib_gid_is_notzero(&cpi->redir_gid)) {
>> +                             OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                                     "ERR 4C17: Invalid redirection "
>> +                                     "(both redirect LID and GID are zero)\n");
>> +                             invalid = TRUE;
>> +                     }
>> +             }
>> +             if (cpi->redir_qp == 0) {
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                             "ERR 4C18: Invalid RedirectQP\n");
>> +                     invalid = TRUE;
>> +             }
>> +             if (cpi->redir_pkey == 0) {
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                             "ERR 4C19: Invalid RedirectP_Key\n");
>> +                     invalid = TRUE;
>> +             }
>> +             if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                             "ERR 4C1A: Invalid RedirectQ_Key\n");
>> +                     invalid = TRUE;
>> +             }
>> +
>> +             if (!validate_redir_pkey(pm, cpi->redir_pkey, &pkey_ix)) {
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR,
>> +                             "ERR 4C1B: Index for Pkey 0x%x not found\n",
>> +                             cl_ntoh16(cpi->redir_pkey));
>> +                     invalid = TRUE;
>>               }
>
> All above are not OpenSM errors, but wrong external data. I think it
> should be logged as VERBOSE messages.

I agree it's wrong external data but it seems serious enough to me to
treat as an error. If not, at least INFO rather than VERBOSE so
nothing special needs to be done to see these.

>>
>>               if (!pm->subn->opt.perfmgr_redir) {
>> -                             OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
>> -                                    "redirection requested but disabled\n");
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
>> +                             "redirection requested but disabled\n");
>> +                     invalid = TRUE;
>> +             }
>
> This is not an error.

Seems like some sort of configuration error to me if this is disabled
at the manager but the PMA wants to use it. Other local configuration
errors are treated as errors.

> BTW, why to bother with verifying redirection info when redirection
> support is disabled anyway?

I thought it was useful to know the redirection info was invalid
rather than getting the disabled notification and then enabling and
finding out. It can easily be moved up earlier in the flow if that's
better.

>> +
>> +             if (cpi->redir_lid == 0) {
>> +                     /* GID redirection: get PathRecord information */
>> +                     OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C21: "
>> +                             "GID redirection not currently supported\n");
>>                       goto Exit;
>>               }
>>
>> @@ -1144,14 +1303,23 @@ static void osm_pc_rcv_process(void *context, void *data)
>>                               p_mon_node->redir_tbl_size);
>>                       goto Exit;
>>               }
>> +             p_mon_node->redir_port[port].redirection = TRUE;
>> +             p_mon_node->redir_port[port].invalid = invalid;
>> +             memcpy(&p_mon_node->redir_port[port].redir_gid,
>> +                    &cpi->redir_gid, sizeof(ib_gid_t));
>>               p_mon_node->redir_port[port].redir_lid = cpi->redir_lid;
>>               p_mon_node->redir_port[port].redir_qp = cpi->redir_qp;
>> +             p_mon_node->redir_port[port].redir_pkey = cpi->redir_pkey;
>> +             p_mon_node->redir_port[port].redir_pkey_ix = pkey_ix;
>>               cl_plock_release(pm->lock);
>>
>> +             if (invalid)
>> +                     goto Exit;
>> +
>>               /* Finally, reissue the query to the redirected location */
>>               status =
>>                   osm_perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
>> -                                         port,
>> +                                         pkey_ix, port,
>>                                           mad_context->perfmgr_context.
>>                                           mad_method, mad_context);
>>               if (status != IB_SUCCESS)
>> @@ -1234,6 +1402,7 @@ osm_perfmgr_init(osm_perfmgr_t * const pm, osm_opensm_t *osm,
>>       pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
>>       pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
>>       pm->osm = osm;
>> +     pm->local_port = -1;
>>
>>       status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
>>       if (status != IB_SUCCESS)
>
> In general I would suggest to not mix redirection case with main flow
> (by using better data structures). The Redirection is not something
> PerfMgr specific and ideally we could have separate redirection handling
> module.  I'm not requesting to do this now (in this implementation), but
> at least flow separation is very desirable.

I've done some more of this in subsequent patches not yet submitted.
However, there is no other GS manager (and if it did exist would it
use redirection ? there are known cases for PerfMgr currently).

In any case, this is "poor man's" redirection support given what is
currently available in the OpenFabrics IB stack.

-- Hal

> Sasha
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>



More information about the general mailing list