[openib-general] [PATCH 10/10] osm: QoS in OpenSM

Hal Rosenstock halr at voltaire.com
Wed Jan 31 09:29:42 PST 2007


Hi Yevgeny,

On Tue, 2007-01-30 at 10:33, Yevgeny Kliteynik wrote:
> Checking PathRecord query for QoS constraints
> 
> The QoS-aware path selection logic is implemented in a
> separate function that is called only when QoS in OpenSM
> is on. It causes some code duplication, but the idea is
> to minimize the changes in the existing logic in OSM.
> Eventually, these two function (the old path selection
> and the new QoS-aware path selection) will be merged
> into a single function.

Yes, this would be nice to do in the future as there is much overlap.
Whether qos is carried in the request could be handled internal to this
combined routine rather than outside to determine which routine to call.
This will make for a lot less code.

Some comments embedded below.

> Signed-off-by: Yevgeny Kliteynik <kliteyn at dev.mellanox.co.il>
> ---
>  osm/opensm/osm_sa_path_record.c |  822 ++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 816 insertions(+), 6 deletions(-)
> 
> diff --git a/osm/opensm/osm_sa_path_record.c b/osm/opensm/osm_sa_path_record.c
> index a0dbb07..2ff7a42 100644
> --- a/osm/opensm/osm_sa_path_record.c
> +++ b/osm/opensm/osm_sa_path_record.c
> @@ -70,6 +70,7 @@
>  #include <opensm/osm_router.h>
>  #include <opensm/osm_sa_mcmember_record.h>
>  #endif
> +#include <opensm/osm_qos_parser.h>
>  
>  #define OSM_PR_RCV_POOL_MIN_SIZE    64
>  #define OSM_PR_RCV_POOL_GROW_SIZE   64
> @@ -87,6 +88,7 @@ typedef struct _osm_path_parms
>    uint8_t            rate;
>    uint8_t            sl;
>    uint8_t            pkt_life;
> +  uint16_t           class;
>    boolean_t          reversible;
>  } osm_path_parms_t;
>  
> @@ -716,6 +718,799 @@ __osm_pr_rcv_get_path_parms(
>  
>  /**********************************************************************
>   **********************************************************************/
> +
> +static ib_api_status_t
> +__osm_pr_rcv_get_path_parms_qos(

This is the similar function to the non QoS one:
__osm_pr_rcv_get_path_parms

> +  IN osm_pr_rcv_t*         const p_rcv,
> +  IN const ib_path_rec_t*  const p_pr,
> +  IN const osm_port_t*     const p_src_port,
> +  IN const osm_port_t*     const p_dest_port,
> +  IN const uint16_t        dest_lid_ho,
> +  IN const ib_net64_t      comp_mask,
> +  OUT osm_path_parms_t*    const p_parms )
> +{
> +   const osm_node_t*        p_node;
> +   const osm_physp_t*       p_physp;
> +   const osm_physp_t*       p_src_physp;
> +   const osm_physp_t*       p_dest_physp;
> +   const osm_prtn_t*        p_prtn;
> +   const ib_port_info_t*    p_pi;
> +   ib_api_status_t          status = IB_SUCCESS;
> +   ib_net16_t               pkey = 0;
> +   ib_net16_t               shared_pkey = 0;
> +   uint8_t                  mtu = 0;
> +   uint8_t                  rate = 0;
> +   uint8_t                  pkt_life = 0;
> +   uint8_t                  sl = 0;
> +   uint16_t                 class = 0;
> +   uint8_t                  required_mtu;
> +   uint8_t                  required_rate;
> +   uint8_t                  required_pkt_life;
> +   uint8_t                  in_port_num;
> +   uint8_t                  out_port_num;
> +   ib_net16_t               dest_lid;
> +   uint8_t                  i;
> +   uint8_t                  vl;
> +   ib_slvl_table_t *        p_slvl_tbl = NULL;
> +   boolean_t                valid_sls[IB_MAX_NUM_VLS];
> +   boolean_t                sl2vl_valid_path = FALSE;
> +   uint8_t                  first_valid_sl;
> +   osm_qos_level_t *        p_qos_level = NULL;
> +
> +   OSM_LOG_ENTER( p_rcv->p_log, __osm_pr_rcv_get_path_parms_qos );
> +
> +   memset(valid_sls,TRUE,sizeof(valid_sls));
> +   dest_lid = cl_hton16( dest_lid_ho );
> +
> +   p_dest_physp = osm_port_get_default_phys_ptr( p_dest_port );
> +   p_physp = osm_port_get_default_phys_ptr( p_src_port );
> +   p_src_physp = p_physp;
> +   p_pi = &p_physp->port_info;
> +
> +   mtu = ib_port_info_get_mtu_cap( p_pi );
> +   rate = ib_port_info_compute_rate( p_pi );
> +
> +   /*
> +    * Mellanox Tavor device performance is better using 1K MTU.
> +    * If required MTU and MTU selector are such that 1K is OK 
> +    * and at least one end of the path is Tavor we override the
> +    * port MTU with 1K.
> +    */
> +   if ( p_rcv->p_subn->opt.enable_quirks &&
> +          __osm_sa_path_rec_apply_tavor_mtu_limit(
> +             p_pr, p_src_port, p_dest_port, comp_mask) )
> +   {
> +      if (mtu > IB_MTU_LEN_1024) 
> +      {
> +         mtu = IB_MTU_LEN_1024;
> +         osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                  "__osm_pr_rcv_get_path_parms_qos: "
> +                  "Optimized Path MTU to 1K for Mellanox Tavor device\n");
> +      }
> +   }
> +
> +   /*
> +    * Walk the subnet object from source to destination,
> +    * tracking the most restrictive rate and mtu values along the way...
> +    *
> +    * If source port node is a switch, then p_physp should
> +    * point to the port that routes the destination lid
> +    */
> +
> +   p_node = osm_physp_get_node_ptr( p_physp );
> +
> +   if( p_node->sw )
> +   {
> +      /* source node is a switch */
> +      in_port_num = osm_physp_get_port_num(p_physp);
> +      /*
> +       * If the dest_lid_ho is equal to the lid of the switch pointed by
> +       * p_sw then p_physp will be the physical port of the switch port zero,
> +       * and out_port_num will be 0.
> +       */
> +      p_physp = osm_switch_get_route_by_lid(p_node->sw, cl_hton16( dest_lid_ho ) );
> +      if ( p_physp == 0 )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +                  "__osm_pr_rcv_get_path_parms_qos: ERR 1F02: "
> +                  "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n",
> +                  dest_lid_ho,
> +                  cl_ntoh64( osm_node_get_node_guid( p_node ) ) );

Nit: Error codes should be made unique and not overlap existing ones.

> +         status = IB_ERROR;
> +         goto Exit;
> +      }
> +      p_src_physp = p_physp;
> +      out_port_num = osm_physp_get_port_num(p_physp);
> +      p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
> +   }
> +   else
> +   {
> +      /* source node is CA or Router */
> +      p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
> +   }
> +
> +   for (i = 0; i < IB_MAX_NUM_VLS; i++)
> +   {
> +      if (valid_sls[i])
> +      {
> +         vl = ib_slvl_table_get(p_slvl_tbl,i);
> +         if (vl == IB_DROP_VL)

Does vl > Operational VLs need checking here or is it never set this way
?

> +            valid_sls[i] = FALSE;
> +      }
> +   }
> +
> +   /*
> +    * now get pointer to the destination port (same as above)
> +    */
> +   p_node = osm_physp_get_node_ptr( p_dest_physp );
> +
> +   if( p_node->sw )
> +   {
> +      p_dest_physp = osm_switch_get_route_by_lid( p_node->sw, cl_ntoh16( dest_lid_ho ) );
> +      if ( p_dest_physp == 0 )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +                  "__osm_pr_rcv_get_path_parms_qos: ERR 1F03: "
> +                  "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n",
> +                  dest_lid_ho,
> +                  cl_ntoh64( osm_node_get_node_guid( p_node ) ) );
> +         status = IB_ERROR;
> +         goto Exit;
> +      }
> +   }
> +
> +   /*
> +    * Now go through the path step by step
> +    */
> +
> +   while( p_physp != p_dest_physp )
> +   {
> +      p_physp = osm_physp_get_remote( p_physp );
> +      if ( p_physp == 0 )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +                  "__osm_pr_rcv_get_path_parms_qos: ERR 1F04: "
> +                  "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n",
> +                  dest_lid_ho,
> +                  cl_ntoh64( osm_node_get_node_guid( p_node ) ) );
> +         status = IB_ERROR;
> +         goto Exit;
> +      }
> +      
> +      in_port_num = osm_physp_get_port_num(p_physp);
> +
> +      /* this is point to point case (no switch in between) */
> +      if( p_physp == p_dest_physp )
> +         break;


Ordering of check for switch and point to point case are different here
and original routine. Should they be the same ? If so, which should
change ? (Any reason why this was moved in this routine ?)

> +      /* Check parameters for the ingress port in this switch */
> +      p_pi = &p_physp->port_info;
> +
> +      if( mtu > ib_port_info_get_mtu_cap( p_pi ) )
> +      {
> +         mtu = ib_port_info_get_mtu_cap( p_pi );
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "New smallest MTU = %u at intervening port 0x%016" PRIx64
> +                     " port num 0x%X\n",
> +                     mtu,
> +                     cl_ntoh64( osm_physp_get_port_guid( p_physp ) ),
> +                     osm_physp_get_port_num( p_physp ) );
> +         }
> +      }
> +
> +      if( rate > ib_port_info_compute_rate( p_pi ) )
> +      {
> +          rate = ib_port_info_compute_rate( p_pi );
> +          if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +          {
> +             osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                      "__osm_pr_rcv_get_path_parms_qos: "
> +                      "New smallest rate = %u at intervening port 0x%016" PRIx64
> +                      " port num 0x%X\n",
> +                      rate,
> +                      cl_ntoh64( osm_physp_get_port_guid( p_physp ) ),
> +                      osm_physp_get_port_num( p_physp ) );
> +          }
> +      }
> +
> +      p_node = osm_physp_get_node_ptr( p_physp );
> +      if( !p_node->sw )
> +      {
> +         /*
> +           There is some sort of problem in the subnet object!
> +           If this isn't a switch, we should have reached
> +           the destination by now!
> +         */
> +         osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +                  "__osm_pr_rcv_get_path_parms_qos: ERR 1F05: "
> +                  "Internal error, bad path\n" );
> +         status = IB_ERROR;
> +         goto Exit;
> +      }
> +      
> +      /* Continue with the egress port on this switch */
> +      p_physp = osm_switch_get_route_by_lid( p_node->sw, dest_lid );
> +
> +      if ( p_physp == 0 )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +                  "__osm_pr_rcv_get_path_parms_qos: ERR 1F06: "
> +                  "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n",
> +                  dest_lid_ho,
> +                  cl_ntoh64( osm_node_get_node_guid( p_node ) ) );
> +         status = IB_ERROR;
> +         goto Exit;
> +      }
> +
> +      CL_ASSERT( p_physp );
> +      CL_ASSERT( osm_physp_is_valid( p_physp ) );
> +
> +      p_pi = &p_physp->port_info;
> +
> +      if( mtu > ib_port_info_get_mtu_cap( p_pi ) )
> +      {
> +         mtu = ib_port_info_get_mtu_cap( p_pi );
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "New smallest MTU = %u at intervening port 0x%016" PRIx64
> +                     " port num 0x%X\n",
> +                     mtu,
> +                     cl_ntoh64( osm_physp_get_port_guid( p_physp ) ),
> +                     osm_physp_get_port_num( p_physp ) );
> +         }
> +      }
> +
> +      if( rate > ib_port_info_compute_rate( p_pi ) )
> +      {
> +         rate = ib_port_info_compute_rate( p_pi );
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "New smallest rate = %u at intervening port 0x%016" PRIx64
> +                     " port num 0x%X\n",
> +                     rate,
> +                     cl_ntoh64( osm_physp_get_port_guid( p_physp ) ),
> +                     osm_physp_get_port_num( p_physp ) );
> +         }
> +      }
> +
> +      out_port_num = osm_physp_get_port_num(p_physp);
> +
> +      /* 
> +       * Check SL2VL table of the switch
> +       */
> +      p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
> +      for ( i = 0; i < IB_MAX_NUM_VLS; i++ )
> +      {
> +         if (valid_sls[i])
> +         {
> +            vl = ib_slvl_table_get(p_slvl_tbl,i);
> +            if (vl == IB_DROP_VL)
> +               valid_sls[i] = FALSE;
> +         }
> +      }
> +      
> +      /* go to the next step in the path */
> +   }
> +
> +   /* p_physp now points to the destination */
> +   
> +   p_pi = &p_physp->port_info;
> +
> +   if( mtu > ib_port_info_get_mtu_cap( p_pi ) )
> +   {
> +      mtu = ib_port_info_get_mtu_cap( p_pi );
> +      if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                  "__osm_pr_rcv_get_path_parms_qos: "
> +                  "New smallest MTU = %u at destination port 0x%016" PRIx64 "\n",
> +                  mtu,
> +                  cl_ntoh64(osm_physp_get_port_guid( p_physp )) );
> +      }
> +   }
> +
> +   if( rate > ib_port_info_compute_rate( p_pi ) )
> +   {
> +      rate = ib_port_info_compute_rate( p_pi );
> +      if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                  "__osm_pr_rcv_get_path_parms_qos: "
> +                  "New smallest rate = %u at destination port 0x%016" PRIx64 "\n",
> +                  rate,
> +                  cl_ntoh64(osm_physp_get_port_guid( p_physp )) );
> +      }
> +   }
> +
> +   if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +   {
> +      osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +               "__osm_pr_rcv_get_path_parms_qos: "
> +               "Path min MTU = %u, min rate = %u\n", mtu, rate );
> +   }
> +
> +   /* check whether there is some SL that won't lead to VL15 eventually */
> +   for (i = 0; i < IB_MAX_NUM_VLS; i++)
> +   {
> +      if (valid_sls[i])
> +      {
> +         sl2vl_valid_path = TRUE;
> +         first_valid_sl = i;
> +         break;
> +      }
> +   }
> +
> +   if (!sl2vl_valid_path)
> +   {
> +      /* all the SLs will eventually lead to VL15 on this path */
> +      status = IB_NOT_FOUND;
> +      goto Exit;
> +   }
> +
> +   /*
> +    * According to spec definition IBA 1.2 Table 205 PacketLifeTime
> +    * description, for loopback paths, packetLifeTime shall be zero. 
> +    */
> +    if ( p_src_port == p_dest_port )
> +       pkt_life = 0; /* loopback */
> +    else
> +       pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT;
> +
> +   /*
> +    * Get QoS Level object according to the path request
> +    */
> +   osm_qos_parser_get_qos_level_by_pr(p_rcv,
> +                                      p_pr,
> +                                      p_src_port,
> +                                      p_dest_port,
> +                                      dest_lid_ho,
> +                                      comp_mask,
> +                                      &p_qos_level);
> +
> +   if (p_qos_level)
> +   {
> +      /* 
> +       * Found QoS level that should be applied to this path record request
> +       */
> +
> +      if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                  "__osm_pr_rcv_get_path_parms_qos: "
> +                  "PathRecord request matches QoS Level %u (%s)\n",
> +                  p_qos_level->sn,
> +                  (p_qos_level->use)? p_qos_level->use : "no description" ) ;
> +      }
> +
> +      /* adjust MTU limit according to QoS constraints */
> +      if (p_qos_level->mtu_limit_set && (mtu > p_qos_level->mtu_limit) )
> +      {
> +         mtu = p_qos_level->mtu_limit;
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "QoS constaraints: new smallest MTU = %u\n",
> +                     mtu);
> +         }
> +      }
> +
> +      /* adjust Rate limit according to QoS constraints */
> +      if (p_qos_level->rate_limit_set && (rate > p_qos_level->rate_limit) )
> +      {
> +         rate = p_qos_level->rate_limit;
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "QoS constaraints: new smallest Rate = %u\n",
> +                     rate);
> +         }
> +      }
> +
> +      /* adjust Packet Lifetime according to QoS constraints */
> +      if (p_qos_level->pkt_life_set && (pkt_life > p_qos_level->pkt_life) )
> +      {
> +         pkt_life = p_qos_level->pkt_life;
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "QoS constaraints: new smallest Packet Lifetime = %u\n",
> +                     pkt_life);
> +         }
> +      }
> +
> +      /* adjust SL according to QoS constraints */
> +      if (p_qos_level->sl_set)
> +      { 
> +         if (!valid_sls[p_qos_level->sl])
> +         {
> +            status = IB_NOT_FOUND;
> +            goto Exit;
> +         }
> +         else
> +         {
> +            sl = p_qos_level->sl;
> +            if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +            {
> +               osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                        "__osm_pr_rcv_get_path_parms_qos: "
> +                        "QoS constaraints: new SL = %u\n",
> +                        sl);
> +            }
> +         }
> +      }
> +
> +      /* adjust PKey according to QoS constraints */
> +      if (p_qos_level->pkey_set)
> +      {
> +         pkey = p_qos_level->pkey;
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "QoS constaraints: new PKey = %u\n",
> +                     pkey);
> +         }
> +      }
> +
> +      /* adjust Class according to QoS constraints */
> +      if (p_qos_level->class_set)
> +      {
> +         class = p_qos_level->class;
> +         if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "QoS constaraints: new Class = %u\n",
> +                     class);
> +         }
> +      }
> +
> +   } /*if (p_qos_level)*/
> +
> +   /*
> +    * Determine if these values meet the user criteria
> +    * and adjust appropriately
> +    */
> +
> +   /* we silently ignore cases where only the MTU selector is defined */
> +   if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
> +        ( comp_mask & IB_PR_COMPMASK_MTU ) )
> +   {
> +      required_mtu = ib_path_rec_mtu( p_pr );
> +      switch( ib_path_rec_mtu_sel( p_pr ) )
> +      {
> +         case 0:    /* must be greater than */
> +            if( mtu <= required_mtu )
> +               status = IB_NOT_FOUND;
> +            break;
> +
> +         case 1:    /* must be less than */
> +            if( mtu >= required_mtu )
> +            {
> +               /* adjust to use the highest mtu
> +                  lower then the required one */
> +               if( required_mtu > 1 )
> +                  mtu = required_mtu - 1;
> +               else
> +                  status = IB_NOT_FOUND;
> +            }
> +            break;
> +
> +         case 2:    /* exact match */
> +            if( mtu < required_mtu )
> +               status = IB_NOT_FOUND;
> +            else
> +               mtu = required_mtu;
> +            break;
> +
> +         case 3:    /* largest available */
> +            /* can't be disqualified by this one */
> +            break;
> +
> +         default:
> +            /* if we're here, there's a bug in ib_path_rec_mtu_sel() */
> +            CL_ASSERT( FALSE );
> +            status = IB_ERROR;
> +            break;
> +      }
> +   }
> +   if (status != IB_SUCCESS)
> +      goto Exit;
> +
> +   /* we silently ignore cases where only the Rate selector is defined */
> +   if ( ( comp_mask & IB_PR_COMPMASK_RATESELEC ) &&
> +        ( comp_mask & IB_PR_COMPMASK_RATE ) )
> +   {
> +      required_rate = ib_path_rec_rate( p_pr );
> +      switch( ib_path_rec_rate_sel( p_pr ) )
> +      {
> +         case 0:    /* must be greater than */
> +            if( rate <= required_rate )
> +               status = IB_NOT_FOUND;
> +            break;
> +
> +         case 1:    /* must be less than */
> +            if( rate >= required_rate )
> +            {
> +               /* adjust the rate to use the highest rate
> +                  lower then the required one */
> +               if( required_rate > 2 )
> +                  rate = required_rate - 1;
> +               else
> +                  status = IB_NOT_FOUND;
> +            }
> +            break;
> +
> +         case 2:    /* exact match */
> +            if( rate < required_rate )
> +               status = IB_NOT_FOUND;
> +            else
> +               rate = required_rate;
> +            break;
> +
> +         case 3:    /* largest available */
> +            /* can't be disqualified by this one */
> +            break;
> +
> +         default:
> +            /* if we're here, there's a bug in ib_path_rec_mtu_sel() */
> +            CL_ASSERT( FALSE );
> +            status = IB_ERROR;
> +            break;
> +      }
> +   }
> +   if (status != IB_SUCCESS)
> +      goto Exit;
> +
> +   /* we silently ignore cases where only the PktLife selector is defined */
> +   if ( ( comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC ) &&
> +        ( comp_mask & IB_PR_COMPMASK_PKTLIFETIME ) )
> +   {
> +      required_pkt_life = ib_path_rec_pkt_life( p_pr );
> +      switch( ib_path_rec_pkt_life_sel( p_pr ) )
> +      {
> +         case 0:    /* must be greater than */
> +            if( pkt_life <= required_pkt_life )
> +               status = IB_NOT_FOUND;
> +            break;
> +
> +         case 1:    /* must be less than */
> +            if( pkt_life >= required_pkt_life )
> +            {
> +               /* adjust the lifetime to use the highest possible
> +                  lower then the required one */
> +               if( required_pkt_life > 1 )
> +                  pkt_life = required_pkt_life - 1;
> +               else
> +                  status = IB_NOT_FOUND;
> +            }
> +            break;
> +
> +         case 2:    /* exact match */
> +            if( pkt_life < required_pkt_life )
> +               status = IB_NOT_FOUND;
> +            else
> +               pkt_life = required_pkt_life;
> +            break;
> +
> +         case 3:    /* smallest available */
> +            /* can't be disqualified by this one */
> +            break;
> +
> +         default:
> +            /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
> +            CL_ASSERT( FALSE );
> +            status = IB_ERROR;
> +            break;
> +      }
> +   }
> +   if (status != IB_SUCCESS)
> +      goto Exit;
> +
> +   /* 
> +    * set Pkey for this path record request 
> +    */
> +
> +   shared_pkey = osm_physp_find_common_pkey( p_src_physp, p_dest_physp );
> +   if ( !shared_pkey )
> +   {
> +      osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +               "__osm_pr_rcv_get_path_parms_qos: ERR 1F07: "
> +               "Ports do not have any shared PKeys\n");
> +      status = IB_NOT_FOUND;
> +      goto Exit;
> +   }
> +
> +   if( (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) &&
> +       (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) )
> +   {
> +      if (p_qos_level && p_qos_level->pkey_set)
> +         pkey = p_qos_level->pkey;
> +      else
> +         pkey = shared_pkey;
> +   }
> +   else if (comp_mask & IB_PR_COMPMASK_PKEY) 
> +   {
> +      /* PathRecord requires specific PKey */
> +      if (p_qos_level && p_qos_level->pkey_set)
> +      {
> +         /* check that QoS pkey matches the required pkey */
> +         if (p_qos_level->pkey != p_pr->pkey)
> +         {
> +            osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                     "__osm_pr_rcv_get_path_parms_qos: "
> +                     "QoS PKey constraint (0x%04x) doesn't match required PKey (0x%04x)\n",
> +                     cl_ntoh16(p_qos_level->pkey), cl_ntoh16(p_pr->pkey));
> +            status = IB_NOT_FOUND;
> +            goto Exit;
> +         }
> +         pkey = p_qos_level->pkey;
> +      }
> +      else
> +         pkey = p_pr->pkey;
> +   }
> +   else
> +   {
> +      if (p_qos_level && p_qos_level->pkey_set)
> +         pkey = p_qos_level->pkey;
> +      else
> +         pkey = shared_pkey;
> +   }
> +
> +   /* 
> +    * PKey has been set. Now check that ports share this PKey.
> +    */
> +
> +   if ( (pkey != shared_pkey) &&
> +        (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) )
> +   {
> +      osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +               "__osm_pr_rcv_get_path_parms_qos: "
> +               "Ports do not share specified PKey 0x%04x\n",
> +               cl_ntoh16(pkey));
> +      status = IB_NOT_FOUND;
> +      goto Exit;
> +   }
> +
> +   /*
> +    * Done selecting PKey - Now select valid SL
> +    */
> +
> +   if (pkey) 
> +   {
> +      p_prtn = (osm_prtn_t *)cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl,
> +                                         pkey & cl_ntoh16((uint16_t)~0x8000));
> +      if ( p_prtn == (osm_prtn_t *)cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl) )
> +      {
> +         /* this may be possible when pkey tables are created somehow in
> +            previous runs or things are going wrong here */
> +         osm_log( p_rcv->p_log, OSM_LOG_ERROR,
> +                  "__osm_pr_rcv_get_path_parms_qos: ERR 1F08: "
> +                  "No partition found for PKey 0x%04x - using default SL %d\n",
> +                  cl_ntoh16(pkey), sl);
> +      }
> +      else
> +      {
> +         if (!valid_sls[p_prtn->sl])
> +         {
> +            /* partition forces to use SL that eventually leads to VL15 */
> +            if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +            {
> +               osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                        "__osm_pr_rcv_get_path_parms_qos: "
> +                        "Partition constraints (pkey=%u, sl=%u) lead to VL15\n",
> +                        pkey, p_prtn->sl);
> +            }
> +            status = IB_NOT_FOUND;
> +            goto Exit;
> +         }
> +         else
> +         {
> +            /* partition's SL is valid */
> +            if ( p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != p_prtn->sl) )
> +            {
> +               /* partition's SL doesn't match QoS Level SL */
> +               if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +               {
> +                  osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                           "__osm_pr_rcv_get_path_parms_qos: "
> +                           "Partition constraints (pkey=%u, sl=%u): SL doesn't match QoS SL(%u)\n",
> +                           pkey, p_prtn->sl, p_qos_level->sl);
> +               }
> +               status = IB_NOT_FOUND;
> +               goto Exit;
> +            }
> +            else
> +            {
> +               /* set SL to be partition's SL */
> +               sl = p_prtn->sl;
> +            }
> +         }
> +      }
> +   }
> +   else
> +   {
> +      /* 
> +       * No pkey (no partition) 
> +       */
> +      if (p_qos_level && p_qos_level->sl_set)
> +      {
> +         /* sl has been already set */
> +      }
> +      else
> +      {
> +         if (comp_mask & IB_PR_COMPMASK_SL) 
> +         {
> +            /* specific SL was requested */
> +            if (!valid_sls[ib_path_rec_sl(p_pr)])
> +            {
> +               if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +               {
> +                  osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                           "__osm_pr_rcv_get_path_parms_qos: "
> +                           "QoS constaraints: required SL (%u) leads to VL15\n",
> +                           ib_path_rec_sl(p_pr));
> +               }
> +               status = IB_NOT_FOUND;
> +               goto Exit;
> +            }
> +            /* set sl to whatever requested */
> +            sl = ib_path_rec_sl(p_pr);
> +         }
> +         else 
> +         {
> +            /* set sl to the first valid sl that won't lead to VL15 */
> +            sl = first_valid_sl;
> +         }
> +      }
> +   }
> +
> +   if ( (comp_mask & IB_PR_COMPMASK_SL) && 
> +        (ib_path_rec_sl( p_pr ) != sl) )
> +   {
> +      if( osm_log_is_active( p_rcv->p_log, OSM_LOG_DEBUG ) )
> +      {
> +         osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
> +                  "__osm_pr_rcv_get_path_parms_qos: "
> +                  "QoS constaraints: required SL (%u) doesn't match QoS constraints\n",
> +                  ib_path_rec_sl(p_pr));
> +      }
> +      status = IB_NOT_FOUND;
> +      goto Exit;
> +   }
> +
> +   /* reset pkey when raw traffic */
> +   if( (pkey) &&
> +       (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) &&
> +       (cl_ntoh32( p_pr->hop_flow_raw ) & (1<<31)) )
> +   {
> +      pkey = 0;
> +   }
> +
> +   p_parms->mtu = mtu;
> +   p_parms->rate = rate;
> +   p_parms->pkt_life = pkt_life;
> +   p_parms->pkey = pkey;
> +   p_parms->sl = sl;
> +   p_parms->class = class;
> +
> +  Exit:
> +   OSM_LOG_EXIT( p_rcv->p_log );
> +   return( status );
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
>  static void
>  __osm_pr_rcv_build_pr(
>    IN osm_pr_rcv_t*         const p_rcv,
> @@ -774,7 +1569,8 @@ __osm_pr_rcv_build_pr(
>  #endif
>  
>    p_pr->pkey = p_parms->pkey;
> -  p_pr->sl = cl_hton16(p_parms->sl);
> +  ib_path_rec_set_qos_class(p_pr,p_parms->class);
> +  ib_path_rec_set_sl(p_pr,p_parms->sl);
>    p_pr->mtu = (uint8_t)(p_parms->mtu | 0x80);
>    p_pr->rate = (uint8_t)(p_parms->rate | 0x80);
>  
> @@ -832,10 +1628,14 @@ __osm_pr_rcv_get_lid_pair_path(
>      goto Exit;
>    }
>  
> -  status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port,
> -                                        p_dest_port, dest_lid_ho,
> -                                        comp_mask, &path_parms );
> -
> +  if (p_rcv->p_subn->opt.no_qos)

Shouldn't this be based on p_rcv->p_subn.opt.qos_policy_file rather than
no_qos ? I think there are cases where the QoS will be used without the
QoS policy (higher level QoS support).

-- Hal

> +     status = __osm_pr_rcv_get_path_parms( p_rcv, p_pr, p_src_port,
> +                                           p_dest_port, dest_lid_ho,
> +                                           comp_mask, &path_parms );
> +  else
> +     status = __osm_pr_rcv_get_path_parms_qos( p_rcv, p_pr, p_src_port,
> +                                               p_dest_port, dest_lid_ho,
> +                                               comp_mask, &path_parms );
>    if( status != IB_SUCCESS )
>    {
>      cl_qlock_pool_put( &p_rcv->pr_pool, &p_pr_item->pool_item );
> @@ -849,6 +1649,16 @@ __osm_pr_rcv_get_lid_pair_path(
>                                                   comp_mask, &rev_path_parms );
>    path_parms.reversible = ( rev_path_status == IB_SUCCESS );
>  
> +  /*
> +   * ToDo: 
> +   * ToDo: The whole implementation of reversible path is wrong.
> +   * ToDo: It is not enough to know that the reversed path exist.
> +   * ToDo: We should also adjust limits (mtu, rate, etc...) by
> +   * ToDo: comparing path and reversed path's limits.
> +   * ToDo: Also, need to think about reversible path in QoS.
> +   * ToDo: 
> +   */       
> +
>    /* did we get a Reversible Path compmask ? */
>    /* 
>       NOTE that if the reversible component = 0, it is a don't care
> @@ -2053,7 +2863,7 @@ osm_pr_rcv_process(
>  	  /* SL, Hop Limit, and Flow Label */
>            ib_member_get_sl_flow_hop( p_mgrp->mcmember_rec.sl_flow_hop,
>                                       &sl, &flow_label, &hop_limit );
> -	  p_pr_item->path_rec.sl = cl_hton16( sl );
> +	  ib_path_rec_set_sl(&(p_pr_item->path_rec), sl);
>  #ifndef ROUTER_EXP
>            p_pr_item->path_rec.hop_flow_raw = cl_hton32(hop_limit) |
>                                               (flow_label << 8);





More information about the general mailing list