[openib-general] [PATCH] osm: fix bugs related to not passing OSM_SIGNAL_DONE_PENDING

Hal Rosenstock halr at voltaire.com
Mon Dec 18 08:07:43 PST 2006


Hi Eitan,

On Sat, 2006-12-16 at 17:12, Eitan Zahavi wrote:
> Hi Hal
> 
> This set of patches fixes issues of not providing back to state manager 
> OSM_SIGNAL_DONE_PENDING
> which breaks the state machine later in the sweep.
> 
> Eitan
> 
> Signed-off-by:  Eitan Zahavi <eitan at mellanox.co.il>
> 
>  osm/opensm/osm_pkey_mgr.c  |  112 
> ++++++++++++++++++++++++++++++++------------

This patch (here and other places) appear to be line wrapped.

> osm/opensm/osm_state_mgr.c |   11 +++--
>  osm/opensm/osm_ucast_mgr.c |   96 ++++++++++++++++++++++++--------------
>  4 files changed, 179 insertions(+), 88 deletions(-)

Is this patch 4 files or 3 ? (How was this patch generated ?)

Is this one patch or should it be 2 or 3 ? It looks to me there is an
incremental change to osm_state_mgr.c and perhaps 2 other ones which can
be separate (pkey and ucast_mgr).

Also, see below in osm_state_mgr.c for another minor comment.

> diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c
> index 48837bc..a33aec7 100644
> --- a/osm/opensm/osm_pkey_mgr.c
> +++ b/osm/opensm/osm_pkey_mgr.c
> @@ -212,8 +212,9 @@ pkey_mgr_update_pkey_entry(
>  
>  /**********************************************************************
>   **********************************************************************/
> -static ib_api_status_t
> +static boolean_t
>  pkey_mgr_enforce_partition(
> +  IN osm_log_t *p_log,
>    IN const osm_req_t *p_req,
>    IN const osm_physp_t *p_physp,
>    IN const boolean_t enforce)
> @@ -221,12 +222,33 @@ pkey_mgr_enforce_partition(
>    osm_madw_context_t context;
>    uint8_t payload[IB_SMP_DATA_SIZE];
>    ib_port_info_t *p_pi;
> +  ib_api_status_t status;
>  
>    if (!(p_pi = osm_physp_get_port_info_ptr( p_physp )))
> -    return IB_ERROR;
> +  {
> +     osm_log( p_log, OSM_LOG_ERROR,
> +              "pkey_mgr_enforce_partition: ERR 0507: "
> +              "No port info for "
> +              "node 0x%016" PRIx64 " port %u\n",
> +              cl_ntoh64(
> +                 osm_node_get_node_guid(
> +                    osm_physp_get_node_ptr( p_physp ))),
> +              osm_physp_get_port_num( p_physp ) );
> +     return FALSE;
> +  }
>  
> -  if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
> -    return IB_SUCCESS;
> +  if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
> +  {
> +     osm_log( p_log, OSM_LOG_DEBUG,
> +              "pkey_mgr_enforce_partition: "
> +              "No need to update PortInfo for "
> +              "node 0x%016" PRIx64 " port %u\n",
> +              cl_ntoh64(
> +                 osm_node_get_node_guid(
> +                    osm_physp_get_node_ptr( p_physp ))),
> +              osm_physp_get_port_num( p_physp ) );
> +    return FALSE;
> +  }
>  
>    memset( payload, 0, IB_SMP_DATA_SIZE );
>    memcpy( payload, p_pi, sizeof(ib_port_info_t) );
> @@ -248,11 +270,35 @@ pkey_mgr_enforce_partition(
>    context.pi_context.light_sweep = FALSE;
>    context.pi_context.active_transition = FALSE;
>  
> -  return osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
> -                      payload, sizeof(payload),
> -                      IB_MAD_ATTR_PORT_INFO,
> -                      cl_hton32( osm_physp_get_port_num( p_physp ) ),
> -                      CL_DISP_MSGID_NONE, &context );
> +  status = osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
> +        payload, sizeof(payload),
> +        IB_MAD_ATTR_PORT_INFO,
> +        cl_hton32( osm_physp_get_port_num( p_physp ) ),
> +        CL_DISP_MSGID_NONE, &context );
> +  if (status != IB_SUCCESS)
> +  {
> +     osm_log( p_log, OSM_LOG_ERROR,
> +              "pkey_mgr_enforce_partition: ERR 0520: "
> +              "Failed to set PortInfo for "
> +              "node 0x%016" PRIx64 " port %u\n",
> +              cl_ntoh64(
> +                 osm_node_get_node_guid(
> +                    osm_physp_get_node_ptr( p_physp ))),
> +              osm_physp_get_port_num( p_physp ) );
> +     return FALSE;
> +  }
> +  else
> +  {
> +     osm_log( p_log, OSM_LOG_DEBUG,
> +              "pkey_mgr_enforce_partition: "
> +              "Set PortInfo for "
> +              "node 0x%016" PRIx64 " port %u\n",
> +              cl_ntoh64(
> +                 osm_node_get_node_guid(
> +                    osm_physp_get_node_ptr( p_physp ))),
> +              osm_physp_get_port_num( p_physp ) );
> +   return TRUE;
> +  }
>  }
>  
>  /**********************************************************************
> @@ -369,15 +415,26 @@ static boolean_t pkey_mgr_update_port(
>  
>      status = pkey_mgr_update_pkey_entry( p_req, p_physp, new_block, 
> block_index );
>      if (status == IB_SUCCESS)
> -      ret_val = TRUE;
> +  {
> +   osm_log( p_log, OSM_LOG_DEBUG,
> +      "pkey_mgr_update_port: "
> +      "Updated "
> +      "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
> +      block_index,
> +      cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +      osm_physp_get_port_num( p_physp ) );
> +   ret_val = TRUE;
> +  }
>      else
> -      osm_log( p_log, OSM_LOG_ERROR,
> -        "pkey_mgr_update_port: ERR 0506: "
> -        "pkey_mgr_update_pkey_entry() failed to update "
> -        "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
> -        block_index,
> -        cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> -        osm_physp_get_port_num( p_physp ) );
> +  {
> +   osm_log( p_log, OSM_LOG_ERROR,
> +      "pkey_mgr_update_port: ERR 0506: "
> +      "pkey_mgr_update_pkey_entry() failed to update "
> +      "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
> +      block_index,
> +      cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +      osm_physp_get_port_num( p_physp ) );
> +  }
>    }
>  
>    return ret_val;
> @@ -405,8 +462,9 @@ pkey_mgr_update_peer_port(
>    uint16_t peer_max_blocks;
>    ib_api_status_t status = IB_SUCCESS;
>    boolean_t ret_val = FALSE;
> +  boolean_t port_info_set = FALSE;
>    ib_pkey_table_t empty_block;
> -
> + 
>    memset(&empty_block, 0, sizeof(ib_pkey_table_t));
>  
>    p_physp = osm_port_get_default_phys_ptr( p_port );
> @@ -439,18 +497,11 @@ pkey_mgr_update_peer_port(
>      enforce = FALSE;
>    }
>  
> -  if (pkey_mgr_enforce_partition( p_req, peer, enforce ) != IB_SUCCESS)
> -  {
> -    osm_log( p_log, OSM_LOG_ERROR,
> -      "pkey_mgr_update_peer_port: ERR 0507: "
> -      "pkey_mgr_enforce_partition() failed to update "
> -      "node 0x%016" PRIx64 " port %u\n",
> -      cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> -      osm_physp_get_port_num( peer ) );
> -  }
> +  if (pkey_mgr_enforce_partition( p_log, p_req, peer, enforce))
> +   port_info_set = TRUE;
>  
>    if (enforce == FALSE)
> -    return FALSE;
> +  return port_info_set;
>  
>    p_peer_pkey_tbl->used_blocks = p_pkey_tbl->used_blocks;
>    for (block_index = 0; block_index < p_pkey_tbl->used_blocks; 
> block_index++)
> @@ -487,6 +538,7 @@ pkey_mgr_update_peer_port(
>               osm_physp_get_port_num( peer ) );
>    }
>  
> +  if (port_info_set) return TRUE;
>    return ret_val;
>  }
>  
> @@ -541,10 +593,10 @@ osm_pkey_mgr_process(
>        signal = OSM_SIGNAL_DONE_PENDING;
>      p_node = osm_port_get_parent_node( p_port );
>      if ( ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) &&
> -  pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
> +   pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
>          &p_osm->subn, p_port,
>          !p_osm->subn.opt.no_partition_enforcement ) )
> -      signal = OSM_SIGNAL_DONE_PENDING;       
> +      signal = OSM_SIGNAL_DONE_PENDING;
>    }
>  
>   _err:
> diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c
> index 9eac038..4e61259 100644
> --- a/osm/opensm/osm_state_mgr.c
> +++ b/osm/opensm/osm_state_mgr.c
> @@ -1853,6 +1853,7 @@ osm_state_mgr_process(
>  {
>     ib_api_status_t status;
>     osm_remote_sm_t *p_remote_sm;
> + osm_signal_t tmp_signal;
>  
>     CL_ASSERT( p_mgr );
>  
> @@ -2075,11 +2076,10 @@ osm_state_mgr_process(
>           case OSM_SIGNAL_CHANGE_DETECTED:
>              /*
>               * Nothing to do here.  One subnet change typcially
> -             * begets another....
> +             * begets another.... But needs to wait for all transactions
>               */
>              signal = OSM_SIGNAL_NONE;
> -            break;
> -

This is a repeat of your previous submitted patch to this file so isn't
needed.

-- Hal

> +    break;
>           case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
>              /*
>               * A change was detected on the subnet.
> @@ -2219,7 +2219,10 @@ osm_state_mgr_process(
>              signal = osm_pkey_mgr_process( p_mgr->p_subn->p_osm );
>  
>              /* the returned signal is always DONE */
> -            signal = osm_qos_setup(p_mgr->p_subn->p_osm);
> +            tmp_signal = osm_qos_setup(p_mgr->p_subn->p_osm);
> +
> +    if (tmp_signal == OSM_SIGNAL_DONE_PENDING)
> +     signal = OSM_SIGNAL_DONE_PENDING;
>  
>              /* try to restore SA DB (this should be before lid_mgr
>                 because we may want to disable clients reregistration
> diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c
> index e977253..39973de 100644
> --- a/osm/opensm/osm_ucast_mgr.c
> +++ b/osm/opensm/osm_ucast_mgr.c
> @@ -885,6 +885,9 @@ osm_ucast_mgr_set_fwd_table(
>    ib_switch_info_t si;
>    uint32_t block_id_ho = 0;
>    uint8_t block[IB_SMP_DATA_SIZE];
> +  boolean_t set_swinfo_require = FALSE;
> +  uint16_t lin_top;
> +  uint8_t life_state;
>  
>    CL_ASSERT( p_mgr );
>  
> @@ -904,43 +907,59 @@ osm_ucast_mgr_set_fwd_table(
>      Set the top of the unicast forwarding table.
>    */
>    si = *osm_switch_get_si_ptr( p_sw );
> -  si.lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
> +  lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
> +  if (si.lin_top != lin_top)
> +  {
> +   set_swinfo_require = TRUE;
> +      si.lin_top  = lin_top;
> +  }
>  
>    /* check to see if the change state bit is on. If it is - then we
>       need to clear it. */
> -   if( ib_switch_info_get_state_change( &si ) )
> -    si.life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
> -                      | ( si.life_state & IB_SWITCH_PSC ) )  & 0xfc;
> +  if ( ib_switch_info_get_state_change( &si ) )
> +      life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
> +                          | ( si.life_state & IB_SWITCH_PSC ) )  & 0xfc;
>    else
> -    si.life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
> +      life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
>  
> -  if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
> +  if (life_state != si.life_state)
>    {
> -    osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
> -             "osm_ucast_mgr_set_fwd_table: "
> -             "Setting switch FT top to LID 0x%X\n",
> -             osm_switch_get_max_lid_ho( p_sw ) );
> +      set_swinfo_require = TRUE;
> +      si.life_state = life_state;
>    }
> -
> -  context.si_context.light_sweep = FALSE;
> -  context.si_context.node_guid = osm_node_get_node_guid( p_node );
> -  context.si_context.set_method = TRUE;
> -
> -  status = osm_req_set( p_mgr->p_req,
> -                        p_path,
> -                        (uint8_t*)&si,
> -                        sizeof(si),
> -                        IB_MAD_ATTR_SWITCH_INFO,
> -                        0,
> -                        CL_DISP_MSGID_NONE,
> -                        &context );
> -
> -  if( status != IB_SUCCESS )
> + 
> +  if ( set_swinfo_require )
>    {
> -    osm_log( p_mgr->p_log, OSM_LOG_ERROR,
> -             "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
> -             "Sending SwitchInfo attribute failed (%s)\n",
> -             ib_get_err_str( status ) );
> +      if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
> +      {
> +          osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
> +                      "osm_ucast_mgr_set_fwd_table: "
> +                      "Setting switch FT top to LID 0x%X\n",
> +                      osm_switch_get_max_lid_ho( p_sw ) );
> +      }
> +     
> +      context.si_context.light_sweep = FALSE;
> +      context.si_context.node_guid = osm_node_get_node_guid( p_node );
> +      context.si_context.set_method = TRUE;
> +     
> +      status = osm_req_set( p_mgr->p_req,
> +                                    p_path,
> +                                    (uint8_t*)&si,
> +                                    sizeof(si),
> +                                    IB_MAD_ATTR_SWITCH_INFO,
> +                                    0,
> +                                    CL_DISP_MSGID_NONE,
> +                                    &context );
> +     
> +      if( status != IB_SUCCESS )
> +      {
> +          osm_log( p_mgr->p_log, OSM_LOG_ERROR,
> +                      "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
> +                      "Sending SwitchInfo attribute failed (%s)\n",
> +                      ib_get_err_str( status ) );
> +      }
> +      else
> +          p_mgr->any_change = TRUE;
>    }
>  
>    /*
> @@ -1215,13 +1234,14 @@ osm_ucast_mgr_process(
>  
>    CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
>  
> +  p_mgr->any_change = FALSE;
> +
>    /*
>      If there are no switches in the subnet, we are done.
>    */
>    if (cl_qmap_count( p_sw_guid_tbl ) == 0)
>      goto Exit;
>  
> -  p_mgr->any_change = FALSE;
>    cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_clean_switch, NULL);
>  
>    if (!p_routing_eng->build_lid_matrices ||
> @@ -1248,14 +1268,20 @@ osm_ucast_mgr_process(
>    if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
>      __osm_ucast_mgr_dump_tables( p_mgr );
>  
> -  if (p_mgr->any_change)
> +  if (p_mgr->any_change)
> +  {
>       signal = OSM_SIGNAL_DONE_PENDING;
> +      osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
> +                 "osm_ucast_mgr_process: "
> +                 "LFT Tables configured on all switches\n");
> +  }
>    else
> +  {
> +      osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
> +                 "osm_ucast_mgr_process: "
> +                 "No need to set any LFT Tables on all switches\n");
>       signal = OSM_SIGNAL_DONE;
> -
> -  osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
> -          "osm_ucast_mgr_process: "
> -          "LFT Tables configured on all switches\n");
> +  }
>  
>   Exit:
>    CL_PLOCK_RELEASE( p_mgr->p_lock );
> 
> 





More information about the general mailing list