[openib-general] [PATCH] osm: fix bugs related to not passing OSM_SIGNAL_DONE_PENDING

Eitan Zahavi eitan at mellanox.co.il
Mon Dec 18 11:35:13 PST 2006


Hal Rosenstock wrote:
> Hi Eitan,
>
> On Sat, 2006-12-16 at 17:12, Eitan Zahavi wrote:
>   
>> Hi Hal
>>
>> This set of patches fixes issues of not providing back to state manager 
>> OSM_SIGNAL_DONE_PENDING
>> which breaks the state machine later in the sweep.
>>
>> Eitan
>>
>> Signed-off-by:  Eitan Zahavi <eitan at mellanox.co.il>
>>
>>  osm/opensm/osm_pkey_mgr.c  |  112 
>> ++++++++++++++++++++++++++++++++------------
>>     
>
> This patch (here and other places) appear to be line wrapped.
>   
Sorry about that - I did cut and paste. I will never do that again.

>   
>> osm/opensm/osm_state_mgr.c |   11 +++--
>>  osm/opensm/osm_ucast_mgr.c |   96 ++++++++++++++++++++++++--------------
>>  4 files changed, 179 insertions(+), 88 deletions(-)
>>     
>
> Is this patch 4 files or 3 ? (How was this patch generated ?)
>   
I did remove part of the patch since I already sent it separately.
> Is this one patch or should it be 2 or 3 ? It looks to me there is an
> incremental change to osm_state_mgr.c and perhaps 2 other ones which can
> be separate (pkey and ucast_mgr).
>   
I probably messed up the patch and can not tell. I will resend this 
patch after pulling from trunk again.
> Also, see below in osm_state_mgr.c for another minor comment.
>
>   
>> diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c
>> index 48837bc..a33aec7 100644
>> --- a/osm/opensm/osm_pkey_mgr.c
>> +++ b/osm/opensm/osm_pkey_mgr.c
>> @@ -212,8 +212,9 @@ pkey_mgr_update_pkey_entry(
>>  
>>  /**********************************************************************
>>   **********************************************************************/
>> -static ib_api_status_t
>> +static boolean_t
>>  pkey_mgr_enforce_partition(
>> +  IN osm_log_t *p_log,
>>    IN const osm_req_t *p_req,
>>    IN const osm_physp_t *p_physp,
>>    IN const boolean_t enforce)
>> @@ -221,12 +222,33 @@ pkey_mgr_enforce_partition(
>>    osm_madw_context_t context;
>>    uint8_t payload[IB_SMP_DATA_SIZE];
>>    ib_port_info_t *p_pi;
>> +  ib_api_status_t status;
>>  
>>    if (!(p_pi = osm_physp_get_port_info_ptr( p_physp )))
>> -    return IB_ERROR;
>> +  {
>> +     osm_log( p_log, OSM_LOG_ERROR,
>> +              "pkey_mgr_enforce_partition: ERR 0507: "
>> +              "No port info for "
>> +              "node 0x%016" PRIx64 " port %u\n",
>> +              cl_ntoh64(
>> +                 osm_node_get_node_guid(
>> +                    osm_physp_get_node_ptr( p_physp ))),
>> +              osm_physp_get_port_num( p_physp ) );
>> +     return FALSE;
>> +  }
>>  
>> -  if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
>> -    return IB_SUCCESS;
>> +  if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
>> +  {
>> +     osm_log( p_log, OSM_LOG_DEBUG,
>> +              "pkey_mgr_enforce_partition: "
>> +              "No need to update PortInfo for "
>> +              "node 0x%016" PRIx64 " port %u\n",
>> +              cl_ntoh64(
>> +                 osm_node_get_node_guid(
>> +                    osm_physp_get_node_ptr( p_physp ))),
>> +              osm_physp_get_port_num( p_physp ) );
>> +    return FALSE;
>> +  }
>>  
>>    memset( payload, 0, IB_SMP_DATA_SIZE );
>>    memcpy( payload, p_pi, sizeof(ib_port_info_t) );
>> @@ -248,11 +270,35 @@ pkey_mgr_enforce_partition(
>>    context.pi_context.light_sweep = FALSE;
>>    context.pi_context.active_transition = FALSE;
>>  
>> -  return osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
>> -                      payload, sizeof(payload),
>> -                      IB_MAD_ATTR_PORT_INFO,
>> -                      cl_hton32( osm_physp_get_port_num( p_physp ) ),
>> -                      CL_DISP_MSGID_NONE, &context );
>> +  status = osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
>> +        payload, sizeof(payload),
>> +        IB_MAD_ATTR_PORT_INFO,
>> +        cl_hton32( osm_physp_get_port_num( p_physp ) ),
>> +        CL_DISP_MSGID_NONE, &context );
>> +  if (status != IB_SUCCESS)
>> +  {
>> +     osm_log( p_log, OSM_LOG_ERROR,
>> +              "pkey_mgr_enforce_partition: ERR 0520: "
>> +              "Failed to set PortInfo for "
>> +              "node 0x%016" PRIx64 " port %u\n",
>> +              cl_ntoh64(
>> +                 osm_node_get_node_guid(
>> +                    osm_physp_get_node_ptr( p_physp ))),
>> +              osm_physp_get_port_num( p_physp ) );
>> +     return FALSE;
>> +  }
>> +  else
>> +  {
>> +     osm_log( p_log, OSM_LOG_DEBUG,
>> +              "pkey_mgr_enforce_partition: "
>> +              "Set PortInfo for "
>> +              "node 0x%016" PRIx64 " port %u\n",
>> +              cl_ntoh64(
>> +                 osm_node_get_node_guid(
>> +                    osm_physp_get_node_ptr( p_physp ))),
>> +              osm_physp_get_port_num( p_physp ) );
>> +   return TRUE;
>> +  }
>>  }
>>  
>>  /**********************************************************************
>> @@ -369,15 +415,26 @@ static boolean_t pkey_mgr_update_port(
>>  
>>      status = pkey_mgr_update_pkey_entry( p_req, p_physp, new_block, 
>> block_index );
>>      if (status == IB_SUCCESS)
>> -      ret_val = TRUE;
>> +  {
>> +   osm_log( p_log, OSM_LOG_DEBUG,
>> +      "pkey_mgr_update_port: "
>> +      "Updated "
>> +      "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
>> +      block_index,
>> +      cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> +      osm_physp_get_port_num( p_physp ) );
>> +   ret_val = TRUE;
>> +  }
>>      else
>> -      osm_log( p_log, OSM_LOG_ERROR,
>> -        "pkey_mgr_update_port: ERR 0506: "
>> -        "pkey_mgr_update_pkey_entry() failed to update "
>> -        "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
>> -        block_index,
>> -        cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> -        osm_physp_get_port_num( p_physp ) );
>> +  {
>> +   osm_log( p_log, OSM_LOG_ERROR,
>> +      "pkey_mgr_update_port: ERR 0506: "
>> +      "pkey_mgr_update_pkey_entry() failed to update "
>> +      "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
>> +      block_index,
>> +      cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> +      osm_physp_get_port_num( p_physp ) );
>> +  }
>>    }
>>  
>>    return ret_val;
>> @@ -405,8 +462,9 @@ pkey_mgr_update_peer_port(
>>    uint16_t peer_max_blocks;
>>    ib_api_status_t status = IB_SUCCESS;
>>    boolean_t ret_val = FALSE;
>> +  boolean_t port_info_set = FALSE;
>>    ib_pkey_table_t empty_block;
>> -
>> + 
>>    memset(&empty_block, 0, sizeof(ib_pkey_table_t));
>>  
>>    p_physp = osm_port_get_default_phys_ptr( p_port );
>> @@ -439,18 +497,11 @@ pkey_mgr_update_peer_port(
>>      enforce = FALSE;
>>    }
>>  
>> -  if (pkey_mgr_enforce_partition( p_req, peer, enforce ) != IB_SUCCESS)
>> -  {
>> -    osm_log( p_log, OSM_LOG_ERROR,
>> -      "pkey_mgr_update_peer_port: ERR 0507: "
>> -      "pkey_mgr_enforce_partition() failed to update "
>> -      "node 0x%016" PRIx64 " port %u\n",
>> -      cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> -      osm_physp_get_port_num( peer ) );
>> -  }
>> +  if (pkey_mgr_enforce_partition( p_log, p_req, peer, enforce))
>> +   port_info_set = TRUE;
>>  
>>    if (enforce == FALSE)
>> -    return FALSE;
>> +  return port_info_set;
>>  
>>    p_peer_pkey_tbl->used_blocks = p_pkey_tbl->used_blocks;
>>    for (block_index = 0; block_index < p_pkey_tbl->used_blocks; 
>> block_index++)
>> @@ -487,6 +538,7 @@ pkey_mgr_update_peer_port(
>>               osm_physp_get_port_num( peer ) );
>>    }
>>  
>> +  if (port_info_set) return TRUE;
>>    return ret_val;
>>  }
>>  
>> @@ -541,10 +593,10 @@ osm_pkey_mgr_process(
>>        signal = OSM_SIGNAL_DONE_PENDING;
>>      p_node = osm_port_get_parent_node( p_port );
>>      if ( ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) &&
>> -  pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
>> +   pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
>>          &p_osm->subn, p_port,
>>          !p_osm->subn.opt.no_partition_enforcement ) )
>> -      signal = OSM_SIGNAL_DONE_PENDING;       
>> +      signal = OSM_SIGNAL_DONE_PENDING;
>>    }
>>  
>>   _err:
>> diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c
>> index 9eac038..4e61259 100644
>> --- a/osm/opensm/osm_state_mgr.c
>> +++ b/osm/opensm/osm_state_mgr.c
>> @@ -1853,6 +1853,7 @@ osm_state_mgr_process(
>>  {
>>     ib_api_status_t status;
>>     osm_remote_sm_t *p_remote_sm;
>> + osm_signal_t tmp_signal;
>>  
>>     CL_ASSERT( p_mgr );
>>  
>> @@ -2075,11 +2076,10 @@ osm_state_mgr_process(
>>           case OSM_SIGNAL_CHANGE_DETECTED:
>>              /*
>>               * Nothing to do here.  One subnet change typcially
>> -             * begets another....
>> +             * begets another.... But needs to wait for all transactions
>>               */
>>              signal = OSM_SIGNAL_NONE;
>> -            break;
>> -
>>     
>
> This is a repeat of your previous submitted patch to this file so isn't
> needed.
>
>   
Yes I will resend.
> -- Hal
>
>   
>> +    break;
>>           case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
>>              /*
>>               * A change was detected on the subnet.
>> @@ -2219,7 +2219,10 @@ osm_state_mgr_process(
>>              signal = osm_pkey_mgr_process( p_mgr->p_subn->p_osm );
>>  
>>              /* the returned signal is always DONE */
>> -            signal = osm_qos_setup(p_mgr->p_subn->p_osm);
>> +            tmp_signal = osm_qos_setup(p_mgr->p_subn->p_osm);
>> +
>> +    if (tmp_signal == OSM_SIGNAL_DONE_PENDING)
>> +     signal = OSM_SIGNAL_DONE_PENDING;
>>  
>>              /* try to restore SA DB (this should be before lid_mgr
>>                 because we may want to disable clients reregistration
>> diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c
>> index e977253..39973de 100644
>> --- a/osm/opensm/osm_ucast_mgr.c
>> +++ b/osm/opensm/osm_ucast_mgr.c
>> @@ -885,6 +885,9 @@ osm_ucast_mgr_set_fwd_table(
>>    ib_switch_info_t si;
>>    uint32_t block_id_ho = 0;
>>    uint8_t block[IB_SMP_DATA_SIZE];
>> +  boolean_t set_swinfo_require = FALSE;
>> +  uint16_t lin_top;
>> +  uint8_t life_state;
>>  
>>    CL_ASSERT( p_mgr );
>>  
>> @@ -904,43 +907,59 @@ osm_ucast_mgr_set_fwd_table(
>>      Set the top of the unicast forwarding table.
>>    */
>>    si = *osm_switch_get_si_ptr( p_sw );
>> -  si.lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
>> +  lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
>> +  if (si.lin_top != lin_top)
>> +  {
>> +   set_swinfo_require = TRUE;
>> +      si.lin_top  = lin_top;
>> +  }
>>  
>>    /* check to see if the change state bit is on. If it is - then we
>>       need to clear it. */
>> -   if( ib_switch_info_get_state_change( &si ) )
>> -    si.life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
>> -                      | ( si.life_state & IB_SWITCH_PSC ) )  & 0xfc;
>> +  if ( ib_switch_info_get_state_change( &si ) )
>> +      life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
>> +                          | ( si.life_state & IB_SWITCH_PSC ) )  & 0xfc;
>>    else
>> -    si.life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
>> +      life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
>>  
>> -  if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
>> +  if (life_state != si.life_state)
>>    {
>> -    osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
>> -             "osm_ucast_mgr_set_fwd_table: "
>> -             "Setting switch FT top to LID 0x%X\n",
>> -             osm_switch_get_max_lid_ho( p_sw ) );
>> +      set_swinfo_require = TRUE;
>> +      si.life_state = life_state;
>>    }
>> -
>> -  context.si_context.light_sweep = FALSE;
>> -  context.si_context.node_guid = osm_node_get_node_guid( p_node );
>> -  context.si_context.set_method = TRUE;
>> -
>> -  status = osm_req_set( p_mgr->p_req,
>> -                        p_path,
>> -                        (uint8_t*)&si,
>> -                        sizeof(si),
>> -                        IB_MAD_ATTR_SWITCH_INFO,
>> -                        0,
>> -                        CL_DISP_MSGID_NONE,
>> -                        &context );
>> -
>> -  if( status != IB_SUCCESS )
>> + 
>> +  if ( set_swinfo_require )
>>    {
>> -    osm_log( p_mgr->p_log, OSM_LOG_ERROR,
>> -             "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
>> -             "Sending SwitchInfo attribute failed (%s)\n",
>> -             ib_get_err_str( status ) );
>> +      if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
>> +      {
>> +          osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
>> +                      "osm_ucast_mgr_set_fwd_table: "
>> +                      "Setting switch FT top to LID 0x%X\n",
>> +                      osm_switch_get_max_lid_ho( p_sw ) );
>> +      }
>> +     
>> +      context.si_context.light_sweep = FALSE;
>> +      context.si_context.node_guid = osm_node_get_node_guid( p_node );
>> +      context.si_context.set_method = TRUE;
>> +     
>> +      status = osm_req_set( p_mgr->p_req,
>> +                                    p_path,
>> +                                    (uint8_t*)&si,
>> +                                    sizeof(si),
>> +                                    IB_MAD_ATTR_SWITCH_INFO,
>> +                                    0,
>> +                                    CL_DISP_MSGID_NONE,
>> +                                    &context );
>> +     
>> +      if( status != IB_SUCCESS )
>> +      {
>> +          osm_log( p_mgr->p_log, OSM_LOG_ERROR,
>> +                      "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
>> +                      "Sending SwitchInfo attribute failed (%s)\n",
>> +                      ib_get_err_str( status ) );
>> +      }
>> +      else
>> +          p_mgr->any_change = TRUE;
>>    }
>>  
>>    /*
>> @@ -1215,13 +1234,14 @@ osm_ucast_mgr_process(
>>  
>>    CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
>>  
>> +  p_mgr->any_change = FALSE;
>> +
>>    /*
>>      If there are no switches in the subnet, we are done.
>>    */
>>    if (cl_qmap_count( p_sw_guid_tbl ) == 0)
>>      goto Exit;
>>  
>> -  p_mgr->any_change = FALSE;
>>    cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_clean_switch, NULL);
>>  
>>    if (!p_routing_eng->build_lid_matrices ||
>> @@ -1248,14 +1268,20 @@ osm_ucast_mgr_process(
>>    if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
>>      __osm_ucast_mgr_dump_tables( p_mgr );
>>  
>> -  if (p_mgr->any_change)
>> +  if (p_mgr->any_change)
>> +  {
>>       signal = OSM_SIGNAL_DONE_PENDING;
>> +      osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
>> +                 "osm_ucast_mgr_process: "
>> +                 "LFT Tables configured on all switches\n");
>> +  }
>>    else
>> +  {
>> +      osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
>> +                 "osm_ucast_mgr_process: "
>> +                 "No need to set any LFT Tables on all switches\n");
>>       signal = OSM_SIGNAL_DONE;
>> -
>> -  osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
>> -          "osm_ucast_mgr_process: "
>> -          "LFT Tables configured on all switches\n");
>> +  }
>>  
>>   Exit:
>>    CL_PLOCK_RELEASE( p_mgr->p_lock );
>>
>>
>>     
>
>
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>   





More information about the general mailing list