[openib-general] [PATCH] osm: fix bugs related to not passing OSM_SIGNAL_DONE_PENDING
Eitan Zahavi
eitan at mellanox.co.il
Mon Dec 18 11:35:13 PST 2006
Hal Rosenstock wrote:
> Hi Eitan,
>
> On Sat, 2006-12-16 at 17:12, Eitan Zahavi wrote:
>
>> Hi Hal
>>
>> This set of patches fixes issues of not providing back to state manager
>> OSM_SIGNAL_DONE_PENDING
>> which breaks the state machine later in the sweep.
>>
>> Eitan
>>
>> Signed-off-by: Eitan Zahavi <eitan at mellanox.co.il>
>>
>> osm/opensm/osm_pkey_mgr.c | 112
>> ++++++++++++++++++++++++++++++++------------
>>
>
> This patch (here and other places) appear to be line wrapped.
>
Sorry about that - I did cut and paste. I will never do that again.
>
>> osm/opensm/osm_state_mgr.c | 11 +++--
>> osm/opensm/osm_ucast_mgr.c | 96 ++++++++++++++++++++++++--------------
>> 4 files changed, 179 insertions(+), 88 deletions(-)
>>
>
> Is this patch 4 files or 3 ? (How was this patch generated ?)
>
I did remove part of the patch since I already sent it separately.
> Is this one patch or should it be 2 or 3 ? It looks to me there is an
> incremental change to osm_state_mgr.c and perhaps 2 other ones which can
> be separate (pkey and ucast_mgr).
>
I probably messed up the patch and can not tell. I will resend this
patch after pulling from trunk again.
> Also, see below in osm_state_mgr.c for another minor comment.
>
>
>> diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c
>> index 48837bc..a33aec7 100644
>> --- a/osm/opensm/osm_pkey_mgr.c
>> +++ b/osm/opensm/osm_pkey_mgr.c
>> @@ -212,8 +212,9 @@ pkey_mgr_update_pkey_entry(
>>
>> /**********************************************************************
>> **********************************************************************/
>> -static ib_api_status_t
>> +static boolean_t
>> pkey_mgr_enforce_partition(
>> + IN osm_log_t *p_log,
>> IN const osm_req_t *p_req,
>> IN const osm_physp_t *p_physp,
>> IN const boolean_t enforce)
>> @@ -221,12 +222,33 @@ pkey_mgr_enforce_partition(
>> osm_madw_context_t context;
>> uint8_t payload[IB_SMP_DATA_SIZE];
>> ib_port_info_t *p_pi;
>> + ib_api_status_t status;
>>
>> if (!(p_pi = osm_physp_get_port_info_ptr( p_physp )))
>> - return IB_ERROR;
>> + {
>> + osm_log( p_log, OSM_LOG_ERROR,
>> + "pkey_mgr_enforce_partition: ERR 0507: "
>> + "No port info for "
>> + "node 0x%016" PRIx64 " port %u\n",
>> + cl_ntoh64(
>> + osm_node_get_node_guid(
>> + osm_physp_get_node_ptr( p_physp ))),
>> + osm_physp_get_port_num( p_physp ) );
>> + return FALSE;
>> + }
>>
>> - if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
>> - return IB_SUCCESS;
>> + if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
>> + {
>> + osm_log( p_log, OSM_LOG_DEBUG,
>> + "pkey_mgr_enforce_partition: "
>> + "No need to update PortInfo for "
>> + "node 0x%016" PRIx64 " port %u\n",
>> + cl_ntoh64(
>> + osm_node_get_node_guid(
>> + osm_physp_get_node_ptr( p_physp ))),
>> + osm_physp_get_port_num( p_physp ) );
>> + return FALSE;
>> + }
>>
>> memset( payload, 0, IB_SMP_DATA_SIZE );
>> memcpy( payload, p_pi, sizeof(ib_port_info_t) );
>> @@ -248,11 +270,35 @@ pkey_mgr_enforce_partition(
>> context.pi_context.light_sweep = FALSE;
>> context.pi_context.active_transition = FALSE;
>>
>> - return osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
>> - payload, sizeof(payload),
>> - IB_MAD_ATTR_PORT_INFO,
>> - cl_hton32( osm_physp_get_port_num( p_physp ) ),
>> - CL_DISP_MSGID_NONE, &context );
>> + status = osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
>> + payload, sizeof(payload),
>> + IB_MAD_ATTR_PORT_INFO,
>> + cl_hton32( osm_physp_get_port_num( p_physp ) ),
>> + CL_DISP_MSGID_NONE, &context );
>> + if (status != IB_SUCCESS)
>> + {
>> + osm_log( p_log, OSM_LOG_ERROR,
>> + "pkey_mgr_enforce_partition: ERR 0520: "
>> + "Failed to set PortInfo for "
>> + "node 0x%016" PRIx64 " port %u\n",
>> + cl_ntoh64(
>> + osm_node_get_node_guid(
>> + osm_physp_get_node_ptr( p_physp ))),
>> + osm_physp_get_port_num( p_physp ) );
>> + return FALSE;
>> + }
>> + else
>> + {
>> + osm_log( p_log, OSM_LOG_DEBUG,
>> + "pkey_mgr_enforce_partition: "
>> + "Set PortInfo for "
>> + "node 0x%016" PRIx64 " port %u\n",
>> + cl_ntoh64(
>> + osm_node_get_node_guid(
>> + osm_physp_get_node_ptr( p_physp ))),
>> + osm_physp_get_port_num( p_physp ) );
>> + return TRUE;
>> + }
>> }
>>
>> /**********************************************************************
>> @@ -369,15 +415,26 @@ static boolean_t pkey_mgr_update_port(
>>
>> status = pkey_mgr_update_pkey_entry( p_req, p_physp, new_block,
>> block_index );
>> if (status == IB_SUCCESS)
>> - ret_val = TRUE;
>> + {
>> + osm_log( p_log, OSM_LOG_DEBUG,
>> + "pkey_mgr_update_port: "
>> + "Updated "
>> + "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
>> + block_index,
>> + cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> + osm_physp_get_port_num( p_physp ) );
>> + ret_val = TRUE;
>> + }
>> else
>> - osm_log( p_log, OSM_LOG_ERROR,
>> - "pkey_mgr_update_port: ERR 0506: "
>> - "pkey_mgr_update_pkey_entry() failed to update "
>> - "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
>> - block_index,
>> - cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> - osm_physp_get_port_num( p_physp ) );
>> + {
>> + osm_log( p_log, OSM_LOG_ERROR,
>> + "pkey_mgr_update_port: ERR 0506: "
>> + "pkey_mgr_update_pkey_entry() failed to update "
>> + "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
>> + block_index,
>> + cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> + osm_physp_get_port_num( p_physp ) );
>> + }
>> }
>>
>> return ret_val;
>> @@ -405,8 +462,9 @@ pkey_mgr_update_peer_port(
>> uint16_t peer_max_blocks;
>> ib_api_status_t status = IB_SUCCESS;
>> boolean_t ret_val = FALSE;
>> + boolean_t port_info_set = FALSE;
>> ib_pkey_table_t empty_block;
>> -
>> +
>> memset(&empty_block, 0, sizeof(ib_pkey_table_t));
>>
>> p_physp = osm_port_get_default_phys_ptr( p_port );
>> @@ -439,18 +497,11 @@ pkey_mgr_update_peer_port(
>> enforce = FALSE;
>> }
>>
>> - if (pkey_mgr_enforce_partition( p_req, peer, enforce ) != IB_SUCCESS)
>> - {
>> - osm_log( p_log, OSM_LOG_ERROR,
>> - "pkey_mgr_update_peer_port: ERR 0507: "
>> - "pkey_mgr_enforce_partition() failed to update "
>> - "node 0x%016" PRIx64 " port %u\n",
>> - cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>> - osm_physp_get_port_num( peer ) );
>> - }
>> + if (pkey_mgr_enforce_partition( p_log, p_req, peer, enforce))
>> + port_info_set = TRUE;
>>
>> if (enforce == FALSE)
>> - return FALSE;
>> + return port_info_set;
>>
>> p_peer_pkey_tbl->used_blocks = p_pkey_tbl->used_blocks;
>> for (block_index = 0; block_index < p_pkey_tbl->used_blocks;
>> block_index++)
>> @@ -487,6 +538,7 @@ pkey_mgr_update_peer_port(
>> osm_physp_get_port_num( peer ) );
>> }
>>
>> + if (port_info_set) return TRUE;
>> return ret_val;
>> }
>>
>> @@ -541,10 +593,10 @@ osm_pkey_mgr_process(
>> signal = OSM_SIGNAL_DONE_PENDING;
>> p_node = osm_port_get_parent_node( p_port );
>> if ( ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) &&
>> - pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
>> + pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
>> &p_osm->subn, p_port,
>> !p_osm->subn.opt.no_partition_enforcement ) )
>> - signal = OSM_SIGNAL_DONE_PENDING;
>> + signal = OSM_SIGNAL_DONE_PENDING;
>> }
>>
>> _err:
>> diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c
>> index 9eac038..4e61259 100644
>> --- a/osm/opensm/osm_state_mgr.c
>> +++ b/osm/opensm/osm_state_mgr.c
>> @@ -1853,6 +1853,7 @@ osm_state_mgr_process(
>> {
>> ib_api_status_t status;
>> osm_remote_sm_t *p_remote_sm;
>> + osm_signal_t tmp_signal;
>>
>> CL_ASSERT( p_mgr );
>>
>> @@ -2075,11 +2076,10 @@ osm_state_mgr_process(
>> case OSM_SIGNAL_CHANGE_DETECTED:
>> /*
>> * Nothing to do here. One subnet change typcially
>> - * begets another....
>> + * begets another.... But needs to wait for all transactions
>> */
>> signal = OSM_SIGNAL_NONE;
>> - break;
>> -
>>
>
> This is a repeat of your previous submitted patch to this file so isn't
> needed.
>
>
Yes I will resend.
> -- Hal
>
>
>> + break;
>> case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
>> /*
>> * A change was detected on the subnet.
>> @@ -2219,7 +2219,10 @@ osm_state_mgr_process(
>> signal = osm_pkey_mgr_process( p_mgr->p_subn->p_osm );
>>
>> /* the returned signal is always DONE */
>> - signal = osm_qos_setup(p_mgr->p_subn->p_osm);
>> + tmp_signal = osm_qos_setup(p_mgr->p_subn->p_osm);
>> +
>> + if (tmp_signal == OSM_SIGNAL_DONE_PENDING)
>> + signal = OSM_SIGNAL_DONE_PENDING;
>>
>> /* try to restore SA DB (this should be before lid_mgr
>> because we may want to disable clients reregistration
>> diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c
>> index e977253..39973de 100644
>> --- a/osm/opensm/osm_ucast_mgr.c
>> +++ b/osm/opensm/osm_ucast_mgr.c
>> @@ -885,6 +885,9 @@ osm_ucast_mgr_set_fwd_table(
>> ib_switch_info_t si;
>> uint32_t block_id_ho = 0;
>> uint8_t block[IB_SMP_DATA_SIZE];
>> + boolean_t set_swinfo_require = FALSE;
>> + uint16_t lin_top;
>> + uint8_t life_state;
>>
>> CL_ASSERT( p_mgr );
>>
>> @@ -904,43 +907,59 @@ osm_ucast_mgr_set_fwd_table(
>> Set the top of the unicast forwarding table.
>> */
>> si = *osm_switch_get_si_ptr( p_sw );
>> - si.lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
>> + lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
>> + if (si.lin_top != lin_top)
>> + {
>> + set_swinfo_require = TRUE;
>> + si.lin_top = lin_top;
>> + }
>>
>> /* check to see if the change state bit is on. If it is - then we
>> need to clear it. */
>> - if( ib_switch_info_get_state_change( &si ) )
>> - si.life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
>> - | ( si.life_state & IB_SWITCH_PSC ) ) & 0xfc;
>> + if ( ib_switch_info_get_state_change( &si ) )
>> + life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
>> + | ( si.life_state & IB_SWITCH_PSC ) ) & 0xfc;
>> else
>> - si.life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
>> + life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
>>
>> - if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
>> + if (life_state != si.life_state)
>> {
>> - osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
>> - "osm_ucast_mgr_set_fwd_table: "
>> - "Setting switch FT top to LID 0x%X\n",
>> - osm_switch_get_max_lid_ho( p_sw ) );
>> + set_swinfo_require = TRUE;
>> + si.life_state = life_state;
>> }
>> -
>> - context.si_context.light_sweep = FALSE;
>> - context.si_context.node_guid = osm_node_get_node_guid( p_node );
>> - context.si_context.set_method = TRUE;
>> -
>> - status = osm_req_set( p_mgr->p_req,
>> - p_path,
>> - (uint8_t*)&si,
>> - sizeof(si),
>> - IB_MAD_ATTR_SWITCH_INFO,
>> - 0,
>> - CL_DISP_MSGID_NONE,
>> - &context );
>> -
>> - if( status != IB_SUCCESS )
>> +
>> + if ( set_swinfo_require )
>> {
>> - osm_log( p_mgr->p_log, OSM_LOG_ERROR,
>> - "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
>> - "Sending SwitchInfo attribute failed (%s)\n",
>> - ib_get_err_str( status ) );
>> + if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
>> + {
>> + osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
>> + "osm_ucast_mgr_set_fwd_table: "
>> + "Setting switch FT top to LID 0x%X\n",
>> + osm_switch_get_max_lid_ho( p_sw ) );
>> + }
>> +
>> + context.si_context.light_sweep = FALSE;
>> + context.si_context.node_guid = osm_node_get_node_guid( p_node );
>> + context.si_context.set_method = TRUE;
>> +
>> + status = osm_req_set( p_mgr->p_req,
>> + p_path,
>> + (uint8_t*)&si,
>> + sizeof(si),
>> + IB_MAD_ATTR_SWITCH_INFO,
>> + 0,
>> + CL_DISP_MSGID_NONE,
>> + &context );
>> +
>> + if( status != IB_SUCCESS )
>> + {
>> + osm_log( p_mgr->p_log, OSM_LOG_ERROR,
>> + "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
>> + "Sending SwitchInfo attribute failed (%s)\n",
>> + ib_get_err_str( status ) );
>> + }
>> + else
>> + p_mgr->any_change = TRUE;
>> }
>>
>> /*
>> @@ -1215,13 +1234,14 @@ osm_ucast_mgr_process(
>>
>> CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
>>
>> + p_mgr->any_change = FALSE;
>> +
>> /*
>> If there are no switches in the subnet, we are done.
>> */
>> if (cl_qmap_count( p_sw_guid_tbl ) == 0)
>> goto Exit;
>>
>> - p_mgr->any_change = FALSE;
>> cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_clean_switch, NULL);
>>
>> if (!p_routing_eng->build_lid_matrices ||
>> @@ -1248,14 +1268,20 @@ osm_ucast_mgr_process(
>> if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
>> __osm_ucast_mgr_dump_tables( p_mgr );
>>
>> - if (p_mgr->any_change)
>> + if (p_mgr->any_change)
>> + {
>> signal = OSM_SIGNAL_DONE_PENDING;
>> + osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
>> + "osm_ucast_mgr_process: "
>> + "LFT Tables configured on all switches\n");
>> + }
>> else
>> + {
>> + osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
>> + "osm_ucast_mgr_process: "
>> + "No need to set any LFT Tables on all switches\n");
>> signal = OSM_SIGNAL_DONE;
>> -
>> - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
>> - "osm_ucast_mgr_process: "
>> - "LFT Tables configured on all switches\n");
>> + }
>>
>> Exit:
>> CL_PLOCK_RELEASE( p_mgr->p_lock );
>>
>>
>>
>
>
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>
More information about the general
mailing list