[openib-general] [PATCH] osm: fix bugs related to not passing OSM_SIGNAL_DONE_PENDING
Eitan Zahavi
eitan at mellanox.co.il
Sat Dec 16 14:12:28 PST 2006
Hi Hal
This set of patches fixes issues of not providing back to state manager
OSM_SIGNAL_DONE_PENDING
which breaks the state machine later in the sweep.
Eitan
Signed-off-by: Eitan Zahavi <eitan at mellanox.co.il>
osm/opensm/osm_pkey_mgr.c | 112
++++++++++++++++++++++++++++++++------------
osm/opensm/osm_state_mgr.c | 11 +++--
osm/opensm/osm_ucast_mgr.c | 96 ++++++++++++++++++++++++--------------
4 files changed, 179 insertions(+), 88 deletions(-)
diff --git a/osm/opensm/osm_pkey_mgr.c b/osm/opensm/osm_pkey_mgr.c
index 48837bc..a33aec7 100644
--- a/osm/opensm/osm_pkey_mgr.c
+++ b/osm/opensm/osm_pkey_mgr.c
@@ -212,8 +212,9 @@ pkey_mgr_update_pkey_entry(
/**********************************************************************
**********************************************************************/
-static ib_api_status_t
+static boolean_t
pkey_mgr_enforce_partition(
+ IN osm_log_t *p_log,
IN const osm_req_t *p_req,
IN const osm_physp_t *p_physp,
IN const boolean_t enforce)
@@ -221,12 +222,33 @@ pkey_mgr_enforce_partition(
osm_madw_context_t context;
uint8_t payload[IB_SMP_DATA_SIZE];
ib_port_info_t *p_pi;
+ ib_api_status_t status;
if (!(p_pi = osm_physp_get_port_info_ptr( p_physp )))
- return IB_ERROR;
+ {
+ osm_log( p_log, OSM_LOG_ERROR,
+ "pkey_mgr_enforce_partition: ERR 0507: "
+ "No port info for "
+ "node 0x%016" PRIx64 " port %u\n",
+ cl_ntoh64(
+ osm_node_get_node_guid(
+ osm_physp_get_node_ptr( p_physp ))),
+ osm_physp_get_port_num( p_physp ) );
+ return FALSE;
+ }
- if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
- return IB_SUCCESS;
+ if ((p_pi->vl_enforce & 0xc) == (0xc)*(enforce == TRUE))
+ {
+ osm_log( p_log, OSM_LOG_DEBUG,
+ "pkey_mgr_enforce_partition: "
+ "No need to update PortInfo for "
+ "node 0x%016" PRIx64 " port %u\n",
+ cl_ntoh64(
+ osm_node_get_node_guid(
+ osm_physp_get_node_ptr( p_physp ))),
+ osm_physp_get_port_num( p_physp ) );
+ return FALSE;
+ }
memset( payload, 0, IB_SMP_DATA_SIZE );
memcpy( payload, p_pi, sizeof(ib_port_info_t) );
@@ -248,11 +270,35 @@ pkey_mgr_enforce_partition(
context.pi_context.light_sweep = FALSE;
context.pi_context.active_transition = FALSE;
- return osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
- payload, sizeof(payload),
- IB_MAD_ATTR_PORT_INFO,
- cl_hton32( osm_physp_get_port_num( p_physp ) ),
- CL_DISP_MSGID_NONE, &context );
+ status = osm_req_set( p_req, osm_physp_get_dr_path_ptr( p_physp ),
+ payload, sizeof(payload),
+ IB_MAD_ATTR_PORT_INFO,
+ cl_hton32( osm_physp_get_port_num( p_physp ) ),
+ CL_DISP_MSGID_NONE, &context );
+ if (status != IB_SUCCESS)
+ {
+ osm_log( p_log, OSM_LOG_ERROR,
+ "pkey_mgr_enforce_partition: ERR 0520: "
+ "Failed to set PortInfo for "
+ "node 0x%016" PRIx64 " port %u\n",
+ cl_ntoh64(
+ osm_node_get_node_guid(
+ osm_physp_get_node_ptr( p_physp ))),
+ osm_physp_get_port_num( p_physp ) );
+ return FALSE;
+ }
+ else
+ {
+ osm_log( p_log, OSM_LOG_DEBUG,
+ "pkey_mgr_enforce_partition: "
+ "Set PortInfo for "
+ "node 0x%016" PRIx64 " port %u\n",
+ cl_ntoh64(
+ osm_node_get_node_guid(
+ osm_physp_get_node_ptr( p_physp ))),
+ osm_physp_get_port_num( p_physp ) );
+ return TRUE;
+ }
}
/**********************************************************************
@@ -369,15 +415,26 @@ static boolean_t pkey_mgr_update_port(
status = pkey_mgr_update_pkey_entry( p_req, p_physp, new_block,
block_index );
if (status == IB_SUCCESS)
- ret_val = TRUE;
+ {
+ osm_log( p_log, OSM_LOG_DEBUG,
+ "pkey_mgr_update_port: "
+ "Updated "
+ "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
+ block_index,
+ cl_ntoh64( osm_node_get_node_guid( p_node ) ),
+ osm_physp_get_port_num( p_physp ) );
+ ret_val = TRUE;
+ }
else
- osm_log( p_log, OSM_LOG_ERROR,
- "pkey_mgr_update_port: ERR 0506: "
- "pkey_mgr_update_pkey_entry() failed to update "
- "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
- block_index,
- cl_ntoh64( osm_node_get_node_guid( p_node ) ),
- osm_physp_get_port_num( p_physp ) );
+ {
+ osm_log( p_log, OSM_LOG_ERROR,
+ "pkey_mgr_update_port: ERR 0506: "
+ "pkey_mgr_update_pkey_entry() failed to update "
+ "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
+ block_index,
+ cl_ntoh64( osm_node_get_node_guid( p_node ) ),
+ osm_physp_get_port_num( p_physp ) );
+ }
}
return ret_val;
@@ -405,8 +462,9 @@ pkey_mgr_update_peer_port(
uint16_t peer_max_blocks;
ib_api_status_t status = IB_SUCCESS;
boolean_t ret_val = FALSE;
+ boolean_t port_info_set = FALSE;
ib_pkey_table_t empty_block;
-
+
memset(&empty_block, 0, sizeof(ib_pkey_table_t));
p_physp = osm_port_get_default_phys_ptr( p_port );
@@ -439,18 +497,11 @@ pkey_mgr_update_peer_port(
enforce = FALSE;
}
- if (pkey_mgr_enforce_partition( p_req, peer, enforce ) != IB_SUCCESS)
- {
- osm_log( p_log, OSM_LOG_ERROR,
- "pkey_mgr_update_peer_port: ERR 0507: "
- "pkey_mgr_enforce_partition() failed to update "
- "node 0x%016" PRIx64 " port %u\n",
- cl_ntoh64( osm_node_get_node_guid( p_node ) ),
- osm_physp_get_port_num( peer ) );
- }
+ if (pkey_mgr_enforce_partition( p_log, p_req, peer, enforce))
+ port_info_set = TRUE;
if (enforce == FALSE)
- return FALSE;
+ return port_info_set;
p_peer_pkey_tbl->used_blocks = p_pkey_tbl->used_blocks;
for (block_index = 0; block_index < p_pkey_tbl->used_blocks;
block_index++)
@@ -487,6 +538,7 @@ pkey_mgr_update_peer_port(
osm_physp_get_port_num( peer ) );
}
+ if (port_info_set) return TRUE;
return ret_val;
}
@@ -541,10 +593,10 @@ osm_pkey_mgr_process(
signal = OSM_SIGNAL_DONE_PENDING;
p_node = osm_port_get_parent_node( p_port );
if ( ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) &&
- pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
+ pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
&p_osm->subn, p_port,
!p_osm->subn.opt.no_partition_enforcement ) )
- signal = OSM_SIGNAL_DONE_PENDING;
+ signal = OSM_SIGNAL_DONE_PENDING;
}
_err:
diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c
index 9eac038..4e61259 100644
--- a/osm/opensm/osm_state_mgr.c
+++ b/osm/opensm/osm_state_mgr.c
@@ -1853,6 +1853,7 @@ osm_state_mgr_process(
{
ib_api_status_t status;
osm_remote_sm_t *p_remote_sm;
+ osm_signal_t tmp_signal;
CL_ASSERT( p_mgr );
@@ -2075,11 +2076,10 @@ osm_state_mgr_process(
case OSM_SIGNAL_CHANGE_DETECTED:
/*
* Nothing to do here. One subnet change typcially
- * begets another....
+ * begets another.... But needs to wait for all transactions
*/
signal = OSM_SIGNAL_NONE;
- break;
-
+ break;
case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
/*
* A change was detected on the subnet.
@@ -2219,7 +2219,10 @@ osm_state_mgr_process(
signal = osm_pkey_mgr_process( p_mgr->p_subn->p_osm );
/* the returned signal is always DONE */
- signal = osm_qos_setup(p_mgr->p_subn->p_osm);
+ tmp_signal = osm_qos_setup(p_mgr->p_subn->p_osm);
+
+ if (tmp_signal == OSM_SIGNAL_DONE_PENDING)
+ signal = OSM_SIGNAL_DONE_PENDING;
/* try to restore SA DB (this should be before lid_mgr
because we may want to disable clients reregistration
diff --git a/osm/opensm/osm_ucast_mgr.c b/osm/opensm/osm_ucast_mgr.c
index e977253..39973de 100644
--- a/osm/opensm/osm_ucast_mgr.c
+++ b/osm/opensm/osm_ucast_mgr.c
@@ -885,6 +885,9 @@ osm_ucast_mgr_set_fwd_table(
ib_switch_info_t si;
uint32_t block_id_ho = 0;
uint8_t block[IB_SMP_DATA_SIZE];
+ boolean_t set_swinfo_require = FALSE;
+ uint16_t lin_top;
+ uint8_t life_state;
CL_ASSERT( p_mgr );
@@ -904,43 +907,59 @@ osm_ucast_mgr_set_fwd_table(
Set the top of the unicast forwarding table.
*/
si = *osm_switch_get_si_ptr( p_sw );
- si.lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
+ lin_top = cl_hton16( osm_switch_get_max_lid_ho( p_sw ) );
+ if (si.lin_top != lin_top)
+ {
+ set_swinfo_require = TRUE;
+ si.lin_top = lin_top;
+ }
/* check to see if the change state bit is on. If it is - then we
need to clear it. */
- if( ib_switch_info_get_state_change( &si ) )
- si.life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
- | ( si.life_state & IB_SWITCH_PSC ) ) & 0xfc;
+ if ( ib_switch_info_get_state_change( &si ) )
+ life_state = ( (p_mgr->p_subn->opt.packet_life_time <<3 )
+ | ( si.life_state & IB_SWITCH_PSC ) ) & 0xfc;
else
- si.life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
+ life_state = (p_mgr->p_subn->opt.packet_life_time <<3 ) & 0xf8;
- if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
+ if (life_state != si.life_state)
{
- osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
- "osm_ucast_mgr_set_fwd_table: "
- "Setting switch FT top to LID 0x%X\n",
- osm_switch_get_max_lid_ho( p_sw ) );
+ set_swinfo_require = TRUE;
+ si.life_state = life_state;
}
-
- context.si_context.light_sweep = FALSE;
- context.si_context.node_guid = osm_node_get_node_guid( p_node );
- context.si_context.set_method = TRUE;
-
- status = osm_req_set( p_mgr->p_req,
- p_path,
- (uint8_t*)&si,
- sizeof(si),
- IB_MAD_ATTR_SWITCH_INFO,
- 0,
- CL_DISP_MSGID_NONE,
- &context );
-
- if( status != IB_SUCCESS )
+
+ if ( set_swinfo_require )
{
- osm_log( p_mgr->p_log, OSM_LOG_ERROR,
- "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
- "Sending SwitchInfo attribute failed (%s)\n",
- ib_get_err_str( status ) );
+ if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
+ {
+ osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
+ "osm_ucast_mgr_set_fwd_table: "
+ "Setting switch FT top to LID 0x%X\n",
+ osm_switch_get_max_lid_ho( p_sw ) );
+ }
+
+ context.si_context.light_sweep = FALSE;
+ context.si_context.node_guid = osm_node_get_node_guid( p_node );
+ context.si_context.set_method = TRUE;
+
+ status = osm_req_set( p_mgr->p_req,
+ p_path,
+ (uint8_t*)&si,
+ sizeof(si),
+ IB_MAD_ATTR_SWITCH_INFO,
+ 0,
+ CL_DISP_MSGID_NONE,
+ &context );
+
+ if( status != IB_SUCCESS )
+ {
+ osm_log( p_mgr->p_log, OSM_LOG_ERROR,
+ "osm_ucast_mgr_set_fwd_table: ERR 3A06: "
+ "Sending SwitchInfo attribute failed (%s)\n",
+ ib_get_err_str( status ) );
+ }
+ else
+ p_mgr->any_change = TRUE;
}
/*
@@ -1215,13 +1234,14 @@ osm_ucast_mgr_process(
CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
+ p_mgr->any_change = FALSE;
+
/*
If there are no switches in the subnet, we are done.
*/
if (cl_qmap_count( p_sw_guid_tbl ) == 0)
goto Exit;
- p_mgr->any_change = FALSE;
cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_clean_switch, NULL);
if (!p_routing_eng->build_lid_matrices ||
@@ -1248,14 +1268,20 @@ osm_ucast_mgr_process(
if ( osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
__osm_ucast_mgr_dump_tables( p_mgr );
- if (p_mgr->any_change)
+ if (p_mgr->any_change)
+ {
signal = OSM_SIGNAL_DONE_PENDING;
+ osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
+ "osm_ucast_mgr_process: "
+ "LFT Tables configured on all switches\n");
+ }
else
+ {
+ osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
+ "osm_ucast_mgr_process: "
+ "No need to set any LFT Tables on all switches\n");
signal = OSM_SIGNAL_DONE;
-
- osm_log(p_mgr->p_log, OSM_LOG_VERBOSE,
- "osm_ucast_mgr_process: "
- "LFT Tables configured on all switches\n");
+ }
Exit:
CL_PLOCK_RELEASE( p_mgr->p_lock );
More information about the general
mailing list