[openib-general] [PATCH] OpenSM: Improve handling of IB router ports
Hal Rosenstock
halr at voltaire.com
Fri Oct 6 11:28:03 PDT 2006
OpenSM: Improve handling of IB router ports
Signed-off-by: Hal Rosenstock <halr at voltaire.com>
---
Index: opensm/osm_drop_mgr.c
===================================================================
--- opensm/osm_drop_mgr.c (revision 9679)
+++ opensm/osm_drop_mgr.c (working copy)
@@ -266,10 +266,10 @@ __osm_drop_mgr_remove_port(
osm_node_unlink( p_node, (uint8_t)port_num,
p_remote_node, (uint8_t)remote_port_num );
- /* If the remote node is a ca - need to remove the remote port, since
- it is no longer reachable. This can be done if we reset the discovery
- count of the remote port. */
- if ( osm_node_get_type( p_remote_node ) == IB_NODE_TYPE_CA )
+ /* If the remote node is ca or router - need to remove the remote port,
+ since it is no longer reachable. This can be done if we reset the
+ discovery count of the remote port. */
+ if ( osm_node_get_type( p_remote_node ) != IB_NODE_TYPE_SWITCH )
{
if ( p_remote_port != (osm_port_t*)cl_qmap_end( p_port_guid_tbl ) )
{
@@ -385,25 +385,6 @@ __osm_drop_mgr_remove_switch(
/**********************************************************************
**********************************************************************/
-static void
-__osm_drop_mgr_remove_router(
- IN const osm_drop_mgr_t* const p_mgr,
- IN osm_node_t* p_node )
-{
- OSM_LOG_ENTER( p_mgr->p_log, __osm_drop_mgr_remove_router );
-
- UNUSED_PARAM( p_mgr );
- UNUSED_PARAM( p_node );
-
- osm_log( p_mgr->p_log, OSM_LOG_ERROR,
- "__osm_drop_mgr_remove_router: ERR 0106: "
- "Routers are not supported\n" );
-
- OSM_LOG_EXIT( p_mgr->p_log );
-}
-
-/**********************************************************************
- **********************************************************************/
static boolean_t
__osm_drop_mgr_process_node(
IN const osm_drop_mgr_t* const p_mgr,
@@ -454,16 +435,13 @@ __osm_drop_mgr_process_node(
switch( osm_node_get_type( p_node ) )
{
case IB_NODE_TYPE_CA:
+ case IB_NODE_TYPE_ROUTER:
break;
case IB_NODE_TYPE_SWITCH:
__osm_drop_mgr_remove_switch( p_mgr, p_node );
break;
- case IB_NODE_TYPE_ROUTER:
- __osm_drop_mgr_remove_router( p_mgr, p_node );
- break;
-
default:
osm_log( p_mgr->p_log, OSM_LOG_ERROR,
"__osm_drop_mgr_process_node: ERR 0104: "
Index: opensm/osm_node_info_rcv.c
===================================================================
--- opensm/osm_node_info_rcv.c (revision 9679)
+++ opensm/osm_node_info_rcv.c (working copy)
@@ -601,6 +601,172 @@ __osm_ni_rcv_process_new_router(
__osm_ni_rcv_process_new_node( p_rcv, p_node, p_madw );
+ /*
+ A node guid of 0 is the corner case that indicates
+ we discovered our own node. Initialize the subnet
+ object with the SM's own port guid.
+ */
+ if( osm_madw_get_ni_context_ptr( p_madw )->node_guid == 0 )
+ {
+ p_rcv->p_subn->sm_port_guid = p_node->node_info.port_guid;
+ }
+
+ OSM_LOG_EXIT( p_rcv->p_log );
+}
+
+/**********************************************************************
+ The plock must be held before calling this function.
+**********************************************************************/
+static void
+__osm_ni_rcv_process_existing_router(
+ IN const osm_ni_rcv_t* const p_rcv,
+ IN osm_node_t* const p_node,
+ IN const osm_madw_t* const p_madw )
+{
+ ib_node_info_t *p_ni;
+ ib_smp_t *p_smp;
+ osm_port_t *p_port;
+ osm_port_t *p_port_check;
+ cl_qmap_t *p_guid_tbl;
+ osm_madw_context_t context;
+ uint8_t port_num;
+ osm_physp_t *p_physp;
+ ib_api_status_t status;
+ osm_dr_path_t *p_dr_path;
+ osm_bind_handle_t h_bind;
+ cl_status_t cl_status;
+
+ OSM_LOG_ENTER( p_rcv->p_log, __osm_ni_rcv_process_existing_router );
+
+ p_smp = osm_madw_get_smp_ptr( p_madw );
+ p_ni = (ib_node_info_t*)ib_smp_get_payload_ptr( p_smp );
+ port_num = ib_node_info_get_local_port_num( p_ni );
+ p_guid_tbl = &p_rcv->p_subn->port_guid_tbl;
+ h_bind = osm_madw_get_bind_handle( p_madw );
+
+ /*
+ Determine if we have encountered this node through a
+ previously undiscovered port. If so, build the new
+ port object.
+ */
+ p_port = (osm_port_t*)cl_qmap_get( p_guid_tbl, p_ni->port_guid );
+
+ if( p_port == (osm_port_t*)cl_qmap_end( p_guid_tbl ) )
+ {
+ osm_log( p_rcv->p_log, OSM_LOG_VERBOSE,
+ "__osm_ni_rcv_process_existing_router: "
+ "Creating new port object with GUID = 0x%" PRIx64 "\n",
+ cl_ntoh64( p_ni->port_guid ) );
+
+ osm_node_init_physp( p_node, p_madw );
+
+ p_port = osm_port_new( p_ni, p_node );
+ if( p_port == NULL )
+ {
+ osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+ "__osm_ni_rcv_process_existing_router: ERR 0D24: "
+ "Unable to create new port object\n" );
+ goto Exit;
+ }
+
+ /*
+ Add the new port object to the database.
+ */
+ p_port_check = (osm_port_t*)cl_qmap_insert( p_guid_tbl,
+ p_ni->port_guid, &p_port->map_item );
+ if( p_port_check != p_port )
+ {
+ /*
+ We should never be here!
+ Somehow, this port GUID already exists in the table.
+ */
+ osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+ "__osm_ni_rcv_process_existing_router: ERR 0D22: "
+ "Port 0x%" PRIx64 " already in the database!\n",
+ cl_ntoh64( p_ni->port_guid ) );
+
+ osm_port_delete( &p_port );
+
+ goto Exit;
+ }
+
+ /* If we are a master, then this means the port is new on the subnet.
+ Add it to the new_ports_list - need to send trap 64 on these ports.
+ The condition that we are master is true, since if we are in discovering
+ state (meaning we woke up from standby or we are just initializing),
+ then these ports may be new to us, but are not new on the subnet.
+ If we are master, then the subnet as we know it is the updated one,
+ and any new ports we encounter should cause trap 64. C14-72.1.1 */
+ if ( p_rcv->p_subn->sm_state == IB_SMINFO_STATE_MASTER )
+ {
+ cl_status = cl_list_insert_tail( &p_rcv->p_subn->new_ports_list, p_port );
+ if( cl_status != CL_SUCCESS )
+ {
+ osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+ "__osm_ni_rcv_process_existing_router: ERR 0D28: "
+ "Error %s adding to list\n",
+ CL_STATUS_MSG( cl_status ) );
+ osm_port_delete( &p_port );
+ goto Exit;
+ }
+ else
+ {
+ osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+ "__osm_ni_rcv_process_existing_router: "
+ "Adding port GUID:0x%016" PRIx64 " to new_ports_list\n",
+ cl_ntoh64(osm_node_get_node_guid( p_port->p_node )) );
+ }
+ }
+
+ p_physp = osm_node_get_physp_ptr( p_node, port_num );
+ }
+ else
+ {
+ p_physp = osm_node_get_physp_ptr( p_node, port_num );
+
+ CL_ASSERT( p_physp );
+
+ if ( !osm_physp_is_valid( p_physp ) )
+ {
+ osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+ "__osm_ni_rcv_process_existing_router: ERR 0D29: "
+ "Invalid physical port. Aborting discovery\n");
+ goto Exit;
+ }
+
+ /*
+ Update the DR Path to the port,
+ in case the old one is no longer available.
+ */
+ p_dr_path = osm_physp_get_dr_path_ptr( p_physp );
+
+ osm_dr_path_init( p_dr_path, h_bind, p_smp->hop_count,
+ p_smp->initial_path );
+ }
+
+ context.pi_context.node_guid = p_ni->node_guid;
+ context.pi_context.port_guid = p_ni->port_guid;
+ context.pi_context.set_method = FALSE;
+ context.pi_context.update_master_sm_base_lid = FALSE;
+ context.pi_context.ignore_errors = FALSE;
+ context.pi_context.light_sweep = FALSE;
+
+ status = osm_req_get( p_rcv->p_gen_req,
+ osm_physp_get_dr_path_ptr( p_physp ),
+ IB_MAD_ATTR_PORT_INFO,
+ cl_hton32( port_num ),
+ CL_DISP_MSGID_NONE,
+ &context );
+
+ if( status != IB_SUCCESS )
+ {
+ osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+ "__osm_ni_rcv_process_existing_router: ERR 0D23: "
+ "Failure initiating PortInfo request (%s)\n",
+ ib_get_err_str(status));
+ }
+
+ Exit:
OSM_LOG_EXIT( p_rcv->p_log );
}
@@ -937,7 +1103,7 @@ __osm_ni_rcv_process_existing(
switch( p_ni->node_type )
{
case IB_NODE_TYPE_ROUTER:
- /* Not supported yet. */
+ __osm_ni_rcv_process_existing_router( p_rcv, p_node, p_madw );
break;
case IB_NODE_TYPE_CA:
Index: opensm/osm_ucast_updn.c
===================================================================
--- opensm/osm_ucast_updn.c (revision 9679)
+++ opensm/osm_ucast_updn.c (working copy)
@@ -222,7 +222,7 @@ __updn_bfs_by_node(
}
else
{
- /* This is an HCA - need to take its remote port */
+ /* This is a CA or router - need to take its remote port */
p_remote_physp = p_physp->p_remote_physp;
/*
make sure that the following occur:
@@ -1042,7 +1042,7 @@ osm_updn_find_root_nodes_by_min_hop(
cl_list_init( p_ca_list, 10 );
*/
- /* Find the Maximum number of Cas for histogram normalization */
+ /* Find the Maximum number of CAs (and routers) for histogram normalization */
osm_log (&(osm.log), OSM_LOG_VERBOSE,
"osm_updn_find_root_nodes_by_min_hop: "
"Find the number of CA and store them in cl_list\n");
@@ -1050,7 +1050,7 @@ osm_updn_find_root_nodes_by_min_hop(
while( p_next_port != (osm_port_t*)cl_qmap_end( &osm.subn.port_guid_tbl ) ) {
p_port = p_next_port;
p_next_port = (osm_port_t*)cl_qmap_next( &p_next_port->map_item );
- if ( osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA )
+ if ( osm_node_get_type(p_port->p_node) != IB_NODE_TYPE_SWITCH )
{
p_physp = osm_port_get_default_phys_ptr(p_port);
self_lid_ho = cl_ntoh16( osm_physp_get_base_lid(p_physp) );
Index: opensm/osm_state_mgr.c
===================================================================
--- opensm/osm_state_mgr.c (revision 9679)
+++ opensm/osm_state_mgr.c (working copy)
@@ -941,6 +941,7 @@ __osm_state_mgr_sweep_hop_1(
switch ( osm_node_get_type( p_node ) )
{
case IB_NODE_TYPE_CA:
+ case IB_NODE_TYPE_ROUTER:
context.ni_context.node_guid = osm_node_get_node_guid( p_node );
context.ni_context.port_num = port_num;
@@ -1002,8 +1003,8 @@ __osm_state_mgr_sweep_hop_1(
default:
osm_log( p_mgr->p_log, OSM_LOG_ERROR,
- "__osm_state_mgr_sweep_hop_1: ERR 3313: "
- "Current supported node types that host SM are CA or SW only\n" );
+ "__osm_state_mgr_sweep_hop_1: ERR 3313: Node type %d. "
+ "Current supported node types that host SM are CA, router, or SW\n", osm_node_get_type( p_node ) );
}
Exit:
More information about the general
mailing list