[openib-general] [PATCH] OpenSM: Improve handling of IB router ports

Hal Rosenstock halr at voltaire.com
Fri Oct 6 11:28:03 PDT 2006


OpenSM: Improve handling of IB router ports

Signed-off-by: Hal Rosenstock <halr at voltaire.com>
---

Index: opensm/osm_drop_mgr.c
===================================================================
--- opensm/osm_drop_mgr.c	(revision 9679)
+++ opensm/osm_drop_mgr.c	(working copy)
@@ -266,10 +266,10 @@ __osm_drop_mgr_remove_port(
         osm_node_unlink( p_node, (uint8_t)port_num,
                          p_remote_node, (uint8_t)remote_port_num );
 
-        /* If the remote node is a ca - need to remove the remote port, since
-           it is no longer reachable. This can be done if we reset the discovery
-           count of the remote port. */
-        if ( osm_node_get_type( p_remote_node ) == IB_NODE_TYPE_CA )
+        /* If the remote node is ca or router - need to remove the remote port,
+           since it is no longer reachable. This can be done if we reset the
+           discovery count of the remote port. */
+        if ( osm_node_get_type( p_remote_node ) != IB_NODE_TYPE_SWITCH )
         {
           if ( p_remote_port != (osm_port_t*)cl_qmap_end( p_port_guid_tbl ) )
           {
@@ -385,25 +385,6 @@ __osm_drop_mgr_remove_switch(
 
 /**********************************************************************
  **********************************************************************/
-static void
-__osm_drop_mgr_remove_router(
-  IN const osm_drop_mgr_t* const p_mgr,
-  IN osm_node_t* p_node )
-{
-  OSM_LOG_ENTER( p_mgr->p_log, __osm_drop_mgr_remove_router );
-
-  UNUSED_PARAM( p_mgr );
-  UNUSED_PARAM( p_node );
-
-  osm_log( p_mgr->p_log, OSM_LOG_ERROR,
-           "__osm_drop_mgr_remove_router: ERR 0106: "
-           "Routers are not supported\n" );
-
-  OSM_LOG_EXIT( p_mgr->p_log );
-}
-
-/**********************************************************************
- **********************************************************************/
 static boolean_t
 __osm_drop_mgr_process_node(
   IN const osm_drop_mgr_t* const p_mgr,
@@ -454,16 +435,13 @@ __osm_drop_mgr_process_node(
   switch( osm_node_get_type( p_node ) )
   {
   case IB_NODE_TYPE_CA:
+  case IB_NODE_TYPE_ROUTER:
     break;
 
   case IB_NODE_TYPE_SWITCH:
     __osm_drop_mgr_remove_switch( p_mgr, p_node );
     break;
 
-  case IB_NODE_TYPE_ROUTER:
-    __osm_drop_mgr_remove_router( p_mgr, p_node );
-    break;
-
   default:
     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
              "__osm_drop_mgr_process_node: ERR 0104: "
Index: opensm/osm_node_info_rcv.c
===================================================================
--- opensm/osm_node_info_rcv.c	(revision 9679)
+++ opensm/osm_node_info_rcv.c	(working copy)
@@ -601,6 +601,172 @@ __osm_ni_rcv_process_new_router(
 
   __osm_ni_rcv_process_new_node( p_rcv, p_node, p_madw );
 
+  /*
+    A node guid of 0 is the corner case that indicates
+    we discovered our own node.  Initialize the subnet
+    object with the SM's own port guid.
+  */
+  if( osm_madw_get_ni_context_ptr( p_madw )->node_guid == 0 )
+  {
+    p_rcv->p_subn->sm_port_guid = p_node->node_info.port_guid;
+  }
+
+  OSM_LOG_EXIT( p_rcv->p_log );
+}
+
+/**********************************************************************
+ The plock must be held before calling this function.
+**********************************************************************/
+static void
+__osm_ni_rcv_process_existing_router(
+  IN const osm_ni_rcv_t* const p_rcv,
+  IN osm_node_t* const p_node,
+  IN const osm_madw_t* const p_madw )
+{
+  ib_node_info_t *p_ni;
+  ib_smp_t *p_smp;
+  osm_port_t *p_port;
+  osm_port_t *p_port_check;
+  cl_qmap_t *p_guid_tbl;
+  osm_madw_context_t context;
+  uint8_t port_num;
+  osm_physp_t *p_physp;
+  ib_api_status_t status;
+  osm_dr_path_t *p_dr_path;
+  osm_bind_handle_t h_bind;
+  cl_status_t cl_status;
+
+  OSM_LOG_ENTER( p_rcv->p_log, __osm_ni_rcv_process_existing_router );
+
+  p_smp = osm_madw_get_smp_ptr( p_madw );
+  p_ni = (ib_node_info_t*)ib_smp_get_payload_ptr( p_smp );
+  port_num = ib_node_info_get_local_port_num( p_ni );
+  p_guid_tbl = &p_rcv->p_subn->port_guid_tbl;
+  h_bind = osm_madw_get_bind_handle( p_madw );
+
+  /*
+    Determine if we have encountered this node through a
+    previously undiscovered port.  If so, build the new
+    port object.
+  */
+  p_port = (osm_port_t*)cl_qmap_get( p_guid_tbl, p_ni->port_guid );
+
+  if( p_port == (osm_port_t*)cl_qmap_end( p_guid_tbl ) )
+  {
+    osm_log( p_rcv->p_log, OSM_LOG_VERBOSE,
+             "__osm_ni_rcv_process_existing_router: "
+             "Creating new port object with GUID = 0x%" PRIx64 "\n",
+             cl_ntoh64( p_ni->port_guid ) );
+
+    osm_node_init_physp( p_node, p_madw );
+
+    p_port = osm_port_new( p_ni, p_node );
+    if( p_port == NULL )
+    {
+      osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+               "__osm_ni_rcv_process_existing_router: ERR 0D24: "
+               "Unable to create new port object\n" );
+      goto Exit;
+    }
+
+    /*
+      Add the new port object to the database.
+    */
+    p_port_check = (osm_port_t*)cl_qmap_insert( p_guid_tbl,
+                                                p_ni->port_guid, &p_port->map_item );
+    if( p_port_check != p_port )
+    {
+      /*
+        We should never be here!
+        Somehow, this port GUID already exists in the table.
+      */
+      osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+               "__osm_ni_rcv_process_existing_router: ERR 0D22: "
+               "Port 0x%" PRIx64 " already in the database!\n",
+               cl_ntoh64( p_ni->port_guid ) );
+
+      osm_port_delete( &p_port );
+
+      goto Exit;
+    }
+
+    /* If we are a master, then this means the port is new on the subnet.
+       Add it to the new_ports_list - need to send trap 64 on these ports.
+       The condition that we are master is true, since if we are in discovering
+       state (meaning we woke up from standby or we are just initializing),
+       then these ports may be new to us, but are not new on the subnet.
+       If we are master, then the subnet as we know it is the updated one,
+       and any new ports we encounter should cause trap 64. C14-72.1.1 */
+    if ( p_rcv->p_subn->sm_state == IB_SMINFO_STATE_MASTER )
+    {
+      cl_status = cl_list_insert_tail( &p_rcv->p_subn->new_ports_list, p_port );
+      if( cl_status != CL_SUCCESS )
+      {
+        osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+                 "__osm_ni_rcv_process_existing_router: ERR 0D28: "
+                 "Error %s adding to list\n",
+                 CL_STATUS_MSG( cl_status ) );
+        osm_port_delete( &p_port );
+        goto Exit;
+      }
+      else
+      {
+        osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+                 "__osm_ni_rcv_process_existing_router: "
+                 "Adding port GUID:0x%016" PRIx64 " to new_ports_list\n",
+                 cl_ntoh64(osm_node_get_node_guid( p_port->p_node )) );
+      }
+    }
+
+    p_physp = osm_node_get_physp_ptr( p_node, port_num );
+  }
+  else
+  {
+    p_physp = osm_node_get_physp_ptr( p_node, port_num );
+
+    CL_ASSERT( p_physp );
+
+    if ( !osm_physp_is_valid( p_physp ) )
+    {
+        osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+                 "__osm_ni_rcv_process_existing_router: ERR 0D29: "
+                 "Invalid physical port. Aborting discovery\n");
+        goto Exit;
+    }
+
+    /*
+      Update the DR Path to the port,
+      in case the old one is no longer available.
+    */
+    p_dr_path = osm_physp_get_dr_path_ptr( p_physp );
+
+    osm_dr_path_init( p_dr_path, h_bind, p_smp->hop_count,
+                      p_smp->initial_path );
+  }
+
+  context.pi_context.node_guid = p_ni->node_guid;
+  context.pi_context.port_guid = p_ni->port_guid;
+  context.pi_context.set_method = FALSE;
+  context.pi_context.update_master_sm_base_lid = FALSE;
+  context.pi_context.ignore_errors = FALSE;
+  context.pi_context.light_sweep = FALSE;
+
+  status = osm_req_get( p_rcv->p_gen_req,
+                        osm_physp_get_dr_path_ptr( p_physp ),
+                        IB_MAD_ATTR_PORT_INFO,
+                        cl_hton32( port_num ),
+                        CL_DISP_MSGID_NONE,
+                        &context );
+
+  if( status != IB_SUCCESS )
+  {
+    osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+             "__osm_ni_rcv_process_existing_router: ERR 0D23: "
+             "Failure initiating PortInfo request (%s)\n",
+             ib_get_err_str(status));
+  }
+
+ Exit:
   OSM_LOG_EXIT( p_rcv->p_log );
 }
 
@@ -937,7 +1103,7 @@ __osm_ni_rcv_process_existing(
   switch( p_ni->node_type )
   {
   case IB_NODE_TYPE_ROUTER:
-    /* Not supported yet. */
+    __osm_ni_rcv_process_existing_router( p_rcv, p_node, p_madw );    
     break;
 
   case IB_NODE_TYPE_CA:
Index: opensm/osm_ucast_updn.c
===================================================================
--- opensm/osm_ucast_updn.c	(revision 9679)
+++ opensm/osm_ucast_updn.c	(working copy)
@@ -222,7 +222,7 @@ __updn_bfs_by_node(
   }
   else
   {
-    /* This is an HCA - need to take its remote port */
+    /* This is a CA or router - need to take its remote port */
     p_remote_physp = p_physp->p_remote_physp;
     /*
       make sure that the following occur:
@@ -1042,7 +1042,7 @@ osm_updn_find_root_nodes_by_min_hop(
      cl_list_init( p_ca_list, 10 );
   */
 
-  /* Find the Maximum number of Cas for histogram normalization */
+  /* Find the Maximum number of CAs (and routers) for histogram normalization */
   osm_log (&(osm.log), OSM_LOG_VERBOSE,
            "osm_updn_find_root_nodes_by_min_hop: "
            "Find the number of CA and store them in cl_list\n");
@@ -1050,7 +1050,7 @@ osm_updn_find_root_nodes_by_min_hop(
   while( p_next_port != (osm_port_t*)cl_qmap_end( &osm.subn.port_guid_tbl ) ) {
     p_port = p_next_port;
     p_next_port = (osm_port_t*)cl_qmap_next( &p_next_port->map_item );
-    if ( osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA )
+    if ( osm_node_get_type(p_port->p_node) != IB_NODE_TYPE_SWITCH )
     {
       p_physp = osm_port_get_default_phys_ptr(p_port);
       self_lid_ho = cl_ntoh16( osm_physp_get_base_lid(p_physp) );
Index: opensm/osm_state_mgr.c
===================================================================
--- opensm/osm_state_mgr.c	(revision 9679)
+++ opensm/osm_state_mgr.c	(working copy)
@@ -941,6 +941,7 @@ __osm_state_mgr_sweep_hop_1(
    switch ( osm_node_get_type( p_node ) )
    {
    case IB_NODE_TYPE_CA:
+   case IB_NODE_TYPE_ROUTER:
       context.ni_context.node_guid = osm_node_get_node_guid( p_node );
       context.ni_context.port_num = port_num;
 
@@ -1002,8 +1003,8 @@ __osm_state_mgr_sweep_hop_1(
 
    default:
       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
-               "__osm_state_mgr_sweep_hop_1: ERR 3313: "
-               "Current supported node types that host SM are CA or SW only\n" );
+               "__osm_state_mgr_sweep_hop_1: ERR 3313: Node type %d. "
+               "Current supported node types that host SM are CA, router, or SW\n", osm_node_get_type( p_node ) );
    }
 
  Exit:







More information about the general mailing list