[openib-general] [PATCH] opensm: updn performance improvements

Sasha Khapyorsky sashak at voltaire.com
Sat Feb 24 12:13:42 PST 2007


There are various performance improvements for up/down routing engine:
- updn_node object which is referenced by switch's priv pointer
- ranking for switches only
- replace time consuming cl_list by cl_qlist
- reuse already collected up/down related information (in updn_node
  structure) instead of rediscovering
- eliminate many inner loops
- mask time consuming logging
- elminate using two lists with BFS
- minor cleaups

Now up/down looks 5-6 times faster.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
 osm/opensm/osm_ucast_updn.c |  743 +++++++++++++++----------------------------
 1 files changed, 257 insertions(+), 486 deletions(-)

diff --git a/osm/opensm/osm_ucast_updn.c b/osm/opensm/osm_ucast_updn.c
index 8b86958..e8282f4 100644
--- a/osm/opensm/osm_ucast_updn.c
+++ b/osm/opensm/osm_ucast_updn.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  *
@@ -40,8 +40,6 @@
  *
  * Environment:
  *      Linux User Mode
- *
- * $Revision: 1.0 $
  */
 
 #if HAVE_CONFIG_H
@@ -61,25 +59,10 @@
 /* direction */
 typedef enum _updn_switch_dir
 {
-    UP = 0,
-    DOWN
+  UP = 0,
+  DOWN
 } updn_switch_dir_t;
 
-/* This enum respresent available states in the UPDN algorithm */
-typedef enum _updn_state
-{
-    UPDN_INIT = 0,
-    UPDN_RANK,
-    UPDN_MIN_HOP_CALC,
-} updn_state_t;
-
-/* Rank value of this node */
-typedef struct _updn_rank
-{
-  cl_map_item_t map_item;
-  uint8_t rank;
-} updn_rank_t;
-
 /* Histogram element - the number of occurences of the same hop value */
 typedef struct _updn_hist
 {
@@ -87,12 +70,6 @@ typedef struct _updn_hist
   uint32_t bar_value;
 } updn_hist_t;
 
-typedef struct _updn_next_step
-{
-  updn_switch_dir_t state;
-  osm_switch_t *p_sw;
-} updn_next_step_t;
-
 /* guids list */
 typedef struct _updn_input
 {
@@ -100,17 +77,26 @@ typedef struct _updn_input
   uint64_t *guid_list;
 } updn_input_t;
 
+struct updn_node {
+  cl_list_item_t list;
+  osm_switch_t *sw;
+  updn_switch_dir_t dir;
+  unsigned rank;
+  unsigned is_root;
+  unsigned visited;
+};
+
 /* updn structure */
 typedef struct _updn
 {
-  updn_state_t   state;
   boolean_t      auto_detect_root_nodes;
-  cl_qmap_t      guid_rank_tbl;
   updn_input_t   updn_ucast_reg_inputs;
-  cl_list_t *    p_root_nodes;
+  cl_list_t     *p_root_nodes;
   osm_opensm_t  *p_osm;
 } updn_t;
 
+#define NOISE_L(log, fmt, arg...)
+
 /* ///////////////////////////////// */
 /*  Statics                          */
 /* ///////////////////////////////// */
@@ -122,27 +108,17 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t *p_updn);
    remote ports */
 static updn_switch_dir_t
 __updn_get_dir(
-  IN updn_t *p_updn,
-  IN uint8_t cur_rank,
-  IN uint8_t rem_rank,
+  IN unsigned cur_rank,
+  IN unsigned rem_rank,
   IN uint64_t cur_guid,
-  IN uint64_t rem_guid )
+  IN uint64_t rem_guid,
+  IN unsigned cur_is_root,
+  IN unsigned rem_is_root )
 {
-  uint32_t i = 0, max_num_guids = p_updn->updn_ucast_reg_inputs.num_guids;
-  uint64_t *p_guid = p_updn->updn_ucast_reg_inputs.guid_list;
-  boolean_t cur_is_root = FALSE, rem_is_root = FALSE;
-
   /* HACK: comes to solve root nodes connection, in a classic subnet root nodes do not connect
-     directly, but in case they are we assign to root node an UP direction to allow UPDN discover
+     directly, but in case they are we assign to root node an UP direction to allow UPDN to discover
      the subnet correctly (and not from the point of view of the last root node).
   */
-  for ( i = 0; i < max_num_guids; i++ )
-  {
-    if (cur_guid == p_guid[i])
-      cur_is_root = TRUE;
-    if (rem_guid == p_guid[i])
-      rem_is_root = TRUE;
-  }
   if (cur_is_root && rem_is_root)
     return UP;
 
@@ -162,58 +138,18 @@ __updn_get_dir(
 
 /**********************************************************************
  **********************************************************************/
-/* This function creates a new element of updn_next_step_t type then return its
-   pointer, Null if malloc has failed */
-static updn_next_step_t*
-__updn_create_updn_next_step_t(
-  IN updn_switch_dir_t state,
-  IN osm_switch_t* const p_sw )
-{
-  updn_next_step_t *p_next_step;
-
-  p_next_step = (updn_next_step_t*) malloc(sizeof(*p_next_step));
-  if (p_next_step)
-  {
-    memset(p_next_step, 0, sizeof(*p_next_step));
-    p_next_step->state = state;
-    p_next_step->p_sw = p_sw;
-  }
-
-  return p_next_step;
-}
-
-/**********************************************************************
- **********************************************************************/
-/* This function updates an element in the qmap list by guid index and rank value */
+/* This function updates rank value for a node */
 /* Return 0 if no need to further update 1 if brought a new value */
 static int
 __updn_update_rank(
-  IN cl_qmap_t *p_guid_rank_tbl,
-  IN ib_net64_t guid,
-  IN uint8_t rank )
+  IN struct updn_node *u,
+  IN unsigned rank )
 {
-  updn_rank_t *p_updn_rank;
-
-  p_updn_rank = (updn_rank_t*) cl_qmap_get(p_guid_rank_tbl, guid);
-  if (p_updn_rank == (updn_rank_t*) cl_qmap_end(p_guid_rank_tbl))
+  if (u->rank > rank)
   {
-    p_updn_rank = (updn_rank_t*) malloc(sizeof(updn_rank_t));
-
-    CL_ASSERT(p_updn_rank);
-
-    p_updn_rank->rank = rank;
-
-    cl_qmap_insert(p_guid_rank_tbl, guid, &p_updn_rank->map_item);
+    u->rank = rank;
     return 1;
   }
-  else
-  {
-    if (p_updn_rank->rank > rank)
-    {
-      p_updn_rank->rank = rank;
-      return 1;
-    }
-  }
   return 0;
 }
 
@@ -223,20 +159,18 @@ __updn_update_rank(
  **********************************************************************/
 static int
 __updn_bfs_by_node(
-  IN updn_t *p_updn,
+  IN osm_log_t *p_log,
   IN osm_subn_t *p_subn,
-  IN osm_port_t *p_port,
-  IN cl_qmap_t *p_guid_rank_tbl )
+  IN osm_port_t *p_port )
 {
   /* Init local vars */
   osm_switch_t *p_self_node = NULL;
   uint8_t pn, pn_rem;
   osm_physp_t *p_physp, *p_remote_physp;
-  cl_list_t *p_currList, *p_nextList;
+  cl_qlist_t list;
   uint16_t root_lid;
-  updn_next_step_t *p_updn_switch, *p_tmp;
+  struct updn_node *u;
   updn_switch_dir_t next_dir, current_dir;
-  osm_log_t *p_log = &p_updn->p_osm->log;
 
   OSM_LOG_ENTER( p_log, __updn_bfs_by_node );
 
@@ -248,21 +182,6 @@ __updn_bfs_by_node(
     return 1;
   }
 
-  /* Init the list pointers */
-  p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t));
-  if (!p_nextList)
-  {
-    osm_log( p_log, OSM_LOG_ERROR,
-             "__updn_bfs_by_node: ERR AA14: "
-             "No memory for p_nextList\n" );
-    OSM_LOG_EXIT( p_log );
-    return 1;
-  }
-
-  cl_list_construct( p_nextList );
-  cl_list_init( p_nextList, 10 );
-  p_currList = p_nextList;
-
   /* The Root BFS - lid  */
   root_lid = cl_ntoh16(osm_physp_get_base_lid( p_physp ));
   /* printf ("-V- BFS through lid : 0x%x\n", root_lid); */
@@ -273,7 +192,7 @@ __updn_bfs_by_node(
   if (p_port->p_node->sw)
   {
     p_self_node = p_port->p_node->sw;
-    /* Update its Min Hop Table */
+    /* Update it's Min Hop Table */
     osm_log( p_log, OSM_LOG_DEBUG,
              "__updn_bfs_by_node: "
              "Update Min Hop Table of GUID 0x%" PRIx64 "\n",
@@ -282,7 +201,7 @@ __updn_bfs_by_node(
   }
   else
   {
-    /* This is a CA or router - need to take its remote port */
+    /* This is a CA or router - need to take it's remote port */
     p_remote_physp = p_physp->p_remote_physp;
     /*
       make sure that the following occur:
@@ -304,7 +223,7 @@ __updn_bfs_by_node(
       else
       {
         p_self_node = p_remote_physp->p_node->sw;
-        /* Update its Min Hop Table */
+        /* Update it's Min Hop Table */
         /* NOTE : Check if there is a function which prints the Min Hop Table */
         osm_log( p_log, OSM_LOG_DEBUG,
                  "__updn_bfs_by_node: "
@@ -322,201 +241,111 @@ __updn_bfs_by_node(
            "Starting from switch - port GUID 0x%" PRIx64 "\n",
            cl_ntoh64(p_self_node->p_node->node_info.port_guid) );
 
-  /* Update list with the updn_next_step_t new element */
-  /* NOTE : When inserting an item which is a pointer to a struct, does remove
-     action also free its memory */
-  if (!(p_tmp=__updn_create_updn_next_step_t(UP, p_self_node)))
-  {
-    osm_log( p_log, OSM_LOG_ERROR,
-             "__updn_bfs_by_node:  ERR AA08: "
-             "Could not create updn_next_step_t\n" );
-    return 1;
-  }
+  /* Update current list with the new element */
+  u = p_self_node->priv;
+  u->dir = UP;
 
-  cl_list_insert_tail(p_currList, p_tmp);
+  cl_qlist_init(&list);
+  cl_qlist_insert_tail(&list, &u->list);
 
   /* BFS the list till no next element */
-  osm_log( p_log, OSM_LOG_VERBOSE,
-           "__updn_bfs_by_node: "
-           "BFS the subnet [\n" );
-
-  while (!cl_is_list_empty(p_currList))
+  while (!cl_is_qlist_empty(&list))
   {
-    osm_log( p_log, OSM_LOG_DEBUG,
+    ib_net64_t remote_guid, current_guid;
+
+    NOISE_L( p_log, OSM_LOG_DEBUG,
              "__updn_bfs_by_node: "
              "Starting a new iteration with %zu elements in current list\n",
-             cl_list_count(p_currList) );
-    /* Init the switch directed list */
-    p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t));
-    if (!p_nextList)
-    {
-      osm_log( p_log, OSM_LOG_ERROR,
-               "__updn_bfs_by_node: ERR AA15: "
-               "No memory for p_nextList\n" );
-      OSM_LOG_EXIT( p_log );
-      return 1;
-    }
+             cl_qlist_count(&list) );
 
-    cl_list_construct( p_nextList );
-    cl_list_init( p_nextList, 10 );
-    /* Go over all current list items till it's empty */
-    /* printf ("-V- In inner while\n"); */
-    p_updn_switch = (updn_next_step_t*)cl_list_remove_head( p_currList );
-    /* While there is a pointer to updn struct we continue to BFS */
-    while (p_updn_switch)
+    u = (struct updn_node *)cl_qlist_remove_head(&list);
+    u->visited = 0; /* cleanup */
+    current_dir = u->dir;
+    current_guid = osm_node_get_node_guid(u->sw->p_node);
+    NOISE_L( p_log, OSM_LOG_DEBUG,
+             "__updn_bfs_by_node: "
+             "Visiting port GUID 0x%" PRIx64 "\n",
+             cl_ntoh64(current_guid) );
+    /* Go over all ports of the switch and find unvisited remote nodes */
+    for ( pn = 0; pn < osm_switch_get_num_ports(u->sw); pn++ )
     {
-      current_dir = p_updn_switch->state;
-      osm_log( p_log, OSM_LOG_DEBUG,
+      osm_node_t *p_remote_node;
+      struct updn_node *rem_u;
+      uint8_t current_min_hop, remote_min_hop, set_hop_return_value;
+      osm_switch_t *p_remote_sw;
+
+      p_remote_node = osm_node_get_remote_node(u->sw->p_node, pn, &pn_rem);
+      /* If no remote node OR remote node is not a SWITCH
+         continue to next pn */
+      if( !p_remote_node || !p_remote_node->sw )
+        continue;
+      /* Fetch remote guid only after validation of remote node */
+      remote_guid = osm_node_get_node_guid(p_remote_node);
+      p_remote_sw = p_remote_node->sw;
+      rem_u = p_remote_sw->priv;
+      /* Decide which direction to mark it (UP/DOWN) */
+      next_dir = __updn_get_dir(u->rank, rem_u->rank,
+                                current_guid, remote_guid,
+                                u->is_root, rem_u->is_root);
+
+      NOISE_L( p_log, OSM_LOG_DEBUG,
                "__updn_bfs_by_node: "
-               "Visiting port GUID 0x%" PRIx64 "\n",
-               cl_ntoh64(p_updn_switch->p_sw->p_node->node_info.port_guid) );
-      /* Go over all ports of the switch and find unvisited remote nodes */
-      for ( pn = 0; pn < osm_switch_get_num_ports(p_updn_switch->p_sw); pn++ )
+               "move from 0x%016" PRIx64 " rank: %u "
+               "to 0x%016" PRIx64" rank: %u\n",
+               cl_ntoh64(current_guid), u->rank,
+               cl_ntoh64(remote_guid), rem->rank );
+      /* Check if this is a legal step : the only illegal step is going
+         from DOWN to UP */
+      if ((current_dir == DOWN) && (next_dir == UP))
       {
-        /* printf("-V- Inner for in port num 0x%X\n", pn); */
-        osm_node_t *p_remote_node;
-        cl_list_iterator_t updn_switch_iterator;
-        boolean_t HasVisited = FALSE;
-        ib_net64_t remote_guid,current_guid;
-        updn_rank_t *p_rem_rank, *p_cur_rank;
-        uint8_t current_min_hop, remote_min_hop, set_hop_return_value;
-        osm_switch_t *p_remote_sw;
-
-        current_guid = osm_node_get_node_guid(p_updn_switch->p_sw->p_node);
-        p_remote_node = osm_node_get_remote_node( p_updn_switch->p_sw->p_node,
-                                                  pn, &pn_rem );
-        /* If no remote node OR remote node is not a SWITCH
-           continue to next pn */
-        if( !p_remote_node ||
-            (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) )
-          continue;
-        /* Fetch remote guid only after validation of remote node */
-        remote_guid = osm_node_get_node_guid(p_remote_node);
-        /* printf ("-V- Current guid : 0x%" PRIx64 " Remote guid : 0x%" PRIx64 "\n", */
-        /* cl_ntoh64(current_guid), cl_ntoh64(remote_guid)); */
-        p_remote_sw = p_remote_node->sw;
-        p_rem_rank = (updn_rank_t*)cl_qmap_get(p_guid_rank_tbl, remote_guid);
-        p_cur_rank = (updn_rank_t*)cl_qmap_get(p_guid_rank_tbl, current_guid);
-        /* Decide which direction to mark it (UP/DOWN) */
-        next_dir = __updn_get_dir (p_updn, p_cur_rank->rank, p_rem_rank->rank,
-                                   current_guid, remote_guid);
-
         osm_log( p_log, OSM_LOG_DEBUG,
                  "__updn_bfs_by_node: "
-                 "move from 0x%016" PRIx64 " rank: %u "
-                 "to 0x%016" PRIx64" rank: %u\n",
-                 cl_ntoh64(current_guid), p_cur_rank->rank,
-                 cl_ntoh64(remote_guid), p_rem_rank->rank );
-        /* Check if this is a legal step : the only illegal step is going
-           from DOWN to UP */
-        if ((current_dir == DOWN) && (next_dir == UP))
+                 "Avoiding move from 0x%016" PRIx64 " to 0x%016" PRIx64"\n",
+                 cl_ntoh64(current_guid), cl_ntoh64(remote_guid) );
+        /* Illegal step */
+        continue;
+      }
+      /* Set MinHop value for the current lid */
+      current_min_hop = osm_switch_get_least_hops(u->sw, root_lid);
+      /* Check hop count if better insert into NextState list && update
+         the remote node Min Hop Table */
+      remote_min_hop = osm_switch_get_hop_count(p_remote_sw, root_lid, pn_rem);
+      if (current_min_hop + 1 < remote_min_hop)
+      {
+        NOISE_L( p_log, OSM_LOG_DEBUG,
+                 "__updn_bfs_by_node (less): "
+                 "Setting Min Hop Table of switch: 0x%" PRIx64
+                 "\n\t\tCurrent hop count is: %d, next hop count: %d"
+                 "\n\tlid to set: 0x%x"
+                 "\n\tport number: 0x%X"
+                 "\n\thops number: %d\n",
+                 cl_ntoh64(remote_guid), remote_min_hop,current_min_hop + 1,
+                 root_lid, pn_rem, current_min_hop + 1 );
+        set_hop_return_value = osm_switch_set_hops(p_remote_sw, root_lid, pn_rem, current_min_hop + 1);
+        if (set_hop_return_value)
         {
-          osm_log( p_log, OSM_LOG_DEBUG,
-                   "__updn_bfs_by_node: "
-                   "Avoiding move from 0x%016" PRIx64 " to 0x%016" PRIx64"\n",
-                   cl_ntoh64(current_guid), cl_ntoh64(remote_guid) );
-          /* Illegal step */
-          continue;
+          osm_log( p_log, OSM_LOG_ERROR,
+                   "__updn_bfs_by_node (less) ERR AA01: "
+                   "Invalid value returned from set min hop is: %d\n",
+                   set_hop_return_value );
         }
-        /* Set MinHop value for the current lid */
-        current_min_hop = osm_switch_get_least_hops(p_updn_switch->p_sw,root_lid);
-        /* Check hop count if better insert into NextState list && update
-           the remote node Min Hop Table */
-        remote_min_hop = osm_switch_get_hop_count(p_remote_sw, root_lid, pn_rem);
-        if (current_min_hop + 1 < remote_min_hop)
-        {
-          osm_log( p_log, OSM_LOG_DEBUG,
-                   "__updn_bfs_by_node (less): "
-                   "Setting Min Hop Table of switch: 0x%" PRIx64
-                   "\n\t\tCurrent hop count is: %d, next hop count: %d"
-                   "\n\tlid to set: 0x%x"
-                   "\n\tport number: 0x%X"
-                   " \n\thops number: %d\n",
-                   cl_ntoh64(remote_guid), remote_min_hop,current_min_hop + 1,
-                   root_lid, pn_rem, current_min_hop + 1 );
-          set_hop_return_value = osm_switch_set_hops(p_remote_sw, root_lid, pn_rem, current_min_hop + 1);
-          if (set_hop_return_value)
-          {
-            osm_log( p_log, OSM_LOG_ERROR,
-                     "__updn_bfs_by_node (less) ERR AA01: "
-                     "Invalid value returned from set min hop is: %d\n",
-                     set_hop_return_value );
-          }
-          /* Check if remote port is allready has been visited */
-          updn_switch_iterator = cl_list_head(p_nextList);
-          while( updn_switch_iterator != cl_list_end(p_nextList) )
-          {
-            updn_next_step_t *p_updn;
-            p_updn = (updn_next_step_t*)cl_list_obj(updn_switch_iterator);
-            /* Mark HasVisited only if:
-               1. Same node guid
-               2. Same direction
-            */
-            if ((p_updn->p_sw->p_node == p_remote_node) && (p_updn->state == next_dir))
-              HasVisited = TRUE;
-            updn_switch_iterator = cl_list_next(updn_switch_iterator);
-          }
-          if (!HasVisited)
-          {
-            /* Insert updn_switch item into the next list */
-            if(!(p_tmp=__updn_create_updn_next_step_t(next_dir, p_remote_sw)))
-            {
-              osm_log( p_log, OSM_LOG_ERROR,
-                       "__updn_bfs_by_node: ERR AA11: "
-                       "Could not create updn_next_step_t\n" );
-              return 1;
-            }
-            osm_log( p_log, OSM_LOG_DEBUG,
-                     "__updn_bfs_by_node: "
-                     "Inserting new element to the next list: guid=0x%" PRIx64 " %s\n",
-                     cl_ntoh64(p_tmp->p_sw->p_node->node_info.port_guid),
-                     (p_tmp->state == UP ? "UP" : "DOWN")
-                     );
-            cl_list_insert_tail(p_nextList, p_tmp);
-          }
-          /* If the same value only update entry - at the min hop table */
-        } else if (current_min_hop + 1 == osm_switch_get_hop_count(p_remote_sw,
-                                                                   root_lid,
-                                                                   pn_rem))
+        /* Check if remote port has already been visited */
+        if (!rem_u->visited)
         {
-          osm_log( p_log, OSM_LOG_DEBUG,
-                   "__updn_bfs_by_node (equal): "
-                   "Setting Min Hop Table of switch: 0x%" PRIx64
-                   "\n\t\tCurrent hop count is: %d, next hop count: %d"
-                   "\n\tlid to set: 0x%x"
-                   "\n\tport number: 0x%X"
-                   "\n\thops number: %d\n",
-                   cl_ntoh64(remote_guid),
-                   osm_switch_get_hop_count(p_remote_sw, root_lid, pn_rem),
-                   current_min_hop + 1, root_lid, pn_rem, current_min_hop + 1 );
-          set_hop_return_value = osm_switch_set_hops(p_remote_sw, root_lid, pn_rem, current_min_hop + 1);
-
-          if (set_hop_return_value)
-          {
-            osm_log( p_log, OSM_LOG_ERROR,
-                     "__updn_bfs_by_node (less) ERR AA12: "
-                     "Invalid value returned from set min hop is: %d\n",
-                     set_hop_return_value );
-          }
+          /* Insert updn_switch item into the next list */
+          rem_u->dir = next_dir;
+          rem_u->visited = 1;
+          NOISE_L( p_log, OSM_LOG_DEBUG,
+                   "__updn_bfs_by_node: "
+                   "Inserting new element to the next list: guid=0x%" PRIx64 " %s\n",
+                   cl_ntoh64(rem_u->sw->p_node->node_info.port_guid),
+                   (rem_u->dir == UP ? "UP" : "DOWN"));
+          cl_qlist_insert_tail(&list, &rem_u->list);
         }
       }
-      free (p_updn_switch);
-      p_updn_switch = (updn_next_step_t*)cl_list_remove_head( p_currList );
     }
-    /* Cleanup p_currList */
-    cl_list_destroy( p_currList );
-    free (p_currList);
-
-    /* Reassign p_currList to p_nextList */
-    p_currList = p_nextList;
   }
-  /* Cleanup p_currList - Had the pointer to cl_list_t */
-  cl_list_destroy( p_currList );
-  free (p_currList);
 
-  osm_log( p_log, OSM_LOG_VERBOSE,
-           "__updn_bfs_by_node: "
-           "BFS the subnet ]\n" );
   OSM_LOG_EXIT( p_log );
   return 0;
 }
@@ -527,23 +356,8 @@ static void
 updn_destroy(
   IN updn_t* const p_updn )
 {
-  cl_map_item_t *p_map_item;
   uint64_t *p_guid_list_item;
 
-  /* Destroy the updn struct */
-  p_map_item = cl_qmap_head( &p_updn->guid_rank_tbl);
-  while( p_map_item != cl_qmap_end( &p_updn->guid_rank_tbl ))
-  {
-    osm_log ( &p_updn->p_osm->log, OSM_LOG_DEBUG,
-              "updn_destroy: "
-              "guid = 0x%" PRIx64 " rank = %u\n",
-              cl_ntoh64(cl_qmap_key(p_map_item)),
-              ((updn_rank_t *)p_map_item)->rank );
-    cl_qmap_remove_item( &p_updn->guid_rank_tbl, p_map_item );
-    free( (updn_rank_t *)p_map_item);
-    p_map_item = cl_qmap_head( &p_updn->guid_rank_tbl );
-  }
-
   /* free the array of guids */
   if (p_updn->updn_ucast_reg_inputs.guid_list)
     free(p_updn->updn_ucast_reg_inputs.guid_list);
@@ -592,8 +406,6 @@ updn_init(
   OSM_LOG_ENTER( &p_osm->log, updn_init );
 
   p_updn->p_osm = p_osm;
-  p_updn->state = UPDN_INIT;
-  cl_qmap_init( &p_updn->guid_rank_tbl );
   p_list = (cl_list_t*)malloc(sizeof(cl_list_t));
   if (!p_list)
   {
@@ -691,171 +503,99 @@ updn_subn_rank(
   IN updn_t* p_updn )
 {
   /* Init local vars */
-  osm_port_t *p_root_port = NULL;
-  uint16_t tbl_size;
+  osm_switch_t *p_sw;
   uint8_t rank = base_rank;
-  osm_physp_t *p_physp, *p_remote_physp, *p_physp_temp;
-  cl_list_t *p_currList,*p_nextList;
+  osm_physp_t *p_physp, *p_remote_physp;
+  cl_qlist_t list;
   cl_status_t did_cause_update;
+  struct updn_node *u, *remote_u;
   uint8_t num_ports, port_num;
   osm_log_t *p_log = &p_updn->p_osm->log;
 
   OSM_LOG_ENTER( p_log, updn_subn_rank );
 
-  osm_log( p_log, OSM_LOG_VERBOSE,
-           "updn_subn_rank: "
-           "Ranking starts from GUID 0x%" PRIx64 "\n", root_guid );
-
-  /* Init the list pointers */
-  p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t));
-  if (!p_nextList)
+  p_sw = osm_get_switch_by_guid(&p_updn->p_osm->subn, cl_hton64(root_guid));
+  if(!p_sw)
   {
     osm_log( p_log, OSM_LOG_ERROR,
-             "updn_subn_rank: ERR AA15: "
-             "No memory for p_nextList\n" );
+             "updn_subn_rank: ERR AA05: "
+             "Wrong switch GUID 0x%" PRIx64 "\n", root_guid );
     OSM_LOG_EXIT( p_log );
     return 1;
   }
 
-  cl_list_construct( p_nextList );
-  cl_list_init( p_nextList, 10 );
-  p_currList = p_nextList;
+  osm_log( p_log, OSM_LOG_VERBOSE,
+           "updn_subn_rank: "
+           "Ranking starts from GUID 0x%" PRIx64 "\n", root_guid );
 
-  /* Check valid subnet & guid */
-  tbl_size = (uint16_t)(cl_qmap_count(&p_updn->p_osm->subn.port_guid_tbl));
-  if (tbl_size == 0)
-  {
-    osm_log( p_log, OSM_LOG_ERROR,
-             "updn_subn_rank: ERR AA04: "
-             "Port guid table is empty, cannot perform ranking\n" );
-    OSM_LOG_EXIT( p_log );
-    return 1;
-  }
+  u = p_sw->priv;
+  u->is_root = 1;
 
-  p_root_port = (osm_port_t*) cl_qmap_get(&p_updn->p_osm->subn.port_guid_tbl,
-                                          cl_ntoh64(root_guid));
-  if( p_root_port == (osm_port_t*)cl_qmap_end( &p_updn->p_osm->subn.port_guid_tbl ) )
-  {
-    osm_log( p_log, OSM_LOG_ERROR,
-             "updn_subn_rank: ERR AA05: "
-             "Wrong guid value: 0x%" PRIx64 "\n", root_guid );
-    OSM_LOG_EXIT( p_log );
-    return 1;
-  }
-
-  /* Rank the first chosen guid anyway since its the base rank */
+  /* Rank the first guid chosen anyway since it's the base rank */
   osm_log( p_log, OSM_LOG_DEBUG,
            "updn_subn_rank: "
            "Ranking port GUID 0x%" PRIx64 "\n", root_guid );
 
-  __updn_update_rank(&p_updn->guid_rank_tbl, cl_ntoh64(root_guid), rank);
-  /*
-    HACK: We are assuming SM is running on HCA, so when getting the default
-    port we'll get the port connected to the rest of the subnet. If SM is
-    running on SWITCH - we should try to get a dr path from all switch ports.
-  */
-  p_physp = osm_port_get_default_phys_ptr( p_root_port );
-  CL_ASSERT( p_physp );
-  CL_ASSERT( osm_physp_is_valid( p_physp ) );
-  /* We can safely add the node to the list */
-  cl_list_insert_tail(p_nextList, p_physp);
-  /* Assign pointer to the list for BFS */
-  p_currList = p_nextList;
-
-  /* BFS the list till its empty */
-  osm_log( p_log, OSM_LOG_VERBOSE,
-           "updn_subn_rank: "
-           "BFS the subnet [\n" );
+  __updn_update_rank(u, rank);
+
+  cl_qlist_init(&list);
+  cl_qlist_insert_tail(&list, &u->list);
 
-  while (!cl_is_list_empty(p_currList))
+  /* BFS the list till it's empty */
+  while (!cl_is_qlist_empty(&list))
   {
     rank++;
-    p_nextList = (cl_list_t*)malloc(sizeof(cl_list_t));
-    if (!p_nextList)
-    {
-      osm_log( p_log, OSM_LOG_ERROR,
-               "updn_subn_rank: ERR AA16: "
-               "No memory for p_nextList\n" );
-      OSM_LOG_EXIT( p_log );
-      return 1;
-    }
 
-    cl_list_construct( p_nextList );
-    cl_list_init( p_nextList, 10 );
-    p_physp = (osm_physp_t*)cl_list_remove_head( p_currList );
-    /* Go over all remote nodes and rank them (if not allready visited) till
-       no elemtent in the list p_currList */
-    while ( p_physp != NULL )
+    u = (struct updn_node *)cl_qlist_remove_head(&list);
+    /* Go over all remote nodes and rank them (if not already visited) */
+    p_sw = u->sw;
+    num_ports = osm_switch_get_num_ports(p_sw);
+    osm_log( p_log, OSM_LOG_DEBUG,
+             "updn_subn_rank: "
+             "Handling switch GUID 0x%" PRIx64 "\n",
+             cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)) );
+    for (port_num = 1; port_num < num_ports; port_num++)
     {
-      num_ports = osm_node_get_num_physp( p_physp->p_node );
-      osm_log( p_log, OSM_LOG_DEBUG,
-               "updn_subn_rank: "
-               "Handling port GUID 0x%" PRIx64 "\n",
-               cl_ntoh64(p_physp->port_guid) );
-      for (port_num = 1; port_num < num_ports; port_num++)
+      ib_net64_t port_guid;
+
+      /* Current port fetched in order to get remote side */
+      p_physp = osm_node_get_physp_ptr( p_sw->p_node, port_num );
+      p_remote_physp = p_physp->p_remote_physp;
+
+      /*
+        make sure that all the following occur on p_remote_physp:
+        1. The port isn't NULL
+        2. The port is a valid port
+        3. It is a switch
+      */
+      if ( p_remote_physp &&
+           osm_physp_is_valid( p_remote_physp ) &&
+           p_remote_physp->p_node->sw )
       {
-        ib_net64_t port_guid;
-
-        /* Current port fetched in order to get remote side */
-        p_physp_temp = osm_node_get_physp_ptr( p_physp->p_node, port_num );
-        p_remote_physp = p_physp_temp->p_remote_physp;
-
-        /*
-          make sure that all the following occur on p_remote_physp:
-          1. The port isn't NULL
-          2. The port is a valid port
-        */
-        if ( p_remote_physp &&
-             osm_physp_is_valid ( p_remote_physp ))
-        {
-          port_guid = p_remote_physp->port_guid;
-          osm_log( p_log, OSM_LOG_DEBUG,
-                   "updn_subn_rank: "
-                   "Visiting remote port GUID 0x%" PRIx64 "\n",
-                   cl_ntoh64(port_guid) );
-          /* Was it visited ?
-             Only if the pointer equal to cl_qmap_end its not
-             found in the list */
-          osm_log( p_log, OSM_LOG_DEBUG,
-                   "updn_subn_rank: "
-                   "Ranking port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid) );
-          did_cause_update = __updn_update_rank(&p_updn->guid_rank_tbl, port_guid, rank);
-
-          osm_log( p_log, OSM_LOG_VERBOSE,
-                   "updn_subn_rank: "
-                   "Rank of port GUID 0x%" PRIx64 " = %u\n", cl_ntoh64(port_guid),
-                   ((updn_rank_t*)cl_qmap_get(&p_updn->guid_rank_tbl, port_guid))->rank
-                   );
-
-          if (did_cause_update)
-          {
-            cl_list_insert_tail(p_nextList, p_remote_physp);
-          }
-        }
+        remote_u = p_remote_physp->p_node->sw->priv;
+        port_guid = p_remote_physp->port_guid;
+        NOISE_L( p_log, OSM_LOG_DEBUG,
+                 "updn_subn_rank: "
+                 "Ranking port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid) );
+        did_cause_update = __updn_update_rank(remote_u, rank);
+
+        osm_log( p_log, OSM_LOG_DEBUG,
+                 "updn_subn_rank: "
+                 "Rank of port GUID 0x%" PRIx64 " = %u\n",
+                 cl_ntoh64(port_guid),
+                 remote_u->rank );
+
+        if (did_cause_update)
+          cl_qlist_insert_tail(&list, &remote_u->list);
       }
-      /* Propagte through the next item in the p_currList */
-      p_physp = (osm_physp_t*)cl_list_remove_head( p_currList );
     }
-    /* First free the allocation of cl_list pointer then reallocate */
-    cl_list_destroy( p_currList );
-    free(p_currList);
-    /* p_currList is empty - need to assign it to p_nextList */
-    p_currList = p_nextList;
   }
 
-  osm_log( p_log, OSM_LOG_VERBOSE,
-           "updn_subn_rank: "
-           "BFS the subnet ]\n" );
-
-  cl_list_destroy( p_currList );
-  free(p_currList);
-
   /* Print Summary of ranking */
   osm_log( p_log, OSM_LOG_VERBOSE,
            "updn_subn_rank: "
            "Rank Info :\n\t Root Guid = 0x%" PRIx64 "\n\t Max Node Rank = %d\n",
-           cl_ntoh64(p_root_port->guid), rank );
-  p_updn->state = UPDN_RANK;
+           root_guid, rank );
   OSM_LOG_EXIT( p_log );
   return 0;
 }
@@ -875,25 +615,6 @@ __osm_subn_set_up_down_min_hop_table(
 
   OSM_LOG_ENTER( p_log, __osm_subn_set_up_down_min_hop_table );
 
-  if (p_updn->state == UPDN_INIT)
-  {
-    osm_log( p_log, OSM_LOG_ERROR,
-             "__osm_subn_set_up_down_min_hop_table: ERR AA06: "
-             "Calculating Min Hop only allowed after ranking\n" );
-    OSM_LOG_EXIT( p_log );
-    return 1;
-  }
-
-  /* Check if its a non switched subnet .. */
-  if ( cl_is_qmap_empty( &p_subn->sw_guid_tbl ) )
-  {
-    osm_log( p_log, OSM_LOG_ERROR,
-             "__osm_subn_set_up_down_min_hop_table: ERR AA10: "
-             "This is a non switched subnet, cannot perform UPDN algorithm\n" );
-    OSM_LOG_EXIT( p_log );
-    return 1;
-  }
-
   /* Go over all the switches in the subnet - for each init their Min Hop
      Table */
   osm_log( p_log, OSM_LOG_VERBOSE,
@@ -927,8 +648,7 @@ __osm_subn_set_up_down_min_hop_table(
              "__osm_subn_set_up_down_min_hop_table: "
              "BFS through port GUID 0x%" PRIx64 "\n",
              cl_ntoh64(port_guid) );
-    if(__updn_bfs_by_node(p_updn, p_subn, p_port,
-                          &p_updn->guid_rank_tbl))
+    if(__updn_bfs_by_node(p_log, p_subn, p_port))
     {
       OSM_LOG_EXIT( p_log );
       return 1;
@@ -952,7 +672,6 @@ __osm_subn_calc_up_down_min_hop_table(
   IN updn_t* p_updn )
 {
   uint8_t idx = 0;
-  cl_map_item_t *p_map_item;
   int status;
 
   OSM_LOG_ENTER( &p_updn->p_osm->log, osm_subn_calc_up_down_min_hop_table );
@@ -965,7 +684,18 @@ __osm_subn_calc_up_down_min_hop_table(
     osm_log( &p_updn->p_osm->log, OSM_LOG_ERROR,
              "__osm_subn_calc_up_down_min_hop_table: ERR AA0A: "
              "No guids were given or number of guids is 0\n" );
-    return 1;
+    status = -1;
+    goto _exit;
+  }
+
+  /* Check if it's not a switched subnet */
+  if ( cl_is_qmap_empty( &p_updn->p_osm->subn.sw_guid_tbl ) )
+  {
+    osm_log( &p_updn->p_osm->log, OSM_LOG_ERROR,
+             "__osm_subn_calc_up_down_min_hop_table: ERR AAOB: "
+             "This is not a switched subnet, cannot perform UPDN algorithm\n" );
+    status = -1;
+    goto _exit;
   }
 
   for (idx = 0; idx < num_guids; idx++)
@@ -980,27 +710,16 @@ __osm_subn_calc_up_down_min_hop_table(
 
   status = __osm_subn_set_up_down_min_hop_table(p_updn);
 
-  /* Cleanup updn rank tbl */
-  p_map_item = cl_qmap_head( &p_updn->guid_rank_tbl);
-  while( p_map_item != cl_qmap_end( &p_updn->guid_rank_tbl ))
-  {
-    osm_log( &p_updn->p_osm->log, OSM_LOG_DEBUG,
-             "__osm_subn_calc_up_down_min_hop_table: "
-             "guid = 0x%" PRIx64 " rank = %u\n",
-             cl_ntoh64(cl_qmap_key(p_map_item)),
-             ((updn_rank_t *)p_map_item)->rank );
-    cl_qmap_remove_item( &p_updn->guid_rank_tbl, p_map_item);
-    free( (updn_rank_t *)p_map_item);
-    p_map_item = cl_qmap_head( &p_updn->guid_rank_tbl);
-  }
-
+ _exit:
   OSM_LOG_EXIT( &p_updn->p_osm->log );
   return status;
 }
 
 /**********************************************************************
  **********************************************************************/
-static void expand_lid_matrices_for_lmc(osm_subn_t *p_subn)
+static void
+expand_lid_matrices_for_lmc(
+  osm_subn_t *p_subn )
 {
   cl_map_item_t *p_next_port, *p_next_sw;
   osm_port_t *p_port;
@@ -1009,7 +728,8 @@ static void expand_lid_matrices_for_lmc(osm_subn_t *p_subn)
   uint8_t port, num_ports;
 
   p_next_port = cl_qmap_head( &p_subn->port_guid_tbl );
-  while (p_next_port != cl_qmap_end(&p_subn->port_guid_tbl)) {
+  while (p_next_port != cl_qmap_end(&p_subn->port_guid_tbl))
+  {
     p_port = (osm_port_t *)p_next_port;
     p_next_port = cl_qmap_next(p_next_port);
     if (p_port->p_node->sw &&
@@ -1019,7 +739,8 @@ static void expand_lid_matrices_for_lmc(osm_subn_t *p_subn)
     if (!min_lid || min_lid == max_lid)
       continue;
     p_next_sw = cl_qmap_head(&p_subn->sw_guid_tbl);
-    while (p_next_sw != cl_qmap_end(&p_subn->sw_guid_tbl)) {
+    while (p_next_sw != cl_qmap_end(&p_subn->sw_guid_tbl))
+    {
       p_sw = (osm_switch_t *)p_next_sw;
       p_next_sw = cl_qmap_next(p_next_sw);
       num_ports = osm_switch_get_num_ports(p_sw);
@@ -1034,20 +755,62 @@ static void expand_lid_matrices_for_lmc(osm_subn_t *p_subn)
 
 /**********************************************************************
  **********************************************************************/
+static struct updn_node *
+create_updn_node(
+  osm_switch_t *sw )
+{
+  struct updn_node *u;
+
+  u = malloc(sizeof(*u));
+  if (!u)
+    return NULL;
+  memset(u, 0, sizeof(*u));
+  u->sw = sw;
+  u->rank = 0xffffffff;
+  return u;
+}
+
+static void
+delete_updn_node(
+  struct updn_node *u )
+{
+  u->sw->priv = NULL;
+  free(u);
+}
+
+/**********************************************************************
+ **********************************************************************/
 /* UPDN callback function */
 static int
 __osm_updn_call(
   void *ctx )
 {
   updn_t *p_updn = ctx;
+  cl_map_item_t *p_item;
+  osm_switch_t *p_sw;
 
   OSM_LOG_ENTER( &p_updn->p_osm->log, __osm_updn_call );
 
+  p_item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
+  while(p_item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl))
+  {
+    p_sw = (osm_switch_t *)p_item;
+    p_item = cl_qmap_next(p_item);
+    p_sw->priv = create_updn_node(p_sw);
+    if (!p_sw->priv)
+    {
+      osm_log( &(p_updn->p_osm->log), OSM_LOG_ERROR,
+               "__osm_updn_call: ERR AA0C: "
+               " cannot create updn node\n" );
+      OSM_LOG_EXIT( &p_updn->p_osm->log );
+      return -1;
+    }
+  }
+
   /* First auto detect root nodes - if required */
   if ( p_updn->auto_detect_root_nodes )
   {
     osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
-    /* printf ("-V- b4 osm_updn_find_root_nodes_by_min_hop\n"); */
     __osm_updn_find_root_nodes_by_min_hop( p_updn );
   }
   /* printf ("-V- after osm_updn_find_root_nodes_by_min_hop\n"); */
@@ -1066,8 +829,16 @@ __osm_updn_call(
   else
     osm_log( &p_updn->p_osm->log, OSM_LOG_INFO,
              "__osm_updn_call: "
-             "disable UPDN algorithm, no root nodes were found\n" );
+             "disabling UPDN algorithm, no root nodes were found\n" );
   
+  p_item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
+  while(p_item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl))
+  {
+    p_sw = (osm_switch_t *)p_item;
+    p_item = cl_qmap_next(p_item);
+    delete_updn_node(p_sw->priv);
+  }
+
   OSM_LOG_EXIT( &p_updn->p_osm->log );
   return 0;
 }
@@ -1137,7 +908,7 @@ __osm_updn_find_root_nodes_by_min_hop(
 
   osm_log( &p_osm->log, OSM_LOG_DEBUG,
            "__osm_updn_find_root_nodes_by_min_hop: "
-           "current number of ports in the subnet is %d\n",
+           "Current number of ports in the subnet is %d\n",
            cl_qmap_count(&p_osm->subn.port_guid_tbl) );
   /* Init the required vars */
   cl_qmap_init( &min_hop_hist );
@@ -1159,7 +930,7 @@ __osm_updn_find_root_nodes_by_min_hop(
   /* Find the Maximum number of CAs (and routers) for histogram normalization */
   osm_log( &p_osm->log, OSM_LOG_VERBOSE,
            "__osm_updn_find_root_nodes_by_min_hop: "
-           "Find the number of CAs and store them in cl_list\n" );
+           "Finding the number of CAs and storing them in cl_map\n" );
   p_next_port = (osm_port_t*)cl_qmap_head( &p_osm->subn.port_guid_tbl );
   while( p_next_port != (osm_port_t*)cl_qmap_end( &p_osm->subn.port_guid_tbl ) ) {
     p_port = p_next_port;
@@ -1177,13 +948,13 @@ __osm_updn_find_root_nodes_by_min_hop(
       cl_map_insert( &ca_by_lid_map, self_lid_ho, (void *)0x1);
       osm_log( &p_osm->log, OSM_LOG_DEBUG,
                "__osm_updn_find_root_nodes_by_min_hop: "
-               "Inserting into array GUID 0x%" PRIx64 ", Lid: 0x%X\n",
+               "Inserting GUID 0x%" PRIx64 ", Lid: 0x%X into array\n",
                cl_ntoh64(osm_port_get_guid(p_port)), self_lid_ho );
     }
   }
   osm_log( &p_osm->log, OSM_LOG_DEBUG,
            "__osm_updn_find_root_nodes_by_min_hop: "
-           "Found %u CA, %u SW in the subnet\n", numCas, numSws );
+           "Found %u CAs, %u SWs in the subnet\n", numCas, numSws );
   p_next_sw = (osm_switch_t*)cl_qmap_head( &p_osm->subn.sw_guid_tbl );
   osm_log( &p_osm->log, OSM_LOG_VERBOSE,
            "__osm_updn_find_root_nodes_by_min_hop: "
@@ -1201,7 +972,7 @@ __osm_updn_find_root_nodes_by_min_hop(
     p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
 
     /* Clear Min Hop Table && FWD Tbls - This should caused opensm to
-       rebuild its FWD tables , post setting Min Hop Tables */
+       rebuild it's FWD tables, post setting Min Hop Tables */
     max_lid_ho = osm_switch_get_max_lid_ho(p_sw);
     /* Get base lid of switch by retrieving port 0 lid of node pointer */
     self_lid_ho = cl_ntoh16( osm_node_get_base_lid( p_sw->p_node, 0 ) );
@@ -1285,7 +1056,7 @@ __osm_updn_find_root_nodes_by_min_hop(
                numHopBarsOverThd1, numHopBarsOverThd2 );
     }
 
-    /* destroy the qmap table and all its content - no longer needed */
+    /* destroy the qmap table and all it's content - no longer needed */
     osm_log( &p_osm->log, OSM_LOG_DEBUG,
              "__osm_updn_find_root_nodes_by_min_hop: "
              "Cleanup: delete histogram "
-- 
1.5.0.1.26.gf5a92





More information about the general mailing list