[ofa-general] [PATCH] osm: some improvements to fat-tree routing

Yevgeny Kliteynik kliteyn at dev.mellanox.co.il
Sun Jul 15 04:56:32 PDT 2007


Hi Sasha

This patch adds a small improvement to fat-tree routing for
asymmetrical (or unusual) trees:
1. When routing down-going routes (by climbing up the tree), 
   first selecting the least loaded group, and then least loaded
   port in the selected group.
2. When routing up-going routes (by descending down the tree), 
   scan groups by indexing order, but the start group is selected
   by round-robin.

Signed-off-by: Yevgeny Kliteynik <kliteyn at dev.mellanox.co.il>
---
 opensm/opensm/osm_ucast_ftree.c |   79 ++++++++++++++++++++++++++-------------
 1 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c
index 38bee8a..cfe5435 100644
--- a/opensm/opensm/osm_ucast_ftree.c
+++ b/opensm/opensm/osm_ucast_ftree.c
@@ -179,6 +179,7 @@ typedef struct ftree_port_group_t_
    ftree_hca_or_sw remote_hca_or_sw;  /* pointer to remote hca/switch */
    cl_ptr_vector_t ports;             /* vector of ports to the same lid */
    boolean_t       is_cn;             /* whether this port is a compute node */
+   uint32_t        counter_down;      /* number of allocated routs downwards */
 } ftree_port_group_t;
 
 /***************************************************
@@ -200,6 +201,7 @@ typedef struct ftree_sw_t_
    uint8_t                up_port_groups_num;
    ftree_fwd_tbl_t        lft_buf;
    boolean_t              is_leaf;
+   int                    down_port_groups_idx;
 } ftree_sw_t;
 
 /***************************************************
@@ -681,6 +683,8 @@ __osm_ftree_sw_create(
    p_sw->lft_buf = (ftree_fwd_tbl_t)cl_pool_get(&p_ftree->sw_fwd_tbl_pool);
    memset(p_sw->lft_buf, OSM_NO_PATH, FTREE_FWD_TBL_LEN);
 
+   p_sw->down_port_groups_idx = -1;
+
    return p_sw;
 } /* __osm_ftree_sw_create() */
 
@@ -2145,6 +2149,7 @@ __osm_ftree_fabric_route_upgoing_by_going_down(
    ftree_port_t        * p_min_port;
    uint16_t              i;
    uint16_t              j;
+   uint16_t              k;
 
    /* we shouldn't enter here if both real_lid and main_path are false */
    CL_ASSERT(is_real_lid || is_main_path);
@@ -2153,9 +2158,23 @@ __osm_ftree_fabric_route_upgoing_by_going_down(
    if (p_sw->down_port_groups_num == 0)
        return;
 
-   /* foreach down-going port group (in indexing order) */
-   for (i = 0; i < p_sw->down_port_groups_num; i++)
+   /* promote the index that indicates which group should we
+      start with when going through all the downgoing groups */
+   if (p_sw->down_port_groups_idx == -1)
+      p_sw->down_port_groups_idx = 0;
+   else
+      p_sw->down_port_groups_idx = 
+            (p_sw->down_port_groups_idx + 1) % p_sw->down_port_groups_num;
+
+   /* foreach down-going port group (in indexing order)
+      starting with the least loaded group */
+   for ( k = 0; k < p_sw->down_port_groups_num; k++ )
    {
+      if ( k == 0 )
+         i = p_sw->down_port_groups_idx;
+      else
+         i = (i+1) % p_sw->down_port_groups_num;
+
       p_group = p_sw->down_port_groups[i];
 
       /* Skip this port group unless it points to a switch */
@@ -2352,34 +2371,40 @@ __osm_ftree_fabric_route_downgoing_by_going_up(
    if (p_sw->rank == 0)
       return;
 
-   /* Find the least loaded port of all the upgoing port groups
-      (in indexing order of the remote switches). */
+   /* Find the least loaded upgoing port group */
    p_min_group = NULL;
-   p_min_port = NULL;
    for (i = 0; i < p_sw->up_port_groups_num; i++)
    {
       p_group = p_sw->up_port_groups[i];
+      if (!p_min_group)
+      {
+         /* first group that we're checking - use 
+            it as a group with the lowest load */
+         p_min_group = p_group;
+      }
+      else if ( p_group->counter_down < p_min_group->counter_down  )
+      {
+         /* this group is less loaded - use it as min */
+         p_min_group = p_group;
+      }
+   }
 
-      ports_num = (uint16_t)cl_ptr_vector_get_size(&p_group->ports);
-      for (j = 0; j < ports_num; j++)
+   /* Find the least loaded upgoing port in the selected group */
+   p_min_port = NULL;
+   ports_num = (uint16_t)cl_ptr_vector_get_size(&p_min_group->ports);
+   for (j = 0; j < ports_num; j++)
+   {
+      cl_ptr_vector_at(&p_min_group->ports, j, (void **)&p_port);
+      if (!p_min_port)
       {
-         cl_ptr_vector_at(&p_group->ports, j, (void **)&p_port);
-         if (!p_min_group)
-         {
-            /* first port that we're checking - use
-               it as a port with the lowest load */
-            p_min_group = p_group;
-            p_min_port = p_port;
-         }
-         else
-         {
-            if ( p_port->counter_down < p_min_port->counter_down  )
-            {
-               /* this port is less loaded - use it as min */
-               p_min_group = p_group;
-               p_min_port = p_port;
-            }
-         }
+         /* first port that we're checking - use
+            it as a port with the lowest load */
+         p_min_port = p_port;
+      }
+      else if ( p_port->counter_down < p_min_port->counter_down  )
+      {
+         /* this port is less loaded - use it as min */
+         p_min_port = p_port;
       }
    }
 
@@ -2435,8 +2460,10 @@ __osm_ftree_fabric_route_downgoing_by_going_up(
                  __osm_ftree_tuple_to_str(p_remote_sw->tuple));
       }
       /* The number of downgoing routes is tracked in the
-         p_port->counter_down counter of the port that belongs to
-         the lower side of the link (on switch with higher rank) */
+         p_group->counter_down p_port->counter_down counters of the
+         group and port that belong to the lower side of the link
+         (on switch with higher rank) */
+      p_min_group->counter_down++;
       p_min_port->counter_down++;
       if (is_real_lid)
       {
-- 
1.5.1.4




More information about the general mailing list