[ofa-general] [PATCH 2/4] Enhanced qsort compareason function so it looks at the next switch level.

Nicolas Morey Chaisemartin nicolas.morey-chaisemartin at ext.bull.net
Tue Mar 10 06:59:45 PDT 2009


Signed-off-by: Nicolas Morey-Chaisemartin <nicolas.morey-chaisemartin at ext.bull.net>
---
 opensm/opensm/osm_ucast_ftree.c |   56 ++++++++++++++++++++++++++++++++++++---
 1 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c
index 7ca7f35..0ba4999 100644
--- a/opensm/opensm/osm_ucast_ftree.c
+++ b/opensm/opensm/osm_ucast_ftree.c
@@ -173,6 +173,7 @@ typedef struct ftree_sw_t_ {
 	uint8_t up_port_groups_num;
 	boolean_t is_leaf;
 	unsigned down_port_groups_idx;
+	uint32_t min_counter_down;
 } ftree_sw_t;
 
 /***************************************************
@@ -1900,8 +1901,33 @@ static void __osm_ftree_set_sw_fwd_table(IN cl_map_item_t * const p_map_item,
 				    p_sw->p_osm_sw);
 }
 
+static void
+__osm_ftree_recalculate_min_counter_down(ftree_sw_t *p_sw){
+	uint32_t min= (1<<30);
+	uint32_t i;
+	for(i=0;i < p_sw->down_port_groups_num; i++) {
+		if(p_sw->down_port_groups[i]->counter_down < min){
+			min = p_sw->down_port_groups[i]->counter_down;
+		}
+	}
+	p_sw->min_counter_down = min;
+	return;
+}
 
+static uint32_t
+__osm_ftree_find_lowest_loaded_group_on_sw(ftree_sw_t *p_sw){
+	/*uint32_t min= (1<<30);
+		uint32_t i;
+	for(i=0;i < p_sw->down_port_groups_num; i++) {
+		if(p_sw->down_port_groups[i]->counter_down < min){
+			min = p_sw->down_port_groups[i]->counter_down;
+		}
+		}*/
+	return p_sw->min_counter_down;
+}
 
+/* This is for downgoing_by_going_up.
+ * If load are equals, let's have a look at the remote switches and find the less loaded one */
 static int
 __osm_ftree_port_group_compare_load_down(const void* p1,const void* p2){
 	int temp = (*((ftree_port_group_t**)p1))->counter_down -(*((ftree_port_group_t**)p2))->counter_down ;
@@ -1909,9 +1935,26 @@ __osm_ftree_port_group_compare_load_down(const void* p1,const void* p2){
 		return 1;
 	if(temp < 0)
 		return -1;
-	return 0;
+
+	/* Find the less loaded remote sw and choose this one */
+	do{
+		uint32_t load1=__osm_ftree_find_lowest_loaded_group_on_sw((*((ftree_port_group_t**)p1))->remote_hca_or_sw.p_sw);
+		uint32_t load2=__osm_ftree_find_lowest_loaded_group_on_sw((*((ftree_port_group_t**)p2))->remote_hca_or_sw.p_sw);
+		temp = load1-load2;
+		if(temp > 0)
+			return 1;
+		if(temp < 0)
+			return -1;
+	}while(0);
+
+	/* If they are both equal, choose the biggest GUID */
+	if(((*((ftree_port_group_t**)p1)))->remote_port_guid > ((*((ftree_port_group_t**)p2)))->remote_port_guid)
+		return 1;
+
+	return -1;
 }
 
+/* This is for upgoing_by_going_down. There is not much equilibration to do so don't bother looking at the next rank */
 static int
 __osm_ftree_port_group_compare_load_up(const void* p1,const void* p2){
 	int temp = (*((ftree_port_group_t**)p1))->counter_up -(*((ftree_port_group_t**)p2))->counter_up ;
@@ -2212,8 +2255,8 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 	 * going port and explore the other pots on secondary routes more easily (and quickly) */
 	qsort(p_sw->up_port_groups,p_sw->up_port_groups_num,sizeof(*(p_sw->up_port_groups)), __osm_ftree_port_group_compare_load_down);
 
-
 	p_min_group = p_sw->up_port_groups[0];
+
 	/* Find the least loaded upgoing port in the selected group */
 	p_min_port = NULL;
 	ports_num = (uint16_t) cl_ptr_vector_get_size(&p_min_group->ports);
@@ -2277,12 +2320,17 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 				__osm_ftree_tuple_to_str(p_sw->tuple),
 				__osm_ftree_tuple_to_str(p_remote_sw->tuple));
 		}
+
 		/* The number of downgoing routes is tracked in the
 		   p_group->counter_down p_port->counter_down counters of the
 		   group and port that belong to the lower side of the link
 		   (on switch with higher rank) */
 		p_min_group->counter_down++;
 		p_min_port->counter_down++;
+		if(p_min_group->counter_down == (p_min_group->remote_hca_or_sw.p_sw->min_counter_down+1)){
+			__osm_ftree_recalculate_min_counter_down(p_min_group->remote_hca_or_sw.p_sw);
+		}
+
 		if (is_real_lid) {
 			/* This LID may already be in the LFT in the reverse_hop feature is used */
 			/* We update the LFT only if this LID isn't already present. */
@@ -2371,7 +2419,7 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 		/* skip if target lid has been already set on remote switch fwd tbl (with a bigger hop count)*/
 		if (p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] != OSM_NO_PATH)
 			if((target_rank -p_remote_sw->rank + 2*reverse_hops) >=
-			   osm_switch_get_least_hops(p_remote_sw->p_osm_sw, cl_ntoh16(target_lid)))
+			     osm_switch_get_least_hops(p_remote_sw->p_osm_sw, cl_ntoh16(target_lid)))
 				continue;
 
 		if (p_sw->is_leaf) {
@@ -2387,7 +2435,7 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 		   We can safely assume that switch will initiate very
 		   few traffic, so there's no point waisting runtime on
 		   trying to balance these routes - always pick port 0. */
-		/* GET MIN PORT HERE */
+
 		p_min_port = NULL;
 		ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports);
 		for (j = 0; j < ports_num; j++) {
-- 
1.6.2-rc2.GIT





More information about the general mailing list