[ofa-general] [PATCH v2] osm_ucast_ftree.c Allow horizontal links between switches of max rank

Line.Holen at Sun.COM Line.Holen at Sun.COM
Tue Jun 16 06:09:18 PDT 2009


This patch makes it legal to have cross links (horizontal links) between
switches at max rank. These switches do have same rank, so hop count cannot
be calculated based on rank anymore.
The horizontal links are treated as downlinks. Switch A has a downlink to B
while B has a downlink to A. Tests on lids and also number of hops makes sure
that we don't loop back and forth across the link.

Signed-off-by: Frank Olaf Sem-Jacobsen <frankose at simula.no>
Signed-off-by: Line Holen <Line.Holen at sun.com>

---

Changes compared to v1 includes the hop value sent as argument to
set_hops_on_remote_sw(). In addition the counter_up is decremented
if lft is changed due to shorter path.

diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c
index 8ed2f74..9ab4aef 100644
--- a/opensm/opensm/osm_ucast_ftree.c
+++ b/opensm/opensm/osm_ucast_ftree.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2009 Simula Research Laboratory. All rights reserved.
  * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
  * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved.
@@ -1495,7 +1496,8 @@ static void fabric_make_indexing(IN ftree_fabric_t * p_ftree)
 				p_remote_sw =
 				    p_sw->down_port_groups[i]->remote_hca_or_sw.
 				    p_sw;
-				if (tuple_assigned(p_remote_sw->tuple)) {
+				if (tuple_assigned(p_remote_sw->tuple) ||
+				    (p_sw->rank == p_remote_sw->rank)) {
 					/* this switch has been already indexed */
 					continue;
 				}
@@ -1884,6 +1886,53 @@ static void set_sw_fwd_table(IN cl_map_item_t * const p_map_item,
 
 /***************************************************
  ***************************************************/
+/*
+ * This function will count down counter_up on the remote side of
+ * the indicated switch and port */
+static void decrement_counter_up(IN ftree_fabric_t * p_ftree,
+				IN ftree_sw_t * p_sw, IN uint8_t port)
+{
+	ftree_port_t *p_port;
+	ftree_port_group_t *p_group;
+	ftree_sw_t *neighbor_sw;
+	uint8_t neighbor_port_num;
+	osm_node_t *neighbor_node;
+	int i, j;
+
+	neighbor_node = osm_node_get_remote_node(p_sw->p_osm_sw->p_node, port, &neighbor_port_num);
+	if (!neighbor_node)
+		return;
+	if (osm_node_get_type(neighbor_node) != IB_NODE_TYPE_SWITCH)
+		return;
+
+	neighbor_sw = fabric_get_sw_by_guid(p_ftree,
+					    osm_node_get_node_guid(neighbor_node));
+	if (!neighbor_sw)
+		return;
+
+	/* Find the port on the neighbor_sw that has port_num = neighbor_port_num
+	   This is the port that needs to decrement counter_up
+	   This port should be part of the downgroups */
+	for (i = 0; i < neighbor_sw->down_port_groups_num; i++) {
+		p_group = neighbor_sw->down_port_groups[i];
+		for (j = 0;
+		     j < (uint16_t) cl_ptr_vector_get_size(&p_group->ports);
+		     j++) {
+			cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port);
+			if (p_port->port_num == neighbor_port_num) {
+				p_port->counter_up--;
+				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
+				    "Decremented counter_up on %s port %d\n",
+				    neighbor_sw->p_osm_sw->p_node->print_desc,
+				    neighbor_port_num);
+				return;
+			}
+		}
+	}
+}
+
+/***************************************************
+ ***************************************************/
 
 /*
  * Function: assign-up-going-port-by-descending-down
@@ -1903,12 +1952,11 @@ fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree,
 				   IN ftree_sw_t * p_sw,
 				   IN ftree_sw_t * p_prev_sw,
 				   IN uint16_t target_lid,
-				   IN uint8_t target_rank,
 				   IN boolean_t is_real_lid,
 				   IN boolean_t is_main_path,
 				   IN boolean_t is_target_a_sw,
-				   IN uint8_t highest_rank_in_route,
-				   IN uint16_t reverse_hops)
+				   IN uint16_t reverse_hops,
+				   IN uint8_t current_hops)
 {
 	ftree_sw_t *p_remote_sw;
 	uint16_t ports_num;
@@ -1919,6 +1967,8 @@ fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree,
 	uint16_t j;
 	uint16_t k;
 	boolean_t created_route = FALSE;
+	uint8_t least_hops;
+	uint8_t old_lft_val = OSM_NO_PATH;
 
 	/* we shouldn't enter here if both real_lid and main_path are false */
 	CL_ASSERT(is_real_lid || is_main_path);
@@ -1968,14 +2018,15 @@ fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree,
 		   Set on the remote switch how to get to the target_lid -
 		   set LFT(target_lid) on the remote switch to the remote port */
 		p_remote_sw = p_group->remote_hca_or_sw.p_sw;
+		least_hops = sw_get_least_hops(p_remote_sw, target_lid);
 
-		if (sw_get_least_hops(p_remote_sw, target_lid) != OSM_NO_PATH) {
+		if ((least_hops != OSM_NO_PATH) && (least_hops <= current_hops)) {
 			/* Loop in the fabric - we already routed the remote switch
 			   on our way UP, and now we see it again on our way DOWN */
 			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
 				"Loop of lenght %d in the fabric:\n                             "
 				"Switch %s (LID %u) closes loop through switch %s (LID %u)\n",
-				(p_remote_sw->rank - highest_rank_in_route) * 2,
+				current_hops,
 				tuple_to_str(p_remote_sw->tuple),
 				p_group->base_lid,
 				tuple_to_str(p_sw->tuple),
@@ -2019,24 +2070,22 @@ fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree,
 
 		/* setting fwd tbl port only if this is real LID */
 		if (is_real_lid) {
+			old_lft_val = p_remote_sw->p_osm_sw->new_lft[target_lid];
 			p_remote_sw->p_osm_sw->new_lft[target_lid] =
 			    p_min_port->remote_port_num;
+
 			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
-				"Switch %s: set path to CA LID %u through port %u\n",
+				"Switch %s: set path to CA LID %u through port %u, hops %u\n",
 				tuple_to_str(p_remote_sw->tuple),
 				target_lid,
-				p_min_port->remote_port_num);
+				p_min_port->remote_port_num,
+				current_hops + 1);
 
 			/* On the remote switch that is pointed by the p_group,
 			   set hops for ALL the ports in the remote group. */
 
 			set_hops_on_remote_sw(p_group, target_lid,
-					      ((target_rank -
-						highest_rank_in_route) +
-					       (p_remote_sw->rank -
-						highest_rank_in_route) +
-					       reverse_hops * 2),
-					      is_target_a_sw);
+					      current_hops + 1, is_target_a_sw);
 		}
 
 		/* The number of upgoing routes is tracked in the
@@ -2044,19 +2093,30 @@ fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree,
 		   the upper side of the link (on switch with lower rank).
 		   Counter is promoted only if we're routing LID on the main
 		   path (whether it's a real LID or a dummy one). */
-		if (is_main_path)
+		if (is_main_path) {
 			p_min_port->counter_up++;
 
+			/* If we have changed a previously set lft, then we need
+			   to reduce counter_up on the old link */
+			if (old_lft_val != OSM_NO_PATH) {
+				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
+				    "lft on %s already set, need to decrement counter "
+				    "for old lft setup\n",
+				    p_remote_sw->p_osm_sw->p_node->print_desc);
+				decrement_counter_up(p_ftree, p_remote_sw, old_lft_val);
+			}
+		}
+
 		/* Recursion step:
 		   Assign upgoing ports by stepping down, starting on REMOTE switch */
 		created_route |= fabric_route_upgoing_by_going_down(p_ftree, p_remote_sw,	/* remote switch - used as a route-upgoing alg. start point */
-								    NULL,	/* prev. position - NULL to mark that we went down and not up */
+								    p_sw,	/* prev. position */
 								    target_lid,	/* LID that we're routing to */
-								    target_rank,	/* rank of the LID that we're routing to */
 								    is_real_lid,	/* whether the target LID is real or dummy */
 								    is_main_path,	/* whether this is path to HCA that should by tracked by counters */
 								    is_target_a_sw,	/* Wheter target lid is a switch or not */
-								    highest_rank_in_route, reverse_hops);	/* highest visited point in the tree before going down */
+								    reverse_hops,
+								    current_hops + 1);
 	}
 	/* done scanning all the down-going port groups */
 
@@ -2087,12 +2147,12 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 					       IN ftree_sw_t * p_sw,
 					       IN ftree_sw_t * p_prev_sw,
 					       IN uint16_t target_lid,
-					       IN uint8_t target_rank,
 					       IN boolean_t is_real_lid,
 					       IN boolean_t is_main_path,
 					       IN boolean_t is_target_a_sw,
 					       IN uint16_t reverse_hop_credit,
-					       IN uint16_t reverse_hops)
+					       IN uint16_t reverse_hops,
+					       IN uint8_t current_hops)
 {
 	ftree_sw_t *p_remote_sw;
 	uint16_t ports_num;
@@ -2110,12 +2170,11 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 	fabric_route_upgoing_by_going_down(p_ftree, p_sw,	/* local switch - used as a route-upgoing alg. start point */
 					   p_prev_sw,	/* switch that we went up from (NULL means that we went down) */
 					   target_lid,	/* LID that we're routing to */
-					   target_rank,	/* rank of the LID that we're routing to */
 					   is_real_lid,	/* whether this target LID is real or dummy */
 					   is_main_path,	/* whether this path to HCA should by tracked by counters */
 					   is_target_a_sw,	/* Wheter target lid is a switch or not */
-					   p_sw->rank,	/* the highest visited point in the tree before going down */
-					   reverse_hops);	/* Number of reverse_hops done up to this point */
+					   reverse_hops,	/* Number of reverse_hops done up to this point */
+					   current_hops);
 
 	/* recursion stop condition - if it's a root switch, */
 	if (p_sw->rank == 0) {
@@ -2140,12 +2199,12 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 				fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw,	/* remote switch - used as a route-downgoing alg. next step point */
 								   p_sw,	/* this switch - prev. position switch for the function */
 								   target_lid,	/* LID that we're routing to */
-								   target_rank,	/* rank of the LID that we're routing to */
 								   is_real_lid,	/* whether this target LID is real or dummy */
 								   is_main_path,	/* whether this is path to HCA that should by tracked by counters */
 								   is_target_a_sw,	/* Wheter target lid is a switch or not */
 								   reverse_hop_credit - 1,	/* Remaining reverse_hops allowed */
-								   reverse_hops + 1);	/* Number of reverse_hops done up to this point */
+								   reverse_hops + 1,	/* Number of reverse_hops done up to this point */
+								   current_hops + 1);
 			}
 
 		}
@@ -2244,17 +2303,16 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 				    new_lft[target_lid] =
 				    p_min_port->remote_port_num;
 				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
-					"Switch %s: set path to CA LID %u through port %u\n",
+					"Switch %s: set path to CA LID %u through port %u, hops %u\n",
 					tuple_to_str(p_remote_sw->tuple),
 					target_lid,
-					p_min_port->remote_port_num);
+					p_min_port->remote_port_num, current_hops + 1);
 			}
 			/* On the remote switch that is pointed by the min_group,
 			   set hops for ALL the ports in the remote group. */
 
 			set_hops_on_remote_sw(p_min_group, target_lid,
-					      target_rank - p_remote_sw->rank +
-					      2 * reverse_hops, is_target_a_sw);
+					      current_hops + 1, is_target_a_sw);
 		}
 
 		/* Recursion step:
@@ -2262,12 +2320,12 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 		fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw,	/* remote switch - used as a route-downgoing alg. next step point */
 						   p_sw,	/* this switch - prev. position switch for the function */
 						   target_lid,	/* LID that we're routing to */
-						   target_rank,	/* rank of the LID that we're routing to */
 						   is_real_lid,	/* whether this target LID is real or dummy */
 						   is_main_path,	/* whether this is path to HCA that should by tracked by counters */
 						   is_target_a_sw,	/* Wheter target lid is a switch or not */
 						   reverse_hop_credit,	/* Remaining reverse_hops allowed */
-						   reverse_hops);	/* Number of reverse_hops done up to this point */
+						   reverse_hops,	/* Number of reverse_hops done up to this point */
+						   current_hops + 1);
 	}
 
 	/* we're done for the third case */
@@ -2335,22 +2393,20 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 
 		/* On the remote switch that is pointed by the p_group,
 		   set hops for ALL the ports in the remote group. */
-
 		set_hops_on_remote_sw(p_group, target_lid,
-				      target_rank - p_remote_sw->rank +
-				      2 * reverse_hops, is_target_a_sw);
+				      current_hops + 1, is_target_a_sw);
 
 		/* Recursion step:
 		   Assign downgoing ports by stepping up, starting on REMOTE switch. */
 		fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw,	/* remote switch - used as a route-downgoing alg. next step point */
 						   p_sw,	/* this switch - prev. position switch for the function */
 						   target_lid,	/* LID that we're routing to */
-						   target_rank,	/* rank of the LID that we're routing to */
 						   TRUE,	/* whether the target LID is real or dummy */
 						   FALSE,	/* whether this is path to HCA that should by tracked by counters */
 						   is_target_a_sw,	/* Wheter target lid is a switch or not */
 						   reverse_hop_credit,	/* Remaining reverse_hops allowed */
-						   reverse_hops);	/* Number of reverse_hops done up to this point */
+						   reverse_hops,	/* Number of reverse_hops done up to this point */
+						   current_hops + 1);
 	}
 
 	/* If we don't have any reverse hop credits, we are done */
@@ -2374,12 +2430,12 @@ static void fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
 		fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw,	/* remote switch - used as a route-downgoing alg. next step point */
 						   p_sw,	/* this switch - prev. position switch for the function */
 						   target_lid,	/* LID that we're routing to */
-						   target_rank,	/* rank of the LID that we're routing to */
 						   TRUE,	/* whether the target LID is real or dummy */
 						   TRUE,	/* whether this is path to HCA that should by tracked by counters */
 						   is_target_a_sw,	/* Wheter target lid is a switch or not */
 						   reverse_hop_credit - 1,	/* Remaining reverse_hops allowed */
-						   reverse_hops + 1);	/* Number of reverse_hops done up to this point */
+						   reverse_hops + 1,	/* Number of reverse_hops done up to this point */
+						   current_hops + 1);
 	}
 
 }				/* ftree_fabric_route_downgoing_by_going_up() */
@@ -2451,7 +2507,7 @@ static void fabric_route_to_cns(IN ftree_fabric_t * p_ftree)
 			    p_port->port_num;
 
 			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
-				"Switch %s: set path to CN LID %u through port %u\n",
+				"Switch %s: set path to CN LID %u through port %u, hop 1\n",
 				tuple_to_str(p_sw->tuple),
 				hca_lid, p_port->port_num);
 
@@ -2464,12 +2520,12 @@ static void fabric_route_to_cns(IN ftree_fabric_t * p_ftree)
 			fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
 							   NULL,	/* prev. position switch */
 							   hca_lid,	/* LID that we're routing to */
-							   p_sw->rank + 1,	/* rank of the LID that we're routing to */
 							   TRUE,	/* whether this HCA LID is real or dummy */
 							   TRUE,	/* whether this path to HCA should by tracked by counters */
 							   FALSE,	/* wheter target lid is a switch or not */
 							   0,	/* Number of reverse hops allowed */
-							   0);	/* Number of reverse hops done yet */
+							   0,	/* Number of reverse hops done yet */
+							   1);
 
 			/* count how many real targets have been routed from this leaf switch */
 			routed_targets_on_leaf++;
@@ -2492,12 +2548,12 @@ static void fabric_route_to_cns(IN ftree_fabric_t * p_ftree)
 				fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
 								   NULL,	/* prev. position switch */
 								   0,	/* LID that we're routing to - ignored for dummy HCA */
-								   0,	/* rank of the LID that we're routing to - ignored for dummy HCA */
 								   FALSE,	/* whether this HCA LID is real or dummy */
 								   TRUE,	/* whether this path to HCA should by tracked by counters */
 								   FALSE,	/* Wheter the target LID is a switch or not */
 								   0,	/* Number of reverse hops allowed */
-								   0);	/* Number of reverse hops done yet */
+								   0,	/* Number of reverse hops done yet */
+								   1);
 			}
 		}
 	}
@@ -2579,12 +2635,12 @@ static void fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree)
 			fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
 							   NULL,	/* prev. position switch */
 							   hca_lid,	/* LID that we're routing to */
-							   p_sw->rank + 1,	/* rank of the LID that we're routing to */
 							   TRUE,	/* whether this HCA LID is real or dummy */
 							   TRUE,	/* whether this path to HCA should by tracked by counters */
 							   FALSE,	/* Wheter the target LID is a switch or not */
 							   p_hca_port_group->is_io ? p_ftree->p_osm->subn.opt.max_reverse_hops : 0,	/* Number or reverse hops allowed */
-							   0);	/* Number or reverse hops done yet */
+							   0,	/* Number or reverse hops done yet */
+							   1);
 		}
 		/* done with all the port groups of this HCA - go to next HCA */
 	}
@@ -2632,12 +2688,12 @@ static void fabric_route_to_switches(IN ftree_fabric_t * p_ftree)
 		fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
 						   NULL,	/* prev. position switch */
 						   p_sw->base_lid,	/* LID that we're routing to */
-						   p_sw->rank,	/* rank of the LID that we're routing to */
 						   TRUE,	/* whether the target LID is a real or dummy */
 						   FALSE,	/* whether this path to HCA should by tracked by counters */
 						   TRUE,	/* Wheter the target LID is a switch or not */
 						   0,	/* Number of reverse hops allowed */
-						   0);	/* Number of reverse hops done yet */
+						   0,	/* Number of reverse hops done yet */
+						   0);
 	}
 
 	OSM_LOG_EXIT(&p_ftree->p_osm->log);
@@ -3058,7 +3114,8 @@ static int fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree,
 
 			p_remote_hca_or_sw = (void *)p_remote_sw;
 
-			if (abs(p_sw->rank - p_remote_sw->rank) != 1) {
+			if ((abs(p_sw->rank - p_remote_sw->rank) != 1) &&
+			    (p_sw->rank != p_ftree->max_switch_rank)) {
 				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
 					"ERR AB16: "
 					"Illegal link between switches with ranks %u and %u:\n"



More information about the general mailing list