[ofa-general] [PATCH] opensm/osm_mesh.c: Improve VL utilization

Hal Rosenstock hnrose at comcast.net
Wed Jun 17 13:10:26 PDT 2009


Add steps to the algorithm to further condition the netlist of switches
that is handed to lash to analyze.

(1)  The previous version sorted out all the links out of switches into
dimension order’in the order the dimensions were found which is driven by
the arbitrary choice of links out of the seed’switch. This takes the
additional step of waiting until after the lengths along each dimension
are measured then reordering the links out of each switch so that dimensions
with the largest lengths are ordered first. i.e. if the mesh analysis discovers
that the mesh is a 5x8x6 torus then the coordinates are reordered so that based
on the order of links on each switch the mesh is an 8x6x5 torus.

(2) The previous version left the order of switches in the switch list the
same as when they were discovered by the SMPs which is some sort of depth
first tree walk modified by the random return of MADs if multiples are
outstanding. This takes the additional step of reordering the switches
so that they are presented in odometer’order again with the dimension
with the largest lengths changing fastest.

With this algorithm, we always see what we believe is the optimal result
for each size of toroidal mesh.

As a footnote, when we later studied the effect of broken or imperfect meshes,
we found that the results are dependent on the choice of origin for the mesh
which clearly doesn't matter for a perfect torus with nodes sorted as described
here. Thus there is some additional work required to try to understand how to
select the optimal origin for meshes with defects. We saw that the number of
VLs required was higher when the origin was near a defect so choosing one as
far as possible from the defects seems to be the right idea. This still needs
to be done since it will remove some fairly rare defect cases which now require 9 VLs to route.

Signed-off-by: Robert Pearson <rpearson at systemfabricworks.com>
Signed-off-by: Hal Rosenstock <hal.rosenstock at gmail.com>
---
diff --git a/opensm/opensm/osm_mesh.c b/opensm/opensm/osm_mesh.c
index 1867876..f2dd2f8 100644
--- a/opensm/opensm/osm_mesh.c
+++ b/opensm/opensm/osm_mesh.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008,2009      System Fabric Works, Inc.
+ * Copyright (c) 2008,2009      System Fabric Works, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -1026,12 +1026,15 @@ static inline int ltmag(int a, int b)
 	return (a1 < b1) || (a1 == b1 && a > b);
 }
 
+static int dim_order[MAX_DIMENSION];
+static int dim_reverse[MAX_DIMENSION];
+
 /*
- * reorder_links
+ * reorder_node_links
  *
  * reorder the links out of a switch in sign/dimension order
  */
-static int reorder_links(lash_t *p_lash, int sw)
+static int reorder_node_links(lash_t *p_lash, mesh_t *mesh, int sw)
 {
 	osm_log_t *p_log = &p_lash->p_osm->log;
 	switch_t *s = p_lash->switches[sw];
@@ -1039,9 +1042,10 @@ static int reorder_links(lash_t *p_lash, int sw)
 	int n = node->num_links;
 	link_t **links;
 	int *axes;
-	int i, j;
+	int i, j, k, l;
 	int c;
 	int next = 0;
+	int dimension = mesh->dimension;
 
 	if (!(links = calloc(n, sizeof(link_t *)))) {
 		OSM_LOG(p_log, OSM_LOG_ERROR, "Failed allocating temp array - out of memory\n");
@@ -1057,19 +1061,23 @@ static int reorder_links(lash_t *p_lash, int sw)
 	/*
 	 * find the links with axes
 	 */
-	for (j = 1; j <= 2*node->dimension; j++) {
-		c = j;
-		if (node->coord[(c-1)/2] > 0)
-			c = opposite(s, c);
+	for (i = 0; i < dimension; i++) {
+		j = dim_order[i];
+		for (k = 1; k <= 2; k++) {
+			c = 2*j + k;
 
-		for (i = 0; i < n; i++) {
-			if (!node->links[i])
-				continue;
-			if (node->axes[i] == c) {
-				links[next] = node->links[i];
-				axes[next] = node->axes[i];
-				node->links[i] = NULL;
-				next++;
+			if (node->coord[j] > 0)
+				c = opposite(s, c);
+
+			for (l = 0; l < n; l++) {
+				if (!node->links[l])
+					continue;
+				if (node->axes[l] == c) {
+					links[next] = node->links[l];
+					axes[next] = node->axes[l];
+					node->links[l] = NULL;
+					next++;
+				}
 			}
 		}
 	}
@@ -1099,9 +1107,9 @@ static int reorder_links(lash_t *p_lash, int sw)
 }
 
 /*
- * measure geometry
+ * make_coord
  */
-static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed)
+static int make_coord(lash_t *p_lash, mesh_t *mesh, int seed)
 {
 	osm_log_t *p_log = &p_lash->p_osm->log;
 	int i, j, k;
@@ -1111,8 +1119,6 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed)
 	int dimension = mesh->dimension;
 	int num_switches = p_lash->num_switches;
 	int assigned_axes = 0, unassigned_axes = 0;
-	int max[MAX_DIMENSION];
-	int min[MAX_DIMENSION];
 
 	OSM_LOG_ENTER(p_log);
 
@@ -1120,6 +1126,13 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed)
 		s = p_lash->switches[sw];
 
 		s->node->coord = calloc(dimension, sizeof(int));
+		if (!s->node->coord) {
+			OSM_LOG(p_log, OSM_LOG_ERROR,
+				"Failed allocating coord - out of memory\n");
+			OSM_LOG_EXIT(p_log);
+			return -1;
+		}
+
 		for (i = 0; i < dimension; i++)
 			s->node->coord[i] = (sw == seed) ? 0 : LARGE;
 
@@ -1163,14 +1176,36 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed)
 		}
 	} while (change);
 
-	for (sw = 0; sw < num_switches; sw++) {
-		if (reorder_links(p_lash, sw)) {
-			OSM_LOG_EXIT(p_log);
-			return -1;
-		}
-	}
+	OSM_LOG_EXIT(p_log);
+	return 0;
+}
+
+/*
+ * measure geometry
+ */
+static int measure_geometry(lash_t *p_lash, mesh_t *mesh)
+{
+	osm_log_t *p_log = &p_lash->p_osm->log;
+	int i, j;
+	int sw;
+	switch_t *s;
+	int dimension = mesh->dimension;
+	int num_switches = p_lash->num_switches;
+	int max[MAX_DIMENSION];
+	int min[MAX_DIMENSION];
+	int size[MAX_DIMENSION];
+	int max_size;
+	int max_index;
+
+	OSM_LOG_ENTER(p_log);
 
 	mesh->size = calloc(dimension, sizeof(int));
+	if (!mesh->size) {
+		OSM_LOG(p_log, OSM_LOG_ERROR,
+			"Failed allocating size - out of memory\n");
+		OSM_LOG_EXIT(p_log);
+		return -1;
+	}
 
 	for (i = 0; i < dimension; i++) {
 		max[i] = -LARGE;
@@ -1191,7 +1226,49 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed)
 	}
 
 	for (i = 0; i < dimension; i++)
-		mesh->size[i] = max[i] - min[i] + 1;
+		mesh->size[i] = size[i] = max[i] - min[i] + 1;
+
+	/*
+	 * find an order of dimensions that places largest
+	 * sizes first since this seems to work best with LASH
+	 */
+	for (j = 0; j < dimension; j++) {
+		max_size = -1;
+		max_index = -1;
+
+		for (i = 0; i < dimension; i++) {
+			if (size[i] > max_size) {
+				max_size = size[i];
+				max_index = i;
+			}
+		}
+
+		dim_order[j] = max_index;
+		dim_reverse[max_index] = j;
+		size[max_index] = -1;
+	}
+
+	OSM_LOG_EXIT(p_log);
+	return 0;
+}
+
+/*
+ * reorder links
+ */
+static int reorder_links(lash_t *p_lash, mesh_t *mesh)
+{
+	osm_log_t *p_log = &p_lash->p_osm->log;
+	int sw;
+	int num_switches = p_lash->num_switches;
+
+	OSM_LOG_ENTER(p_log);
+
+	for (sw = 0; sw < num_switches; sw++) {
+		if (reorder_node_links(p_lash, mesh, sw)) {
+			OSM_LOG_EXIT(p_log);
+			return -1;
+		}
+	}
 
 	OSM_LOG_EXIT(p_log);
 	return 0;
@@ -1387,7 +1464,13 @@ int osm_do_mesh_analysis(lash_t *p_lash)
 	if (s->node->type) {
 		make_geometry(p_lash, max_class_type);
 
-		if (measure_geometry(p_lash, mesh, max_class_type))
+		if (make_coord(p_lash, mesh, max_class_type))
+			goto err;
+
+		if (measure_geometry(p_lash, mesh))
+			goto err;
+
+		if (reorder_links(p_lash, mesh))
 			goto err;
 
 		p = buf;



More information about the general mailing list