[ofa-general] [PATCH] opensm: routing chaining

Sasha Khapyorsky sashak at voltaire.com
Sun Sep 28 13:42:44 PDT 2008


From: Albert Chu <chu11 at llnl.gov>

Routing chaining is the ability to configure the order in which routing
algorithms are applied in opensm, i.e.

	-R ftree,updn,minhop

Try using ftree routing. If ftree fails, try updn. If updn fails, try
minhop.

In order to get this done, some rearchitecture of the routing code had
to be done b/c there is no longer an assumption that only one routing
engine can be specified.

Always setup a routing engine, assume no default "fallthrough" minhop
routing engine.  On configured routing engine failure, do minhop as
a last resort. Stick a *next pointer into struct osm_routing_engine.
Rearchitect routing engine usage as a list instead of a single struct.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
 opensm/include/opensm/osm_opensm.h    |   10 ++-
 opensm/include/opensm/osm_subnet.h    |    7 +-
 opensm/include/opensm/osm_ucast_mgr.h |    2 +-
 opensm/man/opensm.8.in                |    8 ++-
 opensm/opensm/main.c                  |   10 ++-
 opensm/opensm/osm_opensm.c            |  121 +++++++++++++++++++++++----------
 opensm/opensm/osm_subnet.c            |   11 ++-
 opensm/opensm/osm_ucast_file.c        |   19 ++---
 opensm/opensm/osm_ucast_ftree.c       |   35 ++++------
 opensm/opensm/osm_ucast_lash.c        |   16 ++--
 opensm/opensm/osm_ucast_mgr.c         |  119 +++++++++++++++++++++-----------
 opensm/opensm/osm_ucast_updn.c        |   10 ++--
 12 files changed, 226 insertions(+), 142 deletions(-)

diff --git a/opensm/include/opensm/osm_opensm.h b/opensm/include/opensm/osm_opensm.h
index 5d45724..c121be4 100644
--- a/opensm/include/opensm/osm_opensm.h
+++ b/opensm/include/opensm/osm_opensm.h
@@ -126,6 +126,7 @@ struct osm_routing_engine {
 	int (*ucast_build_fwd_tables) (void *context);
 	void (*ucast_dump_tables) (void *context);
 	void (*delete) (void *context);
+	struct osm_routing_engine *next;
 };
 /*
 * FIELDS
@@ -148,6 +149,9 @@ struct osm_routing_engine {
 *	delete
 *		The delete method, may be used for routing engine
 *		internals cleanup.
+*
+*	next
+*		Pointer to next routing engine in the list.
 */
 
 /****s* OpenSM: OpenSM/osm_opensm_t
@@ -178,7 +182,7 @@ typedef struct osm_opensm {
 	osm_log_t log;
 	cl_dispatcher_t disp;
 	cl_plock_t lock;
-	struct osm_routing_engine routing_engine;
+	struct osm_routing_engine *routing_engine_list;
 	osm_routing_engine_type_t routing_engine_used;
 	osm_stats_t stats;
 	osm_console_t console;
@@ -221,8 +225,8 @@ typedef struct osm_opensm {
 *	lock
 *		Shared lock guarding most OpenSM structures.
 *
-*	routing_engine
-*		Routing engine; will be initialized then used.
+*	routing_engine_list
+*		List of routing engines that should be tried for use.
 *
 *	routing_engine_used
 *		Indicates which routing engine was used to route a subnet.
diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index f90f7ea..0c7f3b9 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -182,7 +182,7 @@ typedef struct osm_subn_opt {
 	char *port_prof_ignore_file;
 	boolean_t port_profile_switch_nodes;
 	boolean_t sweep_on_trap;
-	char *routing_engine_name;
+	char *routing_engine_names;
 	boolean_t connect_roots;
 	char *lid_matrix_dump_file;
 	char *lfts_file;
@@ -353,9 +353,8 @@ typedef struct osm_subn_opt {
 *	sweep_on_trap
 *		Received traps will initiate a new sweep.
 *
-*	routing_engine_name
-*		Name of used routing engine
-*		(other than default Min Hop Algorithm)
+*	routing_engine_names
+*		Name of routing engine(s) to use.
 *
 *	connect_roots
 *		The option which will enforce root to root connectivity with
diff --git a/opensm/include/opensm/osm_ucast_mgr.h b/opensm/include/opensm/osm_ucast_mgr.h
index 1dc9a37..59ba9fa 100644
--- a/opensm/include/opensm/osm_ucast_mgr.h
+++ b/opensm/include/opensm/osm_ucast_mgr.h
@@ -264,7 +264,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr,
 *
 * SYNOPSIS
 */
-void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr);
+int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr);
 /*
 * PARAMETERS
 *	p_mgr
diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in
index 565c5f8..6790d11 100644
--- a/opensm/man/opensm.8.in
+++ b/opensm/man/opensm.8.in
@@ -9,7 +9,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
 [\-F | \-\-config <file_name>] [\-c(reate-config) <file_name>]
 [\-g(uid) <GUID in hex>] [\-l(mc) <LMC>]
 [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)]
-[\-R <engine name> | \-\-routing_engine <engine name>]
+[\-R <engine name(s)> | \-\-routing_engine <engine name(s)>]
 [\-z | \-\-connect_roots]
 [\-M <file name> | \-\-lid_matrix_file <file name>]
 [\-U <file name> | \-\-lfts_file <file name>]
@@ -116,8 +116,10 @@ Without -r, OpenSM attempts to preserve existing
 LID assignments resolving multiple use of same LID.
 .TP
 \fB\-R\fR, \fB\-\-routing_engine\fR
-This option chooses routing engine instead of Min Hop
-algorithm (default).
+This option chooses routing engine(s) to use instead of Min Hop
+algorithm (default).  Multiple routing engines can be specified
+separated by commas so that specific ordering of routing algorithms
+will be tried if earlier routing engines fail.
 Supported engines: minhop, updn, file, ftree, lash, dor
 .TP
 \fB\-z\fR, \fB\-\-connect_roots\fR
diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 01bfddf..2f53157 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -177,8 +177,10 @@ static void show_usage(void)
 	       "          LID assignments resolving multiple use of same LID.\n\n");
 	printf("-R\n"
 	       "--routing_engine <engine name>\n"
-	       "          This option chooses routing engine instead of Min Hop\n"
-	       "          algorithm (default).\n"
+	       "          This option chooses routing engine(s) to use instead of default\n"
+	       "          Min Hop algorithm.  Multiple routing engines can be specified\n"
+	       "          separated by commas so that specific ordering of routing\n"
+	       "          algorithms will be tried if earlier routing engines fail.\n"
 	       "          Supported engines: updn, file, ftree, lash, dor\n\n");
 	printf("-z\n"
 	       "--connect_roots\n"
@@ -851,8 +853,8 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'R':
-			opt.routing_engine_name = optarg;
-			printf(" Activate \'%s\' routing engine\n", optarg);
+			opt.routing_engine_names = optarg;
+			printf(" Activate \'%s\' routing engine(s)\n", optarg);
 			break;
 
 		case 'z':
diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index d17fed3..4970d0c 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -61,24 +61,23 @@
 
 struct routing_engine_module {
 	const char *name;
-	int (*setup) (osm_opensm_t * p_osm);
+	int (*setup) (struct osm_routing_engine *, osm_opensm_t *);
 };
 
-extern int osm_ucast_updn_setup(osm_opensm_t * p_osm);
-extern int osm_ucast_file_setup(osm_opensm_t * p_osm);
-extern int osm_ucast_ftree_setup(osm_opensm_t * p_osm);
-extern int osm_ucast_lash_setup(osm_opensm_t * p_osm);
-
-static int osm_ucast_null_setup(osm_opensm_t * p_osm);
+extern int osm_ucast_minhop_setup(struct osm_routing_engine *, osm_opensm_t *);
+extern int osm_ucast_updn_setup(struct osm_routing_engine *, osm_opensm_t *);
+extern int osm_ucast_file_setup(struct osm_routing_engine *, osm_opensm_t *);
+extern int osm_ucast_ftree_setup(struct osm_routing_engine *, osm_opensm_t *);
+extern int osm_ucast_lash_setup(struct osm_routing_engine *, osm_opensm_t *);
+extern int osm_ucast_dor_setup(struct osm_routing_engine *, osm_opensm_t *);
 
 const static struct routing_engine_module routing_modules[] = {
-	{"null", osm_ucast_null_setup},
-	{"minhop", osm_ucast_null_setup},
+	{"minhop", osm_ucast_minhop_setup},
 	{"updn", osm_ucast_updn_setup},
 	{"file", osm_ucast_file_setup},
 	{"ftree", osm_ucast_ftree_setup},
 	{"lash", osm_ucast_lash_setup},
-	{"dor", osm_ucast_null_setup},
+	{"dor", osm_ucast_dor_setup},
 	{NULL, NULL}
 };
 
@@ -135,33 +134,77 @@ osm_routing_engine_type_t osm_routing_engine_type(IN const char *str)
 
 /**********************************************************************
  **********************************************************************/
-static int setup_routing_engine(osm_opensm_t * p_osm, const char *name)
+static void append_routing_engine(osm_opensm_t *osm,
+				  struct osm_routing_engine *routing_engine)
 {
-	const struct routing_engine_module *r;
+	struct osm_routing_engine *r;
+
+	routing_engine->next = NULL;
+
+	if (!osm->routing_engine_list) {
+		osm->routing_engine_list = routing_engine;
+		return;
+	}
+
+	r = osm->routing_engine_list;
+	while (r->next)
+		r = r->next;
 
-	for (r = routing_modules; r->name && *r->name; r++) {
-		if (!strcmp(r->name, name)) {
-			p_osm->routing_engine.name = r->name;
-			if (r->setup(p_osm)) {
-				OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
+	r->next = routing_engine;
+}
+
+static void setup_routing_engine(osm_opensm_t *osm, const char *name)
+{
+	struct osm_routing_engine *re;
+	const struct routing_engine_module *m;
+
+	for (m = routing_modules; m->name && *m->name; m++) {
+		if (!strcmp(m->name, name)) {
+			re = malloc(sizeof(struct osm_routing_engine));
+			if (!re) {
+				OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
+					"memory allocation failed\n");
+				return;
+			}
+			memset(re, 0, sizeof(struct osm_routing_engine));
+
+			re->name = m->name;
+			if (m->setup(re, osm)) {
+				OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
 					"setup of routing"
 					" engine \'%s\' failed\n", name);
-				return -2;
+				return;
 			}
-			OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
-				"\'%s\' routing engine set up\n",
-				p_osm->routing_engine.name);
-			return 0;
+			OSM_LOG(&osm->log, OSM_LOG_DEBUG,
+				"\'%s\' routing engine set up\n", re->name);
+			append_routing_engine(osm, re);
+			return;
 		}
 	}
-	return -1;
+
+	OSM_LOG(&osm->log, OSM_LOG_ERROR,
+		"cannot find or setup routing engine \'%s\'", name);
 }
 
-static int osm_ucast_null_setup(osm_opensm_t * p_osm)
+static void setup_routing_engines(osm_opensm_t *osm, const char *engine_names)
 {
-	OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
-		"nothing yet - using default (minhop) routing engine\n");
-	return 0;
+	char *name, *str, *p;
+
+	if (!engine_names || !*engine_names) {
+		setup_routing_engine(osm, "minhop");
+		return;
+	}
+
+	str = strdup(engine_names);
+	name = strtok_r(str, ", \t\n", &p);
+	while (name && *name) {
+		setup_routing_engine(osm, name);
+		name = strtok_r(NULL, ", \t\n", &p);
+	}
+	free(str);
+
+	if (!osm->routing_engine_list)
+		setup_routing_engine(osm, "minhop");
 }
 
 /**********************************************************************
@@ -181,6 +224,20 @@ void osm_opensm_construct(IN osm_opensm_t * const p_osm)
 
 /**********************************************************************
  **********************************************************************/
+static void destroy_routing_engines(osm_opensm_t *osm)
+{
+	struct osm_routing_engine *r, *next;
+
+	next = osm->routing_engine_list;
+	while (next) {
+		r = next;
+		next = r->next;
+		if (r->delete)
+			r->delete(r->context);
+		free(r);
+	}
+}
+
 void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
 {
 	/* in case of shutdown through exit proc - no ^C */
@@ -218,8 +275,7 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
 	osm_sa_db_file_dump(p_osm);
 
 	/* do the destruction in reverse order as init */
-	if (p_osm->routing_engine.delete)
-		p_osm->routing_engine.delete(p_osm->routing_engine.context);
+	destroy_routing_engines(p_osm);
 	osm_sa_destroy(&p_osm->sa);
 	osm_sm_destroy(&p_osm->sm);
 #ifdef ENABLE_OSM_PERF_MGR
@@ -371,12 +427,7 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
 		goto Exit;
 #endif				/* ENABLE_OSM_PERF_MGR */
 
-	if (p_opt->routing_engine_name &&
-	    setup_routing_engine(p_osm, p_opt->routing_engine_name))
-		OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
-			"cannot find or setup routing engine"
-			" \'%s\'. Default will be used instead\n",
-			p_opt->routing_engine_name);
+	setup_routing_engines(p_osm, p_opt->routing_engine_names);
 
 	p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
 
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index 278aa3d..a39ce75 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -442,7 +442,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt)
 	p_opt->port_prof_ignore_file = NULL;
 	p_opt->port_profile_switch_nodes = FALSE;
 	p_opt->sweep_on_trap = TRUE;
-	p_opt->routing_engine_name = NULL;
+	p_opt->routing_engine_names = NULL;
 	p_opt->connect_roots = FALSE;
 	p_opt->lid_matrix_dump_file = NULL;
 	p_opt->lfts_file = NULL;
@@ -1264,7 +1264,7 @@ int osm_subn_parse_conf_file(char *file_name, osm_subn_opt_t * const p_opts)
 				    p_key, p_val, &p_opts->sweep_on_trap);
 
 		opts_unpack_charp("routing_engine",
-				  p_key, p_val, &p_opts->routing_engine_name);
+				  p_key, p_val, &p_opts->routing_engine_names);
 
 		opts_unpack_boolean("connect_roots",
 				    p_key, p_val, &p_opts->connect_roots);
@@ -1521,9 +1521,12 @@ int osm_subn_write_conf_file(char *file_name, IN osm_subn_opt_t *const p_opts)
 
 	fprintf(opts_file,
 		"# Routing engine\n"
+		"# Multiple routing engines can be specified separated by\n"
+		"# commas so that specific ordering of routing algorithms will\n"
+		"# be tried if earlier routing engines fail.\n"
 		"# Supported engines: minhop, updn, file, ftree, lash, dor\n"
-		"routing_engine %s\n\n", p_opts->routing_engine_name ?
-		p_opts->routing_engine_name : null_str);
+		"routing_engine %s\n\n", p_opts->routing_engine_names ?
+		p_opts->routing_engine_names : null_str);
 
 	fprintf(opts_file,
 		"# Connect roots (use FALSE if unsure)\n"
diff --git a/opensm/opensm/osm_ucast_file.c b/opensm/opensm/osm_ucast_file.c
index 3d00cb2..cbd65c1 100644
--- a/opensm/opensm/osm_ucast_file.c
+++ b/opensm/opensm/osm_ucast_file.c
@@ -135,14 +135,13 @@ static int do_ucast_file_load(void *context)
 		OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
 			"LFTs file name is not given; "
 			"using default routing algorithm\n");
-		return -1;
+		return 1;
 	}
 
 	file = fopen(file_name, "r");
 	if (!file) {
 		OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6302: "
-			"cannot open ucast dump file \'%s\'; "
-			"using default routing algorithm\n", file_name);
+			"cannot open ucast dump file \'%s\': %m\n", file_name);
 		return -1;
 	}
 
@@ -270,15 +269,13 @@ static int do_lid_matrix_file_load(void *context)
 		OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
 			"lid matrix file name is not given; "
 			"using default lid matrix generation algorithm\n");
-		return -1;
+		return 1;
 	}
 
 	file = fopen(file_name, "r");
 	if (!file) {
 		OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6305: "
-			"cannot open lid matrix file \'%s\'; "
-			"using default lid matrix generation algorithm\n",
-			file_name);
+			"cannot open lid matrix file \'%s\': %m\n", file_name);
 		return -1;
 	}
 
@@ -389,10 +386,10 @@ static int do_lid_matrix_file_load(void *context)
 	return 0;
 }
 
-int osm_ucast_file_setup(osm_opensm_t * p_osm)
+int osm_ucast_file_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
 {
-	p_osm->routing_engine.context = (void *)p_osm;
-	p_osm->routing_engine.build_lid_matrices = do_lid_matrix_file_load;
-	p_osm->routing_engine.ucast_build_fwd_tables = do_ucast_file_load;
+	r->context = osm;
+	r->build_lid_matrices = do_lid_matrix_file_load;
+	r->ucast_build_fwd_tables = do_ucast_file_load;
 	return 0;
 }
diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c
index 1d3233c..15168b7 100644
--- a/opensm/opensm/osm_ucast_ftree.c
+++ b/opensm/opensm/osm_ucast_ftree.c
@@ -3552,8 +3552,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
 	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n");
 	if (__osm_ftree_fabric_rank(p_ftree) != 0) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Failed ranking the tree  - "
-			"fat-tree routing falls back to default routing\n");
+			"Failed ranking the tree\n");
 		status = -1;
 		goto Exit;
 	}
@@ -3567,14 +3566,12 @@ static int __osm_ftree_construct_fabric(IN void *context)
 		"Populating CA & switch ports\n");
 	if (__osm_ftree_fabric_populate_ports(p_ftree) != 0) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Fabric topology is not a fat-tree - "
-			"routing falls back to default routing\n");
+			"Fabric topology is not a fat-tree\n");
 		status = -1;
 		goto Exit;
 	} else if (p_ftree->cn_num == 0) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Fabric has no valid compute nodes - "
-			"routing falls back to default routing\n");
+			"Fabric has no valid compute nodes\n");
 		status = -1;
 		goto Exit;
 	}
@@ -3586,8 +3583,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
 	if (__osm_ftree_fabric_get_rank(p_ftree) > FAT_TREE_MAX_RANK ||
 	    __osm_ftree_fabric_get_rank(p_ftree) < FAT_TREE_MIN_RANK) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Fabric rank is %u (should be between %u and %u) - "
-			"fat-tree routing falls back to default routing\n",
+			"Fabric rank is %u (should be between %u and %u)\n",
 			__osm_ftree_fabric_get_rank(p_ftree), FAT_TREE_MIN_RANK,
 			FAT_TREE_MAX_RANK);
 		status = -1;
@@ -3600,8 +3596,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
 	   validation - it checks that all the CNs are at the same rank. */
 	if (__osm_ftree_fabric_mark_leaf_switches(p_ftree)) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Fabric topology is not a fat-tree - "
-			"routing falls back to default routing\n");
+			"Fabric topology is not a fat-tree\n");
 		status = -1;
 		goto Exit;
 	}
@@ -3619,8 +3614,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
 	   In any case, the first and the last switches in the array are REAL leafs. */
 	if (__osm_ftree_fabric_create_leaf_switch_array(p_ftree)) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Fabric topology is not a fat-tree - "
-			"routing falls back to default routing\n");
+			"Fabric topology is not a fat-tree\n");
 		status = -1;
 		goto Exit;
 	}
@@ -3640,8 +3634,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
 	if (!__osm_ftree_fabric_roots_provided(p_ftree) &&
 	    !__osm_ftree_fabric_validate_topology(p_ftree)) {
 		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
-			"Fabric topology is not a fat-tree - "
-			"routing falls back to default routing\n");
+			"Fabric topology is not a fat-tree\n");
 		status = -1;
 		goto Exit;
 	}
@@ -3726,7 +3719,7 @@ static void __osm_ftree_delete(IN void *context)
 /***************************************************
  ***************************************************/
 
-int osm_ucast_ftree_setup(osm_opensm_t * p_osm)
+int osm_ucast_ftree_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm)
 {
 	ftree_fabric_t *p_ftree = __osm_ftree_fabric_create();
 	if (!p_ftree)
@@ -3734,12 +3727,10 @@ int osm_ucast_ftree_setup(osm_opensm_t * p_osm)
 
 	p_ftree->p_osm = p_osm;
 
-	p_osm->routing_engine.context = (void *)p_ftree;
-	p_osm->routing_engine.build_lid_matrices = __osm_ftree_construct_fabric;
-	p_osm->routing_engine.ucast_build_fwd_tables = __osm_ftree_do_routing;
-	p_osm->routing_engine.delete = __osm_ftree_delete;
+	r->context = (void *)p_ftree;
+	r->build_lid_matrices = __osm_ftree_construct_fabric;
+	r->ucast_build_fwd_tables = __osm_ftree_do_routing;
+	r->delete = __osm_ftree_delete;
+
 	return 0;
 }
-
-/***************************************************
- ***************************************************/
diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
index b985e9a..ce3982f 100644
--- a/opensm/opensm/osm_ucast_lash.c
+++ b/opensm/opensm/osm_ucast_lash.c
@@ -785,7 +785,7 @@ static int init_lash_structures(lash_t * p_lash)
 	unsigned vl_min = p_lash->vl_min;
 	unsigned num_switches = p_lash->num_switches;
 	osm_log_t *p_log = &p_lash->p_osm->log;
-	int status = IB_SUCCESS;
+	int status = 0;
 	unsigned int i, j, k;
 
 	OSM_LOG_ENTER(p_log);
@@ -852,7 +852,7 @@ static int init_lash_structures(lash_t * p_lash)
 	goto Exit;
 
 Exit_Mem_Error:
-	status = IB_ERROR;
+	status = -1;
 	OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: "
 		"Could not allocate required memory for LASH errno %d, errno %d for lack of memory\n",
 		errno, ENOMEM);
@@ -875,7 +875,7 @@ static int lash_core(lash_t * p_lash)
 	int stop = 0, output_link, i_next_switch;
 	int output_link2, i_next_switch2;
 	int cycle_found2 = 0;
-	int status = IB_SUCCESS;
+	int status = 0;
 	int *switch_bitmap = NULL;	/* Bitmap to check if we have processed this pair */
 
 	OSM_LOG_ENTER(p_log);
@@ -1028,7 +1028,7 @@ static int lash_core(lash_t * p_lash)
 	goto Exit;
 
 Error_Not_Enough_Lanes:
-	status = IB_ERROR;
+	status = -1;
 	OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: "
 		"Lane requirements (%d) exceed available lanes (%d)\n",
 		p_lash->vl_min, lanes_needed);
@@ -1360,15 +1360,15 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, osm_port_t * p_src_port,
 	return (uint8_t) ((switch_t *) p_sw->priv)->routing_table[dst_id].lane;
 }
 
-int osm_ucast_lash_setup(osm_opensm_t * p_osm)
+int osm_ucast_lash_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
 {
 	lash_t *p_lash = lash_create(p_osm);
 	if (!p_lash)
 		return -1;
 
-	p_osm->routing_engine.context = p_lash;
-	p_osm->routing_engine.ucast_build_fwd_tables = lash_process;
-	p_osm->routing_engine.delete = lash_delete;
+	r->context = p_lash;
+	r->ucast_build_fwd_tables = lash_process;
+	r->delete = lash_delete;
 
 	return 0;
 }
diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
index 9d0ad13..935846c 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -216,7 +216,6 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
 	uint8_t port;
 	boolean_t is_ignored_by_port_prof;
 	ib_net64_t node_guid;
-	struct osm_routing_engine *p_routing_eng;
 	unsigned start_from = 1;
 
 	OSM_LOG_ENTER(p_mgr->p_log);
@@ -253,8 +252,6 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
 
 	node_guid = osm_node_get_node_guid(p_sw->p_node);
 
-	p_routing_eng = &p_mgr->p_subn->p_osm->routing_engine;
-
 	/*
 	   The lid matrix contains the number of hops to each
 	   lid from each port.  From this information we determine
@@ -269,18 +266,9 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
 		/* do not try to overwrite the ppro of non existing port ... */
 		is_ignored_by_port_prof = TRUE;
 
-		/* Up/Down routing can cause unreachable routes between some
-		   switches so we do not report that as an error in that case */
-		if (!p_routing_eng->build_lid_matrices) {
-			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A08: "
-				"No path to get to LID %u from switch 0x%"
-				PRIx64 "\n", lid_ho, cl_ntoh64(node_guid));
-			/* trigger a new sweep - try again ... */
-			p_mgr->p_subn->subnet_initialization_error = TRUE;
-		} else
-			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
-				"No path to get to LID %u from switch 0x%"
-				PRIx64 "\n", lid_ho, cl_ntoh64(node_guid));
+		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
+			"No path to get to LID %u from switch 0x%" PRIx64 "\n",
+			lid_ho, cl_ntoh64(node_guid));
 	} else {
 		osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port);
 
@@ -583,7 +571,7 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item,
 
 /**********************************************************************
  **********************************************************************/
-void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
+int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
 {
 	uint32_t i;
 	uint32_t iteration_max;
@@ -646,6 +634,8 @@ void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
 			"Min-hop propagated in %d steps\n", i);
 	}
+
+	return 0;
 }
 
 /**********************************************************************
@@ -752,7 +742,7 @@ static void clear_prof_ignore_flag(cl_map_item_t * const p_map_item, void *ctx)
 	}
 }
 
-static void ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
+static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
 {
 	cl_qlist_init(&p_mgr->port_order_list);
 
@@ -786,27 +776,56 @@ static void ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
 			   __osm_ucast_mgr_process_tbl, p_mgr);
 
 	cl_qlist_remove_all(&p_mgr->port_order_list);
+
+	return 0;
 }
 
 /**********************************************************************
  **********************************************************************/
+static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t *osm)
+{
+	int ret;
+
+	OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
+		"building routing with \'%s\' routing algorithm...\n", r->name);
+
+	if (!r->build_lid_matrices ||
+	    (ret = r->build_lid_matrices(r->context)) > 0)
+		ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr);
+
+	if (ret < 0) {
+		OSM_LOG(&osm->log, OSM_LOG_ERROR,
+			"%s: cannot build lid matrices.\n", r->name);
+		return ret;
+	}
+
+	if (!r->ucast_build_fwd_tables ||
+	    (ret = r->ucast_build_fwd_tables(r->context)) > 0)
+		ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr);
+
+	if (ret < 0) {
+		OSM_LOG(&osm->log, OSM_LOG_ERROR,
+			"%s: cannot build fwd tables.\n", r->name);
+		return ret;
+	}
+
+	osm->routing_engine_used = osm_routing_engine_type(r->name);
+
+	return 0;
+}
+
 osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
 {
 	osm_opensm_t *p_osm;
 	struct osm_routing_engine *p_routing_eng;
 	osm_signal_t signal = OSM_SIGNAL_DONE;
 	cl_qmap_t *p_sw_guid_tbl;
-	int blm = 0;
-	int ubft = 0;
 
 	OSM_LOG_ENTER(p_mgr->p_log);
 
 	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
 	p_osm = p_mgr->p_subn->p_osm;
-	p_routing_eng = &p_osm->routing_engine;
-
-	p_mgr->is_dor = p_routing_eng->name
-	    && (strcmp(p_routing_eng->name, "dor") == 0);
+	p_routing_eng = p_osm->routing_engine_list;
 
 	CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);
 
@@ -819,28 +838,19 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
 
 	p_mgr->any_change = FALSE;
 
-	if (!p_routing_eng->build_lid_matrices ||
-	    (blm = p_routing_eng->build_lid_matrices(p_routing_eng->context)))
-		osm_ucast_mgr_build_lid_matrices(p_mgr);
+	p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
+	while (p_routing_eng) {
+		if (!ucast_mgr_route(p_routing_eng, p_osm))
+			break;
+		p_routing_eng = p_routing_eng->next;
+	}
 
-	/*
-	   Now that the lid matrices have been built, we can
-	   build and download the switch forwarding tables.
-	 */
-	if (!p_routing_eng->ucast_build_fwd_tables ||
-	    (ubft =
-	     p_routing_eng->ucast_build_fwd_tables(p_routing_eng->context)))
+	if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
+		/* If configured routing algorithm failed, use default MinHop */
+		osm_ucast_mgr_build_lid_matrices(p_mgr);
 		ucast_mgr_build_lfts(p_mgr);
-
-	/* 'file' routing engine has one unique logic corner case */
-	if (p_routing_eng->name && (strcmp(p_routing_eng->name, "file") == 0)
-	    && (!blm || !ubft))
-		p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_FILE;
-	else if (!blm && !ubft)
-		p_osm->routing_engine_used =
-		    osm_routing_engine_type(p_routing_eng->name);
-	else
 		p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
+	}
 
 	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
 		"%s tables configured on all switches\n",
@@ -861,3 +871,28 @@ Exit:
 	OSM_LOG_EXIT(p_mgr->p_log);
 	return (signal);
 }
+
+static int ucast_build_lid_matrices(void *context)
+{
+	return osm_ucast_mgr_build_lid_matrices(context);
+}
+
+static int ucast_build_lfts(void *context)
+{
+	return ucast_mgr_build_lfts(context);
+}
+
+int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
+{
+	r->context = &osm->sm.ucast_mgr;
+	r->build_lid_matrices = ucast_build_lid_matrices;
+	r->ucast_build_fwd_tables = ucast_build_lfts;
+	return 0;
+}
+
+int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
+{
+	osm_ucast_minhop_setup(r, osm);
+	osm->sm.ucast_mgr.is_dor = 1;
+	return 0;
+}
diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
index 90e9af8..4fdcc78 100644
--- a/opensm/opensm/osm_ucast_updn.c
+++ b/opensm/opensm/osm_ucast_updn.c
@@ -643,7 +643,7 @@ static int __osm_updn_call(void *ctx)
 	} else {
 		OSM_LOG(&p_updn->p_osm->log, OSM_LOG_INFO,
 			"disabling UPDN algorithm, no root nodes were found\n");
-		ret = 1;
+		ret = -1;
 	}
 
 	if (osm_log_is_active(&p_updn->p_osm->log, OSM_LOG_ROUTING))
@@ -669,7 +669,7 @@ static void __osm_updn_delete(void *context)
 	free(context);
 }
 
-int osm_ucast_updn_setup(osm_opensm_t * p_osm)
+int osm_ucast_updn_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
 {
 	updn_t *p_updn;
 
@@ -680,9 +680,9 @@ int osm_ucast_updn_setup(osm_opensm_t * p_osm)
 
 	p_updn->p_osm = p_osm;
 
-	p_osm->routing_engine.context = p_updn;
-	p_osm->routing_engine.delete = __osm_updn_delete;
-	p_osm->routing_engine.build_lid_matrices = __osm_updn_call;
+	r->context = p_updn;
+	r->delete = __osm_updn_delete;
+	r->build_lid_matrices = __osm_updn_call;
 
 	return 0;
 }
-- 
1.6.0.2.287.g3791f




More information about the general mailing list