[ofa-general] [PATCH] opensm/updn: --connect_roots option
Sasha Khapyorsky
sashak at voltaire.com
Thu Jun 21 14:29:20 PDT 2007
With this option up/down preserves route paths (based on min hops
knowledge) between root switches. This makes up/down IBA complaint
(where all to all connectivity is required), OTOH this violates up/down
deadlock free algorithm. By default this option is 'off'.
Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
opensm/include/opensm/osm_subnet.h | 6 ++++++
opensm/man/opensm.8 | 8 +++++++-
opensm/opensm/main.c | 15 ++++++++++++++-
opensm/opensm/osm_subnet.c | 10 ++++++++++
opensm/opensm/osm_ucast_updn.c | 27 ++++++++++++++++++++++++++-
5 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index 2ee5689..43b1589 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -276,6 +276,7 @@ typedef struct _osm_subn_opt
boolean_t sweep_on_trap;
osm_testability_modes_t testability_mode;
char * routing_engine_name;
+ boolean_t connect_roots;
char * lid_matrix_dump_file;
char * ucast_dump_file;
char * root_guid_file;
@@ -445,6 +446,11 @@ typedef struct _osm_subn_opt
* Name of used routing engine
* (other than default Min Hop Algorithm)
*
+* connect_roots
+* The option which will enfoce root to root connectivity with
+* up/down routing engine (even if this violates "pure" deadlock
+* free up/down algorithm)
+*
* lid_matrix_dump_file
* Name of the lid matrix dump file from where switch
* lid matrices (min hops tables) will be loaded
diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8
index 4d35689..40e0235 100644
--- a/opensm/man/opensm.8
+++ b/opensm/man/opensm.8
@@ -5,7 +5,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
.SH SYNOPSIS
.B opensm
-[\-c(ache-options)] [\-g(uid)[=]<GUID in hex>] [\-l(mc) <LMC>] [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)] [\-R <engine name> | \-\-routing_engine <engine name>] [\-M <file name> | \-\-lid_matrix_file <file name>] [\-U <file name> | \-ucast_file <file name>] [\-S | \-\-sadb_file <file name>] [\-a | \-\-root_guid_file <path to file>] [\-u | \-\-cn_guid_file <path to file>] [\-o(nce)] [\-s(weep) <interval>] [\-t(imeout) <milliseconds>] [\-maxsmps <number>] [\-console [off | local | socket]] [\-console-port <port>] [\-i(gnore-guids) <equalize-ignore-guids-file>] [\-f | \-\-log_file] [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)] [\-P(config)] [\-Q | \-qos] [\-N | \-no_part_enforce] [\-y | \-stay_on_fatal] [\-B | \-daemon] [\-I | \-inactive] [\-perfmgr] [\-perfmgr_sweep_time_s <seconds>] [\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>] [\-h(elp)] [\-?]
+[\-c(ache-options)] [\-g(uid)[=]<GUID in hex>] [\-l(mc) <LMC>] [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)] [\-R <engine name> | \-\-routing_engine <engine name>] [\-z | \-\-connect_roots] [\-M <file name> | \-\-lid_matrix_file <file name>] [\-U <file name> | \-ucast_file <file name>] [\-S | \-\-sadb_file <file name>] [\-a | \-\-root_guid_file <path to file>] [\-u | \-\-cn_guid_file <path to file>] [\-o(nce)] [\-s(weep) <interval>] [\-t(imeout) <milliseconds>] [\-maxsmps <number>] [\-console [off | local | socket]] [\-console-port <port>] [\-i(gnore-guids) <equalize-ignore-guids-file>] [\-f | \-\-log_file] [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)] [\-P(config)] [\-Q | \-qos] [\-N | \-no_part_enforce] [\-y | \-stay_on_fatal] [\-B | \-daemon] [\-I | \-inactive] [\-perfmgr] [\-perfmgr_sweep_time_s <seconds>] [\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>] [\-h(elp)] [\-?]
.SH DESCRIPTION
.PP
@@ -94,6 +94,12 @@ This option chooses routing engine instead of Min Hop
algorithm (default).
Supported engines: updn, file, ftree, lash
.TP
+\fB\-z\fR, \fB\-\-connect_roots\fR
+This option enforces a routing engine (currently up/down
+only) to make connectivity between root switches and in
+this way to be fully IBA complaint. In many cases this can
+violate "pure" deadlock free algorithm, so use it carefully.
+.TP
\fB\-M\fR, \fB\-\-lid_matrix_file\fR
This option specifies the name of the lid matrix dump file
from where switch lid matrices (min hops tables will be
diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 0d5e0eb..e182276 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -175,6 +175,13 @@ show_usage(void)
" This option chooses routing engine instead of Min Hop\n"
" algorithm (default).\n"
" Supported engines: updn, file, ftree\n\n");
+ printf( "-z\n"
+ "--connect_roots\n"
+ " This option enforces a routing engine (currently\n"
+ " up/down only) to make connectivity between root switches\n"
+ " and in this way to be fully IBA complaint. In many cases\n"
+ " this can violate \"pure\" deadlock free algorithm, so\n"
+ " use it carefully.\n\n");
printf( "-M\n"
"--lid_matrix_file <file name>\n"
" This option specifies the name of the lid matrix dump file\n"
@@ -591,7 +598,7 @@ main(
char *ignore_guids_file_name = NULL;
uint32_t val;
const char * const short_option =
- "i:f:ed:g:l:L:s:t:a:u:R:M:U:S:P:NBIQvVhorcyxp:n:q:k:C:";
+ "i:f:ed:g:l:L:s:t:a:u:R:zM:U:S:P:NBIQvVhorcyxp:n:q:k:C:";
/*
In the array below, the 2nd parameter specifies the number
@@ -625,6 +632,7 @@ main(
{ "priority", 1, NULL, 'p'},
{ "smkey", 1, NULL, 'k'},
{ "routing_engine",1, NULL, 'R'},
+ { "connect_roots", 0, NULL, 'z'},
{ "lid_matrix_file",1, NULL, 'M'},
{ "ucast_file", 1, NULL, 'U'},
{ "sadb_file", 1, NULL, 'S'},
@@ -876,6 +884,11 @@ main(
printf(" Activate \'%s\' routing engine\n", optarg);
break;
+ case 'z':
+ opt.connect_roots = TRUE;
+ printf(" Connect roots option is on\n");
+ break;
+
case 'M':
opt.lid_matrix_dump_file = optarg;
printf(" Lid matrix dump file is \'%s\'\n", optarg);
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index 82d66f9..8f429ae 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -500,6 +500,7 @@ osm_subn_set_default_opt(
p_opt->sweep_on_trap = TRUE;
p_opt->testability_mode = OSM_TEST_MODE_NONE;
p_opt->routing_engine_name = NULL;
+ p_opt->connect_roots = FALSE;
p_opt->lid_matrix_dump_file = NULL;
p_opt->ucast_dump_file = NULL;
p_opt->root_guid_file = NULL;
@@ -1290,6 +1291,10 @@ osm_subn_parse_conf_file(
"routing_engine",
p_key, p_val, &p_opts->routing_engine_name);
+ __osm_subn_opts_unpack_boolean(
+ "connect_roots",
+ p_key, p_val, &p_opts->connect_roots);
+
__osm_subn_opts_unpack_charp(
"log_file", p_key, p_val, &p_opts->log_file);
@@ -1545,6 +1550,11 @@ osm_subn_write_conf_file(
"# Routing engine\n"
"routing_engine %s\n\n",
p_opts->routing_engine_name);
+ if (p_opts->connect_roots)
+ fprintf( opts_file,
+ "# Connect roots (use FALSE if unsure)\n"
+ "connect_roots %s\n\n",
+ p_opts->connect_roots ? "TRUE" : "FALSE");
if (p_opts->lid_matrix_dump_file)
fprintf( opts_file,
"# Lid matrix dump file name\n"
diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
index af5ee4e..db8e60a 100644
--- a/opensm/opensm/osm_ucast_updn.c
+++ b/opensm/opensm/osm_ucast_updn.c
@@ -449,6 +449,24 @@ updn_subn_rank(
/**********************************************************************
**********************************************************************/
+/* hack: preserve min hops entries to any other root switches */
+static void
+updn_clear_root_hops(updn_t *p_updn, osm_switch_t *p_sw)
+{
+ osm_port_t *p_port;
+ unsigned i;
+
+ for ( i = 0 ; i < p_sw->num_hops ; i++ )
+ if (p_sw->hops[i]) {
+ p_port = cl_ptr_vector_get(&p_updn->p_osm->subn.port_lid_tbl, i);
+ if (!p_port || !p_port->p_node->sw ||
+ ((struct updn_node *)p_port->p_node->sw->priv)->rank != 0)
+ memset(p_sw->hops[i], 0xff, p_sw->num_ports);
+ }
+}
+
+/**********************************************************************
+ **********************************************************************/
static int
__osm_subn_set_up_down_min_hop_table(
IN updn_t* p_updn )
@@ -471,7 +489,10 @@ __osm_subn_set_up_down_min_hop_table(
p_sw = p_next_sw;
p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
/* Clear Min Hop Table */
- osm_switch_clear_hops(p_sw);
+ if (p_subn->opt.connect_roots && !((struct updn_node *)p_sw->priv)->rank)
+ updn_clear_root_hops(p_updn, p_sw);
+ else
+ osm_switch_clear_hops(p_sw);
}
osm_log( p_log, OSM_LOG_VERBOSE,
@@ -607,6 +628,10 @@ __osm_updn_call(
osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
__osm_updn_find_root_nodes_by_min_hop( p_updn );
}
+ else if (p_updn->p_osm->subn.opt.connect_roots &&
+ p_updn->updn_ucast_reg_inputs.num_guids > 1)
+ osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
+
/* printf ("-V- after osm_updn_find_root_nodes_by_min_hop\n"); */
/* Only if there are assigned root nodes do the algorithm, otherwise perform do nothing */
if ( p_updn->updn_ucast_reg_inputs.num_guids > 0)
--
1.5.2.2.277.g07b8
More information about the general
mailing list