[ofa-general] [PATCH] opensm/updn: --connect_roots option
Yevgeny Kliteynik
kliteyn at dev.mellanox.co.il
Sun Jul 15 06:36:30 PDT 2007
Hi Sasha,
Sasha Khapyorsky wrote:
> With this option up/down preserves route paths (based on min hops
> knowledge) between root switches. This makes up/down IBA complaint
> (where all to all connectivity is required), OTOH this violates up/down
> deadlock free algorithm. By default this option is 'off'.
>
> Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
If I understand you correctly, this patch does what it says - connects
*roots*. But what if other switches are not connected because of the up/down
constraints?
For instance, the fabric can be actually built of several sub-trees that are
connected only at leaf switch rank, so there is no path in up/down between any
two switches from different sub-trees at ranks 0 to leaf rank (not inclusively).
Moreover, I can think of a topology where some CA-to-CA paths will be missing too.
Similar problem exists in fat-tree routing.
Thoughts?
-- Yevgeny
> ---
> opensm/include/opensm/osm_subnet.h | 6 ++++++
> opensm/man/opensm.8 | 8 +++++++-
> opensm/opensm/main.c | 15 ++++++++++++++-
> opensm/opensm/osm_subnet.c | 10 ++++++++++
> opensm/opensm/osm_ucast_updn.c | 27 ++++++++++++++++++++++++++-
> 5 files changed, 63 insertions(+), 3 deletions(-)
>
> diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
> index 2ee5689..43b1589 100644
> --- a/opensm/include/opensm/osm_subnet.h
> +++ b/opensm/include/opensm/osm_subnet.h
> @@ -276,6 +276,7 @@ typedef struct _osm_subn_opt
> boolean_t sweep_on_trap;
> osm_testability_modes_t testability_mode;
> char * routing_engine_name;
> + boolean_t connect_roots;
> char * lid_matrix_dump_file;
> char * ucast_dump_file;
> char * root_guid_file;
> @@ -445,6 +446,11 @@ typedef struct _osm_subn_opt
> * Name of used routing engine
> * (other than default Min Hop Algorithm)
> *
> +* connect_roots
> +* The option which will enfoce root to root connectivity with
> +* up/down routing engine (even if this violates "pure" deadlock
> +* free up/down algorithm)
> +*
> * lid_matrix_dump_file
> * Name of the lid matrix dump file from where switch
> * lid matrices (min hops tables) will be loaded
> diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8
> index 4d35689..40e0235 100644
> --- a/opensm/man/opensm.8
> +++ b/opensm/man/opensm.8
> @@ -5,7 +5,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
>
> .SH SYNOPSIS
> .B opensm
> -[\-c(ache-options)] [\-g(uid)[=]<GUID in hex>] [\-l(mc) <LMC>] [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)] [\-R <engine name> | \-\-routing_engine <engine name>] [\-M <file name> | \-\-lid_matrix_file <file name>] [\-U <file name> | \-ucast_file <file name>] [\-S | \-\-sadb_file <file name>] [\-a | \-\-root_guid_file <path to file>] [\-u | \-\-cn_guid_file <path to file>] [\-o(nce)] [\-s(weep) <interval>] [\-t(imeout) <milliseconds>] [\-maxsmps <number>] [\-console [off | local | socket]] [\-console-port <port>] [\-i(gnore-guids) <equalize-ignore-guids-file>] [\-f | \-\-log_file] [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)] [\-P(config)] [\-Q | \-qos] [\-N | \-no_part_enforce] [\-y | \-stay_on_fatal] [\-B | \-daemon] [\-I | \-inactive] [\-perfmgr] [\-perfmgr_sweep_time_s <seconds>] [\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>] [\-h(elp)] [\-?]
> +[\-c(ache-options)] [\-g(uid)[=]<GUID in hex>] [\-l(mc) <LMC>] [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)] [\-R <engine name> | \-\-routing_engine <engine name>] [\-z | \-\-connect_roots] [\-M <file name> | \-\-lid_matrix_file <file name>] [\-U <file name> | \-ucast_file <file name>] [\-S | \-\-sadb_file <file name>] [\-a | \-\-root_guid_file <path to file>] [\-u | \-\-cn_guid_file <path to file>] [\-o(nce)] [\-s(weep) <interval>] [\-t(imeout) <milliseconds>] [\-maxsmps <number>] [\-console [off | local | socket]] [\-console-port <port>] [\-i(gnore-guids) <equalize-ignore-guids-file>] [\-f | \-\-log_file] [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)] [\-P(config)] [\-Q | \-qos] [\-N | \-no_part_enforce] [\-y | \-stay_on_fatal] [\-B | \-daemon] [\-I | \-inactive] [\-perfmgr] [\-perfmgr_sweep_time_s <seconds>] [\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>] [\-h(elp)] [\-?]
>
> .SH DESCRIPTION
> .PP
> @@ -94,6 +94,12 @@ This option chooses routing engine instead of Min Hop
> algorithm (default).
> Supported engines: updn, file, ftree, lash
> .TP
> +\fB\-z\fR, \fB\-\-connect_roots\fR
> +This option enforces a routing engine (currently up/down
> +only) to make connectivity between root switches and in
> +this way to be fully IBA complaint. In many cases this can
> +violate "pure" deadlock free algorithm, so use it carefully.
> +.TP
> \fB\-M\fR, \fB\-\-lid_matrix_file\fR
> This option specifies the name of the lid matrix dump file
> from where switch lid matrices (min hops tables will be
> diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
> index 0d5e0eb..e182276 100644
> --- a/opensm/opensm/main.c
> +++ b/opensm/opensm/main.c
> @@ -175,6 +175,13 @@ show_usage(void)
> " This option chooses routing engine instead of Min Hop\n"
> " algorithm (default).\n"
> " Supported engines: updn, file, ftree\n\n");
> + printf( "-z\n"
> + "--connect_roots\n"
> + " This option enforces a routing engine (currently\n"
> + " up/down only) to make connectivity between root switches\n"
> + " and in this way to be fully IBA complaint. In many cases\n"
> + " this can violate \"pure\" deadlock free algorithm, so\n"
> + " use it carefully.\n\n");
> printf( "-M\n"
> "--lid_matrix_file <file name>\n"
> " This option specifies the name of the lid matrix dump file\n"
> @@ -591,7 +598,7 @@ main(
> char *ignore_guids_file_name = NULL;
> uint32_t val;
> const char * const short_option =
> - "i:f:ed:g:l:L:s:t:a:u:R:M:U:S:P:NBIQvVhorcyxp:n:q:k:C:";
> + "i:f:ed:g:l:L:s:t:a:u:R:zM:U:S:P:NBIQvVhorcyxp:n:q:k:C:";
>
> /*
> In the array below, the 2nd parameter specifies the number
> @@ -625,6 +632,7 @@ main(
> { "priority", 1, NULL, 'p'},
> { "smkey", 1, NULL, 'k'},
> { "routing_engine",1, NULL, 'R'},
> + { "connect_roots", 0, NULL, 'z'},
> { "lid_matrix_file",1, NULL, 'M'},
> { "ucast_file", 1, NULL, 'U'},
> { "sadb_file", 1, NULL, 'S'},
> @@ -876,6 +884,11 @@ main(
> printf(" Activate \'%s\' routing engine\n", optarg);
> break;
>
> + case 'z':
> + opt.connect_roots = TRUE;
> + printf(" Connect roots option is on\n");
> + break;
> +
> case 'M':
> opt.lid_matrix_dump_file = optarg;
> printf(" Lid matrix dump file is \'%s\'\n", optarg);
> diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
> index 82d66f9..8f429ae 100644
> --- a/opensm/opensm/osm_subnet.c
> +++ b/opensm/opensm/osm_subnet.c
> @@ -500,6 +500,7 @@ osm_subn_set_default_opt(
> p_opt->sweep_on_trap = TRUE;
> p_opt->testability_mode = OSM_TEST_MODE_NONE;
> p_opt->routing_engine_name = NULL;
> + p_opt->connect_roots = FALSE;
> p_opt->lid_matrix_dump_file = NULL;
> p_opt->ucast_dump_file = NULL;
> p_opt->root_guid_file = NULL;
> @@ -1290,6 +1291,10 @@ osm_subn_parse_conf_file(
> "routing_engine",
> p_key, p_val, &p_opts->routing_engine_name);
>
> + __osm_subn_opts_unpack_boolean(
> + "connect_roots",
> + p_key, p_val, &p_opts->connect_roots);
> +
> __osm_subn_opts_unpack_charp(
> "log_file", p_key, p_val, &p_opts->log_file);
>
> @@ -1545,6 +1550,11 @@ osm_subn_write_conf_file(
> "# Routing engine\n"
> "routing_engine %s\n\n",
> p_opts->routing_engine_name);
> + if (p_opts->connect_roots)
> + fprintf( opts_file,
> + "# Connect roots (use FALSE if unsure)\n"
> + "connect_roots %s\n\n",
> + p_opts->connect_roots ? "TRUE" : "FALSE");
> if (p_opts->lid_matrix_dump_file)
> fprintf( opts_file,
> "# Lid matrix dump file name\n"
> diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
> index af5ee4e..db8e60a 100644
> --- a/opensm/opensm/osm_ucast_updn.c
> +++ b/opensm/opensm/osm_ucast_updn.c
> @@ -449,6 +449,24 @@ updn_subn_rank(
>
> /**********************************************************************
> **********************************************************************/
> +/* hack: preserve min hops entries to any other root switches */
> +static void
> +updn_clear_root_hops(updn_t *p_updn, osm_switch_t *p_sw)
> +{
> + osm_port_t *p_port;
> + unsigned i;
> +
> + for ( i = 0 ; i < p_sw->num_hops ; i++ )
> + if (p_sw->hops[i]) {
> + p_port = cl_ptr_vector_get(&p_updn->p_osm->subn.port_lid_tbl, i);
> + if (!p_port || !p_port->p_node->sw ||
> + ((struct updn_node *)p_port->p_node->sw->priv)->rank != 0)
> + memset(p_sw->hops[i], 0xff, p_sw->num_ports);
> + }
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> static int
> __osm_subn_set_up_down_min_hop_table(
> IN updn_t* p_updn )
> @@ -471,7 +489,10 @@ __osm_subn_set_up_down_min_hop_table(
> p_sw = p_next_sw;
> p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
> /* Clear Min Hop Table */
> - osm_switch_clear_hops(p_sw);
> + if (p_subn->opt.connect_roots && !((struct updn_node *)p_sw->priv)->rank)
> + updn_clear_root_hops(p_updn, p_sw);
> + else
> + osm_switch_clear_hops(p_sw);
> }
>
> osm_log( p_log, OSM_LOG_VERBOSE,
> @@ -607,6 +628,10 @@ __osm_updn_call(
> osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
> __osm_updn_find_root_nodes_by_min_hop( p_updn );
> }
> + else if (p_updn->p_osm->subn.opt.connect_roots &&
> + p_updn->updn_ucast_reg_inputs.num_guids > 1)
> + osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
> +
> /* printf ("-V- after osm_updn_find_root_nodes_by_min_hop\n"); */
> /* Only if there are assigned root nodes do the algorithm, otherwise perform do nothing */
> if ( p_updn->updn_ucast_reg_inputs.num_guids > 0)
More information about the general
mailing list