[ofa-general] [PATCH] opensm/updn: --connect_roots option

Sasha Khapyorsky sashak at voltaire.com
Thu Jun 21 14:29:20 PDT 2007


With this option up/down preserves route paths (based on min hops
knowledge) between root switches. This makes up/down IBA complaint
(where all to all connectivity is required), OTOH this violates up/down
deadlock free algorithm. By default this option is 'off'.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
 opensm/include/opensm/osm_subnet.h |    6 ++++++
 opensm/man/opensm.8                |    8 +++++++-
 opensm/opensm/main.c               |   15 ++++++++++++++-
 opensm/opensm/osm_subnet.c         |   10 ++++++++++
 opensm/opensm/osm_ucast_updn.c     |   27 ++++++++++++++++++++++++++-
 5 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index 2ee5689..43b1589 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -276,6 +276,7 @@ typedef struct _osm_subn_opt
   boolean_t                sweep_on_trap;
   osm_testability_modes_t  testability_mode;
   char *                   routing_engine_name;
+  boolean_t                connect_roots;
   char *                   lid_matrix_dump_file;
   char *                   ucast_dump_file;
   char *                   root_guid_file;
@@ -445,6 +446,11 @@ typedef struct _osm_subn_opt
 *		Name of used routing engine
 *		(other than default Min Hop Algorithm)
 *
+*	connect_roots
+*		The option which will enfoce root to root connectivity with
+*		up/down routing engine (even if this violates "pure" deadlock
+*		free up/down algorithm)
+*
 *	lid_matrix_dump_file
 *		Name of the lid matrix dump file from where switch
 *		lid matrices (min hops tables) will be loaded
diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8
index 4d35689..40e0235 100644
--- a/opensm/man/opensm.8
+++ b/opensm/man/opensm.8
@@ -5,7 +5,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
 
 .SH SYNOPSIS
 .B opensm
-[\-c(ache-options)] [\-g(uid)[=]<GUID in hex>] [\-l(mc) <LMC>] [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)] [\-R <engine name> | \-\-routing_engine <engine name>] [\-M <file name> | \-\-lid_matrix_file <file name>] [\-U <file name> | \-ucast_file <file name>] [\-S | \-\-sadb_file <file name>] [\-a | \-\-root_guid_file <path to file>] [\-u | \-\-cn_guid_file <path to file>] [\-o(nce)] [\-s(weep) <interval>] [\-t(imeout) <milliseconds>] [\-maxsmps <number>] [\-console [off | local | socket]] [\-console-port <port>] [\-i(gnore-guids) <equalize-ignore-guids-file>] [\-f | \-\-log_file] [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)] [\-P(config)] [\-Q | \-qos] [\-N | \-no_part_enforce] [\-y | \-stay_on_fatal] [\-B | \-daemon] [\-I | \-inactive] [\-perfmgr] [\-perfmgr_sweep_time_s <seconds>] [\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>] [\-h(elp)] [\-?]
+[\-c(ache-options)] [\-g(uid)[=]<GUID in hex>] [\-l(mc) <LMC>] [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)] [\-R <engine name> | \-\-routing_engine <engine name>] [\-z | \-\-connect_roots] [\-M <file name> | \-\-lid_matrix_file <file name>] [\-U <file name> | \-ucast_file <file name>] [\-S | \-\-sadb_file <file name>] [\-a | \-\-root_guid_file <path to file>] [\-u | \-\-cn_guid_file <path to file>] [\-o(nce)] [\-s(weep) <interval>] [\-t(imeout) <milliseconds>] [\-maxsmps <number>] [\-console [off | local | socket]] [\-console-port <port>] [\-i(gnore-guids) <equalize-ignore-guids-file>] [\-f | \-\-log_file] [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)] [\-P(config)] [\-Q | \-qos] [\-N | \-no_part_enforce] [\-y | \-stay_on_fatal] [\-B | \-daemon] [\-I | \-inactive] [\-perfmgr] [\-perfmgr_sweep_time_s <seconds>] [\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>] [\-h(elp)] [\-?]
 
 .SH DESCRIPTION
 .PP
@@ -94,6 +94,12 @@ This option chooses routing engine instead of Min Hop
 algorithm (default).
 Supported engines: updn, file, ftree, lash
 .TP
+\fB\-z\fR, \fB\-\-connect_roots\fR
+This option enforces a routing engine (currently up/down
+only) to make connectivity between root switches and in
+this way to be fully IBA complaint. In many cases this can
+violate "pure" deadlock free algorithm, so use it carefully.
+.TP
 \fB\-M\fR, \fB\-\-lid_matrix_file\fR
 This option specifies the name of the lid matrix dump file
 from where switch lid matrices (min hops tables will be
diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 0d5e0eb..e182276 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -175,6 +175,13 @@ show_usage(void)
           "          This option chooses routing engine instead of Min Hop\n"
           "          algorithm (default).\n"
           "          Supported engines: updn, file, ftree\n\n");
+  printf( "-z\n"
+          "--connect_roots\n"
+          "          This option enforces a routing engine (currently\n"
+          "          up/down only) to make connectivity between root switches\n"
+          "          and in this way to be fully IBA complaint. In many cases\n"
+          "          this can violate \"pure\" deadlock free algorithm, so\n"
+          "          use it carefully.\n\n");
   printf( "-M\n"
           "--lid_matrix_file <file name>\n"
           "          This option specifies the name of the lid matrix dump file\n"
@@ -591,7 +598,7 @@ main(
   char                 *ignore_guids_file_name = NULL;
   uint32_t              val;
   const char * const    short_option =
-	  "i:f:ed:g:l:L:s:t:a:u:R:M:U:S:P:NBIQvVhorcyxp:n:q:k:C:";
+	  "i:f:ed:g:l:L:s:t:a:u:R:zM:U:S:P:NBIQvVhorcyxp:n:q:k:C:";
 
   /*
     In the array below, the 2nd parameter specifies the number
@@ -625,6 +632,7 @@ main(
       {  "priority",      1, NULL, 'p'},
       {  "smkey",         1, NULL, 'k'},
       {  "routing_engine",1, NULL, 'R'},
+      {  "connect_roots", 0, NULL, 'z'},
       {  "lid_matrix_file",1, NULL, 'M'},
       {  "ucast_file",    1, NULL, 'U'},
       {  "sadb_file",     1, NULL, 'S'},
@@ -876,6 +884,11 @@ main(
       printf(" Activate \'%s\' routing engine\n", optarg);
       break;
 
+    case 'z':
+      opt.connect_roots = TRUE;
+      printf(" Connect roots option is on\n");
+      break;
+
     case 'M':
       opt.lid_matrix_dump_file = optarg;
       printf(" Lid matrix dump file is \'%s\'\n", optarg);
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index 82d66f9..8f429ae 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -500,6 +500,7 @@ osm_subn_set_default_opt(
   p_opt->sweep_on_trap = TRUE;
   p_opt->testability_mode = OSM_TEST_MODE_NONE;
   p_opt->routing_engine_name = NULL;
+  p_opt->connect_roots = FALSE;
   p_opt->lid_matrix_dump_file = NULL;
   p_opt->ucast_dump_file = NULL;
   p_opt->root_guid_file = NULL;
@@ -1290,6 +1291,10 @@ osm_subn_parse_conf_file(
         "routing_engine",
         p_key, p_val, &p_opts->routing_engine_name);
 
+      __osm_subn_opts_unpack_boolean(
+        "connect_roots",
+        p_key, p_val, &p_opts->connect_roots);
+
       __osm_subn_opts_unpack_charp(
         "log_file", p_key, p_val, &p_opts->log_file);
 
@@ -1545,6 +1550,11 @@ osm_subn_write_conf_file(
              "# Routing engine\n"
              "routing_engine %s\n\n",
              p_opts->routing_engine_name);
+  if (p_opts->connect_roots)
+    fprintf( opts_file,
+             "# Connect roots (use FALSE if unsure)\n"
+             "connect_roots %s\n\n",
+             p_opts->connect_roots ? "TRUE" : "FALSE");
   if (p_opts->lid_matrix_dump_file)
     fprintf( opts_file,
              "# Lid matrix dump file name\n"
diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
index af5ee4e..db8e60a 100644
--- a/opensm/opensm/osm_ucast_updn.c
+++ b/opensm/opensm/osm_ucast_updn.c
@@ -449,6 +449,24 @@ updn_subn_rank(
 
 /**********************************************************************
  **********************************************************************/
+/* hack: preserve min hops entries to any other root switches */
+static void
+updn_clear_root_hops(updn_t *p_updn, osm_switch_t *p_sw)
+{
+  osm_port_t *p_port;
+  unsigned i;
+
+  for ( i = 0 ; i < p_sw->num_hops ; i++ )
+    if (p_sw->hops[i]) {
+      p_port = cl_ptr_vector_get(&p_updn->p_osm->subn.port_lid_tbl, i);
+      if (!p_port || !p_port->p_node->sw ||
+          ((struct updn_node *)p_port->p_node->sw->priv)->rank != 0)
+        memset(p_sw->hops[i], 0xff, p_sw->num_ports);
+    }
+}
+
+/**********************************************************************
+ **********************************************************************/
 static int
 __osm_subn_set_up_down_min_hop_table(
   IN updn_t* p_updn )
@@ -471,7 +489,10 @@ __osm_subn_set_up_down_min_hop_table(
     p_sw = p_next_sw;
     p_next_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
     /* Clear Min Hop Table */
-    osm_switch_clear_hops(p_sw);
+    if (p_subn->opt.connect_roots && !((struct updn_node *)p_sw->priv)->rank)
+      updn_clear_root_hops(p_updn, p_sw);
+    else
+      osm_switch_clear_hops(p_sw);
   }
 
   osm_log( p_log, OSM_LOG_VERBOSE,
@@ -607,6 +628,10 @@ __osm_updn_call(
     osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
     __osm_updn_find_root_nodes_by_min_hop( p_updn );
   }
+  else if (p_updn->p_osm->subn.opt.connect_roots &&
+           p_updn->updn_ucast_reg_inputs.num_guids > 1)
+    osm_ucast_mgr_build_lid_matrices( &p_updn->p_osm->sm.ucast_mgr );
+
   /* printf ("-V- after osm_updn_find_root_nodes_by_min_hop\n"); */
   /* Only if there are assigned root nodes do the algorithm, otherwise perform do nothing */
   if ( p_updn->updn_ucast_reg_inputs.num_guids > 0)
-- 
1.5.2.2.277.g07b8




More information about the general mailing list