[openib-general] [PATCH] Opensm - duplicated guids issue

Yael Kalka yael at mellanox.co.il
Sun Dec 4 05:02:50 PST 2005


Hi Hal,

Currently if OpenSM discovers duplicated guids or 12x link with lane
reversal badly configured it only issues an error to the log
file. This issue, though, is much more problematic, since it will cause
part of the subnet to be un-initialized.
The following patch includes a fuller handling of the issue - first,
issue an error message to the /var/log/messeges file as well.
Second - add an option flag to the SM that will define wether or not
to exit on such case.

Thanks,
Yael

Signed-off-by:  Yael Kalka <yael at mellanox.co.il>

Index: include/opensm/osm_subnet.h
===================================================================
--- include/opensm/osm_subnet.h	(revision 4288)
+++ include/opensm/osm_subnet.h	(working copy)
@@ -235,6 +235,7 @@ typedef struct _osm_subn_opt
   osm_testability_modes_t  testability_mode;
   boolean_t                updn_activate;
   char *                   updn_guid_file;
+  boolean_t                exit_on_fatal;
 } osm_subn_opt_t;
 /*
 * FIELDS
@@ -372,6 +373,13 @@ typedef struct _osm_subn_opt
 *  updn_guid_file
 *     Pointer to name of the UPDN guid file given by User
 *
+*  exit_on_fatal
+*     If TRUE (default) - SM will exit on fatal subnet initialization issues.
+*     If FALSE - SM will not exit.
+*     Fatal initialization issues:
+*     a. SM recognizes 2 different nodes with the same guid, or 12x link with
+*        lane reversal badly configured.
+*
 * SEE ALSO
 *	Subnet object
 *********/
Index: opensm/osm_subnet.c
===================================================================
--- opensm/osm_subnet.c	(revision 4288)
+++ opensm/osm_subnet.c	(working copy)
@@ -440,6 +440,7 @@ osm_subn_set_default_opt(
   p_opt->testability_mode = OSM_TEST_MODE_NONE;
   p_opt->updn_activate = FALSE;
   p_opt->updn_guid_file = NULL;
+  p_opt->exit_on_fatal = TRUE;
 }
 
 /**********************************************************************
@@ -765,6 +766,10 @@ osm_subn_parse_conf_file(
       __osm_subn_opts_unpack_charp( 
         "updn_guid_file" ,
         p_key, p_val, &p_opts->updn_guid_file);
+
+      __osm_subn_opts_unpack_boolean(
+        "exit_on_fatal",
+        p_key, p_val, &p_opts->exit_on_fatal);
     }
   }
   fclose(opts_file);
@@ -930,14 +935,17 @@ osm_subn_write_conf_file(
     "# If TRUE if OpenSM should disable multicast support\n"
     "no_multicast_option %s\n\n"
     "# No multicast routing is performed if TRUE\n"
-    "disable_multicast %s\n\n",
+    "disable_multicast %s\n\n"
+    "# If TRUE opensm will exit on fatal initialization issues\n"
+    "exit_on_fatal %s\n\n",
     p_opts->log_flags,
     p_opts->force_log_flush ? "TRUE" : "FALSE",
     p_opts->log_file,
     p_opts->accum_log_file ? "TRUE" : "FALSE",
     p_opts->dump_files_dir,
     p_opts->no_multicast_option ? "TRUE" : "FALSE",
-    p_opts->disable_multicast ? "TRUE" : "FALSE"
+    p_opts->disable_multicast ? "TRUE" : "FALSE",
+    p_opts->exit_on_fatal ? "TRUE" : "FALSE"
     );
   
   /* optional string attributes ... */
Index: opensm/osm_node_info_rcv.c
===================================================================
--- opensm/osm_node_info_rcv.c	(revision 4288)
+++ opensm/osm_node_info_rcv.c	(working copy)
@@ -198,6 +198,14 @@ __osm_ni_rcv_set_links(
                      p_ni_context->port_num,
                      dr_new_path
                      );
+
+            osm_log( p_rcv->p_log, OSM_LOG_SYS,
+                     "Errors on subnet. SM found duplicated guids or 12x " 
+                     "link with lane reversal badly configured. "
+                     "Use osm log for more details.\n");
+
+            if ( p_rcv->p_subn->opt.exit_on_fatal == TRUE )
+              exit( 1 );
           }
 
           /* 
Index: opensm/main.c
===================================================================
--- opensm/main.c	(revision 4288)
+++ opensm/main.c	(working copy)
@@ -178,6 +178,12 @@ show_usage(void)
           "          This option will cause deletion of the log file\n"
           "          (if it previously exists). By default, the log file\n"
           "          is accumulative.\n\n");
+  printf( "-y\n"
+          "--stay_on_fatal\n"
+          "          This option will cause SM not to exit on fatal initialization\n"
+          "          issues: If SM discovers duplicated guids or 12x link with\n"
+          "          lane reversal badly configured.\n"
+          "          By default, the SM will exit.\n\n");
   printf( "-v\n"
           "--verbose\n"
           "          This option increases the log verbosity level.\n"
@@ -460,7 +466,7 @@ main(
   boolean_t             cache_options = FALSE;
   char                 *ignore_guids_file_name = NULL;
   uint32_t              val;
-  const char * const    short_option = "i:f:ed:g:l:s:t:a:uvVhorc";
+  const char * const    short_option = "i:f:ed:g:l:s:t:a:uvVhorcy";
 
   /*
     In the array below, the 2nd parameter specified the number
@@ -492,6 +498,7 @@ main(
       {  "updn",          0, NULL, 'u'},
       {  "add_guid_file", 1, NULL, 'a'},
       {  "cache-options", 0, NULL, 'c'},
+      {  "stay_on_fatal", 0, NULL, 'y'},
       {  NULL,            0, NULL,  0 }  /* Required at the end of the array */
     };
 
@@ -665,6 +672,11 @@ main(
       printf(" Creating new log file\n");
       break;
 
+    case 'y':
+      opt.exit_on_fatal = FALSE;
+      printf(" Staying on fatal initialization\n");
+      break;
+
     case 'v':
       log_flags = (log_flags <<1 )|1;
       printf(" Verbose option -v (log flags = 0x%X)\n", log_flags );




More information about the general mailing list