[openib-general] opensm segfaults after CL_INSUFFICIENT_MEMORY

Bernhard Fischer blist at aon.at
Fri Feb 11 07:31:39 PST 2005


On Thu, Feb 10, 2005 at 03:26:45PM -0500, Hal Rosenstock wrote:
>On Thu, 2005-02-10 at 14:44, Bernhard Fischer wrote:
>> Hi,
>> 
>> I'm seeing the segfault below when i try to run opensm.

>
>> Any ideas?
>> (gdb) run
>> Starting program: /usr/local/ib/bin/opensm 
>> [Thread debugging using libthread_db enabled]
>> [New Thread -1209392032 (LWP 7991)]
>> __init: failed to create timer provider status (CL_INSUFFICIENT_MEMORY) 
>
>First malloc is failing to create the timer
>
>> Program received signal SIGSEGV, Segmentation fault.
>> [Switching to Thread -1209392032 (LWP 7991)]
>> 0xb7f0b597 in memset () from /lib/tls/libc.so.6

>Initialization continues (not sure it should) and attempting to clear a

I'd say it should not but exit.

>static object fails. Even if initialization didn't continue, opensm
>would not run properly.
>
>The first problem is why the malloc fails for the timer provider. The
>second problem is pretty strange as the system callback object is local
>and should not be an address that causes a segmentation violation.
>
>Is this built with the autotools version ?

Disregard the second problem, pathological compiler settings (wrong
regparm for that system).

Please do, however, fix the path for ENOMEM.
-------------- next part --------------
diff -X ./excl -rdup ./gen2.oorig/trunk/src/userspace/management/osm/complib/cl_complib.c ./gen2/trunk/src/userspace/management/osm/complib/cl_complib.c
--- ./gen2.oorig/trunk/src/userspace/management/osm/complib/cl_complib.c	2005-02-08 16:27:20.000000000 +0100
+++ ./gen2/trunk/src/userspace/management/osm/complib/cl_complib.c	2005-02-11 13:23:08.000000000 +0100
@@ -73,6 +73,12 @@ void
 complib_exit(void);
 
 void
+complib_fini(void);
+
+void
+complib_init(void);
+
+void
 __attribute (( constructor ))
 complib_init(void)
 {
@@ -86,7 +92,7 @@ complib_init(void)
 	{
 		cl_msg_out( "__init: failed to init syshelper (%s) \n",
 									CL_STATUS_MSG( status ) );
-
+		exit(1);
 	}
 
 	/*
@@ -98,6 +104,7 @@ complib_init(void)
 	{
 		cl_msg_out( "__init: failed to create timer provider status (%s) \n",
 									CL_STATUS_MSG( status ) );
+		exit(1);
 	}
 
 	/*
@@ -108,9 +115,8 @@ complib_init(void)
 	{
 		cl_msg_out( "__init: failed to initialize syscall back (%s) \n",
 									CL_STATUS_MSG( status ) );
+		exit(1);
 	}
-
-	return;
 }
 
 void
diff -X ./excl -rdup ./gen2.oorig/trunk/src/userspace/management/osm/opensm/main.c ./gen2/trunk/src/userspace/management/osm/opensm/main.c
--- ./gen2.oorig/trunk/src/userspace/management/osm/opensm/main.c	2005-02-08 16:27:11.000000000 +0100
+++ ./gen2/trunk/src/userspace/management/osm/opensm/main.c	2005-02-11 15:45:46.000000000 +0100
@@ -74,6 +74,7 @@ osm_opensm_t osm;
 volatile int osm_exit_flag = 0;
 
 #define GUID_ARRAY_SIZE 64
+#define INVALID_GUID (0xFFFFFFFFFFFFFFFFULL) /* 1 ? */
 
 /**********************************************************************
  **********************************************************************/
@@ -101,13 +102,13 @@ show_usage(void)
           "          -d5  - increase vendor debug level.\n"
           "          -d10.. Put OpenSM in testability mode.\n"
           "          Without -d, no debug options are enabled.\n\n" );
-  printf( "-g <GUID in hex>\n"
-          "--guid <GUID in hex>\n"
+  printf( "-g<[=]GUID in hex>\n"
+          "--guid=<GUID in hex>\n"
           "          This option specifies the local port GUID value\n"
           "          with which OpenSM should bind.  OpenSM may be\n"
           "          bound to 1 port at a time.\n"
-          "          Without -g, OpenSM displays a menu of possible\n"
-          "          port GUIDs and waits for user input.\n\n" );
+          "          Without a guid, OpenSM displays a list of possible\n"
+          "          port GUIDs and exits.\n\n" );
   printf( "-h\n"
           "--help\n"
           "          Display this usage info then exit.\n\n" );
@@ -229,6 +230,36 @@ show_menu(void)
 
 /**********************************************************************
  **********************************************************************/
+static void print_all_guids(IN osm_opensm_t *p_osm);
+static void
+print_all_guids(
+  IN osm_opensm_t *p_osm )
+{
+	ib_api_status_t status;
+	uint32_t num_ports = GUID_ARRAY_SIZE;
+	ib_port_attr_t attr_array[GUID_ARRAY_SIZE];
+	int i;
+
+	/*
+		Call the transport layer for a list of local port
+		GUID values.
+	*/
+	status = osm_vendor_get_all_port_attr(p_osm->p_vendor, attr_array, &num_ports );
+	if ( status != IB_SUCCESS )
+	{
+		printf( "\nError from osm_vendor_get_all_port_attr (%x)\n", status);
+		return;
+	}
+
+	printf("\nListing GUIDs:\n");
+	for (i = 1; i < num_ports; i++) { /* excluding logical mgmnt port */
+		printf("Port %i: 0x%"PRIx64"\n", i, cl_hton64(attr_array[i].port_guid));
+	}
+}
+
+
+/**********************************************************************
+ **********************************************************************/
 ib_net64_t
 get_port_guid(
   IN osm_opensm_t *p_osm, uint64_t port_guid )
@@ -242,6 +273,7 @@ get_port_guid(
 		Call the transport layer for a list of local port
 		GUID values.
 	*/
+// "local ports" is(?) phys, shouldn't this exclude port 0 then ?
 	status = osm_vendor_get_all_port_attr(p_osm->p_vendor, attr_array, &num_ports );
 	if ( status != IB_SUCCESS )
 	{
@@ -353,11 +385,11 @@ main(
   boolean_t          run_once_flag = FALSE;
   boolean_t          mem_track = FALSE;
   int32_t		vendor_debug = 0;
-  uint32_t             next_option;
+  int32_t             next_option;
   unsigned int         exitTimeout;
   char *ignore_guids_file_name = NULL;
   uint32_t             val;
-  const char * const short_option = "i:f:d:g:l:s:t:vVhor";
+  const char * const short_option = "i:f:d:g::l:s:t:vVhor";
   /*
     In the array below, the 2nd parameter specified the number
     of arguments as follows:
@@ -368,7 +400,7 @@ main(
   const struct option long_option[] =
     {
       {  "debug", 1, NULL, 'd'},
-      {  "guid",     1, NULL, 'g'},
+      {  "guid",  2, NULL, 'g'},
       {  "ignore_guids", 1,   NULL, 'i'},
       {  "lmc",      1, NULL, 'l'},
       {  "sweep", 1, NULL, 's'},
@@ -437,8 +469,11 @@ main(
       /*
         Specifies port guid with which to bind.
       */
-      guid = cl_hton64( strtoull( optarg, NULL, 16 ));
-      printf(" Guid <0x%"PRIx64">\n", cl_hton64( guid ));
+      if (optarg) {
+	guid = cl_hton64( strtoull( optarg, NULL, 16 ));
+	printf(" Guid <0x%"PRIx64">\n", cl_hton64( guid ));
+      } else
+	guid = INVALID_GUID;
       break;
 
     case 's':
@@ -567,6 +602,7 @@ main(
 
     case 'h':
     case '?':
+    case ':':
       show_usage();
       break;
 
@@ -609,7 +645,13 @@ main(
     complib_exit();
     return( status );
   }
-  
+
+  if (cl_hton64(guid) == cl_hton64(INVALID_GUID)) {
+	print_all_guids( &osm );
+	complib_exit();
+	return( status );
+  }
+
   /*
     If the user didn't specify a GUID on the command line,
     then get a port GUID value with which to bind.


More information about the general mailing list