[ofw] RE: Bugzilla 1233: machine crashes when adding a newpartition.

Leonid Keller leonid at mellanox.co.il
Thu Jun 11 09:21:38 PDT 2009


Committed in 2225.


________________________________

	From: ofw-bounces at lists.openfabrics.org
[mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Leonid Keller
	Sent: Monday, June 01, 2009 8:55 PM
	To: James Yang
	Cc: ofw at lists.openfabrics.org
	Subject: [ofw] RE: Bugzilla 1233: machine crashes when adding a
newpartition.
	
	
	The patch set a default device information into all new devices,
created by partition manager.
	The patch also renames _Create_Static_Devices to
__create_static_devices for style consistency.
	 
	Index: bus/kernel/bus_driver.c
	
===================================================================
	--- bus/kernel/bus_driver.c (revision 2223)
	+++ bus/kernel/bus_driver.c (working copy)
	@@ -124,7 +124,39 @@
	  IN    UNICODE_STRING    *p_registry_path );
	 
	 
	+child_device_info_t g_default_device_info;
	 
	+static void __create_default_dev_info(child_device_info_t
*pNewDevList)
	+{
	+ UNICODE_STRING    keyValue;
	+
	+ /* DeviceId*/
	+ RtlInitUnicodeString(&keyValue, L"IBA\\IPoIBP\0");  
	+ pNewDevList->device_id_size = keyValue.Length + sizeof(WCHAR);
	+ RtlStringCchCopyW( pNewDevList->device_id, 
	+  sizeof(pNewDevList->device_id)/sizeof(wchar_t),
keyValue.Buffer );
	+ /* HardwareId*/
	+ RtlInitUnicodeString(&keyValue, L"IBA\\IPoIBP\0\0"); 
	+ pNewDevList->hardware_id_size = keyValue.Length +
2*sizeof(WCHAR);
	+ RtlStringCchCopyW( pNewDevList->hardware_id, 
	+  sizeof(pNewDevList->hardware_id)/sizeof(wchar_t),
keyValue.Buffer );
	+ /* CompatibleId*/
	+ RtlInitUnicodeString(&keyValue,
L"IBA\\SID_1000066a00020000\0\0"); 
	+ pNewDevList->compatible_id_size = keyValue.Length +
2*sizeof(WCHAR); //2 
	+ RtlStringCchCopyW( pNewDevList->compatible_id, 
	+  sizeof(pNewDevList->compatible_id)/sizeof(wchar_t),
keyValue.Buffer );
	+ /* Device Description */
	+ RtlInitUnicodeString(&keyValue, L"OpenIB IPoIB Adapter");
	+ pNewDevList->description_size = keyValue.Length +
sizeof(WCHAR);
	+ RtlStringCchCopyW( pNewDevList->description, 
	+  sizeof(pNewDevList->description)/sizeof(wchar_t),
keyValue.Buffer );
	+ /* Pkey */
	+ RtlInitUnicodeString(&keyValue, L"FFFF");  /* Pkey */
	+ RtlStringCchCopyW( pNewDevList->pkey, 
	+  sizeof(pNewDevList->pkey)/sizeof(wchar_t), keyValue.Buffer );
	+}
	+
	+
	 static void
	 __read_machine_name( void )
	 {
	@@ -250,8 +282,7 @@
	 
	 }
	 
	-
	-NTSTATUS _Create_Static_Devices(PUNICODE_STRING p_param_path)
	+static NTSTATUS __create_static_devices(PUNICODE_STRING
p_param_path)
	 {
	  RTL_QUERY_REGISTRY_TABLE table[2];
	  UNICODE_STRING    keyPath;
	@@ -273,7 +304,7 @@
	  {
	   BUS_TRACE(BUS_DBG_ERROR ,("Not enough memory for
key_path_buffer.\n") );
	   status = STATUS_UNSUCCESSFUL;
	-  goto _Create_Static_Devices_exit;
	+  goto __create_static_devices_exit;
	  }
	 
	  key_value_buffer = key_path_buffer + BUF_SIZE;
	@@ -325,7 +356,7 @@
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Not enough memory for
key_path_buffer.\n") );
	     status = STATUS_UNSUCCESSFUL;
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	    pNewDevList->next_device_info = NULL;
	 
	@@ -358,7 +389,7 @@
	    if(!NT_SUCCESS(status))
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Failed to read DeviceId.\n") );
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	    pNewDevList->io_device_info.device_id_size = keyValue.Length
+ sizeof(WCHAR);
	 
	@@ -381,7 +412,7 @@
	    if(!NT_SUCCESS(status))
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Failed to read HardwareId.\n")
);
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	    pNewDevList->io_device_info.hardware_id_size =
keyValue.Length + 2*sizeof(WCHAR);
	 
	@@ -404,7 +435,7 @@
	    if(!NT_SUCCESS(status))
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Failed to read CompatibleId.\n")
);
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	    pNewDevList->io_device_info.compatible_id_size =
keyValue.Length + 2*sizeof(WCHAR); //2 null
	 
	@@ -427,7 +458,7 @@
	    if(!NT_SUCCESS(status))
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Failed to read Description.\n")
);
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	 
	    pNewDevList->io_device_info.description_size =
keyValue.Length + sizeof(WCHAR);
	@@ -441,7 +472,7 @@
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Id or description size is too
big.\n") );
	     status = STATUS_UNSUCCESSFUL;
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	 
	    /* Get Pkey */
	@@ -463,7 +494,7 @@
	    if(!NT_SUCCESS(status))
	    {
	     BUS_TRACE(BUS_DBG_ERROR ,("Failed to read PartitionKey.\n")
);
	-    goto _Create_Static_Devices_exit;
	+    goto __create_static_devices_exit;
	    }
	 
	    while(*curChild) curChild++;
	@@ -471,7 +502,7 @@
	   }
	  }
	 
	-_Create_Static_Devices_exit:
	+__create_static_devices_exit:
	  if(key_path_buffer)
	  {
	   cl_free(key_path_buffer);
	@@ -729,7 +760,7 @@
	     BUS_TRACE(BUS_DBG_ERROR ,
	        ("Failed to build pkey configuration\n"));
	 
	-   if(!NT_SUCCESS(_Create_Static_Devices(&param_path))){
	+   if(!NT_SUCCESS(__create_static_devices(&param_path))){
	     BUS_TRACE(BUS_DBG_ERROR ,
	        ("Failed to create devices\n"));
	    }
	@@ -1031,6 +1062,9 @@
	   return status;
	  }
	 
	+ /* create default device descrition for Partition Manager */
	+ __create_default_dev_info( &g_default_device_info );
	+ 
	  /* Setup the entry points. */
	  p_driver_obj->MajorFunction[IRP_MJ_CREATE] = bus_drv_open;
	  p_driver_obj->MajorFunction[IRP_MJ_CLEANUP] = bus_drv_cleanup;
	Index: bus/kernel/bus_port_mgr.c
	
===================================================================
	--- bus/kernel/bus_port_mgr.c (revision 2223)
	+++ bus/kernel/bus_port_mgr.c (working copy)
	@@ -1014,6 +1014,7 @@
	  return ( success_cnt ? CL_SUCCESS : CL_ERROR );
	 }
	 
	+extern child_device_info_t g_default_device_info;
	 
	
/***********************************************************************
*************
	 * name : port_mgr_pkey_add
	@@ -1103,6 +1104,7 @@
	   pkey_port_ext->pdo.b_reported_missing = FALSE;
	   pkey_port_ext->pdo.b_hibernating = FALSE;
	   pkey_port_ext->pdo.p_po_work_item = NULL;
	+  pkey_port_ext->pdo.p_pdo_device_info =
&g_default_device_info;
	   BUS_TRACE( BUS_DBG_PNP, ("Created device for %s: PDO %p,ext
%p, present %d, missing %d .\n",
	    pkey_port_ext->pdo.cl_ext.vfptr_pnp_po->identity,
p_pdo[cnt], pkey_port_ext, pkey_port_ext->pdo.b_present, 
	    pkey_port_ext->pdo.b_reported_missing ) );
	


________________________________

		From: James Yang [mailto:jyang at xsigo.com] 
		Sent: Sunday, May 24, 2009 10:10 PM
		To: Leonid Keller
		Cc: ofw at lists.openfabrics.org
		Subject: RE: Bugzilla 1233: machine crashes when adding
a new partition.
		
		

		Hi Leonid,

		 

		In this case, since the PDO is not created based on the
information saved in registry, I think one quick fix is to initialize
pdo.p_pdo_device_info  to a fixed information used by partition manager
during _port_mgr_pkey_add() PDO creation. I never used part_man, but my
understanding is that it created new device through IOCTL call.

		 

		Thanks,

		James

		 

		 

		
________________________________


		From: Leonid Keller [mailto:leonid at mellanox.co.il] 
		Sent: Sunday, May 24, 2009 8:09 AM
		To: James Yang
		Cc: ofw at lists.openfabrics.org
		Subject: Bugzilla 1233: machine crashes when adding a
new partition.

		 

		We've come accross a fact, that driver crashes after
'part_man add guid part-ID' execution.

		I looked into it and found out that the problem was
introduced 16/02 by 1965 openib patch - "added support for creating
vendor defined devices.".

		The problem arises from the fact, that
port_query_device_id (as well as port_query_hardware_ids,
port_query_compatible_ids et al) takes now the data for the reply from
its PDO and not from a built-in string. PDO, in turn, gets the data at
the start up from the list of statically (in .inf) configured vendor
devices.

		The data are saved in pdo.p_pdo_device_info, which is
NULL for the devices added by part_man.

		This NULL causes the crash.

		 

		call stack is:
		1: kd> k
		Child-SP          RetAddr           Call Site
		fffffadf`a6292a38 fffff800`010d673e
nt!DbgBreakPointWithStatus
		fffffadf`a6292a40 fffff800`010d7d0e
nt!KiBugCheckDebugBreak+0x1e
		fffffadf`a6292aa0 fffff800`0102ea54 nt!KeBugCheck2+0x640
		fffffadf`a62930e0 fffff800`013731b1
nt!KeBugCheckEx+0x104
		fffffadf`a6293120 fffff800`010556ab
nt!PspSystemThreadStartup+0x270
		fffffadf`a6293170 fffff800`010549fd
nt!_C_specific_handler+0x9b
		fffffadf`a6293200 fffff800`01054f93
nt!RtlpExecuteHandlerForException+0xd
		fffffadf`a6293230 fffff800`0100b901
nt!RtlDispatchException+0x2c0
		fffffadf`a62938f0 fffff800`0102e76f
nt!KiDispatchException+0xd9
		fffffadf`a6293ef0 fffff800`0102d5e1 nt!KiExceptionExit
		fffffadf`a6294070 fffffadf`a53350dc nt!KiPageFault+0x1e1
		fffffadf`a6294200 fffffadf`a534f041
ibbus!port_query_device_id+0x2ec
		[s:\builds\4329\trunk\core\bus\kernel\bus_port_mgr.c @
1598]
		fffffadf`a6294250 fffffadf`a53496bd
ibbus!__query_id+0x3d1
		[s:\builds\4329\trunk\core\complib\kernel\cl_pnp_po.c @
1053]
		fffffadf`a62942b0 fffff800`0133ed04 ibbus!cl_pnp+0x284d
		[s:\builds\4329\trunk\core\complib\kernel\cl_pnp_po.c @
312]
		fffffadf`a62943c0 fffff800`0133e621
nt!IopSynchronousCall+0x144
		fffffadf`a6294430 fffff800`01331c87
nt!PpQueryDeviceID+0x91
		fffffadf`a62944f0 fffff800`0133591b
nt!PiProcessNewDeviceNode+0x67
		fffffadf`a6294890 fffff800`013376e5
nt!PipProcessDevNodeTree+0x223
		fffffadf`a6294c20 fffff800`010cc9d8
nt!PiProcessReenumeration+0x85
		fffffadf`a6294c70 fffff800`0103768a
nt!PipDeviceActionWorker+0x368

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090611/5ddd1b91/attachment.html>


More information about the ofw mailing list