[ofw] Add Assert to debug BS while deleting WDF device

Uri Habusha urih at mellanox.co.il
Mon Dec 6 05:08:59 PST 2010


Okay. I get it. I'm not sure it's required but I can live with that.

However I want to keep the ASSERT so in running time we can check the assumptions. I made some changes in the code please review.

Index: winmad/kernel/wm_driver.c
===================================================================
--- winmad/kernel/wm_driver.c	(revision 3010)
+++ winmad/kernel/wm_driver.c	(working copy)
@@ -41,7 +41,7 @@
 WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(WM_IB_DEVICE, WmIbDeviceGetContext)
 WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(WM_PROVIDER, WmProviderGetContext)
 
-WDFDEVICE				ControlDevice;
+WDFDEVICE				ControlDevice = NULL;
 static LIST_ENTRY		DevList;
 static LIST_ENTRY		ProvList;
 static KGUARDED_MUTEX	Lock;
@@ -195,11 +195,13 @@
 	WdfDeviceInitSetFileObjectConfig(pinit, &fileconfig, &attr);
 
 	WDF_OBJECT_ATTRIBUTES_INIT(&attr);
+	ASSERT(ControlDevice == NULL);
 	status = WdfDeviceCreate(&pinit, &attr, &ControlDevice);
 	if (!NT_SUCCESS(status)) {
 		goto err1;
 	}
 
+	ASSERT(ControlDevice != NULL);
 	status = WdfDeviceCreateSymbolicLink(ControlDevice, &symlink);
 	if (!NT_SUCCESS(status)) {
 		goto err2;
@@ -220,6 +222,7 @@
 
 err2:
 	WdfObjectDelete(ControlDevice);
+	ControlDevice = NULL;
 	return;
 err1:
 	WdfDeviceInitFree(pinit);
@@ -299,7 +302,8 @@
 
 	status = WdfFdoQueryForInterface(Device, &GUID_RDMA_INTERFACE_VERBS,
 									 (PINTERFACE) &dev->VerbsInterface,
-									 sizeof(dev->VerbsInterface), VerbsVersion(2, 0),
+ 									 sizeof(dev->VerbsInterface), 
+ 									 VerbsVersion(VERBS_MAJOR_VER, VERBS_MINOR_VER), 
 									 NULL);
 	if (!NT_SUCCESS(status)) {
 		return status;
@@ -340,15 +344,18 @@
 	WM_REGISTRATION		*reg;
 	LIST_ENTRY			*entry;
 	BOOLEAN				destroy;
-	WDFDEVICE			ctrldev;
+	WDFDEVICE			ctrldev = NULL;
 
 	pdev = WmIbDeviceGetContext(Device);
 
 	KeAcquireGuardedMutex(&Lock);
 	RemoveEntryList(&pdev->Entry);
 	destroy = IsListEmpty(&DevList);
-	ctrldev = ControlDevice;
-
+	if (destroy) {
+		ctrldev = ControlDevice;
+		ControlDevice = NULL;
+	}
+	
 	for (entry = ProvList.Flink; entry != &ProvList; entry = entry->Flink) {
 		prov = CONTAINING_RECORD(entry, WM_PROVIDER, Entry);
 		WmProviderRemoveHandler(prov, pdev);
@@ -364,7 +371,7 @@
 		ExFreePoolWithTag(pdev->pPortArray, 'pimw');
 	}
 
-	if (destroy) {
+	if (destroy && (ctrldev != NULL)) {
 		WdfObjectDelete(ctrldev);
 	}
 
Index: winverbs/kernel/wv_driver.c
===================================================================
--- winverbs/kernel/wv_driver.c	(revision 3010)
+++ winverbs/kernel/wv_driver.c	(working copy)
@@ -48,7 +48,7 @@
 
 WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(WV_RDMA_DEVICE, WvRdmaDeviceGetContext)
 
-WDFDEVICE				ControlDevice;
+WDFDEVICE				ControlDevice = NULL;
 static LIST_ENTRY		DevList;
 static LIST_ENTRY		ProvList;
 static KGUARDED_MUTEX	Lock;
@@ -435,12 +435,16 @@
 	WDF_OBJECT_ATTRIBUTES_INIT_CONTEXT_TYPE(&attr, WV_PROVIDER);
 	WdfDeviceInitSetFileObjectConfig(pinit, &fileconfig, &attr);
 
+	ASSERT(ControlDevice == NULL);
+
 	WDF_OBJECT_ATTRIBUTES_INIT(&attr);
 	status = WdfDeviceCreate(&pinit, &attr, &ControlDevice);
 	if (!NT_SUCCESS(status)) {
 		goto err1;
 	}
 
+	ASSERT(ControlDevice != NULL);
+
 	status = WdfDeviceCreateSymbolicLink(ControlDevice, &symlink);
 	if (!NT_SUCCESS(status)) {
 		goto err2;
@@ -459,6 +463,7 @@
 
 err2:
 	WdfObjectDelete(ControlDevice);
+	ControlDevice = NULL;
 	return;
 err1:
 	WdfDeviceInitFree(pinit);
@@ -477,7 +482,8 @@
 
 	status = WdfFdoQueryForInterface(Device, &GUID_RDMA_INTERFACE_VERBS,
 									 (PINTERFACE) &dev->Interface,
-									 sizeof(dev->Interface), VerbsVersion(2, 0),
+									 sizeof(dev->Interface), 
+									 VerbsVersion(VERBS_MAJOR_VER, VERBS_MINOR_VER), 
 									 NULL);
 	if (!NT_SUCCESS(status)) {
 		return status;
@@ -536,7 +542,7 @@
 	WV_PROVIDER				*prov;
 	LIST_ENTRY				*entry;
 	BOOLEAN					destroy;
-	WDFDEVICE				ctrldev;
+	WDFDEVICE				ctrldev = NULL;
 
 	pdev = WvRdmaDeviceGetContext(Device);
 	if (pdev->hDevice == NULL) {
@@ -546,8 +552,11 @@
 	KeAcquireGuardedMutex(&Lock);
 	RemoveEntryList(&pdev->Entry);
 	destroy = IsListEmpty(&DevList);
-	ctrldev = ControlDevice;
-
+	if (destroy) {
+		ctrldev = ControlDevice;
+		ControlDevice = NULL;
+	}
+	
 	for (entry = ProvList.Flink; entry != &ProvList; entry = entry->Flink) {
 		prov = CONTAINING_RECORD(entry, WV_PROVIDER, Entry);
 		WvProviderRemoveHandler(prov, pdev);
@@ -567,7 +576,10 @@
 			IbCmInterface.InterfaceHeader.InterfaceDereference(IbCmInterface.
 															   InterfaceHeader.Context);
 		}
-		WdfObjectDelete(ctrldev);
+
+		if (ctrldev!= NULL) {
+			WdfObjectDelete(ctrldev);
+		}
 	}
 }


-----Original Message-----
From: Hefty, Sean [mailto:sean.hefty at intel.com] 
Sent: Monday, December 06, 2010 4:05 AM
To: Uri Habusha; ofw at lists.openfabrics.org; Smith, Stan; Fab Tillier
Subject: RE: Add Assert to debug BS while deleting WDF device

> [Uri Habusha] So if destroy is only set once under lock, why you need to
> save ControlDevice variable. You can use it directly.

Because we may be trying to create a new control device from a simultaneous call to power entry.

Once we release the lock in power exit, the device list is now empty.  If power entry is called, it will add a new device to the list, which will create the control device.  Power exit must destroy the correct control device, not the new one.  That's why we save ControlDevice to a local variable.

Now, if power management IRPs are completely serialized across different devices, then this can be simplified, but I don't believe this to be the case.  This also means that we could see something like:

enter power entry for device A
-- we create control device 1
return from power entry for device A
enter power exit for device A
-- we need to destroy control device 1

enter power entry for device B
-- we need to create control device 2
return from power entry for device B
enter power exit for device B
-- we need to destroy control device 2

-- now we have 2 threads in power exit, both of which are destroying control device --

-- control device 1 or 2 must be destroyed by here (order is unknown)
return from power exit for device A or B (order is unknown)
-- control device 1 or 2 must be destroyed by here (order is unknown)
return from power exit for device A or B (order is unknown)

Let me see if additional locking can be provided to completely serialize the device add/remove power entry/exit handling.

In any case, this is the reason why the code is structured the way that it is.  However, this does not appear to be the reason why WdfDeviceCreateSymbolicLink is failing.  To handle the blue screen, the patch I sent to set ControlDevice=NULL on failure with a check in power exit should be sufficient.  Any change to locking would simply be to improve the readability of the code.

- Sean
-------------- next part --------------
A non-text attachment was scrubbed...
Name: winmad.patch
Type: application/octet-stream
Size: 4564 bytes
Desc: winmad.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101206/39e62650/attachment.obj>


More information about the ofw mailing list