[ofw] [PATCH] winverbs: fix crash accessing freed memory from async thread

Sean Hefty sean.hefty at intel.com
Thu Sep 17 10:26:30 PDT 2009


If an application exits while asynchronous accept processing is queued,
it's possible for the async processing to access the IbCmId after it has
been freed.  A similar problem to this was fixed that dealt with accessing
the verbs QP handle.

A simpler, more generic solution to this problem is to handle application
exit in the same manner as device removal, and lock the winverb provider
lookup lists with exclusive access.  Asynchronous operations that are in
process will run to completion, and future operations will be blocked until
the provider cleanup thread has completed.  Once they run, they will fail
to acquire a reference on the desired object, which should result in a
graceful failure.

This avoids more complicated locking to use handles belonging to the lower
level code.  If a reference on an object can be acquired, the handle will
be available for use until the reference is released.  To handle IB CM
callbacks, additional state checking is required to avoid processing
CM events when we're trying to destroy the endpoint.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
This should be added to WinOF 2.1


Index: core/winverbs/kernel/wv_ep.c
===================================================================
--- core/winverbs/kernel/wv_ep.c	(revision 2406)
+++ core/winverbs/kernel/wv_ep.c	(working copy)
@@ -57,7 +57,7 @@
 	KeAcquireGuardedMutex(&pProvider->Lock);
 	WvProviderDisableRemove(pProvider);
 	ep = IndexListAt(&pProvider->EpIndex, (SIZE_T) Id);
-	if (ep != NULL) {
+	if (ep != NULL && ep->State != WvEpDestroying) {
 		WvEpGet(ep);
 	} else {
 		ep = NULL;
@@ -185,14 +185,14 @@
 	pEndpoint->State = WvEpDestroying;
 	WdfObjectReleaseLock(pEndpoint->Queue);
 
-	if (pEndpoint->pIbCmId != NULL) {
-		IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId);
-	}
-
 	if (InterlockedDecrement(&pEndpoint->Ref) > 0) {
 		KeWaitForSingleObject(&pEndpoint->Event, Executive, KernelMode,
FALSE, NULL);
 	}
 
+	if (pEndpoint->pIbCmId != NULL) {
+		IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId);
+	}
+
 	WdfIoQueuePurgeSynchronously(pEndpoint->Queue);
 	WdfObjectDelete(pEndpoint->Queue);
 	ExFreePoolWithTag(pEndpoint, 'pevw');
@@ -373,10 +373,6 @@
 	pEndpoint->Attributes.Param.Connect.DataLength = len;
 }
 
-/*
- * The QP transition to error may be done from an async worker thread.
- * Synchronize against application exit.
- */
 static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,
 								UINT8
*pVerbsData, UINT32 VerbsSize)
 {
@@ -384,12 +380,6 @@
 	ib_api_status_t		ib_status;
 	NTSTATUS			status;
 
-	KeAcquireGuardedMutex(&pQp->Lock);
-	if (pQp->hVerbsQp == NULL) {
-		status = STATUS_NOT_FOUND;
-		goto out;
-	}
-
 	attr.req_state = IB_QPS_ERROR;
 	ib_status = pQp->pVerbs->ndi_modify_qp(pQp->hVerbsQp, &attr, NULL,
 
VerbsSize, pVerbsData);
@@ -399,8 +389,6 @@
 		status = STATUS_UNSUCCESSFUL;
 	}
 
-out:
-	KeReleaseGuardedMutex(&pQp->Lock);
 	return status;
 }
 
@@ -505,10 +493,19 @@
 	ep = pId->context;
 	switch (pEvent->type) {
 	case iba_cm_req_error:
+		WdfObjectAcquireLock(ep->Queue);
+		if (ep->State == WvEpActiveConnect) {
+			ep->State = WvEpDisconnected;
+			WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);
+		}
+		WdfObjectReleaseLock(ep->Queue);
+		break;
 	case iba_cm_rep_error:
 		WdfObjectAcquireLock(ep->Queue);
-		ep->State = WvEpDisconnected;
-		WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);
+		if (ep->State == WvEpPassiveConnect) {
+			ep->State = WvEpDisconnected;
+			WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);
+		}
 		WdfObjectReleaseLock(ep->Queue);
 		break;
 	case iba_cm_dreq_error:
@@ -557,8 +554,10 @@
 		break;
 	default:
 		WdfObjectAcquireLock(ep->Queue);
-		ep->State = WvEpDisconnected;
-		WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED);
+		if (ep->State != WvEpDestroying) {
+			ep->State = WvEpDisconnected;
+			WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED);
+		}
 		WdfObjectReleaseLock(ep->Queue);
 		break;
 	}
Index: core/winverbs/kernel/wv_provider.c
===================================================================
--- core/winverbs/kernel/wv_provider.c	(revision 2388)
+++ core/winverbs/kernel/wv_provider.c	(working copy)
@@ -53,6 +53,38 @@
 	}
 }
 
+// See comment above WvProviderRemoveHandler.
+static void WvProviderLockRemove(WV_PROVIDER *pProvider)
+{
+	KeAcquireGuardedMutex(&pProvider->Lock);
+	pProvider->Exclusive++;
+	KeClearEvent(&pProvider->SharedEvent);
+	while (pProvider->Active > 0) {
+		KeReleaseGuardedMutex(&pProvider->Lock);
+DbgPrint("WvProviderLockRemove - someone is active - waiting\n");
+		KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive,
KernelMode,
+							  FALSE, NULL);
+DbgPrint("WvProviderLockRemove - signaled - continuing\n");
+		KeAcquireGuardedMutex(&pProvider->Lock);
+	}
+	pProvider->Active++;
+	KeReleaseGuardedMutex(&pProvider->Lock);
+}
+
+// See comment above WvProviderRemoveHandler.
+static void WvProviderUnlockRemove(WV_PROVIDER *pProvider)
+{
+	KeAcquireGuardedMutex(&pProvider->Lock);
+	pProvider->Exclusive--;
+	pProvider->Active--;
+	if (pProvider->Exclusive > 0) {
+		KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE);
+	} else if (pProvider->Pending > 0) {
+		KeSetEvent(&pProvider->SharedEvent, 0, FALSE);
+	}
+	KeReleaseGuardedMutex(&pProvider->Lock);
+}
+
 NTSTATUS WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider)
 {
 	NTSTATUS status;
@@ -83,6 +115,15 @@
 	return STATUS_SUCCESS;
 }
 
+/*
+ * We could be processing asynchronous requests or have them queued
+ * when cleaning up.  To synchronize with async request processing,
+ * acquire the provider lock with exclusive access until we're done
+ * destroying all resoureces.  This will allow active requests to
+ * complete their processing, but prevent queued requests from
+ * running until cleanup is done.  At that point, queue requests will
+ * be unable to acquire any winverbs resources.
+ */
 void WvProviderCleanup(WV_PROVIDER *pProvider)
 {
 	WV_DEVICE				*dev;
@@ -95,6 +136,7 @@
 	WV_ENDPOINT				*ep;
 	SIZE_T					i;
 
+	WvProviderLockRemove(pProvider);
 	IndexListForEach(&pProvider->EpIndex, i) {
 		ep = (WV_ENDPOINT *) IndexListAt(&pProvider->EpIndex, i);
 		if (ep->State == WvEpListening) {
@@ -131,6 +173,7 @@
 	while ((dev = IndexListRemoveHead(&pProvider->DevIndex)) != NULL) {
 		WvDeviceFree(dev);
 	}
+	WvProviderUnlockRemove(pProvider);
 
 	if (InterlockedDecrement(&pProvider->Ref) > 0) {
 		KeWaitForSingleObject(&pProvider->Event, Executive, KernelMode,
FALSE, NULL);
@@ -147,36 +190,6 @@
 	WorkQueueDestroy(&pProvider->WorkQueue);
 }
 
-// See comment above WvProviderRemoveHandler.
-static void WvProviderLockRemove(WV_PROVIDER *pProvider)
-{
-	KeAcquireGuardedMutex(&pProvider->Lock);
-	pProvider->Exclusive++;
-	KeClearEvent(&pProvider->SharedEvent);
-	while (pProvider->Active > 0) {
-		KeReleaseGuardedMutex(&pProvider->Lock);
-		KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive,
KernelMode,
-							  FALSE, NULL);
-		KeAcquireGuardedMutex(&pProvider->Lock);
-	}
-	pProvider->Active++;
-	KeReleaseGuardedMutex(&pProvider->Lock);
-}
-
-// See comment above WvProviderRemoveHandler.
-static void WvProviderUnlockRemove(WV_PROVIDER *pProvider)
-{
-	KeAcquireGuardedMutex(&pProvider->Lock);
-	pProvider->Exclusive--;
-	pProvider->Active--;
-	if (pProvider->Exclusive > 0) {
-		KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE);
-	} else if (pProvider->Pending > 0) {
-		KeSetEvent(&pProvider->SharedEvent, 0, FALSE);
-	}
-	KeReleaseGuardedMutex(&pProvider->Lock);
-}
-
 /*
  * Must hold pProvider->Lock.  Function may release and re-acquire.
  * See comment above WvProviderRemoveHandler.
Index: core/winverbs/kernel/wv_qp.c
===================================================================
--- core/winverbs/kernel/wv_qp.c	(revision 2406)
+++ core/winverbs/kernel/wv_qp.c	(working copy)
@@ -519,21 +519,11 @@
 	WdfRequestComplete(Request, status);
 }
 
-/*
- * Synchronize freeing the QP due to application exit with asynchronous
- * disconnect processing transitioning the QP into the error state.
- */
 void WvQpFree(WV_QUEUE_PAIR *pQp)
 {
 	WV_MULTICAST		*mc;
 	LIST_ENTRY			*entry;
-	ib_qp_handle_t		hqp;
 
-	KeAcquireGuardedMutex(&pQp->Lock);
-	hqp = pQp->hVerbsQp;
-	pQp->hVerbsQp = NULL;
-	KeReleaseGuardedMutex(&pQp->Lock);
-
 	while ((entry = RemoveHeadList(&pQp->McList)) != &pQp->McList) {
 		mc = CONTAINING_RECORD(entry, WV_MULTICAST, Entry);
 		if (mc->hVerbsMc != NULL) {
@@ -546,8 +536,8 @@
 		KeWaitForSingleObject(&pQp->Event, Executive, KernelMode, FALSE,
NULL);
 	}
 
-	if (hqp != NULL) {
-		pQp->pVerbs->destroy_qp(hqp, 0);
+	if (pQp->hVerbsQp != NULL) {
+		pQp->pVerbs->destroy_qp(pQp->hVerbsQp, 0);
 	}
 
 	if (pQp->pSrq != NULL) {

-------------- next part --------------
A non-text attachment was scrubbed...
Name: wv-async-free-race.diff
Type: application/octet-stream
Size: 7528 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090917/7fb81a0e/attachment.obj>


More information about the ofw mailing list