[ofw] [RFC] [PATCH 2/2 v2] winverbs: transition QP to error on disconnect
Sean Hefty
sean.hefty at intel.com
Thu May 21 14:42:15 PDT 2009
The QP transition into the error state must occur after a DREQ has been
received and acknowledged by the user (by a subsequent call to Disconnect),
or after a DREP has been received. The current winverbs API requires the
user to call QP:Modify after their NotifyDisconnect completes. This
presents challenges to implementing an ND provider, which expects a single
function call to perform both operations.
Unlike during connection establishment, the QP transition to error must
sometimes be delayed until after a CM callback. And since CM callbacks
are at dispatch, we must queue the modify call to a system thread. To
complicate matters, either the QP or EP or both may be destroyed before
this occurs, or a device removal may have occurred. To handle this, the
work handler must acquire and verify that the EP and QP are still valid.
Regardless of the outcome of the disconnect attempt or other failures,
the driver tries to transition the QP to error. This results in some
minor checks to ensure that the correct status is reported to the user.
A couple of additional changes were made to the Accept path to keep the
code consistent, since both Accept and Disconnect have active/passive
code paths.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\core\winverbs/kernel/wv_driver.c
branches\winverbs\core\winverbs/kernel/wv_driver.c
--- trunk\core\winverbs/kernel/wv_driver.c 2009-04-28 14:53:06.404875000 -0700
+++ branches\winverbs\core\winverbs/kernel/wv_driver.c 2009-05-21 13:16:20.913604400 -0700
@@ -47,7 +47,6 @@
#include "wv_ep.h"
WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(WV_RDMA_DEVICE, WvRdmaDeviceGetContext)
-WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(WV_PROVIDER, WvProviderGetContext)
WDFDEVICE ControlDevice;
static LIST_ENTRY DevList;
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\core\winverbs/kernel/wv_ep.c
branches\winverbs\core\winverbs/kernel/wv_ep.c
--- trunk\core\winverbs/kernel/wv_ep.c 2009-04-01 15:51:01.289540300 -0700
+++ branches\winverbs\core\winverbs/kernel/wv_ep.c 2009-05-21 13:18:17.651802400 -0700
@@ -369,6 +369,121 @@ static void WvEpSaveReject(WV_ENDPOINT *
pEndpoint->Attributes.Param.Connect.DataLength = len;
}
+static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,
+ UINT8 *pVerbsData, UINT32 VerbsSize)
+{
+ ib_qp_mod_t attr;
+ ib_api_status_t ib_status;
+ NTSTATUS status;
+
+ attr.req_state = IB_QPS_ERROR;
+ ib_status = pQp->pVerbs->ndi_modify_qp(pQp->hVerbsQp, &attr, NULL,
+ VerbsSize, pVerbsData);
+ if (ib_status != IB_SUCCESS) {
+ return STATUS_UNSUCCESSFUL;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS WvEpDisconnectQp(WV_PROVIDER *pProvider, UINT64 QpId,
+ UINT8 *pVerbsData, UINT32 VerbsSize)
+{
+ WV_QUEUE_PAIR *qp;
+ NTSTATUS status;
+
+ if (QpId == 0) {
+ return STATUS_SUCCESS;
+ }
+
+ qp = WvQpAcquire(pProvider, QpId);
+ if (qp == NULL) {
+ return STATUS_NOT_FOUND;
+ }
+
+ status = WvEpModifyQpErr(qp, pVerbsData, VerbsSize);
+ WvQpRelease(qp);
+
+ return status;
+}
+
+static void WvEpDisconnectHandler(PDEVICE_OBJECT DeviceObject, PVOID Context)
+{
+ WV_PROVIDER *prov;
+ WDFREQUEST request;
+ PIRP irp;
+ WV_IO_EP_DISCONNECT *pattr;
+ UINT8 *out;
+ size_t outlen = 0;
+ NTSTATUS status;
+
+ request = (WDFREQUEST) Context;
+ irp = WdfRequestWdmGetIrp(request);
+ prov = WvProviderGetContext(WdfRequestGetFileObject(request));
+
+ status = WdfRequestRetrieveInputBuffer(request, sizeof(WV_IO_EP_DISCONNECT),
+ &pattr, NULL);
+ if (!NT_SUCCESS(status)) {
+ goto complete;
+ }
+
+ status = WdfRequestRetrieveOutputBuffer(request, 0, &out, &outlen);
+ if (!NT_SUCCESS(status) && status != STATUS_BUFFER_TOO_SMALL) {
+ goto complete;
+ }
+
+ status = (NTSTATUS) (ULONG_PTR) irp->Tail.Overlay.DriverContext[1];
+ if (NT_SUCCESS(status)) {
+ status = WvEpDisconnectQp(prov, pattr->QpId, out, outlen);
+ } else {
+ WvEpDisconnectQp(prov, pattr->QpId, out, outlen);
+ }
+
+complete:
+ IoFreeWorkItem(irp->Tail.Overlay.DriverContext[0]);
+ WdfRequestCompleteWithInformation(request, status, outlen);
+}
+
+static void WvEpCompleteDisconnect(WV_ENDPOINT *pEndpoint, NTSTATUS DiscStatus)
+{
+ WDFREQUEST request;
+ WDF_REQUEST_PARAMETERS param;
+ WDFDEVICE device;
+ PIRP irp;
+ PIO_WORKITEM work;
+ NTSTATUS status;
+
+ WdfObjectAcquireLock(pEndpoint->Queue);
+ pEndpoint->State = WvEpDisconnected;
+
+ status = WdfIoQueueRetrieveNextRequest(pEndpoint->Queue, &request);
+ while (NT_SUCCESS(status)) {
+ WdfObjectReleaseLock(pEndpoint->Queue);
+
+ WDF_REQUEST_PARAMETERS_INIT(¶m);
+ WdfRequestGetParameters(request, ¶m);
+ if (param.Parameters.DeviceIoControl.IoControlCode == WV_IOCTL_EP_DISCONNECT) {
+ device = WdfFileObjectGetDevice(WdfRequestGetFileObject(request));
+ work = IoAllocateWorkItem(WdfDeviceWdmGetDeviceObject(device));
+ if (work == NULL) {
+ WdfRequestComplete(request, !NT_SUCCESS(DiscStatus) ?
+ DiscStatus : STATUS_INSUFFICIENT_RESOURCES);
+ } else {
+ irp = WdfRequestWdmGetIrp(request);
+ irp->Tail.Overlay.DriverContext[0] = work;
+ irp->Tail.Overlay.DriverContext[1] = (PVOID) (ULONG_PTR) DiscStatus;
+ IoQueueWorkItem(work, WvEpDisconnectHandler, DelayedWorkQueue, request);
+ }
+ } else {
+ WdfRequestComplete(request, DiscStatus);
+ }
+
+ WdfObjectAcquireLock(pEndpoint->Queue);
+ status = WdfIoQueueRetrieveNextRequest(pEndpoint->Queue, &request);
+ }
+ WdfObjectReleaseLock(pEndpoint->Queue);
+}
+
static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent)
{
WV_ENDPOINT *ep;
@@ -377,12 +492,14 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_i
switch (pEvent->type) {
case iba_cm_req_error:
case iba_cm_rep_error:
- case iba_cm_dreq_error:
WdfObjectAcquireLock(ep->Queue);
ep->State = WvEpDisconnected;
WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);
WdfObjectReleaseLock(ep->Queue);
break;
+ case iba_cm_dreq_error:
+ WvEpCompleteDisconnect(ep, STATUS_IO_TIMEOUT);
+ break;
case iba_cm_rep_received:
WdfObjectAcquireLock(ep->Queue);
if (ep->State == WvEpActiveConnect) {
@@ -394,8 +511,8 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_i
case iba_cm_rtu_received:
WdfObjectAcquireLock(ep->Queue);
if (ep->State == WvEpPassiveConnect) {
- WvCompleteRequestsWithInformation(ep->Queue, STATUS_SUCCESS);
ep->State = WvEpConnected;
+ WvCompleteRequestsWithInformation(ep->Queue, STATUS_SUCCESS);
}
WdfObjectReleaseLock(ep->Queue);
break;
@@ -403,18 +520,15 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_i
WdfObjectAcquireLock(ep->Queue);
if (ep->State == WvEpConnected) {
ep->State = WvEpPassiveDisconnect;
+ WvCompleteRequests(ep->Queue, STATUS_SUCCESS);
+ WdfObjectReleaseLock(ep->Queue);
} else {
- ep->State = WvEpDisconnected;
- IbCmInterface.CM.send_drep(pId, NULL, 0);
+ WdfObjectReleaseLock(ep->Queue);
+ WvEpCompleteDisconnect(ep, STATUS_SUCCESS);
}
- WvCompleteRequests(ep->Queue, STATUS_SUCCESS);
- WdfObjectReleaseLock(ep->Queue);
break;
case iba_cm_drep_received:
- WdfObjectAcquireLock(ep->Queue);
- ep->State = WvEpDisconnected;
- WvCompleteRequests(ep->Queue, STATUS_SUCCESS);
- WdfObjectReleaseLock(ep->Queue);
+ WvEpCompleteDisconnect(ep, STATUS_SUCCESS);
break;
case iba_cm_rej_received:
WdfObjectAcquireLock(ep->Queue);
@@ -619,7 +733,9 @@ static NTSTATUS WvEpAcceptActive(WDFREQU
pEndpoint->State = WvEpConnected;
status = IbCmInterface.CM.send_rtu(pEndpoint->pIbCmId, pAttr->Param.Data,
pAttr->Param.DataLength);
- if (!NT_SUCCESS(status)) {
+ if (NT_SUCCESS(status)) {
+ WdfRequestCompleteWithInformation(Request, status, VerbsSize);
+ } else {
pEndpoint->State = WvEpDisconnected;
}
@@ -715,16 +831,13 @@ void WvEpAccept(WV_PROVIDER *pProvider,
goto complete;
}
+ /* EP state is re-checked under lock in WvEpAccept* calls */
switch (ep->State) {
case WvEpActiveConnect:
status = WvEpAcceptActive(Request, out, outlen, ep, pattr);
break;
case WvEpPassiveConnect:
status = WvEpAcceptPassive(Request, out, outlen, ep, pattr);
- if (NT_SUCCESS(status)) {
- WvEpRelease(ep);
- return;
- }
break;
default:
status = STATUS_NOT_SUPPORTED;
@@ -733,7 +846,9 @@ void WvEpAccept(WV_PROVIDER *pProvider,
WvEpRelease(ep);
complete:
- WdfRequestComplete(Request, status);
+ if (!NT_SUCCESS(status)) {
+ WdfRequestComplete(Request, status);
+ }
}
void WvEpReject(WV_PROVIDER *pProvider, WDFREQUEST Request)
@@ -771,43 +886,112 @@ complete:
WdfRequestComplete(Request, status);
}
+static NTSTATUS WvEpDisconnectActive(WDFREQUEST Request,
+ UINT8 *pVerbsData, size_t VerbsSize,
+ WV_ENDPOINT *pEndpoint,
+ WV_IO_EP_DISCONNECT *pAttr)
+{
+ NTSTATUS status, failure;
+
+ WdfObjectAcquireLock(pEndpoint->Queue);
+ if (pEndpoint->State != WvEpConnected) {
+ status = STATUS_NOT_SUPPORTED;
+ goto release;
+ }
+
+ pEndpoint->State = WvEpActiveDisconnect;
+ IbCmInterface.CM.send_dreq(pEndpoint->pIbCmId, NULL, 0);
+
+ status = WdfRequestForwardToIoQueue(Request, pEndpoint->Queue);
+ if (!NT_SUCCESS(status)) {
+ pEndpoint->State = WvEpDisconnected;
+ WvCompleteRequests(pEndpoint->Queue, STATUS_UNSUCCESSFUL);
+ WdfObjectReleaseLock(pEndpoint->Queue);
+
+ failure = status;
+ status = WvEpDisconnectQp(pEndpoint->pProvider, pAttr->QpId,
+ pVerbsData, VerbsSize);
+ if (NT_SUCCESS(status)) {
+ WdfRequestCompleteWithInformation(Request, failure, VerbsSize);
+ }
+ return status;
+ }
+
+release:
+ WdfObjectReleaseLock(pEndpoint->Queue);
+ return status;
+}
+
+static NTSTATUS WvEpDisconnectPassive(WDFREQUEST Request,
+ UINT8 *pVerbsData, size_t VerbsSize,
+ WV_ENDPOINT *pEndpoint,
+ WV_IO_EP_DISCONNECT *pAttr)
+{
+ NTSTATUS status;
+
+ WdfObjectAcquireLock(pEndpoint->Queue);
+ if (pEndpoint->State != WvEpPassiveDisconnect) {
+ WdfObjectReleaseLock(pEndpoint->Queue);
+ return STATUS_NOT_SUPPORTED;
+ }
+
+ pEndpoint->State = WvEpDisconnected;
+ WdfObjectReleaseLock(pEndpoint->Queue);
+
+ IbCmInterface.CM.send_drep(pEndpoint->pIbCmId, NULL, 0);
+
+ status = WvEpDisconnectQp(pEndpoint->pProvider, pAttr->QpId,
+ pVerbsData, VerbsSize);
+ if (NT_SUCCESS(status)) {
+ WdfRequestCompleteWithInformation(Request, status, VerbsSize);
+ }
+
+ return status;
+}
+
void WvEpDisconnect(WV_PROVIDER *pProvider, WDFREQUEST Request)
{
- UINT64 *id;
+ WV_IO_EP_DISCONNECT *pattr;
WV_ENDPOINT *ep;
NTSTATUS status;
+ UINT8 *out;
+ size_t outlen;
- status = WdfRequestRetrieveInputBuffer(Request, sizeof(UINT64), &id, NULL);
+ status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_DISCONNECT),
+ &pattr, NULL);
if (!NT_SUCCESS(status)) {
goto complete;
}
- ep = WvEpAcquire(pProvider, *id);
+ status = WdfRequestRetrieveOutputBuffer(Request, 0, &out, &outlen);
+ if (!NT_SUCCESS(status) && status != STATUS_BUFFER_TOO_SMALL) {
+ goto complete;
+ }
+
+ ep = WvEpAcquire(pProvider, pattr->Id);
if (ep == NULL) {
status = STATUS_NOT_FOUND;
goto complete;
}
- WdfObjectAcquireLock(ep->Queue);
+ /* EP state is re-checked under lock in WvEpDisconnect* calls */
switch (ep->State) {
case WvEpConnected:
- ep->State = WvEpActiveDisconnect;
- status = IbCmInterface.CM.send_dreq(ep->pIbCmId, NULL, 0);
+ status = WvEpDisconnectActive(Request, out, outlen, ep, pattr);
break;
case WvEpPassiveDisconnect:
- ep->State = WvEpDisconnected;
- IbCmInterface.CM.send_drep(ep->pIbCmId, NULL, 0);
- status = STATUS_SUCCESS;
+ status = WvEpDisconnectPassive(Request, out, outlen, ep, pattr);
break;
default:
status = STATUS_NOT_SUPPORTED;
break;
}
- WdfObjectReleaseLock(ep->Queue);
WvEpRelease(ep);
complete:
- WdfRequestComplete(Request, status);
+ if (!NT_SUCCESS(status)) {
+ WdfRequestComplete(Request, status);
+ }
}
void WvEpDisconnectNotify(WV_PROVIDER *pProvider, WDFREQUEST Request)
@@ -1052,23 +1236,15 @@ void WvEpMulticastLeave(WV_PROVIDER *pPr
void WvEpCancelListen(WV_ENDPOINT *pListen)
{
WV_ENDPOINT *ep;
- WDFREQUEST request;
- NTSTATUS status;
WdfObjectAcquireLock(pListen->Queue);
- status = WdfIoQueueRetrieveNextRequest(pListen->Queue, &request);
-
- while (NT_SUCCESS(status)) {
- WdfRequestComplete(request, STATUS_CANCELLED);
- status = WdfIoQueueRetrieveNextRequest(pListen->Queue, &request);
- }
+ WvCompleteRequests(pListen->Queue, STATUS_CANCELLED);
while (!IsListEmpty(&pListen->Entry)) {
ep = CONTAINING_RECORD(RemoveHeadList(&pListen->Entry), WV_ENDPOINT, Entry);
ep->State = WvEpIdle;
WvEpPut(ep);
}
-
WdfObjectReleaseLock(pListen->Queue);
}
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\core\winverbs/kernel/wv_provider.h
branches\winverbs\core\winverbs/kernel/wv_provider.h
--- trunk\core\winverbs/kernel/wv_provider.h 2009-01-24 11:41:22.140625000 -0800
+++ branches\winverbs\core\winverbs/kernel/wv_provider.h 2009-05-21 13:16:36.711086000 -0700
@@ -66,6 +66,8 @@ typedef struct _WV_PROVIDER
} WV_PROVIDER;
+WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(WV_PROVIDER, WvProviderGetContext)
+
void WvProviderGet(WV_PROVIDER *pProvider);
void WvProviderPut(WV_PROVIDER *pProvider);
void WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider);
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\core\winverbs/user/wv_ep.cpp
branches\winverbs\core\winverbs/user/wv_ep.cpp
--- trunk\core\winverbs/user/wv_ep.cpp 2009-03-25 12:11:40.637466100 -0700
+++ branches\winverbs\core\winverbs/user/wv_ep.cpp 2009-05-18 18:04:11.438130700 -0700
@@ -339,13 +339,27 @@ Accept(IWVConnectQueuePair* pQp, WV_CONN
}
STDMETHODIMP CWVConnectEndpoint::
-Disconnect(void)
+Disconnect(IWVConnectQueuePair* pQp, OVERLAPPED* pOverlapped)
{
- DWORD bytes;
- HRESULT hr;
+ CWVConnectQueuePair *qp = (CWVConnectQueuePair *) pQp;
+ WV_IO_EP_DISCONNECT attr;
+ DWORD bytes;
+ HRESULT hr;
+ void *pout;
+ DWORD size;
+
+ attr.Id = m_Id;
+ if (pQp != NULL) {
+ attr.QpId = qp->m_Id;
+ qp->m_pVerbs->nd_modify_qp(qp->m_hVerbsQp, &pout, &size);
+ } else {
+ attr.QpId = 0;
+ pout = NULL;
+ size = 0;
+ }
if (WvDeviceIoControl(m_hFile, WV_IOCTL_EP_DISCONNECT,
- &m_Id, sizeof m_Id, NULL, 0, &bytes, NULL)) {
+ &attr, sizeof attr, pout, size, &bytes, pOverlapped)) {
hr = WV_SUCCESS;
} else {
hr = HRESULT_FROM_WIN32(GetLastError());
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\core\winverbs/user/wv_ep.h
branches\winverbs\core\winverbs/user/wv_ep.h
--- trunk\core\winverbs/user/wv_ep.h 2008-12-01 12:41:07.938683200 -0800
+++ branches\winverbs\core\winverbs/user/wv_ep.h 2009-05-18 14:14:09.218872800 -0700
@@ -61,7 +61,7 @@ public:
WV_CONNECT_PARAM* pParam, OVERLAPPED* pOverlapped);
STDMETHODIMP Accept(IWVConnectQueuePair* pQp, WV_CONNECT_PARAM* pParam,
OVERLAPPED* pOverlapped);
- STDMETHODIMP Disconnect();
+ STDMETHODIMP Disconnect(IWVConnectQueuePair* pQp, OVERLAPPED* pOverlapped);
STDMETHODIMP NotifyDisconnect(OVERLAPPED* pOverlapped);
STDMETHODIMP Query(WV_CONNECT_ATTRIBUTES* pAttributes);
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\core\winverbs/wv_ioctl.h
branches\winverbs\core\winverbs/wv_ioctl.h
--- trunk\core\winverbs/wv_ioctl.h 2009-03-27 03:04:48.793750000 -0700
+++ branches\winverbs\core\winverbs/wv_ioctl.h 2009-05-18 15:43:44.348140100 -0700
@@ -272,7 +272,7 @@ enum {
#define WV_IOCTL_EP_ACCEPT WV_IOCTL(WV_IO_FUNCTION_BASE + \
WV_IO_FUNCTION_EP_ACCEPT)
-// UINT64 Id / none
+// WV_IO_EP_DISCONNECT/ verbs specific
#define WV_IOCTL_EP_DISCONNECT WV_IOCTL(WV_IO_FUNCTION_BASE + \
WV_IO_FUNCTION_EP_DISCONNECT)
@@ -737,4 +737,11 @@ typedef struct _WV_IO_EP_ATTRIBUTES
} WV_IO_EP_ATTRIBUTES;
+typedef struct _WV_IO_EP_DISCONNECT
+{
+ UINT64 Id;
+ UINT64 QpId;
+
+} WV_IO_EP_DISCONNECT;
+
#endif // _WV_IOCTL_H_
Only in branches\winverbs\inc\kernel\rdma: _ntstatus_.h
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\inc\user\rdma/winverbs.h
branches\winverbs\inc\user\rdma/winverbs.h
--- trunk\inc\user\rdma/winverbs.h 2009-03-27 03:05:27.028125000 -0700
+++ branches\winverbs\inc\user\rdma/winverbs.h 2009-05-18 15:46:31.724211300 -0700
@@ -1156,9 +1156,9 @@ DECLARE_INTERFACE_(IWVEndpoint, IWVOverl
#undef INTERFACE
#define INTERFACE IWVConnectEndpoint
-// {ac670274-1934-4759-a39c-eee01a8130b3}
-DEFINE_GUID(IID_IWVConnectEndpoint, 0xac670274, 0x1934, 0x4759,
- 0xa3, 0x9c, 0xee, 0xe0, 0x1a, 0x81, 0x30, 0xb3);
+// {B7C6708E-EA9C-4cff-B2CF-35BCA9A3739A}
+DEFINE_GUID(IID_IWVConnectEndpoint, 0xb7c6708e, 0xea9c, 0x4cff,
+ 0xb2, 0xcf, 0x35, 0xbc, 0xa9, 0xa3, 0x73, 0x9a);
DECLARE_INTERFACE_(IWVConnectEndpoint, IWVEndpoint)
{
@@ -1236,7 +1236,9 @@ DECLARE_INTERFACE_(IWVConnectEndpoint, I
) PURE;
STDMETHOD(Disconnect)(
- THIS
+ THIS_
+ __in_opt IWVConnectQueuePair* pQp,
+ __in_opt OVERLAPPED* pOverlapped
) PURE;
STDMETHOD(NotifyDisconnect)(
More information about the ofw
mailing list