[ofw] [PATCH] winverbs: process connect and accept asynchronously

Sean Hefty sean.hefty at intel.com
Fri Jun 5 16:04:53 PDT 2009


Allow processing of EP:Connect and EP:Accept calls asynchronously.  The
librdmacm uses events to report the completion of rdma_connect and
rdma_accept calls, which allows users of that interface to take advantage
of asynchronous operation.  Modify the winverbs kernel driver to
queue connect/accept calls to a system thread for better parallelism.

This improves the measured connection rate of rdma_cmatose by 3%.  The
connection rate includes address resolution, route resolution, PD/CQ/QP
creation and state transitions, memory registration, posting of receive
buffers, and CM message exchanges.  This patch effectively only improves
the parallelism of modify QP.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
I tested a similar patch that dealt with disconnecting, but because the
librdmacm handles rdma_disconnect synchronously, a driver change slightly
lowers that call's performance.

I'm still working on trying to measure the separate steps of connection
establishment under load.

Index: core/winverbs/kernel/wv_ep.c
===================================================================
--- core/winverbs/kernel/wv_ep.c	(revision 2201)
+++ core/winverbs/kernel/wv_ep.c	(working copy)
@@ -409,7 +409,6 @@
 
 static void WvEpDisconnectHandler(WORK_ENTRY *pWork)
 {
-	WORK_ENTRY			*work;
 	WV_PROVIDER			*prov;
 	WDFREQUEST			request;
 	WV_IO_EP_DISCONNECT	*pattr;
@@ -543,8 +542,10 @@
 	return STATUS_SUCCESS;
 }
 
-void WvEpConnect(WV_PROVIDER *pProvider, WDFREQUEST Request)
+void WvEpConnectHandler(WORK_ENTRY *pWork)
 {
+	WV_PROVIDER			*prov;
+	WDFREQUEST			request;
 	WV_IO_EP_CONNECT	*pattr;
 	WV_ENDPOINT			*ep;
 	WV_QUEUE_PAIR		*qp;
@@ -552,7 +553,10 @@
 	NTSTATUS			status;
 	UINT8				data[IB_REQ_PDATA_SIZE];
 
-	status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_CONNECT),
+	request = (WDFREQUEST) pWork->Context;
+	prov = WvProviderGetContext(WdfRequestGetFileObject(request));
+
+	status = WdfRequestRetrieveInputBuffer(request, sizeof(WV_IO_EP_CONNECT),
 										   &pattr, NULL);
 	if (!NT_SUCCESS(status)) {
 		goto complete;
@@ -563,13 +567,13 @@
 		goto complete;
 	}
 
-	ep = WvEpAcquire(pProvider, pattr->Id);
+	ep = WvEpAcquire(prov, pattr->Id);
 	if (ep == NULL) {
 		status = STATUS_NOT_FOUND;
 		goto complete;
 	}
 
-	qp = WvQpAcquire(pProvider, pattr->QpId);
+	qp = WvQpAcquire(prov, pattr->QpId);
 	if (qp == NULL) {
 		status = STATUS_NOT_FOUND;
 		goto release;
@@ -617,7 +621,7 @@
 	ep->State = WvEpActiveConnect;
 	status = IbCmInterface.CM.send_req(ep->pIbCmId, &req);
 	if (NT_SUCCESS(status)) {
-		status = WdfRequestForwardToIoQueue(Request, ep->Queue);
+		status = WdfRequestForwardToIoQueue(request, ep->Queue);
 	}
 
 	if (!NT_SUCCESS(status)) {
@@ -629,10 +633,25 @@
 	WvEpRelease(ep);
 complete:
 	if (!NT_SUCCESS(status)) {
-		WdfRequestComplete(Request, status);
+		WdfRequestComplete(request, status);
 	}
 }
 
+static void WvEpProcessAsync(WV_PROVIDER *pProvider, WDFREQUEST Request,
+							 void (*AsyncHandler)(struct _WORK_ENTRY *Work))
+{
+	WORK_ENTRY	*work;
+
+	work = WorkEntryFromIrp(WdfRequestWdmGetIrp(Request));
+	WorkEntryInit(work, AsyncHandler, Request);
+	WorkQueueInsert(&pProvider->WorkQueue, work);
+}
+
+void WvEpConnect(WV_PROVIDER *pProvider, WDFREQUEST Request)
+{
+	WvEpProcessAsync(pProvider, Request, WvEpConnectHandler);
+}
+
 static NTSTATUS WvEpModifyQpRtr(WV_ENDPOINT *pEndpoint, WV_QUEUE_PAIR *pQp,
 								UINT64 ResponderResources, UINT32 Psn,
 								UINT8 *pVerbsData, UINT32 VerbsSize)
@@ -792,21 +811,26 @@
 	return status;
 }
 
-void WvEpAccept(WV_PROVIDER *pProvider, WDFREQUEST Request)
+void WvEpAcceptHandler(WORK_ENTRY *pWork)
 {
+	WV_PROVIDER			*prov;
+	WDFREQUEST			request;
 	WV_IO_EP_ACCEPT		*pattr;
 	WV_ENDPOINT			*ep;
 	NTSTATUS			status;
 	UINT8				*out;
 	size_t				outlen;
 
-	status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_ACCEPT),
+	request = (WDFREQUEST) pWork->Context;
+	prov = WvProviderGetContext(WdfRequestGetFileObject(request));
+
+	status = WdfRequestRetrieveInputBuffer(request, sizeof(WV_IO_EP_ACCEPT),
 										   &pattr, NULL);
 	if (!NT_SUCCESS(status)) {
 		goto complete;
 	}
 
-	status = WdfRequestRetrieveOutputBuffer(Request, 0, &out, &outlen);
+	status = WdfRequestRetrieveOutputBuffer(request, 0, &out, &outlen);
 	if (!NT_SUCCESS(status) && status != STATUS_BUFFER_TOO_SMALL) {
 		goto complete;
 	}
@@ -816,7 +840,7 @@
 		goto complete;
 	}
 
-	ep = WvEpAcquire(pProvider, pattr->Id);
+	ep = WvEpAcquire(prov, pattr->Id);
 	if (ep == NULL) {
 		status = STATUS_NOT_FOUND;
 		goto complete;
@@ -825,10 +849,10 @@
 	/* EP state is re-checked under lock in WvEpAccept* calls */
 	switch (ep->State) {
 	case WvEpActiveConnect:
-		status = WvEpAcceptActive(Request, out, outlen, ep, pattr);
+		status = WvEpAcceptActive(request, out, outlen, ep, pattr);
 		break;
 	case WvEpPassiveConnect:
-		status = WvEpAcceptPassive(Request, out, outlen, ep, pattr);
+		status = WvEpAcceptPassive(request, out, outlen, ep, pattr);
 		break;
 	default:
 		status = STATUS_NOT_SUPPORTED;
@@ -838,10 +862,15 @@
 	WvEpRelease(ep);
 complete:
 	if (!NT_SUCCESS(status)) {
-		WdfRequestComplete(Request, status);
+		WdfRequestComplete(request, status);
 	}
 }
 
+void WvEpAccept(WV_PROVIDER *pProvider, WDFREQUEST Request)
+{
+	WvEpProcessAsync(pProvider, Request, WvEpAcceptHandler);
+}
+
 void WvEpReject(WV_PROVIDER *pProvider, WDFREQUEST Request)
 {
 	WV_IO_ID			*id;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: wvep.patch
Type: application/octet-stream
Size: 4422 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090605/d6dc1e40/attachment.obj>


More information about the ofw mailing list