[ofw] [PATCH 3/4] ib/cm: poll for CM REQ events
Sean Hefty
sean.hefty at intel.com
Mon Jan 11 22:37:51 PST 2010
Replace the callback mechanism for reporting connection
requests with one that requires the user to poll for the
events. This allows queuing REQs in the CM until the user
is ready to process the events.
Still provide a callback mechanism to notify the user that
REQ events are ready to be retrieved.
This change improves the connection rate for winverbs when
the user retrieves only a small number of requests at a time.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
This change should be added to WinOF 2.2 once committed to the trunk.
trunk/core/al/kernel/al_cm.c | 53 ++++++++------
trunk/core/al/kernel/al_cm_cep.c | 55 ++++++---------
trunk/core/winverbs/kernel/wv_ep.c | 134 +++++++++++++++++++++---------------
trunk/inc/kernel/iba/ib_cm_ifc.h | 30 +++++++-
4 files changed, 158 insertions(+), 114 deletions(-)
diff --git a/trunk/core/al/kernel/al_cm.c b/trunk/core/al/kernel/al_cm.c
index 177bb9e..8ba28f1 100644
--- a/trunk/core/al/kernel/al_cm.c
+++ b/trunk/core/al/kernel/al_cm.c
@@ -99,36 +99,44 @@ cm_cep_handler(const ib_al_handle_t h_al, const net32_t cid)
static void
cm_listen_handler(const ib_al_handle_t h_al, const net32_t cid)
{
+ iba_cm_id *id;
+ iba_cm_event event;
+
+ id = (iba_cm_id *) kal_cep_get_context(h_al, cid, NULL, NULL);
+ memset(&event, 0, sizeof event);
+ event.type = iba_cm_req_received;
+ id->callback(id, &event);
+}
+
+static NTSTATUS
+cm_get_request(iba_cm_id *p_listen_id, iba_cm_id **pp_id, iba_cm_event *p_event)
+{
void *context;
net32_t new_cid;
ib_mad_element_t *mad;
- iba_cm_id *id, *listen_id;
- iba_cm_event event;
+ ib_api_status_t ib_status;
NTSTATUS status;
- while (al_cep_poll(h_al, cid, &context, &new_cid, &mad) == IB_SUCCESS) {
-
- listen_id = (iba_cm_id *) context;
+ ib_status = al_cep_poll(gh_al, p_listen_id->cid, &context, &new_cid, &mad);
+ if (ib_status != IB_SUCCESS) {
+ return STATUS_NO_MORE_ENTRIES;
+ }
- id = cm_alloc_id(listen_id->callback, listen_id);
- if (id == NULL) {
- kal_cep_destroy(h_al, new_cid, STATUS_NO_MORE_ENTRIES);
- ib_put_mad(mad);
- continue;
- }
+ *pp_id = cm_alloc_id(p_listen_id->callback, p_listen_id);
+ if (*pp_id == NULL) {
+ kal_cep_destroy(gh_al, new_cid, STATUS_NO_MORE_ENTRIES);
+ status = STATUS_NO_MEMORY;
+ goto out;
+ }
- kal_cep_config(h_al, new_cid, cm_cep_handler, id, cm_destroy_handler);
- id->cid = new_cid;
+ kal_cep_config(gh_al, new_cid, cm_cep_handler, *pp_id, cm_destroy_handler);
+ (*pp_id)->cid = new_cid;
+ kal_cep_format_event(gh_al, new_cid, mad, p_event);
+ status = STATUS_SUCCESS;
- kal_cep_format_event(h_al, id->cid, mad, &event);
- status = id->callback(id, &event);
- if (!NT_SUCCESS(status)) {
- kal_cep_config(h_al, new_cid, NULL, NULL, NULL);
- kal_cep_destroy(h_al, id->cid, status);
- cm_free_id(id);
- }
- ib_put_mad(mad);
- }
+out:
+ ib_put_mad(mad);
+ return status;
}
static NTSTATUS
@@ -367,6 +375,7 @@ void cm_get_interface(iba_cm_interface *p_ifc)
p_ifc->create_id = cm_create_id;
p_ifc->destroy_id = cm_destroy_id;
p_ifc->listen = cm_listen;
+ p_ifc->get_request = cm_get_request;
p_ifc->send_req = cm_send_req;
p_ifc->send_rep = cm_send_rep;
p_ifc->send_rtu = cm_send_rtu;
diff --git a/trunk/core/al/kernel/al_cm_cep.c b/trunk/core/al/kernel/al_cm_cep.c
index 86c5412..9a1d138 100644
--- a/trunk/core/al/kernel/al_cm_cep.c
+++ b/trunk/core/al/kernel/al_cm_cep.c
@@ -5978,38 +5978,32 @@ __format_event_req(kcep_t *p_cep, mad_cm_req_t *p_mad, iba_cm_req_event *p_req)
p_req->remote_ca_guid = p_cep->remote_ca_guid;
p_req->pkey_index = p_cep->av[0].pkey_index;
p_req->port_num = p_cep->av[0].attr.port_num;
- p_req->req.service_id = p_mad->sid;
-
- p_req->req.qpn = conn_req_get_lcl_qpn(p_mad);
- p_req->req.qp_type = conn_req_get_qp_type(p_mad);
- p_req->req.starting_psn = conn_req_get_starting_psn(p_mad);
-
- p_req->req.p_pdata = p_mad->pdata;
- p_req->req.pdata_len = IB_REQ_PDATA_SIZE;
-
- p_req->req.max_cm_retries = conn_req_get_max_cm_retries(p_mad);
- p_req->req.resp_res = conn_req_get_init_depth(p_mad);
- p_req->req.init_depth = conn_req_get_resp_res(p_mad);
- p_req->req.remote_resp_timeout = conn_req_get_resp_timeout(p_mad);
- p_req->req.flow_ctrl = (uint8_t) conn_req_get_flow_ctrl(p_mad);
- p_req->req.local_resp_timeout = conn_req_get_lcl_resp_timeout(p_mad);
- p_req->req.rnr_retry_cnt = conn_req_get_rnr_retry_cnt(p_mad);
- p_req->req.retry_cnt = conn_req_get_retry_cnt(p_mad);
- p_req->req.srq = 0; // TODO: fix mad_cm_req_t
-
- // We can re-use the MAD buffer if we're careful to read out the data
- // that we need before it's overwritten.
- p_req->req.p_primary_path = (ib_path_rec_t *) p_mad;
- __format_path(p_req->req.p_primary_path, &p_mad->primary_path,
+ p_req->service_id = p_mad->sid;
+
+ p_req->qpn = conn_req_get_lcl_qpn(p_mad);
+ p_req->qp_type = conn_req_get_qp_type(p_mad);
+ p_req->starting_psn = conn_req_get_starting_psn(p_mad);
+
+ cl_memcpy(p_req->pdata, p_mad->pdata, IB_REQ_PDATA_SIZE);
+
+ p_req->max_cm_retries = conn_req_get_max_cm_retries(p_mad);
+ p_req->resp_res = conn_req_get_init_depth(p_mad);
+ p_req->init_depth = conn_req_get_resp_res(p_mad);
+ p_req->remote_resp_timeout = conn_req_get_resp_timeout(p_mad);
+ p_req->flow_ctrl = (uint8_t) conn_req_get_flow_ctrl(p_mad);
+ p_req->local_resp_timeout = conn_req_get_lcl_resp_timeout(p_mad);
+ p_req->rnr_retry_cnt = conn_req_get_rnr_retry_cnt(p_mad);
+ p_req->retry_cnt = conn_req_get_retry_cnt(p_mad);
+ p_req->srq = 0; // TODO: fix mad_cm_req_t
+
+ __format_path(&p_req->primary_path, &p_mad->primary_path,
p_mad->pkey, conn_req_get_mtu(p_mad));
if (p_mad->alternate_path.remote_lid != 0) {
- p_req->req.p_alt_path = p_req->req.p_primary_path + 1;
- __format_path(p_req->req.p_alt_path, &p_mad->alternate_path,
- p_req->req.p_primary_path->pkey,
- p_req->req.p_primary_path->mtu);
+ __format_path(&p_req->alt_path, &p_mad->alternate_path,
+ p_req->primary_path.pkey, p_req->primary_path.mtu);
} else {
- p_req->req.p_alt_path = NULL;
+ cl_memclr(&p_req->alt_path, sizeof p_req->alt_path);
}
}
@@ -6647,7 +6641,6 @@ kal_cep_get_context(
AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("[ CID = %d\n", cid) );
CL_ASSERT( h_al );
- CL_ASSERT( pfn_addref );
KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
p_cep = __lookup_cep( h_al, cid );
@@ -6658,7 +6651,7 @@ kal_cep_get_context(
goto out;
}
- if( p_cep->pfn_cb != pfn_cb )
+ if( pfn_cb && p_cep->pfn_cb != pfn_cb )
{
AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
("CEP callback mismatch for cid %d, h_al %p\n", cid, h_al ));
@@ -6666,7 +6659,7 @@ kal_cep_get_context(
}
context = p_cep->context;
- if( context != NULL )
+ if( pfn_addref && context != NULL )
{
pfn_addref( context );
}
diff --git a/trunk/core/winverbs/kernel/wv_ep.c b/trunk/core/winverbs/kernel/wv_ep.c
index 3d5c6ce..3ad91ac 100644
--- a/trunk/core/winverbs/kernel/wv_ep.c
+++ b/trunk/core/winverbs/kernel/wv_ep.c
@@ -1110,60 +1110,75 @@ complete:
WdfRequestComplete(Request, status);
}
-static NTSTATUS WvEpIbListenHandler(iba_cm_id *pId, iba_cm_event *pEvent)
+static void WvEpGetIbRequest(WV_ENDPOINT *pListen)
{
- WV_ENDPOINT *listen, *ep;
+ WV_ENDPOINT *ep;
WDFREQUEST request;
NTSTATUS status;
IB_CMA_HEADER *hdr;
+ iba_cm_id *id;
+ iba_cm_event event;
- listen = ((iba_cm_id *) pId->context)->context;
+ WdfObjectAcquireLock(pListen->Queue);
+ while (1) {
+ status = WdfIoQueueRetrieveNextRequest(pListen->Queue, &request);
+ if (!NT_SUCCESS(status)) {
+ break;
+ }
- WdfObjectAcquireLock(listen->Queue);
- status = WdfIoQueueRetrieveNextRequest(listen->Queue, &request);
- if (!NT_SUCCESS(status)) {
- goto release;
+ status = IbCmInterface.CM.get_request(pListen->pIbCmId, &id, &event);
+ if (!NT_SUCCESS(status)) {
+ WdfRequestRequeue(request);
+ break;
+ }
+
+ ASSERT(!IsListEmpty(&pListen->Entry));
+ ep = CONTAINING_RECORD(RemoveHeadList(&pListen->Entry), WV_ENDPOINT, Entry);
+ ep->pIbCmId = id;
+ id->callback = WvEpIbCmHandler;
+ id->context = ep;
+
+ hdr = (IB_CMA_HEADER *) event.data.req.pdata;
+ if ((hdr->IpVersion >> 4) == 4) {
+ ep->Attributes.LocalAddress.SockAddr.In.SinFamily = WV_AF_INET;
+ ep->Attributes.LocalAddress.SockAddr.In.SinAddr = hdr->DstAddress.Ip4.Address;
+ ep->Attributes.PeerAddress.SockAddr.In.SinFamily = WV_AF_INET;
+ ep->Attributes.PeerAddress.SockAddr.In.SinAddr = hdr->SrcAddress.Ip4.Address;
+ } else {
+ ep->Attributes.LocalAddress.SockAddr.In6.Sin6Family = WV_AF_INET6;
+ RtlCopyMemory(ep->Attributes.LocalAddress.SockAddr.In6.Sin6Addr,
+ hdr->DstAddress.Ip6Address, 16);
+ ep->Attributes.PeerAddress.SockAddr.In6.Sin6Family = WV_AF_INET6;
+ RtlCopyMemory(ep->Attributes.PeerAddress.SockAddr.In6.Sin6Addr,
+ hdr->SrcAddress.Ip6Address, 16);
+ }
+ ep->Attributes.Device.DeviceGuid = event.data.req.local_ca_guid;
+ ep->Attributes.Device.Pkey = event.data.req.primary_path.pkey;
+ ep->Attributes.Device.PortNumber = event.data.req.port_num;
+ ep->Attributes.Param.Connect.ResponderResources = event.data.req.resp_res;
+ ep->Attributes.Param.Connect.InitiatorDepth = event.data.req.init_depth;
+ ep->Attributes.Param.Connect.RetryCount = event.data.req.retry_cnt;
+ ep->Attributes.Param.Connect.RnrRetryCount = event.data.req.rnr_retry_cnt;
+ ep->Attributes.Param.Connect.DataLength = sizeof(ep->Attributes.Param.Connect.Data);
+ RtlCopyMemory(ep->Attributes.Param.Connect.Data, hdr + 1,
+ sizeof(ep->Attributes.Param.Connect.Data));
+ ep->Route = event.data.req.primary_path;
+
+ ep->State = WvEpPassiveConnect;
+ WvEpPut(ep);
+
+ WdfRequestComplete(request, STATUS_SUCCESS);
}
+ WdfObjectReleaseLock(pListen->Queue);
+}
- ASSERT(!IsListEmpty(&listen->Entry));
- ep = CONTAINING_RECORD(RemoveHeadList(&listen->Entry), WV_ENDPOINT, Entry);
- ep->pIbCmId = pId;
- pId->callback = WvEpIbCmHandler;
- pId->context = ep;
+static NTSTATUS WvEpIbListenHandler(iba_cm_id *pId, iba_cm_event *pEvent)
+{
+ WV_ENDPOINT *listen;
- hdr = pEvent->data.req.req.p_pdata;
- if ((hdr->IpVersion >> 4) == 4) {
- ep->Attributes.LocalAddress.SockAddr.In.SinFamily = WV_AF_INET;
- ep->Attributes.LocalAddress.SockAddr.In.SinAddr = hdr->DstAddress.Ip4.Address;
- ep->Attributes.PeerAddress.SockAddr.In.SinFamily = WV_AF_INET;
- ep->Attributes.PeerAddress.SockAddr.In.SinAddr = hdr->SrcAddress.Ip4.Address;
- } else {
- ep->Attributes.LocalAddress.SockAddr.In6.Sin6Family = WV_AF_INET6;
- RtlCopyMemory(ep->Attributes.LocalAddress.SockAddr.In6.Sin6Addr,
- hdr->DstAddress.Ip6Address, 16);
- ep->Attributes.PeerAddress.SockAddr.In6.Sin6Family = WV_AF_INET6;
- RtlCopyMemory(ep->Attributes.PeerAddress.SockAddr.In6.Sin6Addr,
- hdr->SrcAddress.Ip6Address, 16);
- }
- ep->Attributes.Device.DeviceGuid = pEvent->data.req.local_ca_guid;
- ep->Attributes.Device.Pkey = pEvent->data.req.req.p_primary_path->pkey;
- ep->Attributes.Device.PortNumber = pEvent->data.req.port_num;
- ep->Attributes.Param.Connect.ResponderResources = pEvent->data.req.req.resp_res;
- ep->Attributes.Param.Connect.InitiatorDepth = pEvent->data.req.req.init_depth;
- ep->Attributes.Param.Connect.RetryCount = pEvent->data.req.req.retry_cnt;
- ep->Attributes.Param.Connect.RnrRetryCount = pEvent->data.req.req.rnr_retry_cnt;
- ep->Attributes.Param.Connect.DataLength = sizeof(ep->Attributes.Param.Connect.Data);
- RtlCopyMemory(ep->Attributes.Param.Connect.Data, hdr + 1,
- sizeof(ep->Attributes.Param.Connect.Data));
- ep->Route = *pEvent->data.req.req.p_primary_path;
-
- ep->State = WvEpPassiveConnect;
- WvEpPut(ep);
-
- WdfRequestComplete(request, STATUS_SUCCESS);
-release:
- WdfObjectReleaseLock(listen->Queue);
- return status;
+ listen = pId->context;
+ WvEpGetIbRequest(listen);
+ return STATUS_SUCCESS;
}
void WvEpListen(WV_PROVIDER *pProvider, WDFREQUEST Request)
@@ -1235,24 +1250,24 @@ void WvEpGetRequest(WV_PROVIDER *pProvider, WDFREQUEST Request)
status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_GET_REQUEST),
&req, NULL);
if (!NT_SUCCESS(status)) {
- goto complete;
+ goto err1;
}
listen = WvEpAcquire(pProvider, req->Id);
if (listen == NULL) {
status = STATUS_NOT_FOUND;
- goto complete;
+ goto err1;
}
if (listen->State != WvEpListening) {
status = STATUS_NOT_SUPPORTED;
- goto release1;
+ goto err2;
}
ep = WvEpAcquire(pProvider, req->EpId);
if (ep == NULL) {
status = STATUS_NOT_FOUND;
- goto release1;
+ goto err2;
}
WdfObjectAcquireLock(ep->Queue);
@@ -1262,9 +1277,8 @@ void WvEpGetRequest(WV_PROVIDER *pProvider, WDFREQUEST Request)
status = STATUS_CONNECTION_IN_USE;
}
WdfObjectReleaseLock(ep->Queue);
-
if (!NT_SUCCESS(status)) {
- goto release2;
+ goto err3;
}
WdfObjectAcquireLock(listen->Queue);
@@ -1274,15 +1288,21 @@ void WvEpGetRequest(WV_PROVIDER *pProvider, WDFREQUEST Request)
WvEpGet(ep);
}
WdfObjectReleaseLock(listen->Queue);
+ if (!NT_SUCCESS(status)) {
+ goto err3;
+ }
-release2:
WvEpRelease(ep);
-release1:
+ WvEpGetIbRequest(listen);
WvEpRelease(listen);
-complete:
- if (!NT_SUCCESS(status)) {
- WdfRequestComplete(Request, status);
- }
+ return;
+
+err3:
+ WvEpRelease(ep);
+err2:
+ WvEpRelease(listen);
+err1:
+ WdfRequestComplete(Request, status);
}
void WvEpLookup(WV_PROVIDER *pProvider, WDFREQUEST Request)
diff --git a/trunk/inc/kernel/iba/ib_cm_ifc.h b/trunk/inc/kernel/iba/ib_cm_ifc.h
index 0949482..e895406 100644
--- a/trunk/inc/kernel/iba/ib_cm_ifc.h
+++ b/trunk/inc/kernel/iba/ib_cm_ifc.h
@@ -73,7 +73,27 @@ typedef struct _iba_cm_req
typedef struct _iba_cm_req_event
{
- iba_cm_req req;
+ ib_net64_t service_id;
+
+ ib_path_rec_t primary_path;
+ ib_path_rec_t alt_path;
+
+ net32_t qpn;
+ ib_qp_type_t qp_type;
+ net32_t starting_psn;
+
+ uint8_t pdata[IB_REQ_PDATA_SIZE];
+
+ uint8_t max_cm_retries;
+ uint8_t resp_res;
+ uint8_t init_depth;
+ uint8_t remote_resp_timeout;
+ uint8_t flow_ctrl;
+ uint8_t local_resp_timeout;
+ uint8_t rnr_retry_cnt;
+ uint8_t retry_cnt;
+ uint8_t srq;
+
net64_t local_ca_guid;
net64_t remote_ca_guid;
uint16_t pkey_index;
@@ -239,6 +259,8 @@ typedef struct _iba_cm_interface
NTSTATUS (*listen)(iba_cm_id *p_id, net64_t service_id, void *p_compare_buf,
uint8_t compare_len, uint8_t compare_offset);
+ NTSTATUS (*get_request)(iba_cm_id *p_listen_id, iba_cm_id **pp_id,
+ iba_cm_event *p_event);
NTSTATUS (*send_req)(iba_cm_id *p_id, iba_cm_req *p_req);
NTSTATUS (*send_rep)(iba_cm_id *p_id, iba_cm_rep *p_rep);
@@ -282,9 +304,9 @@ static inline UINT8 IbaCmVersionMinor(USHORT Version)
return (UINT8) Version;
}
-// {EACC1466-BB2D-4478-B5BE-40EDF7EE08AB}
-DEFINE_GUID(GUID_INFINIBAND_INTERFACE_CM, 0xeacc1466, 0xbb2d, 0x4478,
- 0xb5, 0xbe, 0x40, 0xed, 0xf7, 0xee, 0x8, 0xab);
+// {6A11D060-8957-49e6-BE2A-01EDF1BD22B3}
+DEFINE_GUID(GUID_INFINIBAND_INTERFACE_CM, 0x6a11d060, 0x8957, 0x49e6,
+ 0xbe, 0x2a, 0x1, 0xed, 0xf1, 0xbd, 0x22, 0xb3);
typedef struct _INFINIBAND_INTERFACE_CM
{
More information about the ofw
mailing list