[ofw] [PATCH 4/4] librdmacm: fix connection scalability limit
Sean Hefty
sean.hefty at intel.com
Tue Mar 10 16:53:47 PDT 2009
User the COMP_CHANNEL abstraction to as a common framework for event
reporting and provide better scalability.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
With these changes, I can consistently establish 3000 connections between
two systems. Work is still needed to identify performance bottlenecks,
but at least the connections form now.
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\librdmacm/examples/cmatose/cmatose.c
branches\winverbs\ulp\librdmacm/examples/cmatose/cmatose.c
--- trunk\ulp\librdmacm/examples/cmatose/cmatose.c 2009-01-15 15:47:32.196611000 -0800
+++ branches\winverbs\ulp\librdmacm/examples/cmatose/cmatose.c 2009-03-10 12:38:47.383414700 -0700
@@ -433,7 +433,7 @@ static int connect_events(void)
cma_handler(event->id, event);
rdma_ack_cm_event(event);
} else {
- printf("cmatose: failure in rdma_get_cm_event in connect events\n");
+ printf("cmatose:rdma_get_cm_event connect events error 0x%x\n", err);
ret = err;
}
}
@@ -452,7 +452,7 @@ static int disconnect_events(void)
cma_handler(event->id, event);
rdma_ack_cm_event(event);
} else {
- printf("cmatose: failure in rdma_get_cm_event in disconnect events\n");
+ printf("cmatose: rdma_get_cm_event disconnect events error 0x%x\n", err);
ret = err;
}
}
@@ -537,7 +537,7 @@ static int run_server(void)
goto out;
}
- ret = rdma_listen(listen_id, 0);
+ ret = rdma_listen(listen_id, connections);
if (ret) {
printf("cmatose: failure trying to listen: 0x%x\n", ret);
goto out;
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\librdmacm/include/rdma/rdma_cma.h
branches\winverbs\ulp\librdmacm/include/rdma/rdma_cma.h
--- trunk\ulp\librdmacm/include/rdma/rdma_cma.h 2009-01-12 14:41:27.573634000 -0800
+++ branches\winverbs\ulp\librdmacm/include/rdma/rdma_cma.h 2009-03-10 01:57:47.109375000 -0700
@@ -121,7 +121,7 @@ struct rdma_route
struct rdma_event_channel
{
- uint32_t timeout;
+ COMP_CHANNEL channel;
};
struct rdma_cm_id
@@ -133,13 +133,12 @@ struct rdma_cm_id
struct rdma_route route;
enum rdma_port_space ps;
uint8_t port_num;
+ COMP_ENTRY comp_entry;
union {
IWVConnectEndpoint *connect;
IWVDatagramEndpoint *datagram;
} ep;
- OVERLAPPED overlap;
- uint32_t events_completed;
};
struct rdma_conn_param
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\librdmacm/src/cma.cpp branches\winverbs\ulp\librdmacm/src/cma.cpp
--- trunk\ulp\librdmacm/src/cma.cpp 2009-03-02 13:19:41.110716900 -0800
+++ branches\winverbs\ulp\librdmacm/src/cma.cpp 2009-03-10 15:51:13.085375000 -0700
@@ -34,12 +34,12 @@
#include <rdma/rdma_cma.h>
#include <infiniband/verbs.h>
+#include <comp_channel.h>
#include <iba/ibat.h>
#include "cma.h"
+#include "..\..\..\etc\user\comp_channel.cpp"
-IWVProvider *prov;
-__declspec(dllexport)
-IWVProvider *ibv_get_winverbs(void);
+static struct ibv_windata windata;
enum cma_state
{
@@ -64,22 +64,12 @@ struct cma_id_private
{
struct rdma_cm_id id;
enum cma_state state;
- int channel_index;
struct cma_device *cma_dev;
int backlog;
int index;
struct rdma_cm_id **req_list;
};
-struct cma_event_channel
-{
- struct rdma_event_channel channel;
- CRITICAL_SECTION lock;
- struct cma_id_private *id[MAXIMUM_WAIT_OBJECTS];
- HANDLE event[MAXIMUM_WAIT_OBJECTS];
- int count;
-};
-
struct cma_device
{
struct ibv_context *verbs;
@@ -107,9 +97,9 @@ static void ucma_cleanup(void)
delete cma_dev_array;
cma_dev_cnt = 0;
}
- if (prov != NULL) {
- prov->Release();
- prov = NULL;
+ if (windata.prov != NULL) {
+ ibv_release_windata(&windata, IBV_WINDATA_VERSION);
+ windata.prov = NULL;
}
}
@@ -125,9 +115,8 @@ static int ucma_init(void)
goto out;
}
- prov = ibv_get_winverbs();
- if (prov == NULL) {
- ret = -1;
+ ret = ibv_get_windata(&windata, IBV_WINDATA_VERSION);
+ if (ret) {
goto err;
}
@@ -211,75 +200,26 @@ void rdma_free_devices(struct ibv_contex
__declspec(dllexport)
struct rdma_event_channel *rdma_create_event_channel(void)
{
- struct cma_event_channel *chan;
+ struct rdma_event_channel *channel;
if (!cma_dev_cnt && ucma_init()) {
return NULL;
}
- chan = new struct cma_event_channel;
- if (chan == NULL) {
+ channel = new struct rdma_event_channel;
+ if (channel == NULL) {
return NULL;
}
- InitializeCriticalSection(&chan->lock);
- chan->count = 0;
- chan->channel.timeout = INFINITE;
-
- return &chan->channel;
+ CompChannelInit(windata.comp_mgr, &channel->channel, INFINITE);
+ return channel;
}
__declspec(dllexport)
void rdma_destroy_event_channel(struct rdma_event_channel *channel)
{
- struct cma_event_channel *chan;
-
- chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel);
- DeleteCriticalSection(&chan->lock);
- delete chan;
-}
-
-static int cma_event_channel_insert_id(struct rdma_event_channel *channel,
- struct cma_id_private *id_priv)
-{
- struct cma_event_channel *chan;
- int ret = 0;
-
- chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel);
-
- EnterCriticalSection(&chan->lock);
- if (chan->count == MAXIMUM_WAIT_OBJECTS) {
- ret = -1;
- goto out;
- }
-
- chan->id[chan->count] = id_priv;
- chan->event[chan->count] = id_priv->id.overlap.hEvent;
- id_priv->channel_index = chan->count++;
-out:
- LeaveCriticalSection(&chan->lock);
- return ret;
-}
-
-/*
- * TODO: we cannot call cma_event_channel_remove_id() while another
- * thread is calling rdma_get_event(). If this is needed, then we
- * need to halt the rdma_get_event() thread, modify the event list,
- * then restart the rdma_get_event() thread.
- */
-static void cma_event_channel_remove_id(struct rdma_event_channel *channel,
- struct cma_id_private *id_priv)
-{
- struct cma_event_channel *chan;
-
- chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel);
-
- EnterCriticalSection(&chan->lock);
- chan->count--;
- chan->id[id_priv->channel_index] = chan->id[chan->count];
- chan->event[id_priv->channel_index] = chan->event[chan->count];
- chan->id[id_priv->channel_index]->channel_index = id_priv->channel_index;
- LeaveCriticalSection(&chan->lock);
+ CompChannelCleanup(&channel->channel);
+ delete channel;
}
__declspec(dllexport)
@@ -301,43 +241,26 @@ int rdma_create_id(struct rdma_event_cha
}
RtlZeroMemory(id_priv, sizeof(struct cma_id_private));
- id_priv->id.overlap.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
- if (id_priv->id.overlap.hEvent == NULL) {
- goto err1;
- }
-
id_priv->id.context = context;
id_priv->id.channel = channel;
id_priv->id.ps = ps;
+ CompEntryInit(&channel->channel, &id_priv->id.comp_entry);
if (ps == RDMA_PS_TCP) {
- hr = prov->CreateConnectEndpoint(&id_priv->id.ep.connect);
+ hr = windata.prov->CreateConnectEndpoint(&id_priv->id.ep.connect);
} else {
- hr = prov->CreateDatagramEndpoint(&id_priv->id.ep.datagram);
+ hr = windata.prov->CreateDatagramEndpoint(&id_priv->id.ep.datagram);
}
if (FAILED(hr)) {
- goto err2;
- }
-
- hr = cma_event_channel_insert_id(channel, id_priv);
- if (FAILED(hr)) {
- goto err3;
+ goto err;
}
*id = &id_priv->id;
return 0;
-err3:
- if (ps == RDMA_PS_TCP) {
- id_priv->id.ep.connect->Release();
- } else {
- id_priv->id.ep.datagram->Release();
- }
-err2:
- CloseHandle(id_priv->id.overlap.hEvent);
-err1:
+err:
delete id_priv;
- return -1;
+ return hr;
}
static void ucma_destroy_listen(struct cma_id_private *id_priv)
@@ -354,7 +277,7 @@ static void ucma_destroy_listen(struct c
__declspec(dllexport)
int rdma_destroy_id(struct rdma_cm_id *id)
{
- struct cma_id_private *id_priv;
+ struct cma_id_private *id_priv;
id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
if (id->ps == RDMA_PS_TCP) {
@@ -363,7 +286,7 @@ int rdma_destroy_id(struct rdma_cm_id *i
id->ep.datagram->CancelOverlappedRequests();
}
- cma_event_channel_remove_id(id->channel, id_priv);
+ CompChannelRemoveEntry(&id->channel->channel, &id->comp_entry);
if (id_priv->backlog > 0) {
ucma_destroy_listen(id_priv);
@@ -538,7 +461,8 @@ int rdma_resolve_addr(struct rdma_cm_id
RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));
id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
id_priv->state = cma_addr_resolve;
- SetEvent(id->overlap.hEvent);
+
+ CompEntryPost(&id->comp_entry);
return 0;
}
@@ -563,7 +487,8 @@ int rdma_resolve_route(struct rdma_cm_id
id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
id_priv->state = cma_route_resolve;
- SetEvent(id->overlap.hEvent);
+
+ CompEntryPost(&id->comp_entry);
return 0;
}
@@ -583,7 +508,7 @@ static int ucma_modify_qp_init(struct cm
return hr;
}
- qp_attr.pkey_index = (uint16_t) index;
+ qp_attr.pkey_index = index;
return ibv_modify_qp(qp, &qp_attr, (enum ibv_qp_attr_mask)
(IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT));
}
@@ -689,7 +614,7 @@ int rdma_connect(struct rdma_cm_id *id,
id_priv->state = cma_active_connect;
hr = id->ep.connect->Connect(id->qp->conn_handle, &id->route.addr.dst_addr,
- &attr, &id->overlap);
+ &attr, &id->comp_entry.Overlap);
if (FAILED(hr) && hr != WV_IO_PENDING) {
id_priv->state = cma_route_resolve;
return hr;
@@ -715,13 +640,16 @@ static int ucma_get_request(struct cma_i
if (listen->id.ps == RDMA_PS_TCP) {
hr = listen->id.ep.connect->GetRequest(id_priv->id.ep.connect,
- &id_priv->id.overlap);
+ &id_priv->id.comp_entry.Overlap);
} else {
hr = listen->id.ep.datagram->GetRequest(id_priv->id.ep.datagram,
- &id_priv->id.overlap);
+ &id_priv->id.comp_entry.Overlap);
+ }
+ if (FAILED(hr) && hr != WV_IO_PENDING) {
+ return hr;
}
- return (FAILED(hr) && hr != WV_IO_PENDING) ? hr : 0;
+ return 0;
}
__declspec(dllexport)
@@ -731,7 +659,7 @@ int rdma_listen(struct rdma_cm_id *id, i
HRESULT hr;
int i;
- if (backlog <= 0 || backlog > CMA_DEFAULT_BACKLOG) {
+ if (backlog <= 0) {
backlog = CMA_DEFAULT_BACKLOG;
}
@@ -784,7 +712,8 @@ int rdma_accept(struct rdma_cm_id *id, s
}
id_priv->state = cma_accepting;
- hr = id->ep.connect->Accept(id->qp->conn_handle, &attr, &id->overlap);
+ hr = id->ep.connect->Accept(id->qp->conn_handle, &attr,
+ &id->comp_entry.Overlap);
if (FAILED(hr) && hr != WV_IO_PENDING) {
id_priv->state = cma_disconnected;
return hr;
@@ -848,9 +777,10 @@ int rdma_ack_cm_event(struct rdma_cm_eve
static int ucma_process_conn_req(struct cma_event *event)
{
struct cma_id_private *listen;
- HRESULT hr = 0;
+ struct cma_event_channel *chan;
listen = (struct cma_id_private *) event->id_priv->id.context;
+ ucma_get_request(listen, event->id_priv->index);
if (SUCCEEDED(event->event.status)) {
event->event.status = ucma_query_connect(&event->id_priv->id,
@@ -860,25 +790,18 @@ static int ucma_process_conn_req(struct
if (SUCCEEDED(event->event.status)) {
event->event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event->id_priv->state = cma_passive_connect;
-
- listen->req_list[event->id_priv->index] = NULL;
- ucma_get_request(listen, event->id_priv->index);
} else {
- hr = listen->id.ep.connect->GetRequest(event->id_priv->id.ep.connect,
- &event->id_priv->id.overlap);
- if (hr == WV_IO_PENDING) {
- hr = 0;
- }
+ rdma_destroy_id(&event->id_priv->id);
}
- return hr;
+ return event->event.status;
}
static int ucma_process_conn_resp(struct cma_event *event)
{
- struct rdma_cm_id *id;
- WV_CONNECT_PARAM attr;
- HRESULT hr;
+ struct rdma_cm_id *id;
+ WV_CONNECT_PARAM attr;
+ HRESULT hr;
if (FAILED(event->event.status)) {
goto err;
@@ -888,7 +811,8 @@ static int ucma_process_conn_resp(struct
event->id_priv->state = cma_accepting;
id = &event->id_priv->id;
- hr = id->ep.connect->Accept(id->qp->conn_handle, &attr, &id->overlap);
+ hr = id->ep.connect->Accept(id->qp->conn_handle, &attr,
+ &id->comp_entry.Overlap);
if (FAILED(hr) && hr != WV_IO_PENDING) {
event->event.status = hr;
goto err;
@@ -906,16 +830,18 @@ err:
static void ucma_process_establish(struct cma_event *event)
{
+ struct cma_id_private *id_priv = event->id_priv;
+
if (SUCCEEDED(event->event.status)) {
- event->event.status = ucma_query_connect(&event->id_priv->id,
+ event->event.status = ucma_query_connect(&id_priv->id,
&event->event.param.conn);
}
if (SUCCEEDED(event->event.status)) {
event->event.event = RDMA_CM_EVENT_ESTABLISHED;
- event->id_priv->state = cma_connected;
- event->id_priv->id.ep.connect->NotifyDisconnect(&event->id_priv->id.overlap);
+ id_priv->state = cma_connected;
+ id_priv->id.ep.connect->NotifyDisconnect(&id_priv->id.comp_entry.Overlap);
} else {
event->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
event->id_priv->state = cma_disconnected;
@@ -962,12 +888,10 @@ __declspec(dllexport)
int rdma_get_cm_event(struct rdma_event_channel *channel,
struct rdma_cm_event **event)
{
- struct cma_event_channel *chan;
struct cma_event *evt;
- struct cma_id_private *id_priv;
struct rdma_cm_id *id;
- DWORD bytes;
- HRESULT hr;
+ COMP_ENTRY *entry;
+ DWORD bytes, ret;
evt = new struct cma_event;
if (evt == NULL) {
@@ -977,28 +901,20 @@ int rdma_get_cm_event(struct rdma_event_
do {
RtlZeroMemory(evt, sizeof(struct cma_event));
- chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel);
- hr = WaitForMultipleObjects(chan->count, chan->event, FALSE,
- chan->channel.timeout);
- if (hr == WAIT_TIMEOUT) {
- return hr;
- } else if (hr == WAIT_FAILED) {
- return HRESULT_FROM_WIN32(GetLastError());
+ ret = CompChannelPoll(&channel->channel, &entry);
+ if (ret) {
+ return ret;
}
- EnterCriticalSection(&chan->lock);
- evt->id_priv = chan->id[hr];
- LeaveCriticalSection(&chan->lock);
-
- id = &evt->id_priv->id;
+ id = CONTAINING_RECORD(entry, struct rdma_cm_id, comp_entry);
+ evt->id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
evt->event.id = id;
evt->event.param.conn.private_data = evt->private_data;
- if (id->ep.connect->GetOverlappedResult(&id->overlap, &bytes, FALSE) == 0) {
- evt->event.status = HRESULT_FROM_WIN32(GetLastError());
- }
+ evt->event.status = id->ep.connect->
+ GetOverlappedResult(&entry->Overlap, &bytes, FALSE);
- hr = ucma_process_event(evt);
- } while (FAILED(hr));
+ ret = ucma_process_event(evt);
+ } while (ret);
*event = &evt->event;
return 0;
@@ -1069,16 +985,6 @@ int rdma_set_option(struct rdma_cm_id *i
__declspec(dllexport)
int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)
{
- struct cma_id_private *id_priv;
-
- id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
- cma_event_channel_remove_id(id->channel, id_priv);
- /*
- * TODO: To support calling this routine while processing events on the old
- * channel, we need to wait for all old events to be acknowledged.
- */
id->channel = channel;
- cma_event_channel_insert_id(channel, id_priv);
-
return 0;
}
More information about the ofw
mailing list