[ofw] [PATCH 3/4] libibverbs: use comp_channel code to enhance scalability

Sean Hefty sean.hefty at intel.com
Tue Mar 10 16:51:37 PDT 2009


User the COMP_CHANNEL abstraction to as a common framework for event
reporting and provide better scalability.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\libibverbs/include/infiniband/verbs.h
branches\winverbs\ulp\libibverbs/include/infiniband/verbs.h
--- trunk\ulp\libibverbs/include/infiniband/verbs.h	2009-03-02 16:31:21.146500000 -0800
+++ branches\winverbs\ulp\libibverbs/include/infiniband/verbs.h	2009-03-10 15:52:23.913500000 -0700
@@ -37,6 +37,8 @@
 
 #include <windows.h>
 #include <rdma\winverbs.h>
+#include <errno.h>
+#include <comp_channel.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,6 +53,16 @@ typedef unsigned __int16	uint16_t;
 typedef unsigned __int32	uint32_t;
 typedef unsigned __int64	uint64_t;
 
+#define EOVERFLOW		WV_BUFFER_OVERFLOW
+#define EISCONN			WV_CONNECTION_ACTIVE
+#define ECONNREFUSED	WV_CONNECTION_REFUSED
+#define ETIMEDOUT		WV_TIMEOUT
+#define ENETUNREACH		WV_HOST_UNREACHABLE
+#define EADDRINUSE		WV_ADDRESS_ALREADY_EXISTS
+#define EALREADY		WV_IO_PENDING
+#define EAFNOSUPPORT	WV_INVALID_ADDRESS
+#define EWOULDBLOCK		WV_DEVICE_BUSY
+
 union ibv_gid
 {
 	uint8_t			raw[16];
@@ -636,8 +648,8 @@ struct ibv_qp
 
 struct ibv_comp_channel
 {
-	struct ibv_context     *context;
-	uint32_t				timeout;
+	struct ibv_context		*context;
+	COMP_CHANNEL			comp_channel;
 };
 
 struct ibv_cq
@@ -646,9 +658,8 @@ struct ibv_cq
 	struct ibv_comp_channel *channel;
 	void					*cq_context;
 	IWVCompletionQueue		*handle;
-	OVERLAPPED				overlap;
 	int						cqe;
-	uint32_t				comp_events_completed;
+	COMP_ENTRY				comp_entry;
 };
 
 struct ibv_ah
@@ -678,7 +689,7 @@ struct ibv_context
 {
 	struct ibv_device		*device;
 	IWVDevice				*cmd_if;
-	uint32_t				timeout;
+	COMP_CHANNEL			channel;
 };
 
 /**
@@ -1103,6 +1114,23 @@ const char *ibv_port_state_str(enum ibv_
 __declspec(dllexport)
 const char *ibv_event_type_str(enum ibv_event_type event);
 
+/*
+ * Windows specific structures and interfaces
+ */
+struct ibv_windata
+{
+	IWVProvider		*prov;
+	COMP_MANAGER	*comp_mgr;
+};
+
+#define IBV_WINDATA_VERSION 1
+
+__declspec(dllexport)
+int ibv_get_windata(struct ibv_windata *windata, int version);
+
+__declspec(dllexport)
+void ibv_release_windata(struct ibv_windata *windata, int version);
+
 #ifdef __cplusplus
 }
 #endif
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\libibverbs/src/device.cpp
branches\winverbs\ulp\libibverbs/src/device.cpp
--- trunk\ulp\libibverbs/src/device.cpp	2008-11-26 23:45:19.234375000 -0800
+++ branches\winverbs\ulp\libibverbs/src/device.cpp	2009-03-10 15:52:33.397875000 -0700
@@ -31,8 +31,10 @@
 #include <stdio.h>
 #include <infiniband/verbs.h>
 #include <rdma/winverbs.h>
+#include "..\..\..\etc\user\comp_channel.cpp"
 
 IWVProvider *prov;
+COMP_MANAGER comp_mgr;
 
 struct verbs_device
 {
@@ -43,35 +45,53 @@ struct verbs_device
 
 struct verbs_port
 {
-	OVERLAPPED			overlap;
+	COMP_ENTRY			comp_entry;
 	DWORD				event_flag;
+	uint8_t				port_num;
 };
 
-#define EVENT_PORT_NONE	0xFF
-
 struct verbs_context
 {
 	struct ibv_context	context;
 	struct verbs_device	device;
-	HANDLE				*event;
 	struct verbs_port	*port;
-	uint8_t				event_port_index;
+	verbs_port			*event_port;
 };
 
-__declspec(dllexport)
-IWVProvider *ibv_get_winverbs(void)
+static int ibv_init(void)
 {
 	HRESULT hr;
 
 	if (prov == NULL) {
 		hr = WvGetObject(IID_IWVProvider, (LPVOID*) &prov);
 		if (FAILED(hr)) {
-			return NULL;
+			return -1;
 		}
+		CompManagerOpen(&comp_mgr);
+		CompManagerMonitor(&comp_mgr, prov->GetFileHandle(), 0);
+	}
+	return 0;
+}
+
+__declspec(dllexport)
+int ibv_get_windata(struct ibv_windata *windata, int version)
+{
+	int ret;
+
+	if (version != IBV_WINDATA_VERSION || ibv_init()) {
+		return -1;
 	}
 
 	prov->AddRef();
-	return prov;
+	windata->prov = prov;
+	windata->comp_mgr = &comp_mgr;
+	return 0;
+}
+
+__declspec(dllexport)
+void ibv_release_windata(struct ibv_windata *windata, int version)
+{
+	windata->prov->Release();
 }
 
 __declspec(dllexport)
@@ -84,11 +104,8 @@ struct ibv_device **ibv_get_device_list(
 	SIZE_T size, cnt;
 	HRESULT hr;
 
-	if (prov == NULL) {
-		hr = WvGetObject(IID_IWVProvider, (LPVOID*) &prov);
-		if (FAILED(hr)) {
-			goto err1;
-		}	
+	if (ibv_init()) {
+		goto err1;
 	}
 
 	cnt = 0;
@@ -179,45 +196,30 @@ struct ibv_context *ibv_open_device(stru
 		return NULL;
 	}
 	memcpy(&vcontext->device, vdev, sizeof(struct verbs_device));
-	vcontext->event_port_index = EVENT_PORT_NONE;
-	vcontext->context.timeout = INFINITE;
+	vcontext->event_port = NULL;
+	CompChannelInit(&comp_mgr, &vcontext->context.channel, INFINITE);
 
 	vcontext->port = new struct verbs_port[vdev->phys_port_cnt];
 	if (vcontext->port == NULL) {
 		goto err1;
 	}
 
-	vcontext->event = new HANDLE[vdev->phys_port_cnt];
-	if (vcontext->event == NULL) {
-		goto err2;
-	}
-
 	hr = prov->OpenDevice(vdev->guid, &vcontext->context.cmd_if);
 	if (FAILED(hr)) {
-		goto err3;
+		goto err2;
 	}
 
 	for (i = 0; i < vdev->phys_port_cnt; i++) {
-		vcontext->event[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
-		if (vcontext->event[i] == NULL) {
-			goto err4;
-		}
-		vcontext->port[i].overlap.hEvent = vcontext->event[i];
+		vcontext->port[i].port_num = (uint8_t) i + 1;
 		vcontext->port[i].event_flag = 0;
-
-		vcontext->context.cmd_if->Notify((UINT8) i + 1,
-										 &vcontext->port[i].overlap,
+		CompEntryInit(&vcontext->context.channel, &vcontext->port[i].comp_entry);
+		vcontext->context.cmd_if->Notify(vcontext->port[i].port_num,
+										 &vcontext->port[i].comp_entry.Overlap,
 										 &vcontext->port[i].event_flag);
 	}
 
 	return &vcontext->context;
 
-err4:
-	while (--i >= 0) {
-		CloseHandle(vcontext->event[i]);
-	}
-err3:
-	delete vcontext->event;
 err2:
 	delete vcontext->port;
 err1:
@@ -235,11 +237,10 @@ int ibv_close_device(struct ibv_context 
 	context->cmd_if->CancelOverlappedRequests();
 
 	for (i = 0; i < vcontext->device.phys_port_cnt; i++) {
-		CloseHandle(vcontext->event[i]);
+		CompChannelRemoveEntry(&context->channel, &vcontext->port[i].comp_entry);
 	}
 
 	context->cmd_if->Release();
-	delete vcontext->event;
 	delete vcontext->port;
 	delete vcontext;
 	return 0;
@@ -250,7 +251,7 @@ static enum ibv_event_type ibv_get_port_
 	WV_PORT_ATTRIBUTES attr;
 	HRESULT hr;
 
-	hr = vcontext->context.cmd_if->QueryPort(vcontext->event_port_index + 1, &attr);
+	hr = vcontext->context.cmd_if->QueryPort(vcontext->event_port->port_num, &attr);
 	if (FAILED(hr)) {
 		return IBV_EVENT_PORT_ERR;
 	}
@@ -265,8 +266,8 @@ static int ibv_report_port_event(struct 
 	struct verbs_port *port;
 	int ret = 0;
 
-	port = &vcontext->port[vcontext->event_port_index];
-	event->element.port_num = vcontext->event_port_index + 1;
+	port = vcontext->event_port;
+	event->element.port_num = port->port_num;
 
 	if (port->event_flag & WV_EVENT_ERROR) {
 		event->event_type = IBV_EVENT_DEVICE_FATAL;
@@ -289,9 +290,10 @@ static int ibv_report_port_event(struct 
 	}
 	
 	if (port->event_flag == 0) {
-		vcontext->context.cmd_if->Notify(vcontext->event_port_index + 1,
-										 &port->overlap, &port->event_flag);
-		vcontext->event_port_index = EVENT_PORT_NONE;
+		vcontext->context.cmd_if->Notify(vcontext->event_port->port_num,
+										 &port->comp_entry.Overlap,
+										 &port->event_flag);
+		vcontext->event_port = NULL;
 	}
 	return ret;
 }
@@ -301,26 +303,23 @@ int ibv_get_async_event(struct ibv_conte
 						struct ibv_async_event *event)
 {
 	struct verbs_context *vcontext;
-	HRESULT hr;
-	int i;
+	COMP_ENTRY *entry;
+	int ret;
 
 	vcontext = CONTAINING_RECORD(context, struct verbs_context, context);
-	if (vcontext->event_port_index != EVENT_PORT_NONE) {
+	if (vcontext->event_port) {
 		if (ibv_report_port_event(vcontext, event) == 0) {
 			return 0;
 		}
 	}
 
-	hr = WaitForMultipleObjects(vcontext->device.phys_port_cnt,
-								vcontext->event, FALSE, context->timeout);
-	if (hr == WAIT_TIMEOUT) {
-		return hr;
-	} else if (hr == WAIT_FAILED) {
-		return HRESULT_FROM_WIN32(GetLastError());
+	ret = CompChannelPoll(&context->channel, &entry);
+	if (!ret) {
+		vcontext->event_port = CONTAINING_RECORD(entry, struct verbs_port, comp_entry);
+		ret = ibv_report_port_event(vcontext, event);
 	}
 
-	vcontext->event_port_index = (UINT8) hr;
-	return ibv_report_port_event(vcontext, event);
+	return ret;
 }
 
 __declspec(dllexport)
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\libibverbs/src/ibv_exports.src
branches\winverbs\ulp\libibverbs/src/ibv_exports.src
--- trunk\ulp\libibverbs/src/ibv_exports.src	2008-05-22 12:34:07.456724100 -0700
+++ branches\winverbs\ulp\libibverbs/src/ibv_exports.src	2009-03-10 15:49:59.647875000 -0700
@@ -51,4 +51,6 @@ ibv_detach_mcast
 ibv_node_type_str
 ibv_port_state_str
 ibv_event_type_str
+ibv_get_windata
+ibv_release_windata
 #endif
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\libibverbs/src/ibverbs.h branches\winverbs\ulp\libibverbs/src/ibverbs.h
--- trunk\ulp\libibverbs/src/ibverbs.h	2008-06-28 23:01:40.265625000 -0700
+++ branches\winverbs\ulp\libibverbs/src/ibverbs.h	2009-03-10 00:17:06.959125000 -0700
@@ -1,6 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2007 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2009 Intel Corp, Inc.  All rights reserved.
  *
  * This software is available to you under the OpenIB.org BSD license
  * below:
@@ -31,6 +30,8 @@
 #ifndef IB_VERBS_H
 #define IB_VERBS_H
 
+extern COMP_MANAGER comp_mgr;
+
 __inline void* __cdecl operator new(size_t size)
 {
 	return HeapAlloc(GetProcessHeap(), 0, size);
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\libibverbs/src/verbs.cpp branches\winverbs\ulp\libibverbs/src/verbs.cpp
--- trunk\ulp\libibverbs/src/verbs.cpp	2009-03-02 13:21:49.814750000 -0800
+++ branches\winverbs\ulp\libibverbs/src/verbs.cpp	2009-03-10 00:51:37.552875000 -0700
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2006, 2007 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2008 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under the OpenIB.org BSD license
  * below:
@@ -34,23 +34,9 @@
 #include <stdio.h>
 
 #include <infiniband/verbs.h>
+#include <comp_channel.h>
 #include "ibverbs.h"
 
-struct verbs_cq
-{
-	struct ibv_cq				cq;
-	int							channel_index;
-};
-
-struct verbs_comp_channel
-{
-	struct ibv_comp_channel		channel;
-	CRITICAL_SECTION			lock;
-	struct verbs_cq				*cq[MAXIMUM_WAIT_OBJECTS];
-	HANDLE						event[MAXIMUM_WAIT_OBJECTS];
-	int							count;
-};
-
 __declspec(dllexport)
 int ibv_rate_to_mult(enum ibv_rate rate)
 {
@@ -64,7 +50,7 @@ int ibv_rate_to_mult(enum ibv_rate rate)
 	case IBV_RATE_60_GBPS:  return 24;
 	case IBV_RATE_80_GBPS:  return 32;
 	case IBV_RATE_120_GBPS: return 48;
-	default:           return -1;
+	default:				return -1;
 	}
 }
 
@@ -322,124 +308,60 @@ int ibv_dereg_mr(struct ibv_mr *mr)
 __declspec(dllexport)
 struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context)
 {
-	struct verbs_comp_channel *vchan;
+	struct ibv_comp_channel *channel;
 
-	vchan = new struct verbs_comp_channel;
-	if (vchan == NULL) {
+	channel = new struct ibv_comp_channel;
+	if (channel == NULL) {
 		return NULL;
 	}
 
-	InitializeCriticalSection(&vchan->lock);
-	vchan->count = 0;
-	vchan->channel.context = context;
-	vchan->channel.timeout = INFINITE;
-
-	return &vchan->channel;
+	CompChannelInit(&comp_mgr, &channel->comp_channel, INFINITE);
+	channel->context = context;
+	return channel;
 }
 
 __declspec(dllexport)
 int ibv_destroy_comp_channel(struct ibv_comp_channel *channel)
 {
-	struct verbs_comp_channel *vchan;
-
-	vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel);
-	if (vchan->count > 0) {
-		return -1;
-	}
-
-	DeleteCriticalSection(&vchan->lock);	
-	delete vchan;
+	CompChannelCleanup(&channel->comp_channel);
+	delete channel;
 	return 0;
 }
 
-static int ibv_comp_channel_insert_cq(struct ibv_comp_channel *channel,
-									  struct verbs_cq *cq)
-{
-	struct verbs_comp_channel *vchan;
-	int ret = 0;
-
-	vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel);
-
-	EnterCriticalSection(&vchan->lock);
-	if (vchan->count == MAXIMUM_WAIT_OBJECTS) {
-		ret = -1;
-		goto out;
-	}
-
-	vchan->cq[vchan->count] = cq;
-	vchan->event[vchan->count] = cq->cq.overlap.hEvent;
-	cq->channel_index = vchan->count++;
-out:
-	LeaveCriticalSection(&vchan->lock);
-	return ret;
-}
-
-/*
- * TODO: we cannot call ibv_comp_channel_remove_cq() while another
- * thread is calling ibv_get_cq_event().  If this is needed, then we
- * need to halt the ibv_get_cq_event() thread, modify the event list,
- * then restart the ibv_get_cq_event() thread.
- */
-static void ibv_comp_channel_remove_cq(struct ibv_comp_channel *channel,
-									   struct verbs_cq *cq)
-{
-	struct verbs_comp_channel *vchan;
-
-	vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel);
-
-	EnterCriticalSection(&vchan->lock);
-	vchan->count--;
-	vchan->cq[cq->channel_index] = vchan->cq[vchan->count];
-	vchan->event[cq->channel_index] = vchan->event[vchan->count];
-	vchan->cq[cq->channel_index]->channel_index = cq->channel_index;
-	LeaveCriticalSection(&vchan->lock);
-}
-
 __declspec(dllexport)
 struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context,
 							 struct ibv_comp_channel *channel, int comp_vector)
 {
-	struct verbs_cq *vcq;
+	struct ibv_cq *cq;
 	HRESULT hr;
 	SIZE_T entries;
 
-	vcq = new struct verbs_cq;
-	if (vcq == NULL) {
+	cq = new struct ibv_cq;
+	if (cq == NULL) {
 		return NULL;
 	}
 
-	vcq->cq.overlap.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
-	if (vcq->cq.overlap.hEvent == NULL) {
-		goto err1;
-	}
-
-	vcq->cq.context = context;
-	vcq->cq.channel = channel;
-	vcq->cq.cq_context = cq_context;
-	vcq->cq.comp_events_completed = 0;
+	cq->context = context;
+	cq->channel = channel;
+	cq->cq_context = cq_context;
 
 	entries = cqe;
-	hr = context->cmd_if->CreateCompletionQueue(&entries, &vcq->cq.handle);
+	hr = context->cmd_if->CreateCompletionQueue(&entries, &cq->handle);
 	if (FAILED(hr)) {
-		goto err2;
+		goto err;
 	}
 
 	if (channel != NULL) {
-		hr = ibv_comp_channel_insert_cq(channel, vcq);
-		if (FAILED(hr)) {
-			goto err3;
-		}
+		CompEntryInit(&channel->comp_channel, &cq->comp_entry);
+	} else {
+		memset(&cq->comp_entry, 0, sizeof cq->comp_entry);
 	}
 
-	vcq->cq.cqe = (uint32_t) entries;
-	return &vcq->cq;
+	cq->cqe = (uint32_t) entries;
+	return cq;
 
-err3:
-	vcq->cq.handle->Release();
-err2:
-	CloseHandle(vcq->cq.overlap.hEvent);
-err1:
-	delete vcq;
+err:
+	delete cq;
 	return NULL;
 }
 
@@ -460,8 +382,9 @@ __declspec(dllexport)
 int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only)
 {
 	HRESULT hr;
+
 	hr = cq->handle->Notify(solicited_only ? WvCqSolicited : WvCqNextCompletion,
-							&cq->overlap);
+							&cq->comp_entry.Overlap);
 	if (SUCCEEDED(hr) || hr == WV_IO_PENDING) {
 		return 0;
 	} else {
@@ -486,19 +409,12 @@ int ibv_poll_cq(struct ibv_cq *cq, int n
 __declspec(dllexport)
 int ibv_destroy_cq(struct ibv_cq *cq)
 {
-	struct verbs_cq *vcq;
-
-	vcq = CONTAINING_RECORD(cq, struct verbs_cq, cq);
-
 	cq->handle->CancelOverlappedRequests();
 
 	if (cq->channel != NULL) {
-		ibv_comp_channel_remove_cq(cq->channel, vcq);
+		CompChannelRemoveEntry(&cq->channel->comp_channel, &cq->comp_entry);
 	}
 
-	while (cq->comp_events_completed > 0)
-		; /* twiddle thumbs */
-
 	cq->handle->Release();
 	delete cq;
 	return 0;
@@ -508,42 +424,21 @@ __declspec(dllexport)
 int ibv_get_cq_event(struct ibv_comp_channel *channel,
 					 struct ibv_cq **cq, void **cq_context)
 {
-	struct verbs_comp_channel *vchan;
-	struct verbs_cq *vcq;
-	HRESULT hr;
+	COMP_ENTRY *entry;
+	DWORD ret;
 
-	vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel);
-	hr = WaitForMultipleObjects(vchan->count, vchan->event, FALSE,
-								vchan->channel.timeout);
-	if (hr == WAIT_TIMEOUT) {
-		return hr;
-	} else if (hr == WAIT_FAILED) {
-		return HRESULT_FROM_WIN32(GetLastError());
+	ret = CompChannelPoll(&channel->comp_channel, &entry);
+	if (!ret) {
+		*cq = CONTAINING_RECORD(entry, struct ibv_cq, comp_entry);
+		*cq_context = (*cq)->cq_context;
 	}
 
-	EnterCriticalSection(&vchan->lock);
-	vcq = vchan->cq[hr];
-	vcq->cq.comp_events_completed++;
-	LeaveCriticalSection(&vchan->lock);
-
-	*cq = &vcq->cq;
-	*cq_context = vcq->cq.cq_context;
-	return 0;
+	return ret;
 }
 
 __declspec(dllexport)
 void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents)
 {
-	struct verbs_comp_channel *vchan;
-
-	if (cq->channel == NULL) {
-		return;
-	}
-
-	vchan = CONTAINING_RECORD(cq->channel, struct verbs_comp_channel, channel);
-	EnterCriticalSection(&vchan->lock);
-	cq->comp_events_completed -= nevents;
-	LeaveCriticalSection(&vchan->lock);
 }
 
 __declspec(dllexport)





More information about the ofw mailing list