[ofw] [PATCH] librdmacm: fix event reporting

Sean Hefty sean.hefty at intel.com
Fri Apr 17 11:34:15 PDT 2009


Use the latest comp_channel changes to fix event reporting and avoid
hangs when destroying resources.  We need to track when closing
devices to make sure that events are canceled, and avoid issuing
new wait calls.

Minor correction to the cmatose test app to avoid busy polling of the CQ,
which can prevent other threads from running.  This leads to connection
failures when running more clients than there are CPUs in the system.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\librdmacm/examples/cmatose/cmatose.c
branches\winverbs\ulp\librdmacm/examples/cmatose/cmatose.c
--- trunk\ulp\librdmacm/examples/cmatose/cmatose.c	2009-03-10 12:38:47.383414700 -0700
+++ branches\winverbs\ulp\librdmacm/examples/cmatose/cmatose.c	2009-04-01 11:03:05.691645900 -0700
@@ -417,6 +417,7 @@ static int poll_cqs(enum CQ_INDEX index)
 				printf("cmatose: failed polling CQ: 0x%x\n", ret);
 				return ret;
 			}
+			Sleep(0);
 		}
 	}
 	return 0;
diff -up -r -X \mshefty\scm\winof\trunk\docs\dontdiff.txt -I '\$Id:' trunk\ulp\librdmacm/src/cma.cpp
branches\winverbs\ulp\librdmacm/src/cma.cpp
--- trunk\ulp\librdmacm/src/cma.cpp	2009-03-10 15:51:13.085375000 -0700
+++ branches\winverbs\ulp\librdmacm/src/cma.cpp	2009-04-09 23:24:31.101375000 -0700
@@ -38,14 +38,16 @@
 #include <iba/ibat.h>
 #include "cma.h"
 #include "..\..\..\etc\user\comp_channel.cpp"
+#include "..\..\..\etc\user\dlist.c"
 
-static struct ibv_windata windata;
+static struct ibvw_windata windata;
 
 enum cma_state
 {
 	cma_idle,
 	cma_listening,
 	cma_get_request,
+	cma_addr_bind,
 	cma_addr_resolve,
 	cma_route_resolve,
 	cma_passive_connect,
@@ -98,7 +100,7 @@ static void ucma_cleanup(void)
 		cma_dev_cnt = 0;
 	}
 	if (windata.prov != NULL) {
-		ibv_release_windata(&windata, IBV_WINDATA_VERSION);
+		ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
 		windata.prov = NULL;
 	}
 }
@@ -115,7 +117,7 @@ static int ucma_init(void)
 		goto out;
 	}
 
-	ret = ibv_get_windata(&windata, IBV_WINDATA_VERSION);
+	ret = ibvw_get_windata(&windata, IBVW_WINDATA_VERSION);
 	if (ret) {
 		goto err;
 	}
@@ -286,7 +288,7 @@ int rdma_destroy_id(struct rdma_cm_id *i
 		id->ep.datagram->CancelOverlappedRequests();
 	}
 
-	CompChannelRemoveEntry(&id->channel->channel, &id->comp_entry);
+	CompEntryCancel(&id->comp_entry);
 
 	if (id_priv->backlog > 0) {
 		ucma_destroy_listen(id_priv);
@@ -407,6 +409,7 @@ static int ucma_query_datagram(struct rd
 __declspec(dllexport)
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
+	struct cma_id_private *id_priv;
 	HRESULT hr;
 
 	if (id->ps == RDMA_PS_TCP) {
@@ -421,6 +424,10 @@ int rdma_bind_addr(struct rdma_cm_id *id
 		}
 	}
 
+	if (SUCCEEDED(hr)) {
+		id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
+		id_priv->state = cma_addr_bind;
+	}
 	return hr;
 }
 
@@ -434,32 +441,34 @@ int rdma_resolve_addr(struct rdma_cm_id 
 	DWORD size;
 	HRESULT hr;
 
-	if (src_addr == NULL) {
-		if (id->ps == RDMA_PS_TCP) {
-			s = socket(dst_addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
-		} else {
-			s = socket(dst_addr->sa_family, SOCK_DGRAM, IPPROTO_UDP);
-		}
-		if (s == INVALID_SOCKET) {
-			return WSAGetLastError();
+	id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
+	if (id_priv->state == cma_idle) {
+		if (src_addr == NULL) {
+			if (id->ps == RDMA_PS_TCP) {
+				s = socket(dst_addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+			} else {
+				s = socket(dst_addr->sa_family, SOCK_DGRAM, IPPROTO_UDP);
+			}
+			if (s == INVALID_SOCKET) {
+				return WSAGetLastError();
+			}
+
+			hr = WSAIoctl(s, SIO_ROUTING_INTERFACE_QUERY, dst_addr, ucma_addrlen(dst_addr),
+						  &addr, sizeof addr, &size, NULL, NULL);
+			closesocket(s);
+			if (FAILED(hr)) {
+				return WSAGetLastError();
+			}
+			src_addr = &addr.Sa;
 		}
 
-		hr = WSAIoctl(s, SIO_ROUTING_INTERFACE_QUERY, dst_addr, ucma_addrlen(dst_addr),
-					  &addr, sizeof addr, &size, NULL, NULL);
-		closesocket(s);
+		hr = rdma_bind_addr(id, src_addr);
 		if (FAILED(hr)) {
-			return WSAGetLastError();
+			return hr;
 		}
-		src_addr = &addr.Sa;
-	}
-
-	hr = rdma_bind_addr(id, src_addr);
-	if (FAILED(hr)) {
-		return hr;
 	}
 
 	RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));
-	id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
 	id_priv->state = cma_addr_resolve;
 
 	CompEntryPost(&id->comp_entry);
@@ -613,9 +622,11 @@ int rdma_connect(struct rdma_cm_id *id, 
 	}
 
 	id_priv->state = cma_active_connect;
+	id->comp_entry.Busy = 1;
 	hr = id->ep.connect->Connect(id->qp->conn_handle, &id->route.addr.dst_addr,
 								 &attr, &id->comp_entry.Overlap);
 	if (FAILED(hr) && hr != WV_IO_PENDING) {
+		id->comp_entry.Busy = 0;
 		id_priv->state = cma_route_resolve;
 		return hr;
 	}
@@ -638,6 +649,7 @@ static int ucma_get_request(struct cma_i
 	id_priv->index = index;
 	id_priv->state = cma_get_request;
 
+	id_priv->id.comp_entry.Busy = 1;
 	if (listen->id.ps == RDMA_PS_TCP) {
 		hr = listen->id.ep.connect->GetRequest(id_priv->id.ep.connect,
 											   &id_priv->id.comp_entry.Overlap);
@@ -646,6 +658,7 @@ static int ucma_get_request(struct cma_i
 												&id_priv->id.comp_entry.Overlap);
 	}
 	if (FAILED(hr) && hr != WV_IO_PENDING) {
+		id_priv->id.comp_entry.Busy = 0;
 		return hr;
 	}
 
@@ -712,9 +725,11 @@ int rdma_accept(struct rdma_cm_id *id, s
 	}
 
 	id_priv->state = cma_accepting;
+	id->comp_entry.Busy = 1;
 	hr = id->ep.connect->Accept(id->qp->conn_handle, &attr,
 								&id->comp_entry.Overlap);
 	if (FAILED(hr) && hr != WV_IO_PENDING) {
+		id->comp_entry.Busy = 0;
 		id_priv->state = cma_disconnected;
 		return hr;
 	}
@@ -790,6 +805,7 @@ static int ucma_process_conn_req(struct 
 	if (SUCCEEDED(event->event.status)) {
 		event->event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 		event->id_priv->state = cma_passive_connect;
+		event->event.listen_id = &listen->id;
 	} else {
 		rdma_destroy_id(&event->id_priv->id);
 	}
@@ -811,9 +827,11 @@ static int ucma_process_conn_resp(struct
 	event->id_priv->state = cma_accepting;
 
 	id = &event->id_priv->id;
+	id->comp_entry.Busy = 1;
 	hr = id->ep.connect->Accept(id->qp->conn_handle, &attr,
 								&id->comp_entry.Overlap);
 	if (FAILED(hr) && hr != WV_IO_PENDING) {
+		id->comp_entry.Busy = 0;
 		event->event.status = hr;
 		goto err;
 	}
@@ -841,6 +859,7 @@ static void ucma_process_establish(struc
 		event->event.event = RDMA_CM_EVENT_ESTABLISHED;
 
 		id_priv->state = cma_connected;
+		id_priv->id.comp_entry.Busy = 1;
 		id_priv->id.ep.connect->NotifyDisconnect(&id_priv->id.comp_entry.Overlap);
 	} else {
 		event->event.event = RDMA_CM_EVENT_CONNECT_ERROR;




More information about the ofw mailing list