[Openib-windows] WSD: Behavior when the other side has not yet called accept, is different from TCP/IP (Ethernet)

Fabian Tillier ftillier at silverstorm.com
Mon Aug 7 14:35:56 PDT 2006


Hi Tzachi,

On 8/7/06, Tzachi Dar <tzachid at mellanox.co.il> wrote:
> Hi Fab,
>
> If we want to be as close as possible to the Ethernet than if no one is
> listening we should return WSAECONNREFUSED. This is also the case if the
> backlog is exceeded. There is one limitation to this, that I have tried
> to describe in the bag, and I'll try to explain it now. On Ethernet in
> both cases, there is a retry (3 times). That is  practically, for a
> backlog of 5, one can start 10 clients simultaneously, and they will all
> succeed. In the current implementation of WSD, this will fail. More than
> that due to the nature of the limited number of threads, in the case
> that the server is not standing the load, the clients wait for about a
> second, thus giving the server time to act correctly.
>
> By the way, here is something that I just thought off right now: If we
> will change our behavior to simply drop REQ packets that the backlog for
> has exceeded, we might be different from the spec, but we should handle
> the stress situations better.
> (this is very small change in the code)
> What do you think?

Here's a patch that does this.  I've defined a new reject reason,
IB_REJ_DROP_REQ, having a value of zero (not defined in the IB spec).
The CM will not send a REJ packet when this reson is given in the call
to ib_cm_rej.  Note that this is a valid thing to do on the wire,
since the REQ could get dropped since it is unreliable - so this
should not cause any interoperability issues with any IB-compliant CM.

Note that I think I found a bug - in the reject path for the passive
side, the new CEP is not freed.  It will be in the idle state, but
never destroyed until the AL instance is closed.  The patch fixes this
- see the change to al_cm_qp.  Let me know if this looks correct (it
never hurts to have a code review!) and I'll check that part in.

I've also modified how the WSD provider handles the IB_REJ_INVALID_SID
case - if the user has prevented fall back to IPoIB, it will return
WSAECONNRESET, otherwise it will return WSAETIMEDOUT to let the
connection request try over NDIS.  This is needed to allow Windows
2003 systems to establish sockets to Windows XP systems.  Note that I
am using WSAECONNRESET, not WSAECONNREFUSED, to match the WSD docs
(see WSPConnect documentation)

Beyond this, I've changed the reject handling for exceeded backlog to
drop the REQ as you suggested.  Let me know if it behaves as you
expect.

Thanks,

- Fab

Index: core/al/kernel/al_cm_cep.c
===================================================================
--- core/al/kernel/al_cm_cep.c	(revision 440)
+++ core/al/kernel/al_cm_cep.c	(working copy)
@@ -3631,6 +3631,9 @@
 	cep_agent_t			*p_port_cep;
 	ib_mad_element_t	*p_mad;

+	if( rej_status == IB_REJ_DROP_REQ )
+		return IB_SUCCESS;
+
 	p_port_cep = __get_cep_agent( p_cep );
 	if( !p_port_cep )
 		return IB_INSUFFICIENT_RESOURCES;
Index: core/al/al_cm_qp.c
===================================================================
--- core/al/al_cm_qp.c	(revision 440)
+++ core/al/al_cm_qp.c	(working copy)
@@ -1597,6 +1597,10 @@
 				deref_al_obj( &h_cm.h_qp->obj );
 		}
 	}
+	else
+	{
+		al_destroy_cep( h_cm.h_al, h_cm.cid, NULL );
+	}

 	AL_EXIT( AL_DBG_CM );
 	return status;
Index: inc/iba/ib_types.h
===================================================================
--- inc/iba/ib_types.h	(revision 440)
+++ inc/iba/ib_types.h	(working copy)
@@ -6988,6 +6988,7 @@
 *
 * SOURCE
  */
+#define IB_REJ_DROP_REQ						CL_HTON16(0)	/* Access Layer specific */
 #define IB_REJ_INSUF_QP						CL_HTON16(1)
 #define IB_REJ_INSUF_EEC					CL_HTON16(2)
 #define IB_REJ_INSUF_RESOURCES				CL_HTON16(3)
Index: ulp/wsd/user/ib_cm.c
===================================================================
--- ulp/wsd/user/ib_cm.c	(revision 440)
+++ ulp/wsd/user/ib_cm.c	(working copy)
@@ -127,7 +127,7 @@
 			/* Already too many connection requests are queued */
 			IBSP_TRACE1( IBSP_DBG_CM,
 				("already too many incoming connections, rejecting\n") );
-			ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_USER_DEFINED );
+			ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_DROP_REQ );
 			break;
 		}

@@ -433,10 +433,22 @@
 		ibsp_conn_remove( socket_info );

 		IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_BIND );
-		if( p_cm_rej_rec->rej_status == IB_REJ_TIMEOUT )
+		switch( p_cm_rej_rec->rej_status )
+		{
+		case IB_REJ_INVALID_SID:
+			if( g_connect_err == WSAEHOSTUNREACH )
+			{
+				ibsp_post_select_event( socket_info, FD_CONNECT, WSAECONNRESET );
+				break;
+			}
+			/* Fall through. */
+		case IB_REJ_TIMEOUT:
 			ibsp_post_select_event( socket_info, FD_CONNECT, WSAETIMEDOUT );
-		else
+			break;
+
+		default:
 			ibsp_post_select_event( socket_info, FD_CONNECT, WSAECONNREFUSED );
+		}
 		break;

 	case IBSP_CONNECTED:
-------------- next part --------------
Index: core/al/kernel/al_cm_cep.c
===================================================================
--- core/al/kernel/al_cm_cep.c	(revision 440)
+++ core/al/kernel/al_cm_cep.c	(working copy)
@@ -3631,6 +3631,9 @@
 	cep_agent_t			*p_port_cep;
 	ib_mad_element_t	*p_mad;
 
+	if( rej_status == IB_REJ_DROP_REQ )
+		return IB_SUCCESS;
+
 	p_port_cep = __get_cep_agent( p_cep );
 	if( !p_port_cep )
 		return IB_INSUFFICIENT_RESOURCES;
Index: core/al/al_cm_qp.c
===================================================================
--- core/al/al_cm_qp.c	(revision 440)
+++ core/al/al_cm_qp.c	(working copy)
@@ -1597,6 +1597,10 @@
 				deref_al_obj( &h_cm.h_qp->obj );
 		}
 	}
+	else
+	{
+		al_destroy_cep( h_cm.h_al, h_cm.cid, NULL );
+	}
 
 	AL_EXIT( AL_DBG_CM );
 	return status;
Index: inc/iba/ib_types.h
===================================================================
--- inc/iba/ib_types.h	(revision 440)
+++ inc/iba/ib_types.h	(working copy)
@@ -6988,6 +6988,7 @@
 *
 * SOURCE
  */
+#define IB_REJ_DROP_REQ						CL_HTON16(0)	/* Access Layer specific */
 #define IB_REJ_INSUF_QP						CL_HTON16(1)
 #define IB_REJ_INSUF_EEC					CL_HTON16(2)
 #define IB_REJ_INSUF_RESOURCES				CL_HTON16(3)
Index: ulp/wsd/user/ib_cm.c
===================================================================
--- ulp/wsd/user/ib_cm.c	(revision 440)
+++ ulp/wsd/user/ib_cm.c	(working copy)
@@ -127,7 +127,7 @@
 			/* Already too many connection requests are queued */
 			IBSP_TRACE1( IBSP_DBG_CM,
 				("already too many incoming connections, rejecting\n") );
-			ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_USER_DEFINED );
+			ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_DROP_REQ );
 			break;
 		}
 
@@ -433,10 +433,22 @@
 		ibsp_conn_remove( socket_info );
 
 		IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_BIND );
-		if( p_cm_rej_rec->rej_status == IB_REJ_TIMEOUT )
+		switch( p_cm_rej_rec->rej_status )
+		{
+		case IB_REJ_INVALID_SID:
+			if( g_connect_err == WSAEHOSTUNREACH )
+			{
+				ibsp_post_select_event( socket_info, FD_CONNECT, WSAECONNRESET );
+				break;
+			}
+			/* Fall through. */
+		case IB_REJ_TIMEOUT:
 			ibsp_post_select_event( socket_info, FD_CONNECT, WSAETIMEDOUT );
-		else
+			break;
+
+		default:
 			ibsp_post_select_event( socket_info, FD_CONNECT, WSAECONNREFUSED );
+		}
 		break;
 
 	case IBSP_CONNECTED:


More information about the ofw mailing list