[Openib-windows] Connection rate of WSD

Tzachi Dar tzachid at mellanox.co.il
Tue May 30 09:10:31 PDT 2006


Hi Fab,
 
While doing tests of connecting and disconnecting to WSD I have found
out that the connection rate is low.
When I say low, I mean that some times it took 2 seconds to create a
connection. After checking for the reason of the problem, I have found
out that the main reason is that RNR nack signal is sent.
 
It seems that the current flow is that one side sends CM_REQ. The
received notifies the switch on a new accepted socket. After that when
the switch calls accept, a CM_REP message is sent. No recves are posted,
and they will only be posted when the switch wants which might be too
late. Once the CM_REP was sent, the remote side sends RTU and might
start to send data.
 
To my understanding, the best way to solve this problem is to dely the
sending of the CM_REP until the first recv is posted. This makes sure
that once data arrives, things work fine.
 
I have created an experimental patch that implements this idea and it
seems that things are working well with it.
 
What is your opinion?
 
Do you want me to prepare a "real" patch for this?
 
Thanks
Tzachi
 
Index: ib_cm.c
===================================================================
--- ib_cm.c (revision 1372)
+++ ib_cm.c (working copy)
@@ -92,6 +92,7 @@
   IBSP_TRACE2( IBSP_DBG_NEV,
    ("Signaling eventHandle %p at time %I64d.\n",
    h_event, cl_get_time_stamp() ) );
+  IBSP_ERROR(("Setting the event\n"));
   SetEvent( h_event );
  }
 
@@ -208,6 +209,7 @@
  ib_api_status_t status;
 
  IBSP_ENTER( IBSP_DBG_CM );
+ IBSP_ERROR(("cm_rep_callback called\n"));
 
  memset( &cm_rtu, 0, sizeof(cm_rtu) );
 
@@ -290,6 +292,8 @@
 
  IBSP_ENTER( IBSP_DBG_CM );
 
+ IBSP_ERROR(("cm_rtu_callback called\n"));
+
  cl_spinlock_acquire( &socket_info->mutex );
 
  if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE )
@@ -877,9 +881,11 @@
  IN    struct ibsp_socket_info  *socket_info,
  IN    ib_cm_req_rec_t    *cm_req_received )
 {
- ib_cm_rep_t cm_rep;
- ib_api_status_t status;
+// ib_cm_rep_t cm_rep;
+// ib_api_status_t status;
 
+ UNREFERENCED_PARAMETER(cm_req_received);
+
  IBSP_ENTER( IBSP_DBG_CM );
 
  /* Insert into the connection map. */
@@ -888,7 +894,60 @@
   IBSP_EXIT( IBSP_DBG_CM );
   return WSAEADDRINUSE;
  }
+#if 0
+ memset( &cm_rep, 0, sizeof(cm_rep) );
 
+ cm_rep.qp_type = IB_QPT_RELIABLE_CONN;
+ cm_rep.h_qp = socket_info->qp;
+ cm_rep.access_ctrl = IB_AC_RDMA_READ | IB_AC_RDMA_WRITE |
IB_AC_LOCAL_WRITE;
+#if 0
+ // Bug in TAVOR
+ cm_rep.sq_depth = QP_ATTRIB_SQ_DEPTH;
+ cm_rep.rq_depth = QP_ATTRIB_RQ_DEPTH;
+#endif
+ cm_rep.init_depth = QP_ATTRIB_INITIATOR_DEPTH;
+ cm_rep.target_ack_delay = 10;
+ cm_rep.failover_accepted = IB_FAILOVER_ACCEPT_UNSUPPORTED;
+ cm_rep.flow_ctrl = cm_req_received->flow_ctrl;
+ cm_rep.rnr_nak_timeout = QP_ATTRIB_RNR_NAK_TIMEOUT;
+ cm_rep.rnr_retry_cnt = cm_req_received->rnr_retry_cnt;
+ cm_rep.pfn_cm_mra_cb = cm_mra_callback;
+ cm_rep.pfn_cm_rej_cb = cm_rej_callback;
+ cm_rep.pfn_cm_rtu_cb = cm_rtu_callback;
+ cm_rep.pfn_cm_lap_cb = cm_lap_callback;
+ cm_rep.pfn_cm_dreq_cb = cm_dreq_callback;
+
+ fzprint(("%s():%d:0x%x:0x%x: flow_ctrl=%d rnr_retry_cnt=%d\n",
__FUNCTION__,
+    __LINE__, GetCurrentProcessId(),
+    GetCurrentThreadId(), cm_rep.flow_ctrl, cm_rep.rnr_retry_cnt));
+
+ status = ib_cm_rep( cm_req_received->h_cm_req, &cm_rep );
+ if( status != IB_SUCCESS )
+ {
+  /* Remove from connection map. */
+  ibsp_conn_remove( socket_info );
+
+  IBSP_ERROR_EXIT(
+   ("ib_cm_rep failed (0x%d) at time %I64d\n",
+   ib_get_err_str( status ), cl_get_time_stamp()) );
+  return WSAEACCES;
+ }
+#endif
+ IBSP_EXIT( IBSP_DBG_CM );
+ return 0;
+}
+
+
+int
+ib_accept1(
+ IN    struct ibsp_socket_info  *socket_info,
+ IN    ib_cm_req_rec_t    *cm_req_received )
+{
+ ib_cm_rep_t cm_rep;
+ ib_api_status_t status;
+
+ IBSP_ENTER( IBSP_DBG_CM );
+
  memset( &cm_rep, 0, sizeof(cm_rep) );
 
  cm_rep.qp_type = IB_QPT_RELIABLE_CONN;
Index: ibspdll.c
===================================================================
--- ibspdll.c (revision 1372)
+++ ibspdll.c (working copy)
@@ -325,6 +325,8 @@
  /* Update the state of the socket context */
  IBSP_CHANGE_SOCKET_STATE( new_socket_info, IBSP_CONNECTED );
 
+ new_socket_info->cm_req_received = p_incoming->cm_req_received;
+
  *lpErrno = ib_accept( new_socket_info, &p_incoming->cm_req_received );
  if( *lpErrno )
  {
@@ -346,6 +348,8 @@
   deref_socket_info( new_socket_info );
   return INVALID_SOCKET;
  }
+ CL_ASSERT(new_socket_info->cm_rep_waiting == FALSE);
+ new_socket_info->cm_rep_waiting = TRUE;
 
  cl_spinlock_acquire( &g_ibsp.socket_info_mutex );
  cl_qlist_insert_tail(
@@ -369,6 +373,7 @@
  *    of the user supplied callback so you can trigger that once your
  *    substituted function is triggered).
  */
+
 static SOCKET WSPAPI
 IBSPAccept(
  IN    SOCKET      s,
@@ -724,6 +729,8 @@
 
  IBSP_ENTER( IBSP_DBG_CONN );
 
+ IBSP_ERROR(("IBSPConnect called\n"));
+
  UNUSED_PARAM( lpCalleeData );
  UNUSED_PARAM( lpSQOS );
  UNUSED_PARAM( lpGQOS );
@@ -855,6 +862,7 @@
 done:
  cl_spinlock_release( &socket_info->mutex );
  IBSP_EXIT( IBSP_DBG_CONN );
+ IBSP_ERROR(("IBSPConnect returning\n"));
  return SOCKET_ERROR;
 }
 
@@ -1455,6 +1463,12 @@
  *   handle and then make the receive call. If called with overlap,
post the operation
  *   to our IOCP or completion routine.
 */
+
+int
+ib_accept1(
+ IN    struct ibsp_socket_info  *socket_info,
+ IN    ib_cm_req_rec_t    *cm_req_received );
+
 static int WSPAPI
 IBSPRecv(
      SOCKET      s,
@@ -1496,6 +1510,12 @@
  }
 
  cl_spinlock_acquire( &socket_info->mutex );
+
+ if (socket_info->cm_rep_waiting == TRUE) {
+  ib_accept1(socket_info,&socket_info->cm_req_received);
+  socket_info->cm_rep_waiting = FALSE;
+ }
+
  switch( socket_info->socket_state )
  {
  case IBSP_CONNECTED:
Index: ibspstruct.h
===================================================================
--- ibspstruct.h (revision 1372)
+++ ibspstruct.h (working copy)
@@ -326,6 +326,9 @@
  long   recv_log_idx;
  long   send_log_idx;
 #endif
+
+ boolean_t  cm_rep_waiting;
+ ib_cm_req_rec_t cm_req_received ;
 };
 
 

 
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060530/708d587d/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: wsd_connection.patch
Type: application/octet-stream
Size: 5163 bytes
Desc: wsd_connection.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060530/708d587d/attachment.obj>


More information about the ofw mailing list