[ofw] [PATCH] post 80% of SRQ buffers, code cleanup

Sun Feb 27 00:00:43 PST 2011

Looks good for me.
Just one minor comment- why should we use cl_dbg_out() here ?
		IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
 			("No available WQEs.\n") );
-		cl_dbg_out("HW is full\n");
+		cl_dbg_out("[IPoIB] HW send_q is full\n");

We can just print 		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ALL,
 			("HW SQ is full No available WQEs.\n") );

-----Original Message-----
From: Smith, Stan [mailto:stan.smith at intel.com] 
Sent: Thursday, February 24, 2011 8:04 PM
To: Alex Naslednikov
Cc: ofw at lists.openfabrics.org
Subject: [PATCH] post 80% of SRQ buffers, code cleanup

Post 80% of SRQ buffers initially such that the SRQ buffer pool will not be required to grow as often, reduce RNR errors under stress.

CM code cleanup
 migrate/rename cm_buf_mgr_t.recv_pool_depth to ib_mgr_t.srq_depth; clearer meaning in var naming.
 add ib_mgr_t.srq_max_depth

output NDIS error code in hex so it's easier to find in NDIS.h

ipoib_port.cpp
Setup ib_mgr_t.srq vars max & 80% of max Make the intent of cl_dbg_msg() output clear.

ipoib_cm.cpp:
Use new names for recv_pool_depth
RNR_retry @ 2 instead of 1 for correct operation.
Correct mislabeled CQ AsyncEvent debug print; was mislabeled as SRQ AsyncEvent.


Signed-off-by: stan smith <stan.smith at intel.com>

--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_endpoint.h	Thu Feb 24 09:23:14 2011
+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_endpoint.h	Thu Feb 24 09:06:51 2011
@@ -53,7 +53,6 @@
 	cl_spinlock_t		lock;
 	cl_qlist_t			oop_list;
 	long				posted;
-	int32_t				recv_pool_depth;
 	boolean_t			pool_init;
 } cm_buf_mgr_t;
 /*
--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h	Thu Feb 24 09:22:30 2011
+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h	Thu Feb 24 09:04:38 2011
@@ -107,6 +107,8 @@
 	ib_query_handle_t		h_query;
 	ib_srq_handle_t			h_srq;
 	atomic32_t				srq_qp_cnt;
+	int32_t					srq_max_depth;
+	int32_t					srq_depth;
 	net32_t					qpn;
 
 	ib_mr_handle_t			h_mr;
@@ -1123,7 +1125,7 @@
 
 	if (NET_BUFFER_LIST_STATUS(NetBufferLists) != NDIS_STATUS_SUCCESS) {
 		IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ALL,
-			("NBL completed with error %d to NDIS\n",
+			("NBL completed with error %#x to NDIS\n",
 				NET_BUFFER_LIST_STATUS(NetBufferLists)));
 	}
 	IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_SEND,


--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp	Thu Feb 24 09:34:29 2011
+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp	Thu Feb 24 09:05:46 2011
@@ -1442,6 +1442,12 @@
 			(p_port->p_ca_attrs->max_srq_wrs/2)) );
 
 	p_port->ib_mgr.srq_qp_cnt = 0; 
+	p_port->ib_mgr.srq_max_depth = srq_attr.max_wr;
+
+	/* setup SRQ depth based on rq_depth property++: setup such that 80% of
+	 * allocated SRQ (srq_max_depth) buffers are posted. See cm_buf_mgr_init().
+	 */
+	p_port->ib_mgr.srq_depth = (srq_attr.max_wr / 5) * 4;
 
 	ib_status = p_port->p_adapter->p_ifc->create_srq( p_port->ib_mgr.h_pd, 
 													  &srq_attr,
@@ -6118,7 +6124,7 @@
 	{
 		IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
 			("No available WQEs.\n") );
-		cl_dbg_out("HW is full\n");
+		cl_dbg_out("[IPoIB] HW send_q is full\n");
 		return NDIS_STATUS_PENDING;
 	}
 
--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_cm.cpp	Thu Feb 24 09:45:24 2011
+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_cm.cpp	Thu Feb 24 09:20:29 2011
@@ -402,7 +402,7 @@
 	if( send_qp == FALSE && !p_endpt->conn.h_recv_cq )
 	{
 		memset( &create_cq, 0, sizeof( ib_cq_create_t ) );
-		create_cq.size = p_port->p_adapter->params.rq_depth;
+		create_cq.size = p_port->ib_mgr.srq_depth;
 		create_cq.pfn_comp_cb = __cm_recv_cb;
 
 		ib_status = p_endpt->p_ifc->create_cq( p_port->ib_mgr.h_ca, @@ -633,7 +633,7 @@
 	creq.flow_ctrl			 = FALSE;	// srq attached qp does not support FC
 	creq.local_resp_timeout  = ib_path_rec_pkt_life(&path_rec) + 1;
 	creq.rnr_nak_timeout	 = 7;
-	creq.rnr_retry_cnt		 = 1; /* IPoIB CM RFC draft warns against retries */
+	creq.rnr_retry_cnt		 = 2;
 	creq.retry_cnt			 = 1; /* IPoIB CM RFC draft warns against retries */
 
 	//creq.pfn_cm_req_cb	= (ib_pfn_cm_req_cb_t)NULL; no peer connections
@@ -1631,6 +1631,7 @@
 	PERF_DECLARE( CMSendCb );
 	PERF_DECLARE( CMPollSend );
 	PERF_DECLARE( CMFreeSendBuf );
+	PERF_DECLARE( PortResume );
 
 	IPOIB_ENTER( IPOIB_DBG_SEND );
 
@@ -1747,9 +1748,20 @@
 		p_endpt->conn.h_send_qp = NULL;	// prevent Tx on invalid QP
 		__queue_tx_resource_free( p_port, p_endpt );
 		endpt_cm_set_state( p_endpt, IPOIB_CM_DISCONNECT_CLEANUP );
+		/* Resume any sends awaiting resources. */
+		cl_perf_start( PortResume );
+		ipoib_port_resume( p_port, TRUE, &complete_list ); 
+		cl_perf_stop( &p_port->p_adapter->perf, PortResume );
 	}
 	else
 	{
+		if (p_port->send_mgr.pending_list.count > 0)
+		{
+			/* Resume any sends awaiting resources. */
+			cl_perf_start( PortResume );
+			ipoib_port_resume( p_port, TRUE, &complete_list ); 
+			cl_perf_stop( &p_port->p_adapter->perf, PortResume );
+		}
 		/* Rearm the CQ. */
 		ib_status = p_ibal->rearm_cq( h_cq, FALSE );
 		CL_ASSERT( ib_status == IB_SUCCESS ); @@ -2073,18 +2085,18 @@
 	IPOIB_ENTER( IPOIB_DBG_RECV );
 
 	posted = p_port->cm_buf_mgr.posted;
-	wanted = p_port->p_adapter->params.rq_depth - posted;
+	wanted = p_port->ib_mgr.srq_depth - posted;
 
 #if DBG
-	IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_RECV,
-		("Port[%d] posting %d RC bufs of limit(rq_depth %d) posted %d max %d\n",
-			p_port->port_num, wanted, p_port->p_adapter->params.rq_depth,
-			posted, p_port->cm_buf_mgr.recv_pool_depth) );
+	IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_CM/*XXX RECV*/,
+		("Port[%d] posting RC bufs: wanted %d srq_depth %d posted %d srq max %d\n",
+			p_port->port_num, wanted, p_port->ib_mgr.srq_depth,
+			posted, p_port->ib_mgr.srq_max_depth) );
 #endif
 
 	cl_spinlock_acquire( &p_port->cm_buf_mgr.lock);
 
-	for( rx_cnt=posted; rx_cnt < p_port->p_adapter->params.rq_depth; rx_cnt++)
+	for( rx_cnt=posted; rx_cnt < p_port->ib_mgr.srq_depth; rx_cnt++)
 	{
 		/* Pull receives out of the pool to chain them up. */
 		p_next_desc = __cm_buf_mgr_get_recv_locked( p_port, p_endpt ); @@ -2174,9 +2186,9 @@
 	cl_obj_unlock( &p_port->obj );
 
 	cl_spinlock_acquire( &p_port->cm_buf_mgr.lock);
-	rx_wanted = p_port->p_adapter->params.rq_depth - p_port->cm_buf_mgr.posted;
+	rx_wanted = p_port->ib_mgr.srq_depth - p_port->cm_buf_mgr.posted;
 
-	while( p_port->cm_buf_mgr.posted < p_port->p_adapter->params.rq_depth )
+	while( p_port->cm_buf_mgr.posted < p_port->ib_mgr.srq_depth )
 	{
 		/* Pull receives out of the pool and chain them up. */
 		cl_perf_start( GetRecv );
@@ -2729,7 +2741,7 @@
 	p_port = ipoib_endpt_parent( p_endpt );
 
 	IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
-		("SRQ CQ AsyncEvent EP %s event '%s' vendor code %#I64d\n",
+		("CQ AsyncEvent EP %s event '%s' vendor code %#I64d\n",
 			p_endpt->tag, ib_get_async_event_str(p_event_rec->code),
 			p_event_rec->vendor_specific) );
 }
@@ -2904,15 +2916,11 @@
 
 	__cm_buf_mgr_construct( &p_port->cm_buf_mgr );
 
-	p_port->cm_buf_mgr.recv_pool_depth =
-		min( (uint32_t) p_port->p_adapter->params.rq_depth * 8,
-				p_port->p_ca_attrs->max_srq_wrs/2 );
-
-	DIPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
-			("Port[%d] cm_recv_mgr.recv_pool_depth %d max_srq_wrs/2 %d\n",
+	DIPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_CM,
+		("Port[%d] srq_max_depth %d srq_depth %d\n",
 			p_port->port_num,
-			p_port->cm_buf_mgr.recv_pool_depth,
-			p_port->p_ca_attrs->max_srq_wrs/2 ) );
+			p_port->ib_mgr.srq_max_depth,
+			p_port->ib_mgr.srq_depth) );
 
 	cl_qlist_init( &p_port->cm_buf_mgr.oop_list );
 	cl_status = cl_spinlock_init( &p_port->cm_buf_mgr.lock ); @@ -2954,7 +2962,7 @@
 
 	/* Allocate the receive descriptors pool */
 	cl_status = cl_qpool_init( &p_port->cm_buf_mgr.recv_pool,
-							   p_port->cm_buf_mgr.recv_pool_depth,
+							   p_port->ib_mgr.srq_max_depth,
 							   0,
 							   0,
 							   sizeof( ipoib_cm_recv_desc_t ),