[ofw] [PATCH] post 80% of SRQ buffers, code cleanup

Smith, Stan stan.smith at intel.com
Sun Feb 27 16:29:20 PST 2011


>-----Original Message-----
>From: Alex Naslednikov [mailto:xalex at mellanox.co.il]
>Sent: Sunday, February 27, 2011 12:01 AM
>To: Smith, Stan
>Cc: ofw at lists.openfabrics.org
>Subject: RE: [PATCH] post 80% of SRQ buffers, code cleanup
>
>Looks good for me.
>Just one minor comment- why should we use cl_dbg_out() here ?
>		IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
> 			("No available WQEs.\n") );
>-		cl_dbg_out("HW is full\n");
>+		cl_dbg_out("[IPoIB] HW send_q is full\n");
>
>We can just print 		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ALL,
> 			("HW SQ is full No available WQEs.\n") );

cl_dbg_out() was original code with an added tag so others would not fine the confusion I encountered.
PRINT_EXIT() would also add the IPoIB tag.
Will modify the patch.

Thanks,

Stan.

>
>-----Original Message-----
>From: Smith, Stan [mailto:stan.smith at intel.com]
>Sent: Thursday, February 24, 2011 8:04 PM
>To: Alex Naslednikov
>Cc: ofw at lists.openfabrics.org
>Subject: [PATCH] post 80% of SRQ buffers, code cleanup
>
>Post 80% of SRQ buffers initially such that the SRQ buffer pool will not be required to grow as often,
>reduce RNR errors under stress.
>
>CM code cleanup
> migrate/rename cm_buf_mgr_t.recv_pool_depth to ib_mgr_t.srq_depth; clearer meaning in var naming.
> add ib_mgr_t.srq_max_depth
>
>output NDIS error code in hex so it's easier to find in NDIS.h
>
>ipoib_port.cpp
>Setup ib_mgr_t.srq vars max & 80% of max Make the intent of cl_dbg_msg() output clear.
>
>ipoib_cm.cpp:
>Use new names for recv_pool_depth
>RNR_retry @ 2 instead of 1 for correct operation.
>Correct mislabeled CQ AsyncEvent debug print; was mislabeled as SRQ AsyncEvent.
>
>
>Signed-off-by: stan smith <stan.smith at intel.com>
>
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_endpoint.h	Thu Feb 24 09:23:14 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_endpoint.h	Thu Feb 24 09:06:51 2011
>@@ -53,7 +53,6 @@
> 	cl_spinlock_t		lock;
> 	cl_qlist_t			oop_list;
> 	long				posted;
>-	int32_t				recv_pool_depth;
> 	boolean_t			pool_init;
> } cm_buf_mgr_t;
> /*
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h	Thu Feb 24 09:22:30 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h	Thu Feb 24 09:04:38 2011
>@@ -107,6 +107,8 @@
> 	ib_query_handle_t		h_query;
> 	ib_srq_handle_t			h_srq;
> 	atomic32_t				srq_qp_cnt;
>+	int32_t					srq_max_depth;
>+	int32_t					srq_depth;
> 	net32_t					qpn;
>
> 	ib_mr_handle_t			h_mr;
>@@ -1123,7 +1125,7 @@
>
> 	if (NET_BUFFER_LIST_STATUS(NetBufferLists) != NDIS_STATUS_SUCCESS) {
> 		IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ALL,
>-			("NBL completed with error %d to NDIS\n",
>+			("NBL completed with error %#x to NDIS\n",
> 				NET_BUFFER_LIST_STATUS(NetBufferLists)));
> 	}
> 	IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_SEND,
>
>
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp	Thu Feb 24 09:34:29 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp	Thu Feb 24 09:05:46 2011
>@@ -1442,6 +1442,12 @@
> 			(p_port->p_ca_attrs->max_srq_wrs/2)) );
>
> 	p_port->ib_mgr.srq_qp_cnt = 0;
>+	p_port->ib_mgr.srq_max_depth = srq_attr.max_wr;
>+
>+	/* setup SRQ depth based on rq_depth property++: setup such that 80% of
>+	 * allocated SRQ (srq_max_depth) buffers are posted. See cm_buf_mgr_init().
>+	 */
>+	p_port->ib_mgr.srq_depth = (srq_attr.max_wr / 5) * 4;
>
> 	ib_status = p_port->p_adapter->p_ifc->create_srq( p_port->ib_mgr.h_pd,
> 													  &srq_attr,
>@@ -6118,7 +6124,7 @@
> 	{
> 		IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
> 			("No available WQEs.\n") );
>-		cl_dbg_out("HW is full\n");
>+		cl_dbg_out("[IPoIB] HW send_q is full\n");
> 		return NDIS_STATUS_PENDING;
> 	}
>
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_cm.cpp	Thu Feb 24 09:45:24 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_cm.cpp	Thu Feb 24 09:20:29 2011
>@@ -402,7 +402,7 @@
> 	if( send_qp == FALSE && !p_endpt->conn.h_recv_cq )
> 	{
> 		memset( &create_cq, 0, sizeof( ib_cq_create_t ) );
>-		create_cq.size = p_port->p_adapter->params.rq_depth;
>+		create_cq.size = p_port->ib_mgr.srq_depth;
> 		create_cq.pfn_comp_cb = __cm_recv_cb;
>
> 		ib_status = p_endpt->p_ifc->create_cq( p_port->ib_mgr.h_ca, @@ -633,7 +633,7 @@
> 	creq.flow_ctrl			 = FALSE;	// srq attached qp does not support FC
> 	creq.local_resp_timeout  = ib_path_rec_pkt_life(&path_rec) + 1;
> 	creq.rnr_nak_timeout	 = 7;
>-	creq.rnr_retry_cnt		 = 1; /* IPoIB CM RFC draft warns against retries */
>+	creq.rnr_retry_cnt		 = 2;
> 	creq.retry_cnt			 = 1; /* IPoIB CM RFC draft warns against retries */
>
> 	//creq.pfn_cm_req_cb	= (ib_pfn_cm_req_cb_t)NULL; no peer connections
>@@ -1631,6 +1631,7 @@
> 	PERF_DECLARE( CMSendCb );
> 	PERF_DECLARE( CMPollSend );
> 	PERF_DECLARE( CMFreeSendBuf );
>+	PERF_DECLARE( PortResume );
>
> 	IPOIB_ENTER( IPOIB_DBG_SEND );
>
>@@ -1747,9 +1748,20 @@
> 		p_endpt->conn.h_send_qp = NULL;	// prevent Tx on invalid QP
> 		__queue_tx_resource_free( p_port, p_endpt );
> 		endpt_cm_set_state( p_endpt, IPOIB_CM_DISCONNECT_CLEANUP );
>+		/* Resume any sends awaiting resources. */
>+		cl_perf_start( PortResume );
>+		ipoib_port_resume( p_port, TRUE, &complete_list );
>+		cl_perf_stop( &p_port->p_adapter->perf, PortResume );
> 	}
> 	else
> 	{
>+		if (p_port->send_mgr.pending_list.count > 0)
>+		{
>+			/* Resume any sends awaiting resources. */
>+			cl_perf_start( PortResume );
>+			ipoib_port_resume( p_port, TRUE, &complete_list );
>+			cl_perf_stop( &p_port->p_adapter->perf, PortResume );
>+		}
> 		/* Rearm the CQ. */
> 		ib_status = p_ibal->rearm_cq( h_cq, FALSE );
> 		CL_ASSERT( ib_status == IB_SUCCESS ); @@ -2073,18 +2085,18 @@
> 	IPOIB_ENTER( IPOIB_DBG_RECV );
>
> 	posted = p_port->cm_buf_mgr.posted;
>-	wanted = p_port->p_adapter->params.rq_depth - posted;
>+	wanted = p_port->ib_mgr.srq_depth - posted;
>
> #if DBG
>-	IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_RECV,
>-		("Port[%d] posting %d RC bufs of limit(rq_depth %d) posted %d max %d\n",
>-			p_port->port_num, wanted, p_port->p_adapter->params.rq_depth,
>-			posted, p_port->cm_buf_mgr.recv_pool_depth) );
>+	IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_CM/*XXX RECV*/,
>+		("Port[%d] posting RC bufs: wanted %d srq_depth %d posted %d srq max %d\n",
>+			p_port->port_num, wanted, p_port->ib_mgr.srq_depth,
>+			posted, p_port->ib_mgr.srq_max_depth) );
> #endif
>
> 	cl_spinlock_acquire( &p_port->cm_buf_mgr.lock);
>
>-	for( rx_cnt=posted; rx_cnt < p_port->p_adapter->params.rq_depth; rx_cnt++)
>+	for( rx_cnt=posted; rx_cnt < p_port->ib_mgr.srq_depth; rx_cnt++)
> 	{
> 		/* Pull receives out of the pool to chain them up. */
> 		p_next_desc = __cm_buf_mgr_get_recv_locked( p_port, p_endpt ); @@ -2174,9 +2186,9 @@
> 	cl_obj_unlock( &p_port->obj );
>
> 	cl_spinlock_acquire( &p_port->cm_buf_mgr.lock);
>-	rx_wanted = p_port->p_adapter->params.rq_depth - p_port->cm_buf_mgr.posted;
>+	rx_wanted = p_port->ib_mgr.srq_depth - p_port->cm_buf_mgr.posted;
>
>-	while( p_port->cm_buf_mgr.posted < p_port->p_adapter->params.rq_depth )
>+	while( p_port->cm_buf_mgr.posted < p_port->ib_mgr.srq_depth )
> 	{
> 		/* Pull receives out of the pool and chain them up. */
> 		cl_perf_start( GetRecv );
>@@ -2729,7 +2741,7 @@
> 	p_port = ipoib_endpt_parent( p_endpt );
>
> 	IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>-		("SRQ CQ AsyncEvent EP %s event '%s' vendor code %#I64d\n",
>+		("CQ AsyncEvent EP %s event '%s' vendor code %#I64d\n",
> 			p_endpt->tag, ib_get_async_event_str(p_event_rec->code),
> 			p_event_rec->vendor_specific) );
> }
>@@ -2904,15 +2916,11 @@
>
> 	__cm_buf_mgr_construct( &p_port->cm_buf_mgr );
>
>-	p_port->cm_buf_mgr.recv_pool_depth =
>-		min( (uint32_t) p_port->p_adapter->params.rq_depth * 8,
>-				p_port->p_ca_attrs->max_srq_wrs/2 );
>-
>-	DIPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
>-			("Port[%d] cm_recv_mgr.recv_pool_depth %d max_srq_wrs/2 %d\n",
>+	DIPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_CM,
>+		("Port[%d] srq_max_depth %d srq_depth %d\n",
> 			p_port->port_num,
>-			p_port->cm_buf_mgr.recv_pool_depth,
>-			p_port->p_ca_attrs->max_srq_wrs/2 ) );
>+			p_port->ib_mgr.srq_max_depth,
>+			p_port->ib_mgr.srq_depth) );
>
> 	cl_qlist_init( &p_port->cm_buf_mgr.oop_list );
> 	cl_status = cl_spinlock_init( &p_port->cm_buf_mgr.lock ); @@ -2954,7 +2962,7 @@
>
> 	/* Allocate the receive descriptors pool */
> 	cl_status = cl_qpool_init( &p_port->cm_buf_mgr.recv_pool,
>-							   p_port->cm_buf_mgr.recv_pool_depth,
>+							   p_port->ib_mgr.srq_max_depth,
> 							   0,
> 							   0,
> 							   sizeof( ipoib_cm_recv_desc_t ),




More information about the ofw mailing list