[ofw] [PATCH] post 80% of SRQ buffers, code cleanup
Smith, Stan
stan.smith at intel.com
Sun Feb 27 16:29:20 PST 2011
>-----Original Message-----
>From: Alex Naslednikov [mailto:xalex at mellanox.co.il]
>Sent: Sunday, February 27, 2011 12:01 AM
>To: Smith, Stan
>Cc: ofw at lists.openfabrics.org
>Subject: RE: [PATCH] post 80% of SRQ buffers, code cleanup
>
>Looks good for me.
>Just one minor comment- why should we use cl_dbg_out() here ?
> IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
> ("No available WQEs.\n") );
>- cl_dbg_out("HW is full\n");
>+ cl_dbg_out("[IPoIB] HW send_q is full\n");
>
>We can just print IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ALL,
> ("HW SQ is full No available WQEs.\n") );
cl_dbg_out() was original code with an added tag so others would not fine the confusion I encountered.
PRINT_EXIT() would also add the IPoIB tag.
Will modify the patch.
Thanks,
Stan.
>
>-----Original Message-----
>From: Smith, Stan [mailto:stan.smith at intel.com]
>Sent: Thursday, February 24, 2011 8:04 PM
>To: Alex Naslednikov
>Cc: ofw at lists.openfabrics.org
>Subject: [PATCH] post 80% of SRQ buffers, code cleanup
>
>Post 80% of SRQ buffers initially such that the SRQ buffer pool will not be required to grow as often,
>reduce RNR errors under stress.
>
>CM code cleanup
> migrate/rename cm_buf_mgr_t.recv_pool_depth to ib_mgr_t.srq_depth; clearer meaning in var naming.
> add ib_mgr_t.srq_max_depth
>
>output NDIS error code in hex so it's easier to find in NDIS.h
>
>ipoib_port.cpp
>Setup ib_mgr_t.srq vars max & 80% of max Make the intent of cl_dbg_msg() output clear.
>
>ipoib_cm.cpp:
>Use new names for recv_pool_depth
>RNR_retry @ 2 instead of 1 for correct operation.
>Correct mislabeled CQ AsyncEvent debug print; was mislabeled as SRQ AsyncEvent.
>
>
>Signed-off-by: stan smith <stan.smith at intel.com>
>
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_endpoint.h Thu Feb 24 09:23:14 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_endpoint.h Thu Feb 24 09:06:51 2011
>@@ -53,7 +53,6 @@
> cl_spinlock_t lock;
> cl_qlist_t oop_list;
> long posted;
>- int32_t recv_pool_depth;
> boolean_t pool_init;
> } cm_buf_mgr_t;
> /*
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h Thu Feb 24 09:22:30 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h Thu Feb 24 09:04:38 2011
>@@ -107,6 +107,8 @@
> ib_query_handle_t h_query;
> ib_srq_handle_t h_srq;
> atomic32_t srq_qp_cnt;
>+ int32_t srq_max_depth;
>+ int32_t srq_depth;
> net32_t qpn;
>
> ib_mr_handle_t h_mr;
>@@ -1123,7 +1125,7 @@
>
> if (NET_BUFFER_LIST_STATUS(NetBufferLists) != NDIS_STATUS_SUCCESS) {
> IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ALL,
>- ("NBL completed with error %d to NDIS\n",
>+ ("NBL completed with error %#x to NDIS\n",
> NET_BUFFER_LIST_STATUS(NetBufferLists)));
> }
> IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_SEND,
>
>
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp Thu Feb 24 09:34:29 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp Thu Feb 24 09:05:46 2011
>@@ -1442,6 +1442,12 @@
> (p_port->p_ca_attrs->max_srq_wrs/2)) );
>
> p_port->ib_mgr.srq_qp_cnt = 0;
>+ p_port->ib_mgr.srq_max_depth = srq_attr.max_wr;
>+
>+ /* setup SRQ depth based on rq_depth property++: setup such that 80% of
>+ * allocated SRQ (srq_max_depth) buffers are posted. See cm_buf_mgr_init().
>+ */
>+ p_port->ib_mgr.srq_depth = (srq_attr.max_wr / 5) * 4;
>
> ib_status = p_port->p_adapter->p_ifc->create_srq( p_port->ib_mgr.h_pd,
> &srq_attr,
>@@ -6118,7 +6124,7 @@
> {
> IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
> ("No available WQEs.\n") );
>- cl_dbg_out("HW is full\n");
>+ cl_dbg_out("[IPoIB] HW send_q is full\n");
> return NDIS_STATUS_PENDING;
> }
>
>--- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_cm.cpp Thu Feb 24 09:45:24 2011
>+++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_cm.cpp Thu Feb 24 09:20:29 2011
>@@ -402,7 +402,7 @@
> if( send_qp == FALSE && !p_endpt->conn.h_recv_cq )
> {
> memset( &create_cq, 0, sizeof( ib_cq_create_t ) );
>- create_cq.size = p_port->p_adapter->params.rq_depth;
>+ create_cq.size = p_port->ib_mgr.srq_depth;
> create_cq.pfn_comp_cb = __cm_recv_cb;
>
> ib_status = p_endpt->p_ifc->create_cq( p_port->ib_mgr.h_ca, @@ -633,7 +633,7 @@
> creq.flow_ctrl = FALSE; // srq attached qp does not support FC
> creq.local_resp_timeout = ib_path_rec_pkt_life(&path_rec) + 1;
> creq.rnr_nak_timeout = 7;
>- creq.rnr_retry_cnt = 1; /* IPoIB CM RFC draft warns against retries */
>+ creq.rnr_retry_cnt = 2;
> creq.retry_cnt = 1; /* IPoIB CM RFC draft warns against retries */
>
> //creq.pfn_cm_req_cb = (ib_pfn_cm_req_cb_t)NULL; no peer connections
>@@ -1631,6 +1631,7 @@
> PERF_DECLARE( CMSendCb );
> PERF_DECLARE( CMPollSend );
> PERF_DECLARE( CMFreeSendBuf );
>+ PERF_DECLARE( PortResume );
>
> IPOIB_ENTER( IPOIB_DBG_SEND );
>
>@@ -1747,9 +1748,20 @@
> p_endpt->conn.h_send_qp = NULL; // prevent Tx on invalid QP
> __queue_tx_resource_free( p_port, p_endpt );
> endpt_cm_set_state( p_endpt, IPOIB_CM_DISCONNECT_CLEANUP );
>+ /* Resume any sends awaiting resources. */
>+ cl_perf_start( PortResume );
>+ ipoib_port_resume( p_port, TRUE, &complete_list );
>+ cl_perf_stop( &p_port->p_adapter->perf, PortResume );
> }
> else
> {
>+ if (p_port->send_mgr.pending_list.count > 0)
>+ {
>+ /* Resume any sends awaiting resources. */
>+ cl_perf_start( PortResume );
>+ ipoib_port_resume( p_port, TRUE, &complete_list );
>+ cl_perf_stop( &p_port->p_adapter->perf, PortResume );
>+ }
> /* Rearm the CQ. */
> ib_status = p_ibal->rearm_cq( h_cq, FALSE );
> CL_ASSERT( ib_status == IB_SUCCESS ); @@ -2073,18 +2085,18 @@
> IPOIB_ENTER( IPOIB_DBG_RECV );
>
> posted = p_port->cm_buf_mgr.posted;
>- wanted = p_port->p_adapter->params.rq_depth - posted;
>+ wanted = p_port->ib_mgr.srq_depth - posted;
>
> #if DBG
>- IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_RECV,
>- ("Port[%d] posting %d RC bufs of limit(rq_depth %d) posted %d max %d\n",
>- p_port->port_num, wanted, p_port->p_adapter->params.rq_depth,
>- posted, p_port->cm_buf_mgr.recv_pool_depth) );
>+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_CM/*XXX RECV*/,
>+ ("Port[%d] posting RC bufs: wanted %d srq_depth %d posted %d srq max %d\n",
>+ p_port->port_num, wanted, p_port->ib_mgr.srq_depth,
>+ posted, p_port->ib_mgr.srq_max_depth) );
> #endif
>
> cl_spinlock_acquire( &p_port->cm_buf_mgr.lock);
>
>- for( rx_cnt=posted; rx_cnt < p_port->p_adapter->params.rq_depth; rx_cnt++)
>+ for( rx_cnt=posted; rx_cnt < p_port->ib_mgr.srq_depth; rx_cnt++)
> {
> /* Pull receives out of the pool to chain them up. */
> p_next_desc = __cm_buf_mgr_get_recv_locked( p_port, p_endpt ); @@ -2174,9 +2186,9 @@
> cl_obj_unlock( &p_port->obj );
>
> cl_spinlock_acquire( &p_port->cm_buf_mgr.lock);
>- rx_wanted = p_port->p_adapter->params.rq_depth - p_port->cm_buf_mgr.posted;
>+ rx_wanted = p_port->ib_mgr.srq_depth - p_port->cm_buf_mgr.posted;
>
>- while( p_port->cm_buf_mgr.posted < p_port->p_adapter->params.rq_depth )
>+ while( p_port->cm_buf_mgr.posted < p_port->ib_mgr.srq_depth )
> {
> /* Pull receives out of the pool and chain them up. */
> cl_perf_start( GetRecv );
>@@ -2729,7 +2741,7 @@
> p_port = ipoib_endpt_parent( p_endpt );
>
> IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>- ("SRQ CQ AsyncEvent EP %s event '%s' vendor code %#I64d\n",
>+ ("CQ AsyncEvent EP %s event '%s' vendor code %#I64d\n",
> p_endpt->tag, ib_get_async_event_str(p_event_rec->code),
> p_event_rec->vendor_specific) );
> }
>@@ -2904,15 +2916,11 @@
>
> __cm_buf_mgr_construct( &p_port->cm_buf_mgr );
>
>- p_port->cm_buf_mgr.recv_pool_depth =
>- min( (uint32_t) p_port->p_adapter->params.rq_depth * 8,
>- p_port->p_ca_attrs->max_srq_wrs/2 );
>-
>- DIPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
>- ("Port[%d] cm_recv_mgr.recv_pool_depth %d max_srq_wrs/2 %d\n",
>+ DIPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_CM,
>+ ("Port[%d] srq_max_depth %d srq_depth %d\n",
> p_port->port_num,
>- p_port->cm_buf_mgr.recv_pool_depth,
>- p_port->p_ca_attrs->max_srq_wrs/2 ) );
>+ p_port->ib_mgr.srq_max_depth,
>+ p_port->ib_mgr.srq_depth) );
>
> cl_qlist_init( &p_port->cm_buf_mgr.oop_list );
> cl_status = cl_spinlock_init( &p_port->cm_buf_mgr.lock ); @@ -2954,7 +2962,7 @@
>
> /* Allocate the receive descriptors pool */
> cl_status = cl_qpool_init( &p_port->cm_buf_mgr.recv_pool,
>- p_port->cm_buf_mgr.recv_pool_depth,
>+ p_port->ib_mgr.srq_max_depth,
> 0,
> 0,
> sizeof( ipoib_cm_recv_desc_t ),
More information about the ofw
mailing list