[ofw] [PATCH] IPOIB_NDIS6_CM remove NBL/MDL alloc & dealloc from UD recv data path
Fab Tillier
ftillier at microsoft.com
Tue Nov 9 10:48:04 PST 2010
Hi Stan,
Smith, Stan wrote on Tue, 9 Nov 2010 at 10:10:18
> Hello,
> Enclosed you will find a technology preview patch implementing Fab's
> suggestions about removing NETWORK_BUFFER_LIST and MDL allocation and
> deallocation for every UD receive operation; now allocated/deallocated
> in recv_pool constructor & destructor routines.
> The results is approximately a 3 Mbyte/sec transfer rate increase; less
> than I had envisioned although good nonetheless.
What does it do for small message latencies?
> There are bigger fish
> to fry in the send/recv data paths.
> The patch is done with #if UD_NBL_IN_DESC to highlight the changes and
> make comparison testing easier; I would envision the final patch
> without the #if.
>
> Details:
> Add NBL and MDL pointers to ipoib_recv_desc_t.
> Allocate NBL(h_packet_pool element) & MDL for recv_pool in
> recv_ctor()
> (allocate NBL pool before recv_pool) in buf_mgr_init().
> Added recv_dtor()for RECV_INLINE as NBL and MDL need to be
> deallocated.
> Destroy recv_pool before NBL pool as recv_dtor() needs to free NBL
> before h_packet_pool destroyed.
>
> Let me know what you think.
Looks good to me, some comments below.
>
> thanks,
>
> stan.
>
> --- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h Tue Nov 09 09:47:18
> 2010
> +++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h Mon Nov 08 13:33:52
> 2010
> @@ -365,6 +365,10 @@
> ipoib_pkt_type_t type;
> ib_recv_wr_t wr;
> ib_local_ds_t local_ds[2];
> +#if UD_NBL_IN_DESC
> + NET_BUFFER_LIST *p_NBL;
> + MDL *p_mdl;
> +#endif
> NDIS_TCP_IP_CHECKSUM_PACKET_INFO ndis_csum;
> #if IPOIB_INLINE_RECV
> recv_buf_t buf;
>
> --- A/ulp/ipoib_NDIS6_CM/kernel/SOURCES Tue Nov 09 09:46:50 2010
> +++ B/ulp/ipoib_NDIS6_CM/kernel/SOURCES Tue Nov 09 09:45:59 2010
> @@ -29,7 +29,8 @@
> INCLUDES=..;..\..\..\inc;..\..\..\inc\kernel;
>
> C_DEFINES=$(C_DEFINES) -DNDIS_MINIPORT_DRIVER -DNDIS_WDM=1 \
> - -DDEPRECATE_DDK_FUNCTIONS -DNDIS61_MINIPORT=1 -DNEED_CL_OBJ -
> DBINARY_COMPATIBLE=0
> + -DDEPRECATE_DDK_FUNCTIONS -DNDIS61_MINIPORT=1 -DNEED_CL_OBJ \
> + -DBINARY_COMPATIBLE=0 -DUD_NBL_IN_DESC=1
>
> TARGETLIBS= \
> $(TARGETPATH)\*\complib.lib \
>
>
> --- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp Tue Nov 09 09:47:45
> 2010
> +++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp Tue Nov 09 09:47:18
> 2010
> @@ -192,7 +192,7 @@
> IN void*
> context,
> OUT cl_pool_item_t** const
> pp_pool_item );
>
> -#if !IPOIB_INLINE_RECV
> +#if !IPOIB_INLINE_RECV || UD_NBL_IN_DESC
> static void
> __recv_dtor(
> IN const cl_pool_item_t* const
> p_pool_item,
> @@ -1410,6 +1410,9 @@
> IN ipoib_port_t* const
> p_port )
> {
> cl_status_t cl_status;
> +#if UD_NBL_IN_DESC
> + ib_api_status_t ib_status;
> +#endif
> ipoib_params_t *p_params;
> NET_BUFFER_LIST_POOL_PARAMETERS pool_parameters;
>
> @@ -1420,6 +1423,55 @@
>
> p_params = &p_port->p_adapter->params;
>
> +#if UD_NBL_IN_DESC
> +
> + /* Allocate the NET BUFFER list pools for receive indication.
> */
> + NdisZeroMemory(&pool_parameters,
> sizeof(NET_BUFFER_LIST_POOL_PARAMETERS));
> + pool_parameters.Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
> + pool_parameters.Header.Revision =
> NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1;
> + pool_parameters.Header.Size = sizeof(pool_parameters);
> + pool_parameters.ProtocolId = NDIS_PROTOCOL_ID_DEFAULT;
> + pool_parameters.ContextSize = 0;
> + pool_parameters.fAllocateNetBuffer = TRUE;
> + pool_parameters.PoolTag = 'CRPI';
> + pool_parameters.DataSize = 0;
> +
> + p_port->buf_mgr.h_packet_pool = NdisAllocateNetBufferListPool(
> +
> p_port->p_adapter->h_adapter,
> +
> &pool_parameters );
> +
> + if( !p_port->buf_mgr.h_packet_pool )
> + {
> + NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
> + EVENT_IPOIB_RECV_PKT_POOL, 1,
> NDIS_STATUS_RESOURCES );
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("NdisAllocatePacketPool returned %08X\n",
> (UINT)NDIS_STATUS_RESOURCES) );
> + return IB_INSUFFICIENT_RESOURCES;
> + }
> +
> + /* Allocate the receive descriptor pool */
> + cl_status = cl_qpool_init( &p_port->buf_mgr.recv_pool,
> + p_params-
> >rq_depth * p_params->recv_pool_ratio,
> + 0,
> + 0,
> +
> sizeof(ipoib_recv_desc_t),
> + __recv_ctor,
> + __recv_dtor,
> + p_port );
> +
> + if( cl_status != CL_SUCCESS )
> + {
> + NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
> + EVENT_IPOIB_RECV_POOL, 1, cl_status );
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("cl_qpool_init for recvs returned %#x\n",
> + cl_status) );
> + ib_status = IB_INSUFFICIENT_MEMORY;
> + goto pkt_pool_failed;
> + }
> +
> +#else !UD_NBL_IN_DESC
> +
> /* Allocate the receive descriptor pool */
> cl_status = cl_qpool_init( &p_port->buf_mgr.recv_pool,
> p_params-
> >rq_depth * p_params->recv_pool_ratio,
> @@ -1449,7 +1501,7 @@
> pool_parameters.Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
> pool_parameters.Header.Revision =
> NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1;
> pool_parameters.Header.Size = sizeof(pool_parameters);
> - pool_parameters.ProtocolId = 0;
> + pool_parameters.ProtocolId = NDIS_PROTOCOL_ID_DEFAULT;
> pool_parameters.ContextSize = 0;
> pool_parameters.fAllocateNetBuffer = TRUE;
> pool_parameters.PoolTag = 'CRPI';
> @@ -1467,6 +1519,7 @@
> ("NdisAllocatePacketPool returned %08X\n",
> (UINT)NDIS_STATUS_RESOURCES) );
> return IB_INSUFFICIENT_RESOURCES;
> }
> +#endif
>
> /* Allocate the NET buffer list pool for send formatting. */
> pool_parameters.PoolTag = 'XTPI';
> @@ -1481,11 +1534,31 @@
> IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> ("NdisAllocatePacketPool returned %08X\n",
> (UINT)NDIS_STATUS_RESOURCES) );
> +#if UD_NBL_IN_DESC
> + ib_status = IB_INSUFFICIENT_RESOURCES;
> + goto pkt_pool_failed;
> +#else
> return IB_INSUFFICIENT_RESOURCES;
> +#endif
> }
>
> IPOIB_EXIT( IPOIB_DBG_INIT );
> return IB_SUCCESS;
> +
> +#if UD_NBL_IN_DESC
> +pkt_pool_failed:
> + NdisFreeNetBufferListPool( p_port->buf_mgr.h_packet_pool );
> + p_port->buf_mgr.h_packet_pool = NULL;
> + cl_qpool_destroy( &p_port->buf_mgr.recv_pool );
> + if( p_port->buf_mgr.h_send_pkt_pool)
> + {
> + NdisFreeNetBufferListPool( p_port-
> >buf_mgr.h_send_pkt_pool );
> + p_port->buf_mgr.h_send_pkt_pool = NULL;
> + }
> +
> + IPOIB_EXIT( IPOIB_DBG_INIT );
> + return ib_status;
> +#endif
> }
>
>
> @@ -1506,11 +1579,19 @@
> /* Destroy the receive packet and buffer pools.
> if( p_port->buf_mgr.h_buffer_pool )
> NdisFreeBufferPool( p_port->buf_mgr.h_buffer_pool );*/
> +#if UD_NBL_IN_DESC
> + /* Free the receive and send descriptors. */
> + cl_qpool_destroy( &p_port->buf_mgr.recv_pool );
> +
> + if( p_port->buf_mgr.h_packet_pool )
> + NdisFreeNetBufferListPool ( p_port-
> >buf_mgr.h_packet_pool );
> +#else
> if( p_port->buf_mgr.h_packet_pool )
> NdisFreeNetBufferListPool ( p_port-
> >buf_mgr.h_packet_pool );
>
> /* Free the receive and send descriptors. */
> cl_qpool_destroy( &p_port->buf_mgr.recv_pool );
Would it be invalid to free the recv_pool before the packet_pool in the current code flow? If not, no need to #if UD_NBL_IN_DESC this part, as the change would be valid regardless. As it stands, this looks like a bigger change than it is at first glance.
> +#endif
>
> /* Free the lookaside list of scratch buffers. */
> NdisDeleteNPagedLookasideList( &p_port->buf_mgr.send_buf_list
> );
> @@ -1592,14 +1673,65 @@
> p_desc->wr.num_ds = 1;
> #endif /* IPOIB_INLINE_RECV */
>
> +#if UD_NBL_IN_DESC
> + /* setup NDIS NetworkBufferList and MemoryDescriptorList for
> this Recv desc */
> + p_desc->p_mdl = NdisAllocateMdl(p_port->p_adapter->h_adapter,
> +#if IPOIB_INLINE_RECV
> +
> &p_desc->buf.eth.pkt,
> +#else
> +
> p_desc->p_buf,
> +#endif
> +
> sizeof(ipoib_pkt_t) + sizeof(ib_grh_t) );
> + if( !p_desc->p_mdl )
> + {
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("Failed to allocate MDL\n") );
> + goto err1;
> + }
> +
> + p_desc->p_NBL = NdisAllocateNetBufferAndNetBufferList(
> + p_port-
> >buf_mgr.h_packet_pool,
> + 0,
> + 0,
> + p_desc->p_mdl,
> + 0,
> + 0);
> +
> + if( !p_desc->p_NBL )
> + {
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("Failed to allocate NET_BUFFER_LIST\n") );
> + goto err2;
> + }
> +
> + NET_BUFFER_LIST_NEXT_NBL(p_desc->p_NBL) = NULL;
> + IPOIB_PORT_FROM_NBL( p_desc->p_NBL ) = p_port;
> + IPOIB_RECV_FROM_NBL( p_desc->p_NBL ) = p_desc;
> + p_desc->p_NBL->SourceHandle = p_port->p_adapter->h_adapter;
> +#endif
> +
> *pp_pool_item = &p_desc->item;
>
> IPOIB_EXIT( IPOIB_DBG_ALLOC );
> return CL_SUCCESS;
> -}
>
> +#if UD_NBL_IN_DESC
> +err2:
> + NdisFreeMdl( p_desc->p_mdl );
> + p_desc->p_mdl = NULL;
>
> +err1:
> #if !IPOIB_INLINE_RECV
> + cl_free( p_desc->p_buf );
> + p_desc->p_buf = NULL;
> +#endif
> +
> + return CL_INSUFFICIENT_MEMORY;
> +#endif
> +}
> +
> +
> +#if !IPOIB_INLINE_RECV || UD_NBL_IN_DESC
> static void
> __recv_dtor(
> IN const cl_pool_item_t* const
> p_pool_item,
> @@ -1607,16 +1739,27 @@
> {
> ipoib_recv_desc_t *p_desc;
>
> - IPOIB_ENTER( IPOIB_DBG_ALLOC );
> -
Why?
> UNUSED_PARAM( context );
>
> p_desc = PARENT_STRUCT( p_pool_item, ipoib_recv_desc_t, item );
>
> +#if UD_NBL_IN_DESC
> + if( p_desc->p_mdl )
> + {
> + NdisFreeMdl( p_desc->p_mdl );
> + p_desc->p_mdl = NULL;
Do you need to set to NULL if you're about to free the object?
> + }
> + if( p_desc->p_NBL)
> + {
> + NdisFreeNetBufferList(p_desc->p_NBL);
> + p_desc->p_NBL = NULL;
Same here.
> + }
> +#endif
> +
> +#if !IPOIB_INLINE_RECV
> if( p_desc->p_buf )
> cl_free( p_desc->p_buf );
> -
> - IPOIB_EXIT( IPOIB_DBG_ALLOC );
> +#endif
> }
> #endif
>
> @@ -1657,12 +1800,29 @@
> IN ipoib_recv_desc_t* const
> p_desc,
> IN NET_BUFFER_LIST* const
> p_net_buffer_list OPTIONAL )
> {
> - NET_BUFFER *p_buf = NULL;
> - MDL *p_mdl = NULL;
> IPOIB_ENTER(IPOIB_DBG_RECV );
>
> if( p_net_buffer_list )
> {
> + NET_BUFFER *p_buf = NULL;
> + MDL *p_mdl = NULL;
> +#if UD_NBL_IN_DESC
> + ASSERT( p_desc->p_NBL );
> + ASSERT( p_desc->p_mdl );
> + ASSERT( p_net_buffer_list == p_desc->p_NBL );
> +
> + NET_BUFFER_LIST_NEXT_NBL(p_net_buffer_list) = NULL;
This code seems to assume that the NBL passed in is not chained to any others. Might be worth asserting that NET_BUFFER_LIST_NEXT_NBL is NULL when you enter, rather than setting it?
> +
> + p_buf = NET_BUFFER_LIST_FIRST_NB( p_net_buffer_list );
You also assume there's only one buffer in the NBL. Worth an assert, IMO.
> + p_mdl = NET_BUFFER_FIRST_MDL( p_buf );
Same here with the MDLs.
> +
> + ASSERT( p_mdl == p_desc->p_mdl );
> + ASSERT( NET_BUFFER_CURRENT_MDL( p_buf ) == p_mdl );
> +
> + /* reset Avail buffer lengths to full Rx size */
Why do you need to reset these here? Don't the length get set when the receive completes? Do you use these fields when pre-posting the receive buffers?
> + NET_BUFFER_DATA_LENGTH( p_buf ) = sizeof(ipoib_pkt_t) +
> sizeof(ib_grh_t);
> + NdisAdjustMdlLength( p_mdl, sizeof(ipoib_pkt_t) +
> sizeof(ib_grh_t) );
> +#else
> NET_BUFFER_LIST_NEXT_NBL(p_net_buffer_list) = NULL;
> p_buf = NET_BUFFER_LIST_FIRST_NB(p_net_buffer_list);
> CL_ASSERT( p_buf );
> @@ -1670,6 +1830,7 @@
> CL_ASSERT( p_mdl );
> NdisFreeMdl(p_mdl);
> NdisFreeNetBufferList(p_net_buffer_list);
> +#endif
> }
>
> /* Return the descriptor to its pools. */
> @@ -1701,7 +1862,25 @@
> MDL *p_mdl;
>
> IPOIB_ENTER( IPOIB_DBG_RECV );
> +#if UD_NBL_IN_DESC
> + PNET_BUFFER NetBuffer;
> + UNREFERENCED_PARAMETER(p_port);
> +
> + IPOIB_ENTER( IPOIB_DBG_BUF );
> +
> + ASSERT( p_desc->p_NBL );
> + ASSERT( p_desc->p_mdl );
> + p_net_buffer_list = p_desc->p_NBL;
> +
> + NetBuffer = NET_BUFFER_LIST_FIRST_NB( p_net_buffer_list );
> + p_mdl = NET_BUFFER_FIRST_MDL( NetBuffer );
> +
> + ASSERT( p_mdl == p_desc->p_mdl );
> + ASSERT( NET_BUFFER_CURRENT_MDL( NetBuffer ) == p_mdl );
>
> + NET_BUFFER_DATA_LENGTH( NetBuffer ) = p_desc->len;
> + NdisAdjustMdlLength( p_mdl, p_desc->len );
> +#else // !UD_NBL_IN_DESC
> p_mdl = NdisAllocateMdl(p_port->p_adapter->h_adapter,
> &p_desc-
> >buf.eth.pkt,
> p_desc->len );
> @@ -1732,6 +1911,7 @@
> IPOIB_PORT_FROM_NBL( p_net_buffer_list ) = p_port;
> IPOIB_RECV_FROM_NBL( p_net_buffer_list ) = p_desc;
> p_net_buffer_list->SourceHandle = p_port->p_adapter->h_adapter;
> +#endif // !UD_NBL_IN_DESC
>
> IPOIB_EXIT( IPOIB_DBG_RECV );
> return p_net_buffer_list;
More information about the ofw
mailing list