[ofw] [PATCH] IPOIB_NDIS6_CM remove NBL/MDL alloc & dealloc from UD recv data path

Fab Tillier ftillier at microsoft.com
Tue Nov 9 10:48:04 PST 2010


Hi Stan,

Smith, Stan wrote on Tue, 9 Nov 2010 at 10:10:18

> Hello,
>   Enclosed you will find a technology preview patch implementing Fab's
> suggestions about removing NETWORK_BUFFER_LIST and MDL allocation and
> deallocation for every UD receive operation; now allocated/deallocated
> in recv_pool constructor & destructor routines.
> The results is approximately a 3 Mbyte/sec transfer rate increase; less
> than I had envisioned although good nonetheless.

What does it do for small message latencies?

> There are bigger fish
> to fry in the send/recv data paths.
> The patch is done with #if UD_NBL_IN_DESC to highlight the changes and
> make comparison testing easier; I would envision the final patch
> without the #if.
>
> Details:
>   Add NBL and MDL pointers to ipoib_recv_desc_t.
>   Allocate NBL(h_packet_pool element) & MDL for recv_pool in
> recv_ctor()
>      (allocate NBL pool before recv_pool) in buf_mgr_init().
>   Added recv_dtor()for RECV_INLINE as NBL and MDL need to be
> deallocated.
>   Destroy recv_pool before NBL pool as recv_dtor() needs to free NBL
> before h_packet_pool destroyed.
>
> Let me know what you think.

Looks good to me, some comments below.

>
> thanks,
>
> stan.
>
> --- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h    Tue Nov 09 09:47:18
> 2010
> +++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.h    Mon Nov 08 13:33:52
> 2010
> @@ -365,6 +365,10 @@
>         ipoib_pkt_type_t        type;
>         ib_recv_wr_t            wr;
>         ib_local_ds_t           local_ds[2];
> +#if UD_NBL_IN_DESC
> +       NET_BUFFER_LIST         *p_NBL;
> +       MDL                             *p_mdl;
> +#endif
>         NDIS_TCP_IP_CHECKSUM_PACKET_INFO        ndis_csum;
>  #if IPOIB_INLINE_RECV
>         recv_buf_t                      buf;
>
> --- A/ulp/ipoib_NDIS6_CM/kernel/SOURCES Tue Nov 09 09:46:50 2010
> +++ B/ulp/ipoib_NDIS6_CM/kernel/SOURCES Tue Nov 09 09:45:59 2010
> @@ -29,7 +29,8 @@
>  INCLUDES=..;..\..\..\inc;..\..\..\inc\kernel;
>
>  C_DEFINES=$(C_DEFINES) -DNDIS_MINIPORT_DRIVER -DNDIS_WDM=1 \
> -       -DDEPRECATE_DDK_FUNCTIONS -DNDIS61_MINIPORT=1 -DNEED_CL_OBJ -
> DBINARY_COMPATIBLE=0
> +       -DDEPRECATE_DDK_FUNCTIONS -DNDIS61_MINIPORT=1 -DNEED_CL_OBJ \
> +       -DBINARY_COMPATIBLE=0 -DUD_NBL_IN_DESC=1
>
>  TARGETLIBS= \
>         $(TARGETPATH)\*\complib.lib \
>
>
> --- A/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp  Tue Nov 09 09:47:45
> 2010
> +++ B/ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp  Tue Nov 09 09:47:18
> 2010
> @@ -192,7 +192,7 @@
>         IN                              void*
> context,
>                 OUT                     cl_pool_item_t** const
> pp_pool_item );
>
> -#if !IPOIB_INLINE_RECV
> +#if !IPOIB_INLINE_RECV || UD_NBL_IN_DESC
>  static void
>  __recv_dtor(
>         IN              const   cl_pool_item_t* const
> p_pool_item,
> @@ -1410,6 +1410,9 @@
>         IN                              ipoib_port_t* const
> p_port )
>  {
>         cl_status_t             cl_status;
> +#if UD_NBL_IN_DESC
> +       ib_api_status_t ib_status;
> +#endif
>         ipoib_params_t  *p_params;
>         NET_BUFFER_LIST_POOL_PARAMETERS pool_parameters;
>
> @@ -1420,6 +1423,55 @@
>
>         p_params = &p_port->p_adapter->params;
>
> +#if    UD_NBL_IN_DESC
> +
> +       /* Allocate the NET BUFFER list pools for receive indication.
> */
> +       NdisZeroMemory(&pool_parameters,
> sizeof(NET_BUFFER_LIST_POOL_PARAMETERS));
> +    pool_parameters.Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
> +    pool_parameters.Header.Revision =
> NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1;
> +    pool_parameters.Header.Size = sizeof(pool_parameters);
> +    pool_parameters.ProtocolId = NDIS_PROTOCOL_ID_DEFAULT;
> +    pool_parameters.ContextSize = 0;
> +    pool_parameters.fAllocateNetBuffer = TRUE;
> +    pool_parameters.PoolTag = 'CRPI';
> +       pool_parameters.DataSize = 0;
> +
> +    p_port->buf_mgr.h_packet_pool = NdisAllocateNetBufferListPool(
> +
> p_port->p_adapter->h_adapter,
> +
> &pool_parameters );
> +
> +       if( !p_port->buf_mgr.h_packet_pool )
> +       {
> +               NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
> +                       EVENT_IPOIB_RECV_PKT_POOL, 1,
> NDIS_STATUS_RESOURCES  );
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("NdisAllocatePacketPool returned %08X\n",
> (UINT)NDIS_STATUS_RESOURCES) );
> +               return IB_INSUFFICIENT_RESOURCES;
> +       }
> +
> +       /* Allocate the receive descriptor pool */
> +       cl_status = cl_qpool_init( &p_port->buf_mgr.recv_pool,
> +                                                          p_params-
> >rq_depth * p_params->recv_pool_ratio,
> +                                                          0,
> +                                                          0,
> +
> sizeof(ipoib_recv_desc_t),
> +                                                          __recv_ctor,
> +                                                          __recv_dtor,
> +                                                          p_port );
> +
> +       if( cl_status != CL_SUCCESS )
> +       {
> +               NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
> +                       EVENT_IPOIB_RECV_POOL, 1, cl_status );
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("cl_qpool_init for recvs returned %#x\n",
> +                       cl_status) );
> +               ib_status = IB_INSUFFICIENT_MEMORY;
> +               goto pkt_pool_failed;
> +       }
> +
> +#else  !UD_NBL_IN_DESC
> +
>         /* Allocate the receive descriptor pool */
>         cl_status = cl_qpool_init( &p_port->buf_mgr.recv_pool,
>                                                            p_params-
> >rq_depth * p_params->recv_pool_ratio,
> @@ -1449,7 +1501,7 @@
>      pool_parameters.Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
>      pool_parameters.Header.Revision =
> NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1;
>      pool_parameters.Header.Size = sizeof(pool_parameters);
> -    pool_parameters.ProtocolId = 0;
> +    pool_parameters.ProtocolId = NDIS_PROTOCOL_ID_DEFAULT;
>      pool_parameters.ContextSize = 0;
>      pool_parameters.fAllocateNetBuffer = TRUE;
>      pool_parameters.PoolTag = 'CRPI';
> @@ -1467,6 +1519,7 @@
>                         ("NdisAllocatePacketPool returned %08X\n",
> (UINT)NDIS_STATUS_RESOURCES) );
>                 return IB_INSUFFICIENT_RESOURCES;
>         }
> +#endif
>
>         /* Allocate the NET buffer list pool for send formatting. */
>      pool_parameters.PoolTag = 'XTPI';
> @@ -1481,11 +1534,31 @@
>                 IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>                         ("NdisAllocatePacketPool returned %08X\n",
>                         (UINT)NDIS_STATUS_RESOURCES) );
> +#if UD_NBL_IN_DESC
> +               ib_status = IB_INSUFFICIENT_RESOURCES;
> +               goto pkt_pool_failed;
> +#else
>                 return IB_INSUFFICIENT_RESOURCES;
> +#endif
>         }
>
>         IPOIB_EXIT( IPOIB_DBG_INIT );
>         return IB_SUCCESS;
> +
> +#if UD_NBL_IN_DESC
> +pkt_pool_failed:
> +       NdisFreeNetBufferListPool( p_port->buf_mgr.h_packet_pool );
> +       p_port->buf_mgr.h_packet_pool = NULL;
> +       cl_qpool_destroy( &p_port->buf_mgr.recv_pool );
> +       if( p_port->buf_mgr.h_send_pkt_pool)
> +       {
> +               NdisFreeNetBufferListPool( p_port-
> >buf_mgr.h_send_pkt_pool );
> +               p_port->buf_mgr.h_send_pkt_pool = NULL;
> +       }
> +
> +       IPOIB_EXIT( IPOIB_DBG_INIT );
> +       return ib_status;
> +#endif
>  }
>
>
> @@ -1506,11 +1579,19 @@
>         /* Destroy the receive packet and buffer pools.
>         if( p_port->buf_mgr.h_buffer_pool )
>                 NdisFreeBufferPool( p_port->buf_mgr.h_buffer_pool );*/
> +#if UD_NBL_IN_DESC
> +       /* Free the receive and send descriptors. */
> +       cl_qpool_destroy( &p_port->buf_mgr.recv_pool );
> +
> +       if( p_port->buf_mgr.h_packet_pool )
> +               NdisFreeNetBufferListPool ( p_port-
> >buf_mgr.h_packet_pool );
> +#else
>         if( p_port->buf_mgr.h_packet_pool )
>                 NdisFreeNetBufferListPool ( p_port-
> >buf_mgr.h_packet_pool );
>
>         /* Free the receive and send descriptors. */
>         cl_qpool_destroy( &p_port->buf_mgr.recv_pool );

Would it be invalid to free the recv_pool before the packet_pool in the current code flow?  If not, no need to #if UD_NBL_IN_DESC this part, as the change would be valid regardless.  As it stands, this looks like a bigger change than it is at first glance.

> +#endif
>
>         /* Free the lookaside list of scratch buffers. */
>         NdisDeleteNPagedLookasideList( &p_port->buf_mgr.send_buf_list
> );
> @@ -1592,14 +1673,65 @@
>         p_desc->wr.num_ds = 1;
>  #endif /* IPOIB_INLINE_RECV */
>
> +#if    UD_NBL_IN_DESC
> +       /* setup NDIS NetworkBufferList and MemoryDescriptorList for
> this Recv desc */
> +       p_desc->p_mdl = NdisAllocateMdl(p_port->p_adapter->h_adapter,
> +#if IPOIB_INLINE_RECV
> +
> &p_desc->buf.eth.pkt,
> +#else
> +
> p_desc->p_buf,
> +#endif
> +
> sizeof(ipoib_pkt_t) + sizeof(ib_grh_t) );
> +       if( !p_desc->p_mdl )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("Failed to allocate MDL\n") );
> +               goto err1;
> +       }
> +
> +       p_desc->p_NBL = NdisAllocateNetBufferAndNetBufferList(
> +                                               p_port-
> >buf_mgr.h_packet_pool,
> +                                               0,
> +                                               0,
> +                                               p_desc->p_mdl,
> +                                               0,
> +                                               0);
> +
> +       if( !p_desc->p_NBL )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("Failed to allocate NET_BUFFER_LIST\n") );
> +               goto err2;
> +       }
> +
> +       NET_BUFFER_LIST_NEXT_NBL(p_desc->p_NBL) = NULL;
> +       IPOIB_PORT_FROM_NBL( p_desc->p_NBL ) = p_port;
> +       IPOIB_RECV_FROM_NBL( p_desc->p_NBL ) = p_desc;
> +       p_desc->p_NBL->SourceHandle = p_port->p_adapter->h_adapter;
> +#endif
> +
>         *pp_pool_item = &p_desc->item;
>
>         IPOIB_EXIT( IPOIB_DBG_ALLOC );
>         return CL_SUCCESS;
> -}
>
> +#if UD_NBL_IN_DESC
> +err2:
> +       NdisFreeMdl( p_desc->p_mdl );
> +       p_desc->p_mdl = NULL;
>
> +err1:
>  #if !IPOIB_INLINE_RECV
> +       cl_free( p_desc->p_buf );
> +       p_desc->p_buf = NULL;
> +#endif
> +
> +       return CL_INSUFFICIENT_MEMORY;
> +#endif
> +}
> +
> +
> +#if !IPOIB_INLINE_RECV || UD_NBL_IN_DESC
>  static void
>  __recv_dtor(
>         IN              const   cl_pool_item_t* const
> p_pool_item,
> @@ -1607,16 +1739,27 @@
>  {
>         ipoib_recv_desc_t       *p_desc;
>
> -       IPOIB_ENTER(  IPOIB_DBG_ALLOC );
> -

Why?

>         UNUSED_PARAM( context );
>
>         p_desc = PARENT_STRUCT( p_pool_item, ipoib_recv_desc_t, item );
>
> +#if UD_NBL_IN_DESC
> +       if( p_desc->p_mdl )
> +       {
> +               NdisFreeMdl( p_desc->p_mdl );
> +               p_desc->p_mdl = NULL;

Do you need to set to NULL if you're about to free the object?

> +       }
> +       if( p_desc->p_NBL)
> +       {
> +               NdisFreeNetBufferList(p_desc->p_NBL);
> +               p_desc->p_NBL = NULL;

Same here.

> +       }
> +#endif
> +
> +#if !IPOIB_INLINE_RECV
>         if( p_desc->p_buf )
>                 cl_free( p_desc->p_buf );
> -
> -       IPOIB_EXIT( IPOIB_DBG_ALLOC );
> +#endif
>  }
>  #endif
>
> @@ -1657,12 +1800,29 @@
>         IN                              ipoib_recv_desc_t* const
> p_desc,
>         IN                              NET_BUFFER_LIST* const
> p_net_buffer_list OPTIONAL )
>  {
> -       NET_BUFFER              *p_buf = NULL;
> -       MDL                             *p_mdl = NULL;
>         IPOIB_ENTER(IPOIB_DBG_RECV );
>
>         if( p_net_buffer_list )
>         {
> +               NET_BUFFER      *p_buf = NULL;
> +               MDL                     *p_mdl = NULL;
> +#if UD_NBL_IN_DESC
> +               ASSERT( p_desc->p_NBL );
> +               ASSERT( p_desc->p_mdl );
> +               ASSERT( p_net_buffer_list == p_desc->p_NBL );
> +
> +               NET_BUFFER_LIST_NEXT_NBL(p_net_buffer_list) = NULL;

This code seems to assume that the NBL passed in is not chained to any others.  Might be worth asserting that NET_BUFFER_LIST_NEXT_NBL is NULL when you enter, rather than setting it?

> +
> +               p_buf = NET_BUFFER_LIST_FIRST_NB( p_net_buffer_list );

You also assume there's only one buffer in the NBL.  Worth an assert, IMO.

> +               p_mdl = NET_BUFFER_FIRST_MDL( p_buf );

Same here with the MDLs.

> +
> +               ASSERT( p_mdl == p_desc->p_mdl );
> +               ASSERT( NET_BUFFER_CURRENT_MDL( p_buf ) == p_mdl );
> +
> +               /* reset Avail buffer lengths to full Rx size */

Why do you need to reset these here?  Don't the length get set when the receive completes?  Do you use these fields when pre-posting the receive buffers?

> +               NET_BUFFER_DATA_LENGTH( p_buf ) = sizeof(ipoib_pkt_t) +
> sizeof(ib_grh_t);
> +               NdisAdjustMdlLength( p_mdl, sizeof(ipoib_pkt_t) +
> sizeof(ib_grh_t) );
> +#else
>                 NET_BUFFER_LIST_NEXT_NBL(p_net_buffer_list) = NULL;
>                 p_buf = NET_BUFFER_LIST_FIRST_NB(p_net_buffer_list);
>                 CL_ASSERT( p_buf );
> @@ -1670,6 +1830,7 @@
>                 CL_ASSERT( p_mdl );
>                 NdisFreeMdl(p_mdl);
>                 NdisFreeNetBufferList(p_net_buffer_list);
> +#endif
>         }
>
>         /* Return the descriptor to its pools. */
> @@ -1701,7 +1862,25 @@
>         MDL                                             *p_mdl;
>
>         IPOIB_ENTER(  IPOIB_DBG_RECV );
> +#if UD_NBL_IN_DESC
> +       PNET_BUFFER                     NetBuffer;
> +       UNREFERENCED_PARAMETER(p_port);
> +
> +       IPOIB_ENTER( IPOIB_DBG_BUF );
> +
> +       ASSERT( p_desc->p_NBL );
> +       ASSERT( p_desc->p_mdl );
> +       p_net_buffer_list = p_desc->p_NBL;
> +
> +       NetBuffer = NET_BUFFER_LIST_FIRST_NB( p_net_buffer_list );
> +       p_mdl = NET_BUFFER_FIRST_MDL( NetBuffer );
> +
> +       ASSERT( p_mdl == p_desc->p_mdl );
> +       ASSERT( NET_BUFFER_CURRENT_MDL( NetBuffer ) == p_mdl );
>
> +       NET_BUFFER_DATA_LENGTH( NetBuffer ) = p_desc->len;
> +       NdisAdjustMdlLength( p_mdl, p_desc->len );
> +#else  // !UD_NBL_IN_DESC
>         p_mdl = NdisAllocateMdl(p_port->p_adapter->h_adapter,
>                                                         &p_desc-
> >buf.eth.pkt,
>                                                         p_desc->len );
> @@ -1732,6 +1911,7 @@
>         IPOIB_PORT_FROM_NBL( p_net_buffer_list ) = p_port;
>         IPOIB_RECV_FROM_NBL( p_net_buffer_list ) = p_desc;
>         p_net_buffer_list->SourceHandle = p_port->p_adapter->h_adapter;
> +#endif // !UD_NBL_IN_DESC
>
>         IPOIB_EXIT(  IPOIB_DBG_RECV );
>         return p_net_buffer_list;




More information about the ofw mailing list