[Openib-windows] Added ability to change IPoIB mtu size

Hal Rosenstock halr at voltaire.com
Mon Nov 20 08:02:25 PST 2006


On Mon, 2006-11-20 at 05:02, Anatoly Lisenko wrote:
> Hi Yossi,
> 
> I agree with all your comments.
> In addition I want to change MTU range to 60-2048.

Shouldn't this support 256-4096 MTU (minus the 4 byte CRC)?

-- Hal

> I send the patch with this fix.
> Please apply this patch.
> 
> Thanks,
> Anatoly
> 
> 
> -----Original Message-----
> From: Yossi Leybovich [mailto:sleybo at mellanox.co.il] 
> Sent: Sunday, November 19, 2006 10:33 PM
> To: Anatoly Lisenko
> Cc: openib-windows at openib.org
> Subject: RE: Added ability to change IPoIB mtu size
> 
> Hi Anatoly
> 
> Next time it will be more convenient if you will paste the patch in the
> mail body so I can comment on it .
> I have few comments and I generate new patch , pls review it and test
> it.
> Let me know if its Ok with you and I will apply it
> 
> You can see my comments on your patch ,
> start with [Yossi Leybovich ] marker
> 
> 
> Thanks
> Yossi 
> 
> 
> 
> 
> Index: ipoib/kernel/ipoib_adapter.h
> ===================================================================
> --- ipoib/kernel/ipoib_adapter.h (revision 544)
> +++ ipoib/kernel/ipoib_adapter.h (working copy)
> @@ -70,6 +70,8 @@
>   uint32_t sa_timeout;
>   uint32_t sa_retry_cnt;
>   uint32_t recv_pool_ratio;
> + uint32_t payload_mtu;
> + uint32_t xfer_block_size;
>   mac_addr_t conf_mac;
>  
>  } ipoib_params_t;
> Index: ipoib/kernel/ipoib_driver.c
> ===================================================================
> --- ipoib/kernel/ipoib_driver.c (revision 544)
> +++ ipoib/kernel/ipoib_driver.c (working copy)
> @@ -525,6 +525,21 @@
>   }
>   p_adapter->params.recv_pool_ratio =
> p_param->ParameterData.IntegerData;
>  
> + /* AAA Required: MTU size. */
> + RtlInitUnicodeString( &keyword, L"PayloadMtu" );
> + NdisReadConfiguration(
> +  &status, &p_param, h_config, &keyword, NdisParameterInteger );
> + if( status != NDIS_STATUS_SUCCESS )
> + {
> +  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +   ("PayloadMtu parameter missing. Use the default.\n") );
> +  p_adapter->params.payload_mtu = DEFAULT_IB_UD_MTU -
> sizeof(ipoib_hdr_t);
> + }
> + else
> + {
> +  p_adapter->params.payload_mtu = p_param->ParameterData.IntegerData -
> sizeof(ipoib_hdr_t);
> + }
> + p_adapter->params.xfer_block_size = (sizeof(eth_hdr_t) +
> p_adapter->params.payload_mtu);
>   NdisReadNetworkAddress( &status, &mac, &len, h_config );
> 
> [Yossi Leybovich]
>  
> I don't think that we should maintain backward compatibility with
> drivers that don't define the "PayloadMtu"
> We should enforce it as require parameter , and in case its missing then
> return with error like other registry keys.
> What do you think ?
> 
> 
> [Yossi Leybovich]
> 
> I don't really understand what exactly you want the user to set up.
> I would assume that the user will like to define the maximum data to
> transfer in one IPoIB packet, so the user should not be aware about the
> presence of IPoIB header (4 bytes).
> I would say that the maximum payload will be 2044 bytes and we will set
> it to be ..
> p_adapter->params.payload_mtu = p_param->ParameterData.IntegerData
> 
> In that way the use will get exactly the amount of payload he requested
> with out knowing about the size of the ipoib header.
> 
> 
>   ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr,
> p_adapter->mac.addr );
> @@ -705,7 +720,7 @@
>  
>  #if IPOIB_USE_DMA
>   status =
> -  NdisMInitializeScatterGatherDma( h_adapter, TRUE, XFER_BLOCK_SIZE );
> +  NdisMInitializeScatterGatherDma( h_adapter, TRUE,
> p_adapter->params.xfer_block_size );
>   if( status != NDIS_STATUS_SUCCESS )
>   {
>    ipoib_destroy_adapter( p_adapter );
> @@ -889,7 +904,7 @@
>   case OID_GEN_MAXIMUM_FRAME_SIZE:
>    IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
>     ("Port %d received query for OID_GEN_MAXIMUM_FRAME_SIZE\n",
> port_num) );
> -  info = PAYLOAD_MTU;
> +  info = p_adapter->params.payload_mtu;
>    break;
>  
>   case OID_GEN_LINK_SPEED:
> @@ -927,14 +942,14 @@
>   case OID_GEN_TRANSMIT_BUFFER_SPACE:
>    IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
>     ("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE\n",
> port_num) );
> -  info = p_adapter->params.sq_depth * XFER_BLOCK_SIZE;
> +  info = p_adapter->params.sq_depth *
> p_adapter->params.xfer_block_size;
>    break;
>  
>   case OID_GEN_RECEIVE_BUFFER_SPACE:
>    IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
>     ("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE "
>     "or OID_GEN_RECEIVE_BUFFER_SPACE\n", port_num) );
> -  info = p_adapter->params.rq_depth * XFER_BLOCK_SIZE;
> +  info = p_adapter->params.rq_depth *
> p_adapter->params.xfer_block_size;
>    break;
>  
>   case OID_GEN_MAXIMUM_LOOKAHEAD:
> @@ -948,7 +963,7 @@
>     "OID_GEN_TRANSMIT_BLOCK_SIZE or "
>     "OID_GEN_RECEIVE_BLOCK_SIZE or "
>     "OID_GEN_MAXIMUM_TOTAL_SIZE\n", port_num) );
> -  info = XFER_BLOCK_SIZE;
> +  info = p_adapter->params.xfer_block_size;
>    break;
>  
>   case OID_GEN_VENDOR_ID:
> Index: ipoib/kernel/ipoib_driver.h
> ===================================================================
> --- ipoib/kernel/ipoib_driver.h (revision 544)
> +++ ipoib/kernel/ipoib_driver.h (working copy)
> @@ -47,19 +47,23 @@
>  #define MAX_BUNDLE_ID_LENGTH 32
>  
> 
> -#define IB_MTU   2048
> +//#define IB_MTU   2048
>  /*
>   * Header length as defined by IPoIB spec:
>   *
> http://www.ietf.org/internet-drafts/draft-ietf-ipoib-ip-over-infiniband-
> 04.txt
>   */
> -#define PAYLOAD_MTU  (IB_MTU - sizeof(ipoib_hdr_t))
> +// #define PAYLOAD_MTU  (IB_MTU - sizeof(ipoib_hdr_t))
> +#define DEFAULT_IB_UD_MTU 1500
> +#define MAX_IB_UD_MTU  2048
> +#define MAX_PAYLOAD_MTU  (MAX_IB_UD_MTU - sizeof(ipoib_hdr_t))
> +
>  /*
>   * Only the protocol type is sent as part of the UD payload
>   * since the rest of the Ethernet header is encapsulated in the
>   * various IB headers.  We report out buffer space as if we
>   * transmit the ethernet headers.
>   */
> -#define XFER_BLOCK_SIZE  (sizeof(eth_hdr_t) + PAYLOAD_MTU)
> +//#define XFER_BLOCK_SIZE  (sizeof(eth_hdr_t) + PAYLOAD_MTU)
>  
> [Yossi Leybovich]
> 
> I will still say that the IB_MTU is 2048 (this is according to the IpoIB
> spec and the multicast group mtu)
> But we can use MAX_PAYLOAD_MTU and MAX_XFER_BLOCK_SIZE
> 
>  typedef struct _ipoib_globals
> Index: ipoib/kernel/ipoib_port.c
> ===================================================================
> --- ipoib/kernel/ipoib_port.c (revision 544)
> +++ ipoib/kernel/ipoib_port.c (working copy)
> @@ -73,7 +73,8 @@
>  
> ************************************************************************
> ******/
>  static void
>  __port_construct(
> - IN    ipoib_port_t* const   p_port );
> + IN    ipoib_port_t* const   p_port,
> + IN    ipoib_adapter_t* const  p_adapter );
>  
>  static ib_api_status_t
>  __port_init(
> @@ -130,7 +131,8 @@
>  
> ************************************************************************
> ******/
>  static void
>  __buf_mgr_construct(
> - IN    ipoib_port_t* const   p_port );
> + IN    ipoib_port_t* const   p_port ,
> + IN    ipoib_adapter_t* const  p_adapter );
>  
>  static ib_api_status_t
>  __buf_mgr_init(
> @@ -517,7 +519,7 @@
>   gp_ipoib_port = p_port;
>  #endif
>  
> - __port_construct( p_port );
> + __port_construct( p_port, p_adapter );
>  
>   status = __port_init( p_port, p_adapter, p_pnp_rec );
>   if( status != IB_SUCCESS )
> @@ -554,7 +556,8 @@
>  
>  static void
>  __port_construct(
> - IN    ipoib_port_t* const   p_port )
> + IN    ipoib_port_t* const   p_port ,
> + IN    ipoib_adapter_t* const  p_adapter )
>  {
>   IPOIB_ENTER( IPOIB_DBG_INIT );
>  
> @@ -564,7 +567,7 @@
>   cl_spinlock_construct( &p_port->send_lock );
>   cl_spinlock_construct( &p_port->recv_lock );
>   __ib_mgr_construct( p_port );
> - __buf_mgr_construct( p_port );
> + __buf_mgr_construct( p_port , p_adapter );
>  
>   __recv_mgr_construct( p_port );
>   __send_mgr_construct( p_port );
> @@ -945,7 +948,8 @@
>  
> ************************************************************************
> ******/
>  static void
>  __buf_mgr_construct(
> - IN    ipoib_port_t* const   p_port )
> + IN    ipoib_port_t* const   p_port, 
> + IN    ipoib_adapter_t* const  p_adapter )
>  {
>   IPOIB_ENTER( IPOIB_DBG_INIT );
>  
> @@ -955,7 +959,7 @@
>   p_port->buf_mgr.h_buffer_pool = NULL;
>  
>   ExInitializeNPagedLookasideList( &p_port->buf_mgr.send_buf_list,
> -  NULL, NULL, 0, XFER_BLOCK_SIZE, 'bipi', 0 );
> +  NULL, NULL, 0, p_adapter->params.xfer_block_size, 'bipi', 0 );
>  
> [Yossi Leybovich]
> I would like to declare the Lookaside list with the MAX_XFER_BLOCK_SIZE
> and save the port to know the adapter in the construction step.
> What do you think?
> 
> 
>   p_port->buf_mgr.h_send_pkt_pool = NULL;
>   p_port->buf_mgr.h_send_buf_pool = NULL;
> @@ -2715,7 +2719,7 @@
>   }
>  
>   NdisAllocateBuffer( &status, &p_buf, p_port->buf_mgr.h_send_buf_pool,
> -  p_desc->p_buf, XFER_BLOCK_SIZE );
> +  p_desc->p_buf, p_port->p_adapter->params.xfer_block_size );
>   if( status != NDIS_STATUS_SUCCESS )
>   {
>    NdisFreePacket( p_packet );
> @@ -2798,7 +2802,7 @@
>   }
>  
>   CL_ASSERT( tot_len > sizeof(eth_hdr_t) );
> - CL_ASSERT( tot_len <= XFER_BLOCK_SIZE );
> + CL_ASSERT( tot_len <= p_port->p_adapter->params.xfer_block_size );
>   /*
>    * Assume that the ethernet header is always fully contained
>    * in the first page of the first MDL.  This makes for much
> @@ -2818,7 +2822,7 @@
>     CL_ASSERT( buf_len >= sizeof(eth_hdr_t) );
>     /* Skip the ethernet header. */
>     buf_len -= sizeof(eth_hdr_t);
> -   CL_ASSERT( buf_len <= PAYLOAD_MTU );
> +   CL_ASSERT( buf_len <= p_port->p_adapter->params.payload_mtu );
>     if( buf_len )
>     {
>      /* The ethernet header is a subset of this MDL. */
> Index: ipoib/kernel/ipoib_port.h
> ===================================================================
> --- ipoib/kernel/ipoib_port.h (revision 544)
> +++ ipoib/kernel/ipoib_port.h (working copy)
> @@ -181,7 +181,7 @@
>   ipoib_hdr_t  hdr;
>   union _payload
>   {
> -  uint8_t   data[PAYLOAD_MTU];
> +        uint8_t   data[MAX_PAYLOAD_MTU];
>    ipoib_arp_pkt_t arp;
>    ip_pkt_t  ip;
>  
> @@ -268,7 +268,7 @@
>  */
>  typedef union _send_buf
>  {
> - uint8_t   data[PAYLOAD_MTU];
> +    uint8_t   data[MAX_PAYLOAD_MTU];
>   ipoib_arp_pkt_t arp;
>   ip_pkt_t  ip;
>  
> Index: ipoib/kernel/netipoib.inf
> ===================================================================
> --- ipoib/kernel/netipoib.inf (revision 544)
> +++ ipoib/kernel/netipoib.inf (working copy)
> @@ -111,12 +111,11 @@
>  HKR, Ndi\Params\SaRetries,  Optional, 0, "0"
>  HKR, Ndi\Params\SaRetries,  Min,  0, "1"
>  
> -HKR, Ndi\Params\RecvRatio,  ParamDesc, 0, "Receive Pool Ratio"
> -HKR, Ndi\Params\RecvRatio,  Type,  0, "dword"
> -HKR, Ndi\Params\RecvRatio,  Default, 0, "1"
> -HKR, Ndi\Params\RecvRatio,  Optional, 0, "0"
> -HKR, Ndi\Params\RecvRatio,  Min,  0, "1"
> -HKR, Ndi\Params\RecvRatio,  Max,  0, "10"
> 
> [Yossi Leybovich]
> 
> Why do you remove the "Receive Pool Ratio" registry key ?
> The IPoIB require this parameter , How does it work for you without it
> (maybe you had left overs from former installation) ? 
> 
> +HKR, Ndi\Params\PayloadMtu,  ParamDesc, 0, "Payload Mtu size"
> +HKR, Ndi\Params\PayloadMtu,  Type,  0, "dword"
> +HKR, Ndi\Params\PayloadMtu,  Default, 0, "1500"
> +HKR, Ndi\Params\PayloadMtu,  Min,  0, "1500"
> +HKR, Ndi\Params\PayloadMtu,  Max,  0, "2048"
>  
> 
> [Yossi leybovich]
> 
> Lets change the default to be 2048 , so in the default case we will gain
> the maximum performance
> 
> 
>  [IpoibService]
>  DisplayName     = %IpoibServiceDispName%
> 
> 
> ______________________________________________________________________
> 
> _______________________________________________
> openib-windows mailing list
> openib-windows at openib.org
> http://openib.org/mailman/listinfo/openib-windows





More information about the ofw mailing list