[Openib-windows] Added ability to change IPoIB mtu size
Hal Rosenstock
halr at voltaire.com
Mon Nov 20 08:02:25 PST 2006
On Mon, 2006-11-20 at 05:02, Anatoly Lisenko wrote:
> Hi Yossi,
>
> I agree with all your comments.
> In addition I want to change MTU range to 60-2048.
Shouldn't this support 256-4096 MTU (minus the 4 byte CRC)?
-- Hal
> I send the patch with this fix.
> Please apply this patch.
>
> Thanks,
> Anatoly
>
>
> -----Original Message-----
> From: Yossi Leybovich [mailto:sleybo at mellanox.co.il]
> Sent: Sunday, November 19, 2006 10:33 PM
> To: Anatoly Lisenko
> Cc: openib-windows at openib.org
> Subject: RE: Added ability to change IPoIB mtu size
>
> Hi Anatoly
>
> Next time it will be more convenient if you will paste the patch in the
> mail body so I can comment on it .
> I have few comments and I generate new patch , pls review it and test
> it.
> Let me know if its Ok with you and I will apply it
>
> You can see my comments on your patch ,
> start with [Yossi Leybovich ] marker
>
>
> Thanks
> Yossi
>
>
>
>
> Index: ipoib/kernel/ipoib_adapter.h
> ===================================================================
> --- ipoib/kernel/ipoib_adapter.h (revision 544)
> +++ ipoib/kernel/ipoib_adapter.h (working copy)
> @@ -70,6 +70,8 @@
> uint32_t sa_timeout;
> uint32_t sa_retry_cnt;
> uint32_t recv_pool_ratio;
> + uint32_t payload_mtu;
> + uint32_t xfer_block_size;
> mac_addr_t conf_mac;
>
> } ipoib_params_t;
> Index: ipoib/kernel/ipoib_driver.c
> ===================================================================
> --- ipoib/kernel/ipoib_driver.c (revision 544)
> +++ ipoib/kernel/ipoib_driver.c (working copy)
> @@ -525,6 +525,21 @@
> }
> p_adapter->params.recv_pool_ratio =
> p_param->ParameterData.IntegerData;
>
> + /* AAA Required: MTU size. */
> + RtlInitUnicodeString( &keyword, L"PayloadMtu" );
> + NdisReadConfiguration(
> + &status, &p_param, h_config, &keyword, NdisParameterInteger );
> + if( status != NDIS_STATUS_SUCCESS )
> + {
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("PayloadMtu parameter missing. Use the default.\n") );
> + p_adapter->params.payload_mtu = DEFAULT_IB_UD_MTU -
> sizeof(ipoib_hdr_t);
> + }
> + else
> + {
> + p_adapter->params.payload_mtu = p_param->ParameterData.IntegerData -
> sizeof(ipoib_hdr_t);
> + }
> + p_adapter->params.xfer_block_size = (sizeof(eth_hdr_t) +
> p_adapter->params.payload_mtu);
> NdisReadNetworkAddress( &status, &mac, &len, h_config );
>
> [Yossi Leybovich]
>
> I don't think that we should maintain backward compatibility with
> drivers that don't define the "PayloadMtu"
> We should enforce it as require parameter , and in case its missing then
> return with error like other registry keys.
> What do you think ?
>
>
> [Yossi Leybovich]
>
> I don't really understand what exactly you want the user to set up.
> I would assume that the user will like to define the maximum data to
> transfer in one IPoIB packet, so the user should not be aware about the
> presence of IPoIB header (4 bytes).
> I would say that the maximum payload will be 2044 bytes and we will set
> it to be ..
> p_adapter->params.payload_mtu = p_param->ParameterData.IntegerData
>
> In that way the use will get exactly the amount of payload he requested
> with out knowing about the size of the ipoib header.
>
>
> ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr,
> p_adapter->mac.addr );
> @@ -705,7 +720,7 @@
>
> #if IPOIB_USE_DMA
> status =
> - NdisMInitializeScatterGatherDma( h_adapter, TRUE, XFER_BLOCK_SIZE );
> + NdisMInitializeScatterGatherDma( h_adapter, TRUE,
> p_adapter->params.xfer_block_size );
> if( status != NDIS_STATUS_SUCCESS )
> {
> ipoib_destroy_adapter( p_adapter );
> @@ -889,7 +904,7 @@
> case OID_GEN_MAXIMUM_FRAME_SIZE:
> IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
> ("Port %d received query for OID_GEN_MAXIMUM_FRAME_SIZE\n",
> port_num) );
> - info = PAYLOAD_MTU;
> + info = p_adapter->params.payload_mtu;
> break;
>
> case OID_GEN_LINK_SPEED:
> @@ -927,14 +942,14 @@
> case OID_GEN_TRANSMIT_BUFFER_SPACE:
> IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
> ("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE\n",
> port_num) );
> - info = p_adapter->params.sq_depth * XFER_BLOCK_SIZE;
> + info = p_adapter->params.sq_depth *
> p_adapter->params.xfer_block_size;
> break;
>
> case OID_GEN_RECEIVE_BUFFER_SPACE:
> IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
> ("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE "
> "or OID_GEN_RECEIVE_BUFFER_SPACE\n", port_num) );
> - info = p_adapter->params.rq_depth * XFER_BLOCK_SIZE;
> + info = p_adapter->params.rq_depth *
> p_adapter->params.xfer_block_size;
> break;
>
> case OID_GEN_MAXIMUM_LOOKAHEAD:
> @@ -948,7 +963,7 @@
> "OID_GEN_TRANSMIT_BLOCK_SIZE or "
> "OID_GEN_RECEIVE_BLOCK_SIZE or "
> "OID_GEN_MAXIMUM_TOTAL_SIZE\n", port_num) );
> - info = XFER_BLOCK_SIZE;
> + info = p_adapter->params.xfer_block_size;
> break;
>
> case OID_GEN_VENDOR_ID:
> Index: ipoib/kernel/ipoib_driver.h
> ===================================================================
> --- ipoib/kernel/ipoib_driver.h (revision 544)
> +++ ipoib/kernel/ipoib_driver.h (working copy)
> @@ -47,19 +47,23 @@
> #define MAX_BUNDLE_ID_LENGTH 32
>
>
> -#define IB_MTU 2048
> +//#define IB_MTU 2048
> /*
> * Header length as defined by IPoIB spec:
> *
> http://www.ietf.org/internet-drafts/draft-ietf-ipoib-ip-over-infiniband-
> 04.txt
> */
> -#define PAYLOAD_MTU (IB_MTU - sizeof(ipoib_hdr_t))
> +// #define PAYLOAD_MTU (IB_MTU - sizeof(ipoib_hdr_t))
> +#define DEFAULT_IB_UD_MTU 1500
> +#define MAX_IB_UD_MTU 2048
> +#define MAX_PAYLOAD_MTU (MAX_IB_UD_MTU - sizeof(ipoib_hdr_t))
> +
> /*
> * Only the protocol type is sent as part of the UD payload
> * since the rest of the Ethernet header is encapsulated in the
> * various IB headers. We report out buffer space as if we
> * transmit the ethernet headers.
> */
> -#define XFER_BLOCK_SIZE (sizeof(eth_hdr_t) + PAYLOAD_MTU)
> +//#define XFER_BLOCK_SIZE (sizeof(eth_hdr_t) + PAYLOAD_MTU)
>
> [Yossi Leybovich]
>
> I will still say that the IB_MTU is 2048 (this is according to the IpoIB
> spec and the multicast group mtu)
> But we can use MAX_PAYLOAD_MTU and MAX_XFER_BLOCK_SIZE
>
> typedef struct _ipoib_globals
> Index: ipoib/kernel/ipoib_port.c
> ===================================================================
> --- ipoib/kernel/ipoib_port.c (revision 544)
> +++ ipoib/kernel/ipoib_port.c (working copy)
> @@ -73,7 +73,8 @@
>
> ************************************************************************
> ******/
> static void
> __port_construct(
> - IN ipoib_port_t* const p_port );
> + IN ipoib_port_t* const p_port,
> + IN ipoib_adapter_t* const p_adapter );
>
> static ib_api_status_t
> __port_init(
> @@ -130,7 +131,8 @@
>
> ************************************************************************
> ******/
> static void
> __buf_mgr_construct(
> - IN ipoib_port_t* const p_port );
> + IN ipoib_port_t* const p_port ,
> + IN ipoib_adapter_t* const p_adapter );
>
> static ib_api_status_t
> __buf_mgr_init(
> @@ -517,7 +519,7 @@
> gp_ipoib_port = p_port;
> #endif
>
> - __port_construct( p_port );
> + __port_construct( p_port, p_adapter );
>
> status = __port_init( p_port, p_adapter, p_pnp_rec );
> if( status != IB_SUCCESS )
> @@ -554,7 +556,8 @@
>
> static void
> __port_construct(
> - IN ipoib_port_t* const p_port )
> + IN ipoib_port_t* const p_port ,
> + IN ipoib_adapter_t* const p_adapter )
> {
> IPOIB_ENTER( IPOIB_DBG_INIT );
>
> @@ -564,7 +567,7 @@
> cl_spinlock_construct( &p_port->send_lock );
> cl_spinlock_construct( &p_port->recv_lock );
> __ib_mgr_construct( p_port );
> - __buf_mgr_construct( p_port );
> + __buf_mgr_construct( p_port , p_adapter );
>
> __recv_mgr_construct( p_port );
> __send_mgr_construct( p_port );
> @@ -945,7 +948,8 @@
>
> ************************************************************************
> ******/
> static void
> __buf_mgr_construct(
> - IN ipoib_port_t* const p_port )
> + IN ipoib_port_t* const p_port,
> + IN ipoib_adapter_t* const p_adapter )
> {
> IPOIB_ENTER( IPOIB_DBG_INIT );
>
> @@ -955,7 +959,7 @@
> p_port->buf_mgr.h_buffer_pool = NULL;
>
> ExInitializeNPagedLookasideList( &p_port->buf_mgr.send_buf_list,
> - NULL, NULL, 0, XFER_BLOCK_SIZE, 'bipi', 0 );
> + NULL, NULL, 0, p_adapter->params.xfer_block_size, 'bipi', 0 );
>
> [Yossi Leybovich]
> I would like to declare the Lookaside list with the MAX_XFER_BLOCK_SIZE
> and save the port to know the adapter in the construction step.
> What do you think?
>
>
> p_port->buf_mgr.h_send_pkt_pool = NULL;
> p_port->buf_mgr.h_send_buf_pool = NULL;
> @@ -2715,7 +2719,7 @@
> }
>
> NdisAllocateBuffer( &status, &p_buf, p_port->buf_mgr.h_send_buf_pool,
> - p_desc->p_buf, XFER_BLOCK_SIZE );
> + p_desc->p_buf, p_port->p_adapter->params.xfer_block_size );
> if( status != NDIS_STATUS_SUCCESS )
> {
> NdisFreePacket( p_packet );
> @@ -2798,7 +2802,7 @@
> }
>
> CL_ASSERT( tot_len > sizeof(eth_hdr_t) );
> - CL_ASSERT( tot_len <= XFER_BLOCK_SIZE );
> + CL_ASSERT( tot_len <= p_port->p_adapter->params.xfer_block_size );
> /*
> * Assume that the ethernet header is always fully contained
> * in the first page of the first MDL. This makes for much
> @@ -2818,7 +2822,7 @@
> CL_ASSERT( buf_len >= sizeof(eth_hdr_t) );
> /* Skip the ethernet header. */
> buf_len -= sizeof(eth_hdr_t);
> - CL_ASSERT( buf_len <= PAYLOAD_MTU );
> + CL_ASSERT( buf_len <= p_port->p_adapter->params.payload_mtu );
> if( buf_len )
> {
> /* The ethernet header is a subset of this MDL. */
> Index: ipoib/kernel/ipoib_port.h
> ===================================================================
> --- ipoib/kernel/ipoib_port.h (revision 544)
> +++ ipoib/kernel/ipoib_port.h (working copy)
> @@ -181,7 +181,7 @@
> ipoib_hdr_t hdr;
> union _payload
> {
> - uint8_t data[PAYLOAD_MTU];
> + uint8_t data[MAX_PAYLOAD_MTU];
> ipoib_arp_pkt_t arp;
> ip_pkt_t ip;
>
> @@ -268,7 +268,7 @@
> */
> typedef union _send_buf
> {
> - uint8_t data[PAYLOAD_MTU];
> + uint8_t data[MAX_PAYLOAD_MTU];
> ipoib_arp_pkt_t arp;
> ip_pkt_t ip;
>
> Index: ipoib/kernel/netipoib.inf
> ===================================================================
> --- ipoib/kernel/netipoib.inf (revision 544)
> +++ ipoib/kernel/netipoib.inf (working copy)
> @@ -111,12 +111,11 @@
> HKR, Ndi\Params\SaRetries, Optional, 0, "0"
> HKR, Ndi\Params\SaRetries, Min, 0, "1"
>
> -HKR, Ndi\Params\RecvRatio, ParamDesc, 0, "Receive Pool Ratio"
> -HKR, Ndi\Params\RecvRatio, Type, 0, "dword"
> -HKR, Ndi\Params\RecvRatio, Default, 0, "1"
> -HKR, Ndi\Params\RecvRatio, Optional, 0, "0"
> -HKR, Ndi\Params\RecvRatio, Min, 0, "1"
> -HKR, Ndi\Params\RecvRatio, Max, 0, "10"
>
> [Yossi Leybovich]
>
> Why do you remove the "Receive Pool Ratio" registry key ?
> The IPoIB require this parameter , How does it work for you without it
> (maybe you had left overs from former installation) ?
>
> +HKR, Ndi\Params\PayloadMtu, ParamDesc, 0, "Payload Mtu size"
> +HKR, Ndi\Params\PayloadMtu, Type, 0, "dword"
> +HKR, Ndi\Params\PayloadMtu, Default, 0, "1500"
> +HKR, Ndi\Params\PayloadMtu, Min, 0, "1500"
> +HKR, Ndi\Params\PayloadMtu, Max, 0, "2048"
>
>
> [Yossi leybovich]
>
> Lets change the default to be 2048 , so in the default case we will gain
> the maximum performance
>
>
> [IpoibService]
> DisplayName = %IpoibServiceDispName%
>
>
> ______________________________________________________________________
>
> _______________________________________________
> openib-windows mailing list
> openib-windows at openib.org
> http://openib.org/mailman/listinfo/openib-windows
More information about the ofw
mailing list