[Openib-windows] Added ability to change IPoIB mtu size

Anatoly Lisenko anatolyl at voltaire.com
Mon Nov 20 02:02:22 PST 2006


Hi Yossi,

I agree with all your comments.
In addition I want to change MTU range to 60-2048.
I send the patch with this fix.
Please apply this patch.

Thanks,
Anatoly


-----Original Message-----
From: Yossi Leybovich [mailto:sleybo at mellanox.co.il] 
Sent: Sunday, November 19, 2006 10:33 PM
To: Anatoly Lisenko
Cc: openib-windows at openib.org
Subject: RE: Added ability to change IPoIB mtu size

Hi Anatoly

Next time it will be more convenient if you will paste the patch in the
mail body so I can comment on it .
I have few comments and I generate new patch , pls review it and test
it.
Let me know if its Ok with you and I will apply it

You can see my comments on your patch ,
start with [Yossi Leybovich ] marker


Thanks
Yossi 




Index: ipoib/kernel/ipoib_adapter.h
===================================================================
--- ipoib/kernel/ipoib_adapter.h (revision 544)
+++ ipoib/kernel/ipoib_adapter.h (working copy)
@@ -70,6 +70,8 @@
  uint32_t sa_timeout;
  uint32_t sa_retry_cnt;
  uint32_t recv_pool_ratio;
+ uint32_t payload_mtu;
+ uint32_t xfer_block_size;
  mac_addr_t conf_mac;
 
 } ipoib_params_t;
Index: ipoib/kernel/ipoib_driver.c
===================================================================
--- ipoib/kernel/ipoib_driver.c (revision 544)
+++ ipoib/kernel/ipoib_driver.c (working copy)
@@ -525,6 +525,21 @@
  }
  p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;
 
+ /* AAA Required: MTU size. */
+ RtlInitUnicodeString( &keyword, L"PayloadMtu" );
+ NdisReadConfiguration(
+  &status, &p_param, h_config, &keyword, NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+   ("PayloadMtu parameter missing. Use the default.\n") );
+  p_adapter->params.payload_mtu = DEFAULT_IB_UD_MTU -
sizeof(ipoib_hdr_t);
+ }
+ else
+ {
+  p_adapter->params.payload_mtu = p_param->ParameterData.IntegerData -
sizeof(ipoib_hdr_t);
+ }
+ p_adapter->params.xfer_block_size = (sizeof(eth_hdr_t) +
p_adapter->params.payload_mtu);
  NdisReadNetworkAddress( &status, &mac, &len, h_config );

[Yossi Leybovich]
 
I don't think that we should maintain backward compatibility with
drivers that don't define the "PayloadMtu"
We should enforce it as require parameter , and in case its missing then
return with error like other registry keys.
What do you think ?


[Yossi Leybovich]

I don't really understand what exactly you want the user to set up.
I would assume that the user will like to define the maximum data to
transfer in one IPoIB packet, so the user should not be aware about the
presence of IPoIB header (4 bytes).
I would say that the maximum payload will be 2044 bytes and we will set
it to be ..
p_adapter->params.payload_mtu = p_param->ParameterData.IntegerData

In that way the use will get exactly the amount of payload he requested
with out knowing about the size of the ipoib header.


  ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr,
p_adapter->mac.addr );
@@ -705,7 +720,7 @@
 
 #if IPOIB_USE_DMA
  status =
-  NdisMInitializeScatterGatherDma( h_adapter, TRUE, XFER_BLOCK_SIZE );
+  NdisMInitializeScatterGatherDma( h_adapter, TRUE,
p_adapter->params.xfer_block_size );
  if( status != NDIS_STATUS_SUCCESS )
  {
   ipoib_destroy_adapter( p_adapter );
@@ -889,7 +904,7 @@
  case OID_GEN_MAXIMUM_FRAME_SIZE:
   IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
    ("Port %d received query for OID_GEN_MAXIMUM_FRAME_SIZE\n",
port_num) );
-  info = PAYLOAD_MTU;
+  info = p_adapter->params.payload_mtu;
   break;
 
  case OID_GEN_LINK_SPEED:
@@ -927,14 +942,14 @@
  case OID_GEN_TRANSMIT_BUFFER_SPACE:
   IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
    ("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE\n",
port_num) );
-  info = p_adapter->params.sq_depth * XFER_BLOCK_SIZE;
+  info = p_adapter->params.sq_depth *
p_adapter->params.xfer_block_size;
   break;
 
  case OID_GEN_RECEIVE_BUFFER_SPACE:
   IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
    ("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE "
    "or OID_GEN_RECEIVE_BUFFER_SPACE\n", port_num) );
-  info = p_adapter->params.rq_depth * XFER_BLOCK_SIZE;
+  info = p_adapter->params.rq_depth *
p_adapter->params.xfer_block_size;
   break;
 
  case OID_GEN_MAXIMUM_LOOKAHEAD:
@@ -948,7 +963,7 @@
    "OID_GEN_TRANSMIT_BLOCK_SIZE or "
    "OID_GEN_RECEIVE_BLOCK_SIZE or "
    "OID_GEN_MAXIMUM_TOTAL_SIZE\n", port_num) );
-  info = XFER_BLOCK_SIZE;
+  info = p_adapter->params.xfer_block_size;
   break;
 
  case OID_GEN_VENDOR_ID:
Index: ipoib/kernel/ipoib_driver.h
===================================================================
--- ipoib/kernel/ipoib_driver.h (revision 544)
+++ ipoib/kernel/ipoib_driver.h (working copy)
@@ -47,19 +47,23 @@
 #define MAX_BUNDLE_ID_LENGTH 32
 
 
-#define IB_MTU   2048
+//#define IB_MTU   2048
 /*
  * Header length as defined by IPoIB spec:
  *
http://www.ietf.org/internet-drafts/draft-ietf-ipoib-ip-over-infiniband-
04.txt
  */
-#define PAYLOAD_MTU  (IB_MTU - sizeof(ipoib_hdr_t))
+// #define PAYLOAD_MTU  (IB_MTU - sizeof(ipoib_hdr_t))
+#define DEFAULT_IB_UD_MTU 1500
+#define MAX_IB_UD_MTU  2048
+#define MAX_PAYLOAD_MTU  (MAX_IB_UD_MTU - sizeof(ipoib_hdr_t))
+
 /*
  * Only the protocol type is sent as part of the UD payload
  * since the rest of the Ethernet header is encapsulated in the
  * various IB headers.  We report out buffer space as if we
  * transmit the ethernet headers.
  */
-#define XFER_BLOCK_SIZE  (sizeof(eth_hdr_t) + PAYLOAD_MTU)
+//#define XFER_BLOCK_SIZE  (sizeof(eth_hdr_t) + PAYLOAD_MTU)
 
[Yossi Leybovich]

I will still say that the IB_MTU is 2048 (this is according to the IpoIB
spec and the multicast group mtu)
But we can use MAX_PAYLOAD_MTU and MAX_XFER_BLOCK_SIZE

 typedef struct _ipoib_globals
Index: ipoib/kernel/ipoib_port.c
===================================================================
--- ipoib/kernel/ipoib_port.c (revision 544)
+++ ipoib/kernel/ipoib_port.c (working copy)
@@ -73,7 +73,8 @@
 
************************************************************************
******/
 static void
 __port_construct(
- IN    ipoib_port_t* const   p_port );
+ IN    ipoib_port_t* const   p_port,
+ IN    ipoib_adapter_t* const  p_adapter );
 
 static ib_api_status_t
 __port_init(
@@ -130,7 +131,8 @@
 
************************************************************************
******/
 static void
 __buf_mgr_construct(
- IN    ipoib_port_t* const   p_port );
+ IN    ipoib_port_t* const   p_port ,
+ IN    ipoib_adapter_t* const  p_adapter );
 
 static ib_api_status_t
 __buf_mgr_init(
@@ -517,7 +519,7 @@
  gp_ipoib_port = p_port;
 #endif
 
- __port_construct( p_port );
+ __port_construct( p_port, p_adapter );
 
  status = __port_init( p_port, p_adapter, p_pnp_rec );
  if( status != IB_SUCCESS )
@@ -554,7 +556,8 @@
 
 static void
 __port_construct(
- IN    ipoib_port_t* const   p_port )
+ IN    ipoib_port_t* const   p_port ,
+ IN    ipoib_adapter_t* const  p_adapter )
 {
  IPOIB_ENTER( IPOIB_DBG_INIT );
 
@@ -564,7 +567,7 @@
  cl_spinlock_construct( &p_port->send_lock );
  cl_spinlock_construct( &p_port->recv_lock );
  __ib_mgr_construct( p_port );
- __buf_mgr_construct( p_port );
+ __buf_mgr_construct( p_port , p_adapter );
 
  __recv_mgr_construct( p_port );
  __send_mgr_construct( p_port );
@@ -945,7 +948,8 @@
 
************************************************************************
******/
 static void
 __buf_mgr_construct(
- IN    ipoib_port_t* const   p_port )
+ IN    ipoib_port_t* const   p_port, 
+ IN    ipoib_adapter_t* const  p_adapter )
 {
  IPOIB_ENTER( IPOIB_DBG_INIT );
 
@@ -955,7 +959,7 @@
  p_port->buf_mgr.h_buffer_pool = NULL;
 
  ExInitializeNPagedLookasideList( &p_port->buf_mgr.send_buf_list,
-  NULL, NULL, 0, XFER_BLOCK_SIZE, 'bipi', 0 );
+  NULL, NULL, 0, p_adapter->params.xfer_block_size, 'bipi', 0 );
 
[Yossi Leybovich]
I would like to declare the Lookaside list with the MAX_XFER_BLOCK_SIZE
and save the port to know the adapter in the construction step.
What do you think?


  p_port->buf_mgr.h_send_pkt_pool = NULL;
  p_port->buf_mgr.h_send_buf_pool = NULL;
@@ -2715,7 +2719,7 @@
  }
 
  NdisAllocateBuffer( &status, &p_buf, p_port->buf_mgr.h_send_buf_pool,
-  p_desc->p_buf, XFER_BLOCK_SIZE );
+  p_desc->p_buf, p_port->p_adapter->params.xfer_block_size );
  if( status != NDIS_STATUS_SUCCESS )
  {
   NdisFreePacket( p_packet );
@@ -2798,7 +2802,7 @@
  }
 
  CL_ASSERT( tot_len > sizeof(eth_hdr_t) );
- CL_ASSERT( tot_len <= XFER_BLOCK_SIZE );
+ CL_ASSERT( tot_len <= p_port->p_adapter->params.xfer_block_size );
  /*
   * Assume that the ethernet header is always fully contained
   * in the first page of the first MDL.  This makes for much
@@ -2818,7 +2822,7 @@
    CL_ASSERT( buf_len >= sizeof(eth_hdr_t) );
    /* Skip the ethernet header. */
    buf_len -= sizeof(eth_hdr_t);
-   CL_ASSERT( buf_len <= PAYLOAD_MTU );
+   CL_ASSERT( buf_len <= p_port->p_adapter->params.payload_mtu );
    if( buf_len )
    {
     /* The ethernet header is a subset of this MDL. */
Index: ipoib/kernel/ipoib_port.h
===================================================================
--- ipoib/kernel/ipoib_port.h (revision 544)
+++ ipoib/kernel/ipoib_port.h (working copy)
@@ -181,7 +181,7 @@
  ipoib_hdr_t  hdr;
  union _payload
  {
-  uint8_t   data[PAYLOAD_MTU];
+        uint8_t   data[MAX_PAYLOAD_MTU];
   ipoib_arp_pkt_t arp;
   ip_pkt_t  ip;
 
@@ -268,7 +268,7 @@
 */
 typedef union _send_buf
 {
- uint8_t   data[PAYLOAD_MTU];
+    uint8_t   data[MAX_PAYLOAD_MTU];
  ipoib_arp_pkt_t arp;
  ip_pkt_t  ip;
 
Index: ipoib/kernel/netipoib.inf
===================================================================
--- ipoib/kernel/netipoib.inf (revision 544)
+++ ipoib/kernel/netipoib.inf (working copy)
@@ -111,12 +111,11 @@
 HKR, Ndi\Params\SaRetries,  Optional, 0, "0"
 HKR, Ndi\Params\SaRetries,  Min,  0, "1"
 
-HKR, Ndi\Params\RecvRatio,  ParamDesc, 0, "Receive Pool Ratio"
-HKR, Ndi\Params\RecvRatio,  Type,  0, "dword"
-HKR, Ndi\Params\RecvRatio,  Default, 0, "1"
-HKR, Ndi\Params\RecvRatio,  Optional, 0, "0"
-HKR, Ndi\Params\RecvRatio,  Min,  0, "1"
-HKR, Ndi\Params\RecvRatio,  Max,  0, "10"

[Yossi Leybovich]

Why do you remove the "Receive Pool Ratio" registry key ?
The IPoIB require this parameter , How does it work for you without it
(maybe you had left overs from former installation) ? 

+HKR, Ndi\Params\PayloadMtu,  ParamDesc, 0, "Payload Mtu size"
+HKR, Ndi\Params\PayloadMtu,  Type,  0, "dword"
+HKR, Ndi\Params\PayloadMtu,  Default, 0, "1500"
+HKR, Ndi\Params\PayloadMtu,  Min,  0, "1500"
+HKR, Ndi\Params\PayloadMtu,  Max,  0, "2048"
 

[Yossi leybovich]

Lets change the default to be 2048 , so in the default case we will gain
the maximum performance


 [IpoibService]
 DisplayName     = %IpoibServiceDispName%

-------------- next part --------------
A non-text attachment was scrubbed...
Name: added_ability_to_change_mtu-3.patch
Type: application/octet-stream
Size: 10312 bytes
Desc: added_ability_to_change_mtu-3.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20061120/e5d81be5/attachment.obj>


More information about the ofw mailing list