[ofw] [Patch][IPoIB_NDIS6_CM] LSO revisited

Alex Naslednikov xalex at mellanox.co.il
Mon Nov 29 06:28:21 PST 2010


Applied at 3005

From: Alex Naslednikov
Sent: Thursday, November 25, 2010 4:10 PM
To: ofw at lists.openfabrics.org
Subject: [ofw][Patch][IPoIB_NDIS6_CM] LSO revisited

[IPoIB_NDIS6_CM]
This patch rearrange LSO flow and makes functions more robust and reusable.
This patch affects regular flow as well  (reuse same code for LSO and non-LSO flows)
Signed-off by: Alexander Naslednikov (xalex at mellanox.co.il)
Index: ipoib_port.cpp
===================================================================
--- ipoib_port.cpp            (revision 2999)
+++ ipoib_port.cpp          (working copy)
@@ -329,7 +329,6 @@
static NDIS_STATUS
__send_gen(
               IN                                                           ipoib_send_NB_SG *                                      s_buf,
-              IN                                                           INT                                                                                         lso_data_index,
               IN                                                           UINT                                                                                      lso_header_size OPTIONAL);

 static NDIS_STATUS
@@ -3591,7 +3590,7 @@

                                p_desc->send_dir = SEND_UD_QP;
                               cl_perf_start( SendGen );
-                              status = __send_gen( s_buf, 0, 0 );
+                              status = __send_gen( s_buf, 0 );
                               cl_perf_stop( &p_port->p_adapter->perf, SendGen );
                               break;
               }
@@ -3699,13 +3698,13 @@
__send_copy(
               IN                                                           ipoib_port_t* const                                        p_port,
               IN                                                           ipoib_send_NB_SG *                                      s_buf,
-              IN                                                           UINT                                                                                      lso_header_size)
+              IN                                                           UINT                                                                                      total_offset)
{
               ULONG                                 tot_len = 0;
-              uint32_t                               offset =
-                                                                                              (lso_header_size ? EthIPoIBHeaderOffset : EthHeaderOffset );
-              int seg_index = lso_header_size ? 0 : 1;

+              // first DS does not contain IPoIB header in the case of LSO, so we set it back to 0
+              int seg_index = ( total_offset == EthHeaderOffset ? 1 : 0 );
+
               IPOIB_ENTER( IPOIB_DBG_SEND );

                ipoib_send_desc_t *p_desc = p_port->p_desc;
@@ -3720,15 +3719,15 @@
                               return NDIS_STATUS_RESOURCES;
               }
               tot_len = CopyNetBuffer(s_buf->p_curr_nb, (PUCHAR) s_buf->p_send_buf);
-              if (tot_len <= lso_header_size) {
-                              ASSERT(tot_len > lso_header_size);
+              if ( !tot_len ) {
+                              ASSERT( tot_len );
                               return NDIS_STATUS_FAILURE;
               }

                /* Setup the work request. */
               p_desc->send_wr[0].local_ds[seg_index].vaddr = cl_get_physaddr(
-                              ((uint8_t*)s_buf->p_send_buf) + lso_header_size + offset );
-              p_desc->send_wr[0].local_ds[seg_index].length = tot_len - lso_header_size - offset;
+                              ((uint8_t*)s_buf->p_send_buf) + total_offset );
+              p_desc->send_wr[0].local_ds[seg_index].length = tot_len - total_offset;
               p_desc->send_wr[0].local_ds[seg_index].lkey = p_port->ib_mgr.lkey;
               p_desc->send_wr[0].wr.num_ds = seg_index+1;

@@ -4259,13 +4258,34 @@
static NDIS_STATUS
__send_gen(
               IN                                                           ipoib_send_NB_SG *                                      s_buf,
-              IN                                                           INT                                                                                                         lso_data_index,
               IN                                                           UINT                                                                                      lso_header_size OPTIONAL)
{
               NDIS_STATUS                    status;
-              uint32_t                               i, j                           = 1;
+              uint32_t                               i = 0;       //Index of SG element
+              uint32_t                               j;             //Index of DS elements;
               ULONG                                                 DataOffset          = 0;
-              uint32_t                               offset                    = EthHeaderOffset;
+              UINT                                      total_offset;
+
+              /* We calculate the amount of bytes to skip over ETH header in a case of normal send or
+                * LSO header in a case of LSO.
+                * But in the case of LSO we replace last 4 bytes of ETH header by IPoIB header
+                *
+                * Thus, the calulation should be:
+                * Normal send: offset = sizeof ETH header
+                * LSO                     : offset = sizeof ETH header+sizeof IP header+ sizeof TCP header
+                                                                                              == sizeof LSO header + (sizeof ETH header-sizeof IPoIB header)
+                */
+              if ( lso_header_size )
+              {
+                              total_offset = lso_header_size + EthIPoIBHeaderOffset;
+                              j = 0;
+              }
+              else
+              {
+                              total_offset =  EthHeaderOffset;
+                              j = 1; //Skip on the first DS, because it alredy contain IPoIB header
+              }
+
               PERF_DECLARE( SendCopy );

                IPOIB_ENTER( IPOIB_DBG_SEND );
@@ -4298,7 +4318,7 @@
                               if( !s_buf->p_port->p_adapter->params.cm_enabled )
                               {
                                               cl_perf_start( SendCopy );
-                                              status = __send_copy( s_buf->p_port, s_buf, lso_header_size );
+                                              status = __send_copy( s_buf->p_port, s_buf, total_offset );
                                               cl_perf_stop( &s_buf->p_port->p_adapter->perf, SendCopy );
                               }
                               else
@@ -4309,55 +4329,33 @@
                               return status;
               }

-              /*
-              * Skip the ethernet header.  It is either the first element,
-              * or part of it.
-              */
-              i = 0;
               DataOffset= (ULONG)(NET_BUFFER_CURRENT_MDL_OFFSET(s_buf->p_curr_nb));

-              if( lso_data_index )
-              { /* we have an LSO packet */
-                              i = lso_data_index;
-                              j = 0;
-                              ASSERT( i <= p_sgl->NumberOfElements);
-                              if (i == p_sgl->NumberOfElements) {
-
-                                              /****************************
-                                              * Handle the case when there is only one SG element !
-                                              ****************************/
-
-                                              p_desc->send_wr[0].local_ds[j].vaddr =
-                                                              p_sgl->Elements[0].Address.QuadPart + lso_header_size + DataOffset + EthIPoIBHeaderOffset;
-                                              p_desc->send_wr[0].local_ds[j].length =
-                                                              p_sgl->Elements[0].Length - lso_header_size - DataOffset - EthIPoIBHeaderOffset;
-                                              p_desc->send_wr[0].local_ds[j].lkey = s_buf->p_port->ib_mgr.lkey;
-                                              /* Set the number of data segments. */
-                                              p_desc->send_wr[0].wr.num_ds = 1;
-                                              return NDIS_STATUS_SUCCESS;
-                              }
+
+              /*
+              * Skip the Ethernet or LSO header.  It is contained at N+1 first elements (N>=0),
+              * while (N+1) element may contain only part of it
+              */
+
+              while( total_offset >= p_sgl->Elements[i].Length  )
+              {
+                              // skip the current element and increment the index
+                              total_offset -= p_sgl->Elements[i++].Length;
               }
-              else while( offset )
+
+
+              if (total_offset > 0 )
               {
-                              if( p_sgl->Elements[i].Length <= offset )
-                              {
-                                              offset -= p_sgl->Elements[i++].Length;
-                              }
-                              else
-                              {
-                                              p_desc->send_wr[0].local_ds[j].vaddr =
-                                                              p_sgl->Elements[i].Address.QuadPart + offset + DataOffset;
-                                              p_desc->send_wr[0].local_ds[j].length =
-                                                              p_sgl->Elements[i].Length - offset - DataOffset;
-                                              p_desc->send_wr[0].local_ds[j].lkey = s_buf->p_port->ib_mgr.lkey;
-                                              if( p_desc->send_wr[0].local_ds[j].length > 0 )
-                                              {
-                                                              j++;
-                                              }
-                                              i++;
-                                              break;
-                              }
+                              //Handle the (N+1) element that can probably contain both Header and Data
+                              p_desc->send_wr[0].local_ds[j].vaddr =
+                                              p_sgl->Elements[i].Address.QuadPart + total_offset + DataOffset;
+                              p_desc->send_wr[0].local_ds[j].length =
+                                              p_sgl->Elements[i].Length - total_offset - DataOffset;
+                              p_desc->send_wr[0].local_ds[j].lkey = s_buf->p_port->ib_mgr.lkey;
+                              j++;
+                              i++;
               }
+
               /* Now fill in the rest of the local data segments. */
               while( i < p_sgl->NumberOfElements )
               {
@@ -4527,7 +4525,7 @@

 send_gen:
               cl_perf_start( SendTcp );
-              status = __send_gen(  s_buf, 0, 0 );
+              status = __send_gen(  s_buf, 0 );
               cl_perf_stop( &p_port->p_adapter->perf, SendTcp );

                IPOIB_EXIT( IPOIB_DBG_SEND );
@@ -5273,27 +5271,20 @@
                       ASSERT(p_lso_info->LsoV1Transmit.TcpHeaderOffset == p_lso_info->LsoV2Transmit.TcpHeaderOffset);
                   }
               }
-
+
               /* Format the send descriptor. */
               hdr_idx = cl_atomic_inc( &s_buf->p_port->hdr_idx );
               hdr_idx &= (s_buf->p_port->p_adapter->params.sq_depth - 1);
               ASSERT( hdr_idx < s_buf->p_port->p_adapter->params.sq_depth );

+              /* Set up IPoIB Header */
               s_buf->p_port->hdr[hdr_idx].type = p_eth_hdr->type;
               s_buf->p_port->hdr[hdr_idx].resv = 0;
+

-              //TODO why enter this block for LSO ???
-              p_desc->send_wr[0].local_ds[0].vaddr =
-                                                                                                                              cl_get_physaddr( &s_buf->p_port->hdr[hdr_idx] );
-              p_desc->send_wr[0].local_ds[0].length = sizeof(ipoib_hdr_t);
-              p_desc->send_wr[0].local_ds[0].lkey = s_buf->p_port->ib_mgr.lkey;
-              p_desc->send_wr[0].wr.send_opt = 0;
-
               //Init send buffer to 0
               s_buf->p_send_buf = NULL;

-
-
               if (mss && (p_lso_info->LsoV1Transmit.TcpHeaderOffset != 0))
               { //We have LSO packet
                               ASSERT( mss == (p_lso_info->LsoV1Transmit.MSS & p_lso_info->LsoV2Transmit.MSS));
@@ -5316,6 +5307,13 @@
                               uint32_t               i;

                                cl_perf_start( SendMgrFilter );
+
+                              /* Put first DS to be IPoIB Header */
+                              p_desc->send_wr[0].local_ds[0].vaddr =
+                                                                                                                              cl_get_physaddr( &s_buf->p_port->hdr[hdr_idx] );
+                              p_desc->send_wr[0].local_ds[0].length = sizeof(ipoib_hdr_t);
+                              p_desc->send_wr[0].local_ds[0].lkey = s_buf->p_port->ib_mgr.lkey;
+
                               status = __send_mgr_filter( p_eth_hdr, p_mdl, mdl_len, s_buf );
                               cl_perf_stop( &p_port->p_adapter->perf, SendMgrFilter );
                               if( status != NDIS_STATUS_SUCCESS )
@@ -5327,6 +5325,7 @@

                                if( p_desc->send_dir == SEND_UD_QP )
                               {
+                                              ASSERT ( p_desc->num_wrs == 1 );
                                               p_desc->send_qp = s_buf->p_port->ib_mgr.h_qp; // UD QP
                                               for( i = 0; i < p_desc->num_wrs; i++ )
                                               {
@@ -5423,14 +5422,10 @@
                               return status;
               }
               ASSERT(TheLsoData.LsoHeaderSize > 0);
+
               // Tell NDIS how much we will send.
-              //PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] = UlongToPtr(PacketLength);
-              //p_lso_info->LsoV1TransmitComplete.TcpPayload = PacketLength;
-
-              // Tell NDIS how much we will send.
               if(p_lso_info->LsoV1Transmit.Type == NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE)
     {
-                              //IPOIB_TCP_PAYLOAD_FROM_NB(p_netbuf) += PacketLength-TheLsoData.LsoHeaderSize;
                                s_buf->tcp_payload = PacketLength-TheLsoData.LsoHeaderSize;

                }
@@ -5453,7 +5448,7 @@
               p_desc->send_wr[0].wr.p_next = NULL;
               p_desc->send_qp = p_port->ib_mgr.h_qp;
               p_desc->send_dir = SEND_UD_QP;
-              status = __send_gen( s_buf, IndexOfData, TheLsoData.LsoHeaderSize );
+              status = __send_gen( s_buf, TheLsoData.LsoHeaderSize );

                IPOIB_EXIT( IPOIB_DBG_SEND );
               return status;
@@ -8218,7 +8213,7 @@
--*/

 NDIS_STATUS GetLsoHeaderSize(
-              IN                           PNET_BUFFER                    pNetBuffer,
+              IN OUT  PNET_BUFFER                    pNetBuffer,
               IN                           LsoData                                *pLsoData,
               OUT       UINT                                      *IndexOfData,
               IN                           ipoib_hdr_t *ipoib_hdr
@@ -8255,7 +8250,7 @@
               NdisQueryMdl(pMDL, &pSrc, &CurrLength, NormalPagePriority);

                if (pSrc == NULL) {
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
+                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n"));
                               return status;
               }

@@ -8265,7 +8260,8 @@

                // We start by looking for the ethernet and the IP
               if (CurrLength < ETH_OFFSET) {
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+                              ASSERT ( CurrLength >= ETH_OFFSET );
+                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < ETH_OFFSET\n"));
                               return status;
               }

@@ -8280,7 +8276,7 @@
                               pNetBuffer = NET_BUFFER_NEXT_NB(pNetBuffer);
         NdisQueryMdl(NET_BUFFER_CURRENT_MDL(pNetBuffer), &pSrc, &CurrLength, NormalPagePriority);
                               if (pSrc == NULL) {
-                                              IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+                                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n"));
                                               return status;
                   }
               } else {
@@ -8288,6 +8284,7 @@
                               pLsoData->LsoBuffers[0].pData = pSrc + (ETH_OFFSET - sizeof (ipoib_hdr_t));

                                //IMPORTANT: we de-facto replace ETH header by IPoIB header here
+                              //TODO: This is not good practice to change data we got from NDIS
                               memcpy (pLsoData->LsoBuffers[0].pData, ipoib_hdr, sizeof (ipoib_hdr_t));

                                CurrLength -= ETH_OFFSET;
@@ -8296,14 +8293,14 @@
               }
               // we should now be having at least the size of ethernet data
               if (CurrLength < sizeof (ip_hdr_t)) {
-                              IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < sizeof (ip_hdr_t)\n"));
                               return status;
               }
               IpHdr = (ip_hdr_t UNALIGNED*)pSrc;
               IpHeaderLen = (uint16_t)IP_HEADER_LENGTH(IpHdr);
               ASSERT(IpHdr->prot == IP_PROT_TCP);
               if (CurrLength < IpHeaderLen) {
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
+                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < IpHeaderLe\n"));
                               return status;
               }
               pLsoData->LsoHeaderSize = pLsoData->LsoHeaderSize + IpHeaderLen;
@@ -8324,12 +8321,10 @@

                                FullBuffers++;
                               IsRegularFlow = FALSE;
-                              //NdisGetNextBuffer( CurrBuffer, &CurrBuffer);
-                              //NdisQueryBufferSafe( CurrBuffer, &pSrc, &CurrLength, NormalPagePriority );
                               pNetBuffer = NET_BUFFER_NEXT_NB(pNetBuffer);
                               NdisQueryMdl(NET_BUFFER_CURRENT_MDL(pNetBuffer), &pSrc, &CurrLength, NormalPagePriority);
                               if (pSrc == NULL) {
-                                              IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+                                              IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n"));
                                               return status;
                               }
               } else {
@@ -8346,17 +8341,15 @@
                               pSrc = pSrc + IpHeaderLen;
               }
               if (CurrLength < sizeof (tcp_hdr_t)) {
-                              IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+                              IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error processing packets\n"));
                               return status;
               }
               // We have finaly found the TCP header
               TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc;
               TcpHeaderLen = TCP_HEADER_LENGTH(TcpHdr);
-
-              //ASSERT(TcpHeaderLen == 20);

                if (CurrLength < TcpHeaderLen) {
-                              //IPOIB_PRINT(TRACE_LEVEL_VERBOSE, ETH, ("Error porcessing packets\n"));
+                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
                               return status;
               }
               pLsoData->LsoHeaderSize =  pLsoData->LsoHeaderSize + TcpHeaderLen;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101129/89b8e21c/attachment.html>


More information about the ofw mailing list