[ofw] [patch][bus] LSO flow revisited

Alex Naslednikov xalex at mellanox.co.il
Mon Jan 31 05:36:33 PST 2011


LSO flow revision with important bugfixes
Signed off by: Alexander Naslednikov (xalex at mellanox.co.il)
Index: ulp/ipoib_NDIS6_CM/kernel/ipoib_driver.cpp
===================================================================
--- ulp/ipoib_NDIS6_CM/kernel/ipoib_driver.cpp             (revision 3082)
+++ ulp/ipoib_NDIS6_CM/kernel/ipoib_driver.cpp          (working copy)
@@ -1264,7 +1264,7 @@
                                  if (p_adapter->params.LsoV1IPv4)
                                  {
                                                  p_offload->LsoV1.IPv4.Encapsulation = ulEncapsulation;
-                                                p_offload->LsoV1.IPv4.MaxOffLoadSize = LARGE_SEND_OFFLOAD_SIZE;
+                                               p_offload->LsoV1.IPv4.MaxOffLoadSize = MAX_LSO_SIZE;
                                                  p_offload->LsoV1.IPv4.MinSegmentCount = LSO_MIN_SEG_COUNT;
                                                  p_offload->LsoV1.IPv4.TcpOptions = NDIS_OFFLOAD_SUPPORTED;
                                                  p_offload->LsoV1.IPv4.IpOptions = NDIS_OFFLOAD_SUPPORTED;
@@ -1273,14 +1273,14 @@
                                  if (p_adapter->params.LsoV2IPv4)
                                  {
                                                  p_offload->LsoV2.IPv4.Encapsulation = ulEncapsulation;
-                                                p_offload->LsoV2.IPv4.MaxOffLoadSize = LARGE_SEND_OFFLOAD_SIZE;
+                                               p_offload->LsoV2.IPv4.MaxOffLoadSize = MAX_LSO_SIZE;
                                                  p_offload->LsoV2.IPv4.MinSegmentCount = LSO_MIN_SEG_COUNT;
                                  }

                                  if (p_adapter->params.LsoV2IPv6)
                                  {
                                                  p_offload->LsoV2.IPv6.Encapsulation = ulEncapsulation;
-                                                p_offload->LsoV2.IPv6.MaxOffLoadSize = LARGE_SEND_OFFLOAD_SIZE;
+                                               p_offload->LsoV2.IPv6.MaxOffLoadSize = MAX_LSO_SIZE;
                                                  p_offload->LsoV2.IPv6.MinSegmentCount = LSO_MIN_SEG_COUNT;

                                                  p_offload->LsoV2.IPv6.IpExtensionHeadersSupported = NDIS_OFFLOAD_NOT_SUPPORTED;
@@ -1353,13 +1353,13 @@
                if (p_adapter->offload_cap.lso) {

                                p_offload->LsoV1.IPv4.Encapsulation = ulEncapsulation;
-                              p_offload->LsoV1.IPv4.MaxOffLoadSize = LARGE_SEND_OFFLOAD_SIZE;
+                             p_offload->LsoV1.IPv4.MaxOffLoadSize = MAX_LSO_SIZE;
                                p_offload->LsoV1.IPv4.MinSegmentCount = 2;
                                p_offload->LsoV1.IPv4.TcpOptions = NDIS_OFFLOAD_SUPPORTED;
                                p_offload->LsoV1.IPv4.IpOptions = NDIS_OFFLOAD_SUPPORTED;

                                p_offload->LsoV2.IPv4.Encapsulation = ulEncapsulation;
-                              p_offload->LsoV2.IPv4.MaxOffLoadSize = LARGE_SEND_OFFLOAD_SIZE;
+                             p_offload->LsoV2.IPv4.MaxOffLoadSize = MAX_LSO_SIZE;
                                p_offload->LsoV2.IPv4.MinSegmentCount = 2;

                } else {
@@ -1861,7 +1861,7 @@
                // Even if offload is enabled, the packet size for mapping shouldn't change
                //
                //TODO bug ?
-              DmaDescription.MaximumPhysicalMapping = LARGE_SEND_OFFLOAD_SIZE + LSO_MAX_HEADER;
+             DmaDescription.MaximumPhysicalMapping = MAX_LSO_SIZE + LSO_MAX_HEADER;

                DmaDescription.ProcessSGListHandler = ipoib_process_sg_list;
                DmaDescription.SharedMemAllocateCompleteHandler = NULL;
Index: ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp
===================================================================
--- ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp                (revision 3085)
+++ ulp/ipoib_NDIS6_CM/kernel/ipoib_port.cpp             (working copy)
@@ -422,10 +422,10 @@

 static NDIS_STATUS
 GetLsoHeaderSize(
-              IN                           PNET_BUFFER                   pNetBuffer,
+             IN OUT PNET_BUFFER                   pNetBuffer,
                IN                           LsoData                                *pLsoData,
-              OUT       UINT                                      *IndexOfData,
-              IN                           ipoib_hdr_t        *ipoib_hdr );
+             IN                           ipoib_hdr_t        *ipoib_hdr,
+             ULONG                                                                                 TcpHeaderOffset );


 static NDIS_STATUS
@@ -1148,7 +1148,7 @@
                                }
                                else if( p_port->p_adapter->params.lso )
                                {
-                                              max_phys_mapping = LARGE_SEND_OFFLOAD_SIZE;
+                                             max_phys_mapping = MAX_LSO_SIZE;
                                }
                                else
                                {
@@ -6312,7 +6312,6 @@
 {
                NDIS_STATUS                                    status;
                LsoData                                                                                                                                TheLsoData;
-              UINT                                                                                                                      IndexOfData = 0;

                IPOIB_ENTER( IPOIB_DBG_SEND );

@@ -6327,13 +6326,12 @@
                status = GetLsoHeaderSize(
                                FirstBuffer,
                                &TheLsoData,
-                              &IndexOfData,
-                              &p_port->hdr[hdr_idx] );
+                             &p_port->hdr[hdr_idx],
+                             p_lso_info->LsoV1Transmit.TcpHeaderOffset );

-              if ((status != NDIS_STATUS_SUCCESS ) ||
-                              (TheLsoData.FullBuffers != TheLsoData.UsedBuffers))
+             if ( (status != NDIS_STATUS_SUCCESS ) )
                {
-                              ASSERT(FALSE);
+                             ASSERT( status == NDIS_STATUS_SUCCESS );

                                IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("<-- Throwing this packet\n"));

@@ -6343,17 +6341,17 @@
                                }
                                return status;
                }
-              ASSERT(TheLsoData.LsoHeaderSize > 0);
+             ASSERT( TheLsoData.LsoHeaderSize > 0 );

                // Tell NDIS how much we will send.
-              if(p_lso_info->LsoV1Transmit.Type == NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE)
-    {
-              s_buf->tcp_payload = PacketLength-TheLsoData.LsoHeaderSize;
+             if( p_lso_info->LsoV1Transmit.Type == NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE )
+             {
+                                             s_buf->tcp_payload = PacketLength-TheLsoData.LsoHeaderSize;
                }

                p_desc->send_wr[0].wr.dgrm.ud.mss = mss;
-              p_desc->send_wr[0].wr.dgrm.ud.header = TheLsoData.LsoBuffers[0].pData;
-              p_desc->send_wr[0].wr.dgrm.ud.hlen = TheLsoData.LsoHeaderSize ;//lso_header_size;
+             p_desc->send_wr[0].wr.dgrm.ud.header = TheLsoData.LsoBuffers.pData;
+             p_desc->send_wr[0].wr.dgrm.ud.hlen = TheLsoData.LsoHeaderSize ;
                p_desc->send_wr[0].wr.dgrm.ud.remote_qp = s_buf->p_endpt->qpn;
                p_desc->send_wr[0].wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey;
                p_desc->send_wr[0].wr.dgrm.ud.h_av = s_buf->p_endpt->h_av;
@@ -9054,12 +9052,11 @@
     The routine process the packet and returns LSO information

 Arguments:
-    pNetBuffer - a pointer to the first net buffer object of the packet
-    TcpHeaderOffset - offset to the begining of the TCP header in the packet
-    pLsoData - pointer to LsoData object in which the routine returns the LSO information
-    pHeaderSize - pointer to ULONG object in which the header size is returned
-    IndexOfData -
-
+             pNetBuffer - a pointer to the first net buffer object of the packet
+             pLsoData - pointer to LsoData object in which the routine returns the LSO information
+             ipoib_hdr - pointer to preallocated IPoIB hdr
+             TcpHeaderOffset - offset to the begining of the TCP header in the packet
+
 Return Value:
      NDIS_STATUS

@@ -9067,184 +9064,125 @@
     called at DISPATCH level
 --*/

-NDIS_STATUS GetLsoHeaderSize(
+
+NDIS_STATUS
+GetLsoHeaderSize(
                IN OUT PNET_BUFFER                   pNetBuffer,
                IN                           LsoData                                *pLsoData,
-              OUT       UINT                                      *IndexOfData,
-              IN                           ipoib_hdr_t        *ipoib_hdr )
+             IN                           ipoib_hdr_t        *ipoib_hdr,
+             ULONG                                                                                 TcpHeaderOffset
+             )
 {
-              UINT                      CurrLength;
-              PUCHAR                               pSrc;
-              PUCHAR                               pCopiedData = pLsoData->coppied_data;
-              ip_hdr_t              UNALIGNED  *IpHdr;
-              tcp_hdr_t            UNALIGNED *TcpHdr;
-              uint16_t               TcpHeaderLen;
-              uint16_t               IpHeaderLen;
-              uint16_t               IpOffset;
-              INT                                         FullBuffers = 0;
-              PMDL                    pMDL;
-              NDIS_STATUS    status = NDIS_STATUS_INVALID_PACKET;
-
-
-#define IP_OFFSET 14;
-              //
-              // This Flag indicates the way we gets the headers
-              // RegularFlow = we get the headers (ETH+IP+TCP) in the same Buffer
-              // in sequence.
-              //
-              boolean_t                                           IsRegularFlow = TRUE;
+             static const uint16_t coEthHeaderSize(14);

-              const uint16_t                   ETH_OFFSET = IP_OFFSET;
-
-              pLsoData->LsoHeaderSize = 0;
-              IpOffset = IP_OFFSET; //(uint16_t)pPort->EncapsulationFormat.EncapsulationHeaderSize;
-              *IndexOfData = 0;
+             IPOIB_ENTER( IPOIB_DBG_SEND );

-              pMDL = NET_BUFFER_CURRENT_MDL(pNetBuffer);
-              NdisQueryMdl(pMDL, &pSrc, &CurrLength, NormalPagePriority);
-
-              if (pSrc == NULL) {
-                              IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
-                                              ("Error processing packets\n") );
-                              return status;
+             ASSERT(TcpHeaderOffset > 0);
+             ASSERT(pNetBuffer != NULL);
+
+             PUCHAR pSrc = NULL;
+             bool fAlreadyCopied = false;
+             PUCHAR pCopiedData = pLsoData->coppied_data;
+             UINT CurrLength = 0;
+             ULONG DataOffset = NET_BUFFER_CURRENT_MDL_OFFSET( pNetBuffer );
+
+             PMDL pMDL = NET_BUFFER_CURRENT_MDL( pNetBuffer );
+             NdisQueryMdl( pMDL, &pSrc, &CurrLength, NormalPagePriority );
+             if (pSrc == NULL)
+             {
+                             IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n") );
+                             return NDIS_STATUS_INVALID_PACKET;
                }
-
-              ULONG DataOffset = NET_BUFFER_CURRENT_MDL_OFFSET(pNetBuffer);
+
                pSrc += DataOffset;
                CurrLength -= DataOffset;

-              // We start by looking for the ethernet and the IP
-              if (CurrLength < ETH_OFFSET) {
-                              ASSERT ( CurrLength >= ETH_OFFSET );
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < ETH_OFFSET\n"));
-                              return status;
+             if( CurrLength < TcpHeaderOffset )
+             {
+                             //
+                             // We assume that the ETH and IP header exist in first segment
+                             //
+                             ASSERT( CurrLength >= TcpHeaderOffset );
+                             IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                             ("Error processing packets."
+                                             "The ETH & IP headers are divided into multiple segments\n") );
+                             return NDIS_STATUS_INVALID_PACKET;
                }
-
-              //pLsoData->LsoHeaderSize = pLsoData->LsoHeaderSize + ETH_OFFSET;
-              if (CurrLength == ETH_OFFSET) {
-                              ASSERT(FALSE);
-                              IsRegularFlow = FALSE;
-                              memcpy(pCopiedData, pSrc, ETH_OFFSET);
-                              pCopiedData += ETH_OFFSET;
-                              FullBuffers++;
-                              // First buffer was only ethernet
-                              pNetBuffer = NET_BUFFER_NEXT_NB(pNetBuffer);
-        NdisQueryMdl( NET_BUFFER_CURRENT_MDL(pNetBuffer),
-                                                                                &pSrc,
-                                                                                &CurrLength,
-                                                                                NormalPagePriority );
-                              if( pSrc == NULL )
+
+             if ( CurrLength == TcpHeaderOffset )
+             {
+                             ASSERT( CurrLength > TcpHeaderOffset );
+                             memcpy( pCopiedData, pSrc , TcpHeaderOffset );
+                             pCopiedData += TcpHeaderOffset;
+
+                             fAlreadyCopied = true;
+                             pNetBuffer = NET_BUFFER_NEXT_NB( pNetBuffer );
+                             NdisQueryMdl( NET_BUFFER_CURRENT_MDL(pNetBuffer), &pSrc, &CurrLength, NormalPagePriority);
+
+                             if ( pSrc == NULL )
                                {
-                                              IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
-                                                              ("NdisQueryMdl failed\n") );
-                                              return status;
+                                             IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n") );
+                                             return NDIS_STATUS_INVALID_PACKET;
                    }
+                             // BUGBUG: If we do reach here, make sure that  pLsoData->LsoBuffers.pData will be set correctly.
                }
                else
                {
-                              // This is ETH + IP together (at least)
-                              pLsoData->LsoBuffers[0].pData = pSrc + (ETH_OFFSET - sizeof (ipoib_hdr_t));
-
                                //IMPORTANT: we de-facto replace ETH header by IPoIB header here
                                //TODO: This is not good practice to change data we got from NDIS
-                              memcpy (pLsoData->LsoBuffers[0].pData, ipoib_hdr, sizeof (ipoib_hdr_t));
+                             pLsoData->LsoBuffers.pData = pSrc + (coEthHeaderSize - sizeof (ipoib_hdr_t));
+                             memcpy ( pLsoData->LsoBuffers.pData, ipoib_hdr, sizeof (ipoib_hdr_t) );
+                             pLsoData->LsoHeaderSize = TcpHeaderOffset - (coEthHeaderSize - sizeof (ipoib_hdr_t));

-                              CurrLength -= ETH_OFFSET;
-                              pSrc = pSrc + ETH_OFFSET;
-                              pLsoData->LsoHeaderSize = pLsoData->LsoHeaderSize + sizeof (ipoib_hdr_t);
+                             CurrLength -= TcpHeaderOffset;
+                             pSrc += TcpHeaderOffset;
                }
-              // we should now be having at least the size of ethernet data
-              if (CurrLength < sizeof (ip_hdr_t)) {
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
-                                              ("CurrLength < sizeof (ip_hdr_t)\n"));
-                              return status;
+
+             if ( CurrLength < sizeof(tcp_hdr_t) )
+             {
+                             ASSERT( CurrLength >= sizeof(tcp_hdr_t) );
+                             IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                             ("Error processing packets"
+                                             "The buffer is too small to contain TCP header\n") );
+                             return NDIS_STATUS_INVALID_PACKET;
                }
-              IpHdr = (ip_hdr_t UNALIGNED*)pSrc;
-              IpHeaderLen = (uint16_t)IP_HEADER_LENGTH(IpHdr);
-              ASSERT(IpHdr->prot == IP_PROT_TCP);
-              if( CurrLength < IpHeaderLen )
+
+             //
+             // We found the TCP header
+             //
+             tcp_hdr_t UNALIGNED* TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc;
+             uint16_t TcpHeaderLen = TCP_HEADER_LENGTH( TcpHdr );
+             if (CurrLength < TcpHeaderLen)
                {
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
-                                              ("Error processing packets\n") );
-                              return status;
+                             ASSERT( CurrLength >= sizeof(tcp_hdr_t) );
+                             IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                             ("Error processing packets"
+                                             "The buffer is too small to contain TCP data\n") );
+                             return NDIS_STATUS_INVALID_PACKET;
                }
-              pLsoData->LsoHeaderSize = pLsoData->LsoHeaderSize + IpHeaderLen;
-              // We now start to find where the TCP header starts
-              if (CurrLength == IpHeaderLen) {
-                              ASSERT(FALSE);
-                              // two options :
-                              // if(IsRegularFlow = FALSE) ==> ETH and IP seperated in two buffers
-                              // if(IsRegularFlow = TRUE ) ==> ETH and IP in the same buffer
-                              // TCP will start at next buffer
-                              if(IsRegularFlow){
-                                              memcpy(pCopiedData, pSrc-ETH_OFFSET ,ETH_OFFSET+IpHeaderLen);
-                                              pCopiedData += (ETH_OFFSET + IpHeaderLen);
-                              } else {
-                                              memcpy(pCopiedData, pSrc,IpHeaderLen);
-                                              pCopiedData += IpHeaderLen;
-                              }

-                              FullBuffers++;
-                              IsRegularFlow = FALSE;
-                              pNetBuffer = NET_BUFFER_NEXT_NB(pNetBuffer);
-                              NdisQueryMdl( NET_BUFFER_CURRENT_MDL(pNetBuffer),
-                                                                                &pSrc,
-                                                                                &CurrLength,
-                                                                                NormalPagePriority );
-                              if (pSrc == NULL) {
-                                              IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR,
-                                                              ("NdisQueryMdl failed\n") );
-                                              return status;
-                              }
-              } else {
-                              // if(IsRegularFlow = TRUE ) ==> the ETH and IP and TCP in the same buffer.
-                              // if(IsRegularFlow = FALSE ) ==> ETH in one buffer , IP+TCP together
-                              // in the same buffer
-                              if (IsRegularFlow) {
-                                              pLsoData->LsoBuffers[0].Len += IpHeaderLen;
-                              } else {
-                                              memcpy(pCopiedData, pSrc, IpHeaderLen);
-                                              pCopiedData += IpHeaderLen;
-                              }
-
-                              CurrLength -= IpHeaderLen;
-                              pSrc = pSrc + IpHeaderLen;
+             pLsoData->LsoHeaderSize += TcpHeaderLen;
+             if ( pLsoData->LsoHeaderSize > LSO_MAX_HEADER )
+             {
+                             ASSERT(pLsoData->LsoBuffers.Len <= LSO_MAX_HEADER);
+                                             IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                             ("Error processing packets.LsoHeaderSize > LSO_MAX_HEADER\n") );
+                             return NDIS_STATUS_DEVICE_FAILED;
                }
-              if (CurrLength < sizeof (tcp_hdr_t)) {
-                              IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR,
-                                              ("Error porcessing packets\n") );
-                              return status;
-              }
-              // We have finaly found the TCP header
-              TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc;
-              TcpHeaderLen = TCP_HEADER_LENGTH(TcpHdr);
-
-              if (CurrLength < TcpHeaderLen) {
-                              IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
-                              return status;
-              }
-              pLsoData->LsoHeaderSize =  pLsoData->LsoHeaderSize + TcpHeaderLen;
-              if(IsRegularFlow){
-                              pLsoData->LsoBuffers[0].Len += TcpHeaderLen;
-              }
-              else{
+
+             if( fAlreadyCopied )
+             {
                                memcpy(pCopiedData, pSrc, TcpHeaderLen);
                                pCopiedData += TcpHeaderLen;
+                             pLsoData->LsoBuffers.pData = pLsoData->coppied_data;
                }
-              if (CurrLength == TcpHeaderLen) {
-                              FullBuffers++;
-                              pLsoData->UsedBuffers = FullBuffers;
-                              *IndexOfData = FullBuffers ;
-              } else {
-                              pLsoData->UsedBuffers = FullBuffers + 1;
-                              *IndexOfData = FullBuffers - 1;
-              }
-              pLsoData->FullBuffers = FullBuffers;
-              if (!IsRegularFlow){
-                              pLsoData->LsoBuffers[0].pData = pLsoData->coppied_data;
-                              pLsoData->LsoBuffers[0].Len = ETH_OFFSET + IpHeaderLen + TcpHeaderLen;
-                              ASSERT(pLsoData->LsoBuffers[0].Len <= LSO_MAX_HEADER);
-              }
+#if DBG
+             pLsoData->LsoBuffers.Len =
+                             TcpHeaderOffset + TcpHeaderLen - (coEthHeaderSize - sizeof (ipoib_hdr_t));
+             ASSERT( pLsoData->LsoBuffers.Len <= MAX_LSO_SIZE );
+#endif
+
+             IPOIB_EXIT( IPOIB_DBG_SEND );
                return NDIS_STATUS_SUCCESS;
 }

Index: ulp/ipoib_NDIS6_CM/kernel/offload.h
===================================================================
--- ulp/ipoib_NDIS6_CM/kernel/offload.h           (revision 3080)
+++ ulp/ipoib_NDIS6_CM/kernel/offload.h        (working copy)
@@ -26,22 +26,20 @@
 //

 #define LSO_MAX_HEADER 136
-#define LARGE_SEND_OFFLOAD_SIZE 60000
+#define MAX_LSO_SIZE 60000

 // This struct is being used in order to pass data about the GSO buffers if they
 // are present
-typedef struct LsoBuffer_ {
+struct LsoBuffer {
     PUCHAR pData;
     UINT Len;
-} LsoBuffer;
+};

-typedef struct LsoData_ {
-    LsoBuffer LsoBuffers[1];
-    UINT UsedBuffers;
-    UINT FullBuffers;
+struct LsoData {
+    LsoBuffer LsoBuffers;
     UINT LsoHeaderSize;
-    UINT IndexOfData;
     UCHAR coppied_data[LSO_MAX_HEADER];
-} LsoData;
+};


+
Alexander (XaleX) Naslednikov
SW Networking Team
Mellanox Technologies

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20110131/fc428237/attachment.html>


More information about the ofw mailing list