[ofw] [Patch][IPoIB_NDIS6_CM] LSO revisited
Alex Naslednikov
xalex at mellanox.co.il
Thu Nov 25 06:09:44 PST 2010
[IPoIB_NDIS6_CM]
This patch rearrange LSO flow and makes functions more robust and reusable.
This patch affects regular flow as well (reuse same code for LSO and non-LSO flows)
Signed-off by: Alexander Naslednikov (xalex at mellanox.co.il)
Index: ipoib_port.cpp
===================================================================
--- ipoib_port.cpp (revision 2999)
+++ ipoib_port.cpp (working copy)
@@ -329,7 +329,6 @@
static NDIS_STATUS
__send_gen(
IN ipoib_send_NB_SG * s_buf,
- IN INT lso_data_index,
IN UINT lso_header_size OPTIONAL);
static NDIS_STATUS
@@ -3591,7 +3590,7 @@
p_desc->send_dir = SEND_UD_QP;
cl_perf_start( SendGen );
- status = __send_gen( s_buf, 0, 0 );
+ status = __send_gen( s_buf, 0 );
cl_perf_stop( &p_port->p_adapter->perf, SendGen );
break;
}
@@ -3699,13 +3698,13 @@
__send_copy(
IN ipoib_port_t* const p_port,
IN ipoib_send_NB_SG * s_buf,
- IN UINT lso_header_size)
+ IN UINT total_offset)
{
ULONG tot_len = 0;
- uint32_t offset =
- (lso_header_size ? EthIPoIBHeaderOffset : EthHeaderOffset );
- int seg_index = lso_header_size ? 0 : 1;
+ // first DS does not contain IPoIB header in the case of LSO, so we set it back to 0
+ int seg_index = ( total_offset == EthHeaderOffset ? 1 : 0 );
+
IPOIB_ENTER( IPOIB_DBG_SEND );
ipoib_send_desc_t *p_desc = p_port->p_desc;
@@ -3720,15 +3719,15 @@
return NDIS_STATUS_RESOURCES;
}
tot_len = CopyNetBuffer(s_buf->p_curr_nb, (PUCHAR) s_buf->p_send_buf);
- if (tot_len <= lso_header_size) {
- ASSERT(tot_len > lso_header_size);
+ if ( !tot_len ) {
+ ASSERT( tot_len );
return NDIS_STATUS_FAILURE;
}
/* Setup the work request. */
p_desc->send_wr[0].local_ds[seg_index].vaddr = cl_get_physaddr(
- ((uint8_t*)s_buf->p_send_buf) + lso_header_size + offset );
- p_desc->send_wr[0].local_ds[seg_index].length = tot_len - lso_header_size - offset;
+ ((uint8_t*)s_buf->p_send_buf) + total_offset );
+ p_desc->send_wr[0].local_ds[seg_index].length = tot_len - total_offset;
p_desc->send_wr[0].local_ds[seg_index].lkey = p_port->ib_mgr.lkey;
p_desc->send_wr[0].wr.num_ds = seg_index+1;
@@ -4259,13 +4258,34 @@
static NDIS_STATUS
__send_gen(
IN ipoib_send_NB_SG * s_buf,
- IN INT lso_data_index,
IN UINT lso_header_size OPTIONAL)
{
NDIS_STATUS status;
- uint32_t i, j = 1;
+ uint32_t i = 0; //Index of SG element
+ uint32_t j; //Index of DS elements;
ULONG DataOffset = 0;
- uint32_t offset = EthHeaderOffset;
+ UINT total_offset;
+
+ /* We calculate the amount of bytes to skip over ETH header in a case of normal send or
+ * LSO header in a case of LSO.
+ * But in the case of LSO we replace last 4 bytes of ETH header by IPoIB header
+ *
+ * Thus, the calulation should be:
+ * Normal send: offset = sizeof ETH header
+ * LSO : offset = sizeof ETH header+sizeof IP header+ sizeof TCP header
+ == sizeof LSO header + (sizeof ETH header-sizeof IPoIB header)
+ */
+ if ( lso_header_size )
+ {
+ total_offset = lso_header_size + EthIPoIBHeaderOffset;
+ j = 0;
+ }
+ else
+ {
+ total_offset = EthHeaderOffset;
+ j = 1; //Skip on the first DS, because it alredy contain IPoIB header
+ }
+
PERF_DECLARE( SendCopy );
IPOIB_ENTER( IPOIB_DBG_SEND );
@@ -4298,7 +4318,7 @@
if( !s_buf->p_port->p_adapter->params.cm_enabled )
{
cl_perf_start( SendCopy );
- status = __send_copy( s_buf->p_port, s_buf, lso_header_size );
+ status = __send_copy( s_buf->p_port, s_buf, total_offset );
cl_perf_stop( &s_buf->p_port->p_adapter->perf, SendCopy );
}
else
@@ -4309,55 +4329,33 @@
return status;
}
- /*
- * Skip the ethernet header. It is either the first element,
- * or part of it.
- */
- i = 0;
DataOffset= (ULONG)(NET_BUFFER_CURRENT_MDL_OFFSET(s_buf->p_curr_nb));
- if( lso_data_index )
- { /* we have an LSO packet */
- i = lso_data_index;
- j = 0;
- ASSERT( i <= p_sgl->NumberOfElements);
- if (i == p_sgl->NumberOfElements) {
-
- /****************************
- * Handle the case when there is only one SG element !
- ****************************/
-
- p_desc->send_wr[0].local_ds[j].vaddr =
- p_sgl->Elements[0].Address.QuadPart + lso_header_size + DataOffset + EthIPoIBHeaderOffset;
- p_desc->send_wr[0].local_ds[j].length =
- p_sgl->Elements[0].Length - lso_header_size - DataOffset - EthIPoIBHeaderOffset;
- p_desc->send_wr[0].local_ds[j].lkey = s_buf->p_port->ib_mgr.lkey;
- /* Set the number of data segments. */
- p_desc->send_wr[0].wr.num_ds = 1;
- return NDIS_STATUS_SUCCESS;
- }
+
+ /*
+ * Skip the Ethernet or LSO header. It is contained at N+1 first elements (N>=0),
+ * while (N+1) element may contain only part of it
+ */
+
+ while( total_offset >= p_sgl->Elements[i].Length )
+ {
+ // skip the current element and increment the index
+ total_offset -= p_sgl->Elements[i++].Length;
}
- else while( offset )
+
+
+ if (total_offset > 0 )
{
- if( p_sgl->Elements[i].Length <= offset )
- {
- offset -= p_sgl->Elements[i++].Length;
- }
- else
- {
- p_desc->send_wr[0].local_ds[j].vaddr =
- p_sgl->Elements[i].Address.QuadPart + offset + DataOffset;
- p_desc->send_wr[0].local_ds[j].length =
- p_sgl->Elements[i].Length - offset - DataOffset;
- p_desc->send_wr[0].local_ds[j].lkey = s_buf->p_port->ib_mgr.lkey;
- if( p_desc->send_wr[0].local_ds[j].length > 0 )
- {
- j++;
- }
- i++;
- break;
- }
+ //Handle the (N+1) element that can probably contain both Header and Data
+ p_desc->send_wr[0].local_ds[j].vaddr =
+ p_sgl->Elements[i].Address.QuadPart + total_offset + DataOffset;
+ p_desc->send_wr[0].local_ds[j].length =
+ p_sgl->Elements[i].Length - total_offset - DataOffset;
+ p_desc->send_wr[0].local_ds[j].lkey = s_buf->p_port->ib_mgr.lkey;
+ j++;
+ i++;
}
+
/* Now fill in the rest of the local data segments. */
while( i < p_sgl->NumberOfElements )
{
@@ -4527,7 +4525,7 @@
send_gen:
cl_perf_start( SendTcp );
- status = __send_gen( s_buf, 0, 0 );
+ status = __send_gen( s_buf, 0 );
cl_perf_stop( &p_port->p_adapter->perf, SendTcp );
IPOIB_EXIT( IPOIB_DBG_SEND );
@@ -5273,27 +5271,20 @@
ASSERT(p_lso_info->LsoV1Transmit.TcpHeaderOffset == p_lso_info->LsoV2Transmit.TcpHeaderOffset);
}
}
-
+
/* Format the send descriptor. */
hdr_idx = cl_atomic_inc( &s_buf->p_port->hdr_idx );
hdr_idx &= (s_buf->p_port->p_adapter->params.sq_depth - 1);
ASSERT( hdr_idx < s_buf->p_port->p_adapter->params.sq_depth );
+ /* Set up IPoIB Header */
s_buf->p_port->hdr[hdr_idx].type = p_eth_hdr->type;
s_buf->p_port->hdr[hdr_idx].resv = 0;
+
- //TODO why enter this block for LSO ???
- p_desc->send_wr[0].local_ds[0].vaddr =
- cl_get_physaddr( &s_buf->p_port->hdr[hdr_idx] );
- p_desc->send_wr[0].local_ds[0].length = sizeof(ipoib_hdr_t);
- p_desc->send_wr[0].local_ds[0].lkey = s_buf->p_port->ib_mgr.lkey;
- p_desc->send_wr[0].wr.send_opt = 0;
-
//Init send buffer to 0
s_buf->p_send_buf = NULL;
-
-
if (mss && (p_lso_info->LsoV1Transmit.TcpHeaderOffset != 0))
{ //We have LSO packet
ASSERT( mss == (p_lso_info->LsoV1Transmit.MSS & p_lso_info->LsoV2Transmit.MSS));
@@ -5316,6 +5307,13 @@
uint32_t i;
cl_perf_start( SendMgrFilter );
+
+ /* Put first DS to be IPoIB Header */
+ p_desc->send_wr[0].local_ds[0].vaddr =
+ cl_get_physaddr( &s_buf->p_port->hdr[hdr_idx] );
+ p_desc->send_wr[0].local_ds[0].length = sizeof(ipoib_hdr_t);
+ p_desc->send_wr[0].local_ds[0].lkey = s_buf->p_port->ib_mgr.lkey;
+
status = __send_mgr_filter( p_eth_hdr, p_mdl, mdl_len, s_buf );
cl_perf_stop( &p_port->p_adapter->perf, SendMgrFilter );
if( status != NDIS_STATUS_SUCCESS )
@@ -5327,6 +5325,7 @@
if( p_desc->send_dir == SEND_UD_QP )
{
+ ASSERT ( p_desc->num_wrs == 1 );
p_desc->send_qp = s_buf->p_port->ib_mgr.h_qp; // UD QP
for( i = 0; i < p_desc->num_wrs; i++ )
{
@@ -5423,14 +5422,10 @@
return status;
}
ASSERT(TheLsoData.LsoHeaderSize > 0);
+
// Tell NDIS how much we will send.
- //PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] = UlongToPtr(PacketLength);
- //p_lso_info->LsoV1TransmitComplete.TcpPayload = PacketLength;
-
- // Tell NDIS how much we will send.
if(p_lso_info->LsoV1Transmit.Type == NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE)
{
- //IPOIB_TCP_PAYLOAD_FROM_NB(p_netbuf) += PacketLength-TheLsoData.LsoHeaderSize;
s_buf->tcp_payload = PacketLength-TheLsoData.LsoHeaderSize;
}
@@ -5453,7 +5448,7 @@
p_desc->send_wr[0].wr.p_next = NULL;
p_desc->send_qp = p_port->ib_mgr.h_qp;
p_desc->send_dir = SEND_UD_QP;
- status = __send_gen( s_buf, IndexOfData, TheLsoData.LsoHeaderSize );
+ status = __send_gen( s_buf, TheLsoData.LsoHeaderSize );
IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
@@ -8218,7 +8213,7 @@
--*/
NDIS_STATUS GetLsoHeaderSize(
- IN PNET_BUFFER pNetBuffer,
+ IN OUT PNET_BUFFER pNetBuffer,
IN LsoData *pLsoData,
OUT UINT *IndexOfData,
IN ipoib_hdr_t *ipoib_hdr
@@ -8255,7 +8250,7 @@
NdisQueryMdl(pMDL, &pSrc, &CurrLength, NormalPagePriority);
if (pSrc == NULL) {
- IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n"));
return status;
}
@@ -8265,7 +8260,8 @@
// We start by looking for the ethernet and the IP
if (CurrLength < ETH_OFFSET) {
- IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+ ASSERT ( CurrLength >= ETH_OFFSET );
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < ETH_OFFSET\n"));
return status;
}
@@ -8280,7 +8276,7 @@
pNetBuffer = NET_BUFFER_NEXT_NB(pNetBuffer);
NdisQueryMdl(NET_BUFFER_CURRENT_MDL(pNetBuffer), &pSrc, &CurrLength, NormalPagePriority);
if (pSrc == NULL) {
- IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n"));
return status;
}
} else {
@@ -8288,6 +8284,7 @@
pLsoData->LsoBuffers[0].pData = pSrc + (ETH_OFFSET - sizeof (ipoib_hdr_t));
//IMPORTANT: we de-facto replace ETH header by IPoIB header here
+ //TODO: This is not good practice to change data we got from NDIS
memcpy (pLsoData->LsoBuffers[0].pData, ipoib_hdr, sizeof (ipoib_hdr_t));
CurrLength -= ETH_OFFSET;
@@ -8296,14 +8293,14 @@
}
// we should now be having at least the size of ethernet data
if (CurrLength < sizeof (ip_hdr_t)) {
- IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < sizeof (ip_hdr_t)\n"));
return status;
}
IpHdr = (ip_hdr_t UNALIGNED*)pSrc;
IpHeaderLen = (uint16_t)IP_HEADER_LENGTH(IpHdr);
ASSERT(IpHdr->prot == IP_PROT_TCP);
if (CurrLength < IpHeaderLen) {
- IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("CurrLength < IpHeaderLe\n"));
return status;
}
pLsoData->LsoHeaderSize = pLsoData->LsoHeaderSize + IpHeaderLen;
@@ -8324,12 +8321,10 @@
FullBuffers++;
IsRegularFlow = FALSE;
- //NdisGetNextBuffer( CurrBuffer, &CurrBuffer);
- //NdisQueryBufferSafe( CurrBuffer, &pSrc, &CurrLength, NormalPagePriority );
pNetBuffer = NET_BUFFER_NEXT_NB(pNetBuffer);
NdisQueryMdl(NET_BUFFER_CURRENT_MDL(pNetBuffer), &pSrc, &CurrLength, NormalPagePriority);
if (pSrc == NULL) {
- IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("NdisQueryMdl failed\n"));
return status;
}
} else {
@@ -8346,17 +8341,15 @@
pSrc = pSrc + IpHeaderLen;
}
if (CurrLength < sizeof (tcp_hdr_t)) {
- IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error porcessing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error processing packets\n"));
return status;
}
// We have finaly found the TCP header
TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc;
TcpHeaderLen = TCP_HEADER_LENGTH(TcpHdr);
-
- //ASSERT(TcpHeaderLen == 20);
if (CurrLength < TcpHeaderLen) {
- //IPOIB_PRINT(TRACE_LEVEL_VERBOSE, ETH, ("Error porcessing packets\n"));
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("Error processing packets\n"));
return status;
}
pLsoData->LsoHeaderSize = pLsoData->LsoHeaderSize + TcpHeaderLen;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101125/12d2bb25/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: LSO_revisited_ofw.patch
Type: application/octet-stream
Size: 12646 bytes
Desc: LSO_revisited_ofw.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101125/12d2bb25/attachment.obj>
More information about the ofw
mailing list