[ofw] RE: [IPoIB CM] fix for large IP packets fragmentation failure.
Alex Estrin
alex.estrin at qlogic.com
Fri Jan 23 14:27:46 PST 2009
Updated patch with some changes:
- pool can grow.
- if we run out of descriptors anyway - queue send packets, hoping
some descriptors will be returned to the pool next time.
Thanks,
Alex.
>
> Hello,
>
> This patch introduces usage of send descriptors from
> preallocated pool and
> fix large (> 30k) IP packets failure to fragment.
> Please review.
>
> Thanks,
> Alex.
>
> ---
Index: kernel/ipoib_cm.c
===================================================================
--- kernel/ipoib_cm.c (revision 1856)
+++ kernel/ipoib_cm.c (working copy)
@@ -796,6 +796,7 @@
uint32_t total_length = 0;
ipoib_endpt_t *p_endpt;
send_buf_t *p_send_buf;
+ ipoib_send_desc_t *p_desc;
size_t i;
BOOLEAN send_failed = FALSE;
@@ -831,7 +832,9 @@
CL_ASSERT( p_packet );
CL_ASSERT( IPOIB_PORT_FROM_PACKET( p_packet ) == p_port );
- p_send_buf = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_desc = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_send_buf = p_desc->p_buf;
+ ipoib_send_desc_mgr_put( p_port, p_desc );
switch( p_wc->status )
{
Index: kernel/ipoib_driver.h
===================================================================
--- kernel/ipoib_driver.h (revision 1856)
+++ kernel/ipoib_driver.h (working copy)
@@ -58,7 +58,7 @@
#define MAX_UD_PAYLOAD_MTU (MAX_IB_MTU - sizeof(ipoib_hdr_t))
#define DEFAULT_PAYLOAD_MTU (DEFAULT_MTU - sizeof(ipoib_hdr_t))
#define MAX_CM_PAYLOAD_MTU (65520)
-#define MAX_WRS_PER_MSG (MAX_CM_PAYLOAD_MTU/MAX_UD_PAYLOAD_MTU)
+#define MAX_WRS_PER_MSG ((MAX_CM_PAYLOAD_MTU/DEFAULT_PAYLOAD_MTU)+1)
/*
* Only the protocol type is sent as part of the UD payload
* since the rest of the Ethernet header is encapsulated in the
Index: kernel/ipoib_log.mc
===================================================================
--- kernel/ipoib_log.mc (revision 1856)
+++ kernel/ipoib_log.mc (working copy)
@@ -332,3 +332,10 @@
%2: Connected Mode initialized and operational.
.
+MessageId=0x005E
+Facility=IPoIB
+Severity=Error
+SymbolicName=EVENT_IPOIB_SEND_DESC_POOL
+Language=English
+%2: Failed to create send descriptors pool.
+.
Index: kernel/ipoib_port.c
===================================================================
--- kernel/ipoib_port.c (revision 1856)
+++ kernel/ipoib_port.c (working copy)
@@ -380,6 +380,24 @@
IN uint8_t* p_options,
IN uint32_t options_len,
IN BOOLEAN copy_all );
+
+/*****************************************************************************
+ Send Descriptors management
+*****************************************************************************/
+static ib_api_status_t
+__send_desc_mgr_init(
+ IN ipoib_port_t* const p_port );
+
+static void
+__send_desc_mgr_destroy(
+ IN ipoib_port_t* const p_port );
+
+static cl_status_t
+__send_desc_ctor(
+ IN void* const p_object,
+ IN void* context,
+ OUT cl_pool_item_t** const pp_pool_item );
+
/******************************************************************************
*
* Endpoint manager operations
@@ -671,6 +689,16 @@
("cl_spinlock_init returned %#x\n", cl_status) );
return IB_ERROR;
}
+
+ /* initialize send descriptors manager. */
+ status = __send_desc_mgr_init( p_port );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("__send_desc_mgr_init returned %s\n",
+ p_adapter->p_ifc->get_err_str( status )) );
+ return status;
+ }
/* Initialize the IB resource manager. */
status = __ib_mgr_init( p_port );
@@ -801,7 +829,8 @@
/* Wait for all sends and receives to get flushed. */
while( p_port->send_mgr.depth || p_port->recv_mgr.depth )
cl_thread_suspend( 0 );
-
+
+ __send_desc_mgr_destroy( p_port );
/* Destroy the send and receive managers before closing the CA. */
__ib_mgr_destroy( p_port );
@@ -4151,7 +4180,8 @@
if( p_port->p_adapter->params.cm_enabled )
{
IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
- (" ARP SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ (" ARP %s SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ ( p_ib_arp->op == ARP_OP_REP ? "REP": "REQ"),
p_desc->p_endpt,
endpt_cm_get_state( p_desc->p_endpt ),
p_desc->p_endpt->cm_flag,
@@ -4391,7 +4421,7 @@
/* Store context in our reserved area of the packet. */
IPOIB_PORT_FROM_PACKET( p_desc->p_pkt ) = p_port;
IPOIB_ENDPT_FROM_PACKET( p_desc->p_pkt ) = p_desc->p_endpt;
- IPOIB_SEND_FROM_PACKET( p_desc->p_pkt ) = p_desc->p_buf;
+ IPOIB_SEND_FROM_PACKET( p_desc->p_pkt ) = p_desc;
IPOIB_EXIT( IPOIB_DBG_SEND );
return NDIS_STATUS_SUCCESS;
@@ -4495,7 +4525,7 @@
ExFreeToNPagedLookasideList(
&p_port->buf_mgr.send_buf_list, p_desc->p_buf );
}
-
+ ipoib_send_desc_mgr_put( p_port, p_desc );
IPOIB_EXIT( IPOIB_DBG_SEND );
}
@@ -4508,7 +4538,7 @@
{
NDIS_STATUS status;
ib_api_status_t ib_status;
- ipoib_send_desc_t desc;
+ ipoib_send_desc_t *p_desc;
uint32_t i;
eth_hdr_t *p_eth_hdr;
NDIS_BUFFER *p_buf;
@@ -4548,12 +4578,25 @@
cl_spinlock_acquire( &p_port->send_lock );
for( i = 0; i < num_packets; i++ )
{
- desc.p_pkt = p_packet_array[i];
- desc.p_endpt = NULL;
- desc.p_buf = NULL;
- desc.send_qp = NULL;
- desc.num_wrs = 1;
+ p_desc = ipoib_send_desc_mgr_get( p_port );
+ if( !p_desc )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
+ ("No available Send Descriptors.\n") );
+ while( i < num_packets )
+ {
+ cl_qlist_insert_tail( &p_port->send_mgr.pending_list,
+ IPOIB_LIST_ITEM_FROM_PACKET( p_packet_array[i++] ) );
+ }
+ break;
+ }
+ p_desc->p_pkt = p_packet_array[i];
+ p_desc->p_endpt = NULL;
+ p_desc->p_buf = NULL;
+ p_desc->send_qp = NULL;
+ p_desc->num_wrs = 1;
+
/* Get the ethernet header so we can find the endpoint. */
cl_perf_start( GetEthHdr );
status = __send_mgr_get_eth_hdr(
@@ -4562,7 +4605,7 @@
if( status != NDIS_STATUS_SUCCESS )
{
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, status );
+ __process_failed_send( p_port, p_desc, status );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
continue;
}
@@ -4614,10 +4657,11 @@
p_eth_hdr->dst.addr[3] = ((unsigned char*)&p_ip_hdr->dst_ip)[1];
}
h_end:
- status = __send_mgr_queue( p_port, p_eth_hdr, &desc.p_endpt );
+ status = __send_mgr_queue( p_port, p_eth_hdr, &p_desc->p_endpt );
cl_perf_stop( &p_port->p_adapter->perf, SendMgrQueue );
if( status == NDIS_STATUS_PENDING )
{
+ ipoib_send_desc_mgr_put( p_port, p_desc );
/* Queue all remaining packets. */
cl_perf_start( QueuePacket );
while( i < num_packets )
@@ -4636,30 +4680,31 @@
* sends to fail.
*/
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, NDIS_STATUS_SUCCESS );
+ __process_failed_send( p_port, p_desc, NDIS_STATUS_SUCCESS );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
continue;
}
cl_perf_start( BuildSendDesc );
- status = __build_send_desc( p_port, p_eth_hdr, p_buf, buf_len, &desc );
+ status = __build_send_desc( p_port, p_eth_hdr, p_buf, buf_len, p_desc );
cl_perf_stop( &p_port->p_adapter->perf, BuildSendDesc );
if( status != NDIS_STATUS_SUCCESS )
{
if( status == NDIS_STATUS_PENDING )
{
- ipoib_endpt_deref( desc.p_endpt );
+ ipoib_endpt_deref( p_desc->p_endpt );
+ ipoib_send_desc_mgr_put( p_port, p_desc );
break;
}
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, status );
+ __process_failed_send( p_port, p_desc, status );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
continue;
}
/* Post the WR. */
cl_perf_start( PostSend );
- ib_status = p_port->p_adapter->p_ifc->post_send( desc.send_qp, &desc.send_wr[0].wr, &p_wr_failed );
+ ib_status = p_port->p_adapter->p_ifc->post_send( p_desc->send_qp, &p_desc->send_wr[0].wr, &p_wr_failed );
cl_perf_stop( &p_port->p_adapter->perf, PostSend );
if( ib_status != IB_SUCCESS )
{
@@ -4667,7 +4712,7 @@
("ib_post_send returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( ib_status )) );
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, NDIS_STATUS_FAILURE );
+ __process_failed_send( p_port, p_desc, NDIS_STATUS_FAILURE );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
/* Flag the adapter as hung since posting is busted. */
p_port->p_adapter->hung = TRUE;
@@ -4689,7 +4734,7 @@
NDIS_STATUS status;
ib_api_status_t ib_status;
cl_list_item_t *p_item;
- ipoib_send_desc_t desc;
+ ipoib_send_desc_t *p_desc;
eth_hdr_t *p_eth_hdr;
NDIS_BUFFER *p_buf;
UINT buf_len;
@@ -4726,39 +4771,46 @@
("No available WQEs.\n") );
break;
}
-
- desc.p_pkt = IPOIB_PACKET_FROM_LIST_ITEM(
+ p_desc = ipoib_send_desc_mgr_get( p_port );
+ if( !p_desc )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
+ ("No available Send Descriptors.\n") );
+ break;
+ }
+ p_desc->p_pkt = IPOIB_PACKET_FROM_LIST_ITEM(
cl_qlist_remove_head( &p_port->send_mgr.pending_list ) );
- desc.p_endpt = NULL;
- desc.p_buf = NULL;
- desc.send_qp = NULL;
- desc.num_wrs = 1;
+ p_desc->p_endpt = NULL;
+ p_desc->p_buf = NULL;
+ p_desc->send_qp = NULL;
+ p_desc->num_wrs = 1;
/* Get the ethernet header so we can find the endpoint. */
status = __send_mgr_get_eth_hdr(
- desc.p_pkt, &p_buf, &p_eth_hdr, &buf_len );
+ p_desc->p_pkt, &p_buf, &p_eth_hdr, &buf_len );
if( status != NDIS_STATUS_SUCCESS )
{
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, status );
+ __process_failed_send( p_port, p_desc, status );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
continue;
}
cl_perf_start( GetEndpt );
- status = __endpt_mgr_ref( p_port, p_eth_hdr->dst, &desc.p_endpt );
+ status = __endpt_mgr_ref( p_port, p_eth_hdr->dst, &p_desc->p_endpt );
cl_perf_stop( &p_port->p_adapter->perf, GetEndpt );
if( status == NDIS_STATUS_PENDING )
{
- CL_ASSERT(desc.p_endpt == NULL);
+ CL_ASSERT( p_desc->p_endpt == NULL );
cl_qlist_insert_head( &p_port->send_mgr.pending_list,
- IPOIB_LIST_ITEM_FROM_PACKET( desc.p_pkt ) );
+ IPOIB_LIST_ITEM_FROM_PACKET( p_desc->p_pkt ) );
+ ipoib_send_desc_mgr_put( p_port, p_desc );
break;
}
else if( status != NDIS_STATUS_SUCCESS )
{
ASSERT( status == NDIS_STATUS_NO_ROUTE_TO_DESTINATION );
- CL_ASSERT(desc.p_endpt == NULL);
+ CL_ASSERT( p_desc->p_endpt == NULL );
if( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )
{
@@ -4768,7 +4820,8 @@
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
("Multicast Mac - trying to join.\n") );
cl_qlist_insert_head( &p_port->send_mgr.pending_list,
- IPOIB_LIST_ITEM_FROM_PACKET( desc.p_pkt ) );
+ IPOIB_LIST_ITEM_FROM_PACKET( p_desc->p_pkt ) );
+ ipoib_send_desc_mgr_put( p_port, p_desc );
break;
}
}
@@ -4778,31 +4831,32 @@
* sends to fail.
*/
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, NDIS_STATUS_SUCCESS );
+ __process_failed_send( p_port, p_desc, NDIS_STATUS_SUCCESS );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
continue;
}
cl_perf_start( BuildSendDesc );
- status = __build_send_desc( p_port, p_eth_hdr, p_buf, buf_len, &desc );
+ status = __build_send_desc( p_port, p_eth_hdr, p_buf, buf_len, p_desc );
cl_perf_stop( &p_port->p_adapter->perf, BuildSendDesc );
if( status != NDIS_STATUS_SUCCESS )
{
if( status == NDIS_STATUS_PENDING )
{
/* ARP REPLY packet queued */
- ipoib_endpt_deref( desc.p_endpt );
+ ipoib_endpt_deref( p_desc->p_endpt );
+ ipoib_send_desc_mgr_put( p_port, p_desc );
break;
}
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, status );
+ __process_failed_send( p_port, p_desc, status );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
continue;
}
/* Post the WR. */
cl_perf_start( PostSend );
- ib_status = p_port->p_adapter->p_ifc->post_send( desc.send_qp, &desc.send_wr[0].wr, &p_wr_failed );
+ ib_status = p_port->p_adapter->p_ifc->post_send( p_desc->send_qp, &p_desc->send_wr[0].wr, &p_wr_failed );
cl_perf_stop( &p_port->p_adapter->perf, PostSend );
if( ib_status != IB_SUCCESS )
{
@@ -4810,7 +4864,7 @@
("ib_post_send returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( ib_status )) );
cl_perf_start( ProcessFailedSends );
- __process_failed_send( p_port, &desc, NDIS_STATUS_FAILURE );
+ __process_failed_send( p_port, p_desc, NDIS_STATUS_FAILURE );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
/* Flag the adapter as hung since posting is busted. */
p_port->p_adapter->hung = TRUE;
@@ -4837,6 +4891,7 @@
uint32_t length;
ipoib_endpt_t *p_endpt;
send_buf_t *p_send_buf = NULL;
+ ipoib_send_desc_t *p_desc;
ip_stat_sel_t type;
size_t i;
PERF_DECLARE( SendCompBundle );
@@ -4886,8 +4941,9 @@
CL_ASSERT( p_packet );
CL_ASSERT( IPOIB_PORT_FROM_PACKET( p_packet ) == p_port );
p_endpt = IPOIB_ENDPT_FROM_PACKET( p_packet );
- p_send_buf = IPOIB_SEND_FROM_PACKET( p_packet );
-
+ p_desc = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_send_buf = p_desc->p_buf;
+ ipoib_send_desc_mgr_put( p_port, p_desc );
if( p_endpt->h_mcast )
{
if( p_endpt->dgid.multicast.raw_group_id[11] == 0xFF &&
@@ -4921,7 +4977,9 @@
if( p_packet )
{
p_endpt = IPOIB_ENDPT_FROM_PACKET( p_packet );
- p_send_buf = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_desc = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_send_buf = p_desc->p_buf;
+ ipoib_send_desc_mgr_put( p_port, p_desc );
ipoib_inc_send_stat( p_port->p_adapter, IP_STAT_DROPPED, 0 );
NdisMSendCompleteX( p_port->p_adapter->h_adapter,
p_packet, NDIS_STATUS_RESET_IN_PROGRESS );
@@ -4938,7 +4996,9 @@
if( p_packet )
{
p_endpt = IPOIB_ENDPT_FROM_PACKET( p_packet );
- p_send_buf = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_desc = IPOIB_SEND_FROM_PACKET( p_packet );
+ p_send_buf = p_desc->p_buf;
+ ipoib_send_desc_mgr_put( p_port, p_desc );
ipoib_inc_send_stat( p_port->p_adapter, IP_STAT_ERROR, 0 );
NdisMSendCompleteX( p_port->p_adapter->h_adapter,
p_packet, NDIS_STATUS_FAILURE );
@@ -7510,3 +7570,115 @@
}
return;
}
+
+
+/***************************************************
+ Send desriptors pool management
+**************************************************/
+
+static ib_api_status_t
+__send_desc_mgr_init(
+ IN ipoib_port_t* const p_port )
+{
+ cl_status_t cl_status;
+
+ IPOIB_ENTER( IPOIB_DBG_INIT );
+
+ cl_qpool_construct( &p_port->send_mgr.desc_pool );
+ cl_spinlock_init( &p_port->send_mgr.desc_lock );
+
+ /* Allocate send descriptors pool */
+ cl_status = cl_qpool_init( &p_port->send_mgr.desc_pool,
+ p_port->p_adapter->params.sq_depth,
+ 0,
+ 10,
+ sizeof( ipoib_send_desc_t ),
+ __send_desc_ctor,
+ NULL,
+ p_port );
+
+ if( cl_status != CL_SUCCESS )
+ {
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_SEND_DESC_POOL, 1, cl_status );
+
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_qpool_init for send descriptors pool returned %#x\n", cl_status) );
+
+ return IB_INSUFFICIENT_MEMORY;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_INIT );
+ return IB_SUCCESS;
+}
+
+static void
+__send_desc_mgr_destroy(
+ IN ipoib_port_t* const p_port )
+{
+
+ IPOIB_ENTER(IPOIB_DBG_INIT );
+
+ CL_ASSERT( p_port );
+ cl_spinlock_destroy( &p_port->send_mgr.desc_lock );
+ cl_qpool_destroy( &p_port->send_mgr.desc_pool );
+
+ IPOIB_EXIT( IPOIB_DBG_INIT );
+}
+
+static cl_status_t
+__send_desc_ctor(
+ IN void* const p_object,
+ IN void* context,
+ OUT cl_pool_item_t** const pp_pool_item )
+{
+ ipoib_send_desc_t* p_desc;
+ ipoib_port_t* p_port;
+
+ CL_ASSERT( p_object );
+ CL_ASSERT( context );
+
+ p_desc = (ipoib_send_desc_t*)p_object;
+ p_port = (ipoib_port_t*)context;
+ p_desc->p_endpt = NULL;
+ p_desc->p_buf = NULL;
+ p_desc->send_qp = NULL;
+ p_desc->num_wrs = 1;
+
+ *pp_pool_item = &p_desc->pool_item;
+ return CL_SUCCESS;
+}
+
+inline ipoib_send_desc_t*
+ipoib_send_desc_mgr_get(
+ IN ipoib_port_t* const p_port )
+{
+ ipoib_send_desc_t *p_desc;
+ cl_spinlock_acquire( &p_port->send_mgr.desc_lock );
+ p_desc = (ipoib_send_desc_t*)cl_qpool_get( &p_port->send_mgr.desc_pool );
+ cl_spinlock_release( &p_port->send_mgr.desc_lock );
+
+ return p_desc;
+}
+
+void
+ipoib_send_desc_mgr_put(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_send_desc_t* const p_desc )
+{
+ /* Return the descriptor to it's pool. */
+ cl_spinlock_acquire( &p_port->send_mgr.desc_lock );
+ cl_qpool_put( &p_port->send_mgr.desc_pool, &p_desc->pool_item );
+ cl_spinlock_release( &p_port->send_mgr.desc_lock );
+
+}
+
+void
+ipoib_send_desc_mgr_put_list(
+ IN ipoib_port_t* const p_port,
+ IN cl_qlist_t* const p_list )
+{
+ cl_spinlock_acquire( &p_port->send_mgr.desc_lock );
+ cl_qpool_put_list( &p_port->send_mgr.desc_pool, p_list );
+ cl_spinlock_release( &p_port->send_mgr.desc_lock );
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: send_desc_pool.patch
Type: application/octet-stream
Size: 17377 bytes
Desc: send_desc_pool.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090123/b1abbf9f/attachment.obj>
More information about the ofw
mailing list