[ofw] [PATCH 3/3] Mlx4 (checksum): improving the mechanism ofcsum offload

Alex Estrin alex.estrin at qlogic.com
Mon Sep 8 05:28:31 PDT 2008


Hi Xalex, Tzachi,
Would you please provide a bit more details for send SGE calculations:

> +#define UD_QP_USED_SGE 3
> +	qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ? 
> MAX_SEND_SGE  : (p_ca_attr->max_sges - UD_QP_USED_SGE);

What these 3 SGEs are used for?
Thanks,
Alex.

> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org 
> [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Tzachi Dar
> Sent: Monday, September 08, 2008 3:42 AM
> To: Alex Naslednikov; ofw at lists.openfabrics.org
> Subject: RE: [ofw] [PATCH 3/3] Mlx4 (checksum): improving the 
> mechanism ofcsum offload
> 
> Applied to trunk and branch.
> 
> Thanks
> Tzachi 
> 
> > -----Original Message-----
> > From: Alex Naslednikov 
> > Sent: Thursday, September 04, 2008 7:40 PM
> > To: Alex Naslednikov; Tzachi Dar; ofw at lists.openfabrics.org
> > Cc: Ishai Rabinovitz
> > Subject: [ofw] [PATCH 3/3] Mlx4 (checksum): improving the 
> > mechanism of csum offload
> > 
> > This is cumulative patch that fixes all open issues shown the 
> > last week:
> > SGE wrong calculation
> > LSO messages in log files
> > Fixes in ipoib csum offload mechanism
> > Adding 3 states for CSUM offload : disabled, enabled if 
> > supported by HW and bypass (see inline)
> > 
> > Signed-off by: xalex (Alexander Naslednikov)
> > Index: 
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
> > ===================================================================
> > --- 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib
> > _port.c	(revision 3106)
> > +++ 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib
> > _port.c	(revision 3107)
> > @@ -825,6 +825,8 @@
> >  	uint64_t			vaddr;
> >  	net32_t				rkey;
> >  	ib_qp_attr_t		qp_attr;
> > +	ib_ca_attr_t *		p_ca_attr;
> > +	uint32_t			attr_size;
> >  
> >  	IPOIB_ENTER( IPOIB_DBG_INIT );
> >  
> > @@ -889,7 +891,7 @@
> >  			p_port->p_adapter->p_ifc->get_err_str( 
> > status )) );
> >  		return status;
> >  	}
> > -
> > +	
> >  	/* Allocate the QP. */
> >  	cl_memclr( &qp_create, sizeof(qp_create) );
> >  	qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM; @@ -897,8 +899,47 @@
> >  	qp_create.rq_sge = 2;	/* To support buffers spanning pages. */
> >  	qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq;
> >  	qp_create.sq_depth = p_port->p_adapter->params.sq_depth;
> > -	//TODO: Figure out the right number of SGE entries for sends.
> > -	qp_create.sq_sge = MAX_SEND_SGE;
> > +	
> > +	//Figure out the right number of SGE entries for sends.
> > +	/* Get the size of the CA attribute structure. */
> > +	status = p_port->p_adapter->p_ifc->query_ca( 
> > p_port->ib_mgr.h_ca, NULL, &attr_size );
> > +	if( status != IB_INSUFFICIENT_MEMORY )
> > +	{
> > +		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> > +			("ib_query_ca failed with status 
> > %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) );
> > +		return status;
> > +	}
> > +
> > +	/* Allocate enough space to store the attribute structure. */
> > +	p_ca_attr = cl_malloc( attr_size );
> > +	if( !p_ca_attr )
> > +	{
> > +		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> > +			("cl_malloc failed to allocate p_ca_attr!\n") );
> > +		return IB_INSUFFICIENT_RESOURCES;
> > +	}
> > +
> > +	/* Query the CA attributes. */
> > +	status = 
> > p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca, 
> > p_ca_attr, &attr_size );
> > +	if( status != IB_SUCCESS )
> > +	{
> > +		cl_free( p_ca_attr );
> > +
> > +		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> > +			("ib_query_ca failed with status 
> > %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) );
> > +		return status;
> > +	}
> > +#define UD_QP_USED_SGE 3
> > +	qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ? 
> > MAX_SEND_SGE  : (p_ca_attr->max_sges - UD_QP_USED_SGE);
> > +	if (!p_ca_attr->ipoib_csum) { 
> > +		//checksum is not supported by device
> > +		//user must specify BYPASS to explicitly cancel 
> > checksum calculation
> > +		if 
> > (p_port->p_adapter->params.send_chksum_offload == CSUM_ENABLED)
> > +			
> > p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED;
> > +		if 
> > (p_port->p_adapter->params.recv_chksum_offload == CSUM_ENABLED)
> > +			
> > p_port->p_adapter->params.recv_chksum_offload = CSUM_DISABLED;
> > +	}
> > +	
> >  	qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq;
> >  	qp_create.sq_signaled = TRUE;
> >  	status = p_port->p_adapter->p_ifc->create_qp(
> > @@ -1937,7 +1978,7 @@
> >  			
> >  		}
> >  		/* Successful completion.  Get the receive 
> > information. */
> > -		p_desc->ndis_csum.Value = (ULONG) p_wc->csum_ok;
> > +		p_desc->ndis_csum.Value = (ULONG) p_wc->recv.ud.csum_ok;
> >  		cl_perf_start( GetRecvEndpts );
> >  		__recv_get_endpts( p_port, p_desc, p_wc, 
> > &p_src, &p_dst );
> >  		cl_perf_stop( &p_port->p_adapter->perf, 
> > GetRecvEndpts ); @@ -2580,10 +2621,22 @@
> >  			("__buf_mgr_get_ndis_pkt failed\n") );
> >  		return IB_INSUFFICIENT_RESOURCES;
> >  	}
> > -
> > +	if (p_port->p_adapter->params.recv_chksum_offload != 
> > CSUM_BYPASS) {
> >  	/* Get the checksums directly from packet information. */
> > -	NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, 
> > TcpIpChecksumPacketInfo ) = 
> > -		(PVOID) (uintn_t) (p_desc->ndis_csum.Value);
> > +	/* In this case, no one of cheksum's cat get false value */
> > +	/* If hardware checksum failed or wasn't calculated, 
> > NDIS will recalculate it again */
> > +		NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, 
> > TcpIpChecksumPacketInfo ) = 
> > +			(PVOID) (uintn_t) (p_desc->ndis_csum.Value);
> > +	} else {
> > +		NDIS_TCP_IP_CHECKSUM_PACKET_INFO	chksum;
> > +		/* Flag the checksums as having been calculated. */
> > +		chksum.Value = 0;
> > +		chksum.Receive.NdisPacketTcpChecksumSucceeded = TRUE;
> > +		chksum.Receive.NdisPacketUdpChecksumSucceeded = TRUE;
> > +		chksum.Receive.NdisPacketIpChecksumSucceeded = TRUE;
> > +		NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, 
> > TcpIpChecksumPacketInfo ) =
> > +		(void*)(uintn_t)chksum.Value;
> > +	}
> >  	ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len );
> >  
> >  	IPOIB_EXIT( IPOIB_DBG_RECV );
> > @@ -3833,9 +3886,9 @@
> >  		p_desc->wr.dgrm.ud.hlen = lso_header_size; 
> >  		// Tell NDIS how much we will send.
> >  		PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] 
> > = UlongToPtr(PacketLength);
> > -		p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM 
> > | IB_SEND_OPT_TX_TCP_UDP_CSUM);
> > +		p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM | 
> > +IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED;
> >  		__send_gen(p_port, p_desc, IndexOfData);
> > -		p_desc->wr.wr_type = ( WR_LSO | IB_SEND_OPT_SIGNALED);
> > +		p_desc->wr.wr_type = WR_LSO;
> >  	} else {
> >  
> >  		/* Setup the first local data segment (used for 
> > the IPoIB header). */
> > Index: 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
> > ===================================================================
> > --- 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_ad
> > apter.h	(revision 3106)
> > +++ 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_ad
> > apter.h	(revision 3107)
> > @@ -60,15 +60,15 @@
> >  /*
> >   * Macros
> >   */
> > +typedef enum {CSUM_DISABLED = 0, CSUM_ENABLED, CSUM_BYPASS} 
> > +tCsumTypeEn;
> >  
> > -
> >  typedef struct _ipoib_params
> >  {
> >  	int32_t		rq_depth;
> >  	int32_t		rq_low_watermark;
> >  	int32_t		sq_depth;
> > -	boolean_t	send_chksum_offload;
> > -	boolean_t	recv_chksum_offload;
> > +	int32_t		send_chksum_offload;  //is actually of 
> > type tCsumTypeEn
> > +	int32_t		recv_chksum_offload; //is actually of 
> > type tCsumTypeEn
> >  	uint32_t	sa_timeout;
> >  	uint32_t	sa_retry_cnt;
> >  	uint32_t	recv_pool_ratio;
> > @@ -91,12 +91,11 @@
> >  *		Number of send WQEs to allocate.
> >  *
> >  *	send_chksum_offload
> > -*		Flag to indicate whether to offload send 
> > checksums.  This will make it
> > -*		so that IPoIB packets should never be forwarded 
> > out of the IB subnet
> > -*		without recalculating the checksum.
> > -*
> >  *	recv_chksum_offload
> > -*		Flag to indicate whether to offload recv checksums.
> > +*		Flags to indicate whether to offload send/recv 
> > checksums.
> > +*		0 - No hardware cheksum
> > +*		1 - Try to offload if the device support it
> > +*		2 - Always report success (checksum bypass)
> >  *
> >  *	wsdp_enabled
> >  *		Flag to indicate whether WSDP is enabled for an 
> > adapter adapter.
> > Index: 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
> > ===================================================================
> > --- 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_d
> > river.c	(revision 3106)
> > +++ 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_d
> > river.c	(revision 3107)
> > @@ -150,8 +150,8 @@
> >  	{NDIS_STRING_CONST("RqDepth"),          1, 
> > IPOIB_OFFSET(rq_depth),              IPOIB_SIZE(rq_depth),    
> >        512,        128,    1024},
> >  	{NDIS_STRING_CONST("RqLowWatermark"),   0, 
> > IPOIB_OFFSET(rq_low_watermark),      
> > IPOIB_SIZE(rq_low_watermark),   4,          2,      8},
> >  	{NDIS_STRING_CONST("SqDepth"),          1, 
> > IPOIB_OFFSET(sq_depth),              IPOIB_SIZE(sq_depth),    
> >        512,        128,    1024},
> > -	{NDIS_STRING_CONST("SendChksum"),       1, 
> > IPOIB_OFFSET(send_chksum_offload),   
> > IPOIB_SIZE(send_chksum_offload),0,          0,      1},
> > -	{NDIS_STRING_CONST("RecvChksum"),       1, 
> > IPOIB_OFFSET(recv_chksum_offload),   
> > IPOIB_SIZE(recv_chksum_offload),0,          0,      1},
> > +	{NDIS_STRING_CONST("SendChksum"),       1, 
> > IPOIB_OFFSET(send_chksum_offload),   
> > IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSU
> > M_BYPASS},
> > +	{NDIS_STRING_CONST("RecvChksum"),       1, 
> > IPOIB_OFFSET(recv_chksum_offload),   
> > IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSU
> > M_BYPASS},
> >  	{NDIS_STRING_CONST("SaTimeout"),        1, 
> > IPOIB_OFFSET(sa_timeout),            IPOIB_SIZE(sa_timeout),  
> >        1000,       250,    UINT_MAX},
> >  	{NDIS_STRING_CONST("SaRetries"),        1, 
> > IPOIB_OFFSET(sa_retry_cnt),          
> > IPOIB_SIZE(sa_retry_cnt),       10,         1,      UINT_MAX},
> >  	{NDIS_STRING_CONST("RecvRatio"),        1, 
> > IPOIB_OFFSET(recv_pool_ratio),       
> > IPOIB_SIZE(recv_pool_ratio),    1,          1,      10},
> > @@ -1450,28 +1450,20 @@
> >  	p_offload_task->TaskBufferLength = 
> > sizeof(NDIS_TASK_TCP_IP_CHECKSUM);
> >  	p_offload_chksum =
> >  		(NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer;
> > -
> > +	
> >  	p_offload_chksum->V4Transmit.IpOptionsSupported =
> > -		p_adapter->params.send_chksum_offload;
> >  	p_offload_chksum->V4Transmit.TcpOptionsSupported =
> > -		p_adapter->params.send_chksum_offload;
> >  	p_offload_chksum->V4Transmit.TcpChecksum =
> > -		p_adapter->params.send_chksum_offload;
> >  	p_offload_chksum->V4Transmit.UdpChecksum =
> > -		p_adapter->params.send_chksum_offload;
> >  	p_offload_chksum->V4Transmit.IpChecksum =
> > -		p_adapter->params.send_chksum_offload;
> > +		!!(p_adapter->params.send_chksum_offload);
> >  
> >  	p_offload_chksum->V4Receive.IpOptionsSupported =
> > -		p_adapter->params.recv_chksum_offload;
> >  	p_offload_chksum->V4Receive.TcpOptionsSupported =
> > -		p_adapter->params.recv_chksum_offload;
> >  	p_offload_chksum->V4Receive.TcpChecksum =
> > -			p_adapter->params.recv_chksum_offload;
> >  	p_offload_chksum->V4Receive.UdpChecksum =
> > -		p_adapter->params.recv_chksum_offload;
> >  	p_offload_chksum->V4Receive.IpChecksum =
> > -		p_adapter->params.recv_chksum_offload;
> > +		!!(p_adapter->params.recv_chksum_offload);
> >  
> >  	p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE;
> >  	p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE;
> > Index: 
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
> > ===================================================================
> > --- 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netip
> > oib.inx	(revision 3106)
> > +++ 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netip
> > oib.inx	(revision 3107)
> > @@ -95,18 +95,28 @@
> >  
> >  HKR, Ndi\Params\SendChksum,		ParamDesc,	0, 
> > "Send Checksum Offload"
> >  HKR, Ndi\Params\SendChksum,		Type,		
> 0, "enum"
> > -HKR, Ndi\Params\SendChksum,		Default,	0, "0"
> > +HKR, Ndi\Params\SendChksum,		Default,	0, "1"
> >  HKR, Ndi\Params\SendChksum,		Optional,	0, "0"
> >  HKR, Ndi\Params\SendChksum\enum,	"0",	0, "Disabled"
> > -HKR, Ndi\Params\SendChksum\enum,	"1",	0, "Enabled"
> > +HKR, Ndi\Params\SendChksum\enum,	"1",	0, "Enabled (if 
> > supported by HW)"
> > +HKR, Ndi\Params\SendChksum\enum,	"2",	0, "Bypass"
> >  
> >  HKR, Ndi\Params\RecvChksum,		ParamDesc,	0, 
> > "Recv Checksum Offload"
> >  HKR, Ndi\Params\RecvChksum,		Type,		
> 0, "enum"
> > -HKR, Ndi\Params\RecvChksum,		Default,	0, "0"
> > +HKR, Ndi\Params\RecvChksum,		Default,	0, "1"
> >  HKR, Ndi\Params\RecvChksum,		Optional,	0, "0"
> >  HKR, Ndi\Params\RecvChksum\enum,	"0",	0, "Disabled"
> > -HKR, Ndi\Params\RecvChksum\enum,	"1",	0, "Enabled"
> > +HKR, Ndi\Params\RecvChksum\enum,	"1",	0, "Enabled (if 
> > supported by HW)"
> > +HKR, Ndi\Params\RecvChksum\enum,	"2",	0, "Bypass"
> >  
> > +HKR, Ndi\Params\lso,		ParamDesc,	0, 
> "Large Send Offload"
> > +HKR, Ndi\Params\lso,		Type,		0, "enum"
> > +HKR, Ndi\Params\lso,		Default,	0, "0"
> > +HKR, Ndi\Params\lso,		Optional,	0, "0"
> > +HKR, Ndi\Params\lso\enum,	"0",	0, "Disabled"
> > +HKR, Ndi\Params\lso\enum,	"1",	0, "Enabled"
> > +
> > +
> >  HKR, Ndi\Params\SaTimeout,		ParamDesc,	0, "SA 
> > Query Timeout (ms)"
> >  HKR, Ndi\Params\SaTimeout,		Type,		0, "dword"
> >  HKR, Ndi\Params\SaTimeout,		Default,	0, "1000"
> > Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> > ===================================================================
> > --- 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c	
> > (revision 3106)
> > +++ 
> > D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c	
> > (revision 3107)
> > @@ -498,7 +498,7 @@
> >  		wc->recv.ud.path_bits		= 
> > (u8)(cqe->g_mlpath_rqpn & 0x7f);
> >  		wc->recv.ud.recv_opt		|= 
> > cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0;
> >  		wc->recv.ud.pkey_index	= 
> > (u16)(be32_to_cpu(cqe->immed_rss_invalid)  & 0x7f);
> > -		wc->csum_ok = 
> > mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
> > +		wc->recv.ud.csum_ok = 
> > +mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
> >  	}
> >  	if (!is_send && cqe->rlid == 0){
> >  		
> > MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid 
> > == 0 \n "));
> _______________________________________________
> ofw mailing list
> ofw at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
> 



More information about the ofw mailing list