[ofw] [PATCH 3/3] Mlx4 (checksum): improving the mechanism of csum offload

Alex Naslednikov xalex at mellanox.co.il
Thu Sep 4 09:40:15 PDT 2008


This is cumulative patch that fixes all open issues shown the last week:
SGE wrong calculation
LSO messages in log files
Fixes in ipoib csum offload mechanism
Adding 3 states for CSUM offload : disabled, enabled if supported by HW
and bypass (see inline)

Signed-off by: xalex (Alexander Naslednikov)
Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
(revision 3106)
+++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
(revision 3107)
@@ -825,6 +825,8 @@
 	uint64_t			vaddr;
 	net32_t				rkey;
 	ib_qp_attr_t		qp_attr;
+	ib_ca_attr_t *		p_ca_attr;
+	uint32_t			attr_size;
 
 	IPOIB_ENTER( IPOIB_DBG_INIT );
 
@@ -889,7 +891,7 @@
 			p_port->p_adapter->p_ifc->get_err_str( status ))
);
 		return status;
 	}
-
+	
 	/* Allocate the QP. */
 	cl_memclr( &qp_create, sizeof(qp_create) );
 	qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM;
@@ -897,8 +899,47 @@
 	qp_create.rq_sge = 2;	/* To support buffers spanning pages. */
 	qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq;
 	qp_create.sq_depth = p_port->p_adapter->params.sq_depth;
-	//TODO: Figure out the right number of SGE entries for sends.
-	qp_create.sq_sge = MAX_SEND_SGE;
+	
+	//Figure out the right number of SGE entries for sends.
+	/* Get the size of the CA attribute structure. */
+	status = p_port->p_adapter->p_ifc->query_ca(
p_port->ib_mgr.h_ca, NULL, &attr_size );
+	if( status != IB_INSUFFICIENT_MEMORY )
+	{
+		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+			("ib_query_ca failed with status %s.\n",
p_port->p_adapter->p_ifc->get_err_str(status)) );
+		return status;
+	}
+
+	/* Allocate enough space to store the attribute structure. */
+	p_ca_attr = cl_malloc( attr_size );
+	if( !p_ca_attr )
+	{
+		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+			("cl_malloc failed to allocate p_ca_attr!\n") );
+		return IB_INSUFFICIENT_RESOURCES;
+	}
+
+	/* Query the CA attributes. */
+	status = p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca,
p_ca_attr, &attr_size );
+	if( status != IB_SUCCESS )
+	{
+		cl_free( p_ca_attr );
+
+		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+			("ib_query_ca failed with status %s.\n",
p_port->p_adapter->p_ifc->get_err_str(status)) );
+		return status;
+	}
+#define UD_QP_USED_SGE 3
+	qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ?
MAX_SEND_SGE  : (p_ca_attr->max_sges - UD_QP_USED_SGE);
+	if (!p_ca_attr->ipoib_csum) { 
+		//checksum is not supported by device
+		//user must specify BYPASS to explicitly cancel checksum
calculation
+		if (p_port->p_adapter->params.send_chksum_offload ==
CSUM_ENABLED)
+			p_port->p_adapter->params.send_chksum_offload =
CSUM_DISABLED;
+		if (p_port->p_adapter->params.recv_chksum_offload ==
CSUM_ENABLED)
+			p_port->p_adapter->params.recv_chksum_offload =
CSUM_DISABLED;
+	}
+	
 	qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq;
 	qp_create.sq_signaled = TRUE;
 	status = p_port->p_adapter->p_ifc->create_qp(
@@ -1937,7 +1978,7 @@
 			
 		}
 		/* Successful completion.  Get the receive information.
*/
-		p_desc->ndis_csum.Value = (ULONG) p_wc->csum_ok;
+		p_desc->ndis_csum.Value = (ULONG) p_wc->recv.ud.csum_ok;
 		cl_perf_start( GetRecvEndpts );
 		__recv_get_endpts( p_port, p_desc, p_wc, &p_src, &p_dst
);
 		cl_perf_stop( &p_port->p_adapter->perf, GetRecvEndpts );
@@ -2580,10 +2621,22 @@
 			("__buf_mgr_get_ndis_pkt failed\n") );
 		return IB_INSUFFICIENT_RESOURCES;
 	}
-
+	if (p_port->p_adapter->params.recv_chksum_offload !=
CSUM_BYPASS) {
 	/* Get the checksums directly from packet information. */
-	NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
TcpIpChecksumPacketInfo ) = 
-		(PVOID) (uintn_t) (p_desc->ndis_csum.Value);
+	/* In this case, no one of cheksum's cat get false value */
+	/* If hardware checksum failed or wasn't calculated, NDIS will
recalculate it again */
+		NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
TcpIpChecksumPacketInfo ) = 
+			(PVOID) (uintn_t) (p_desc->ndis_csum.Value);
+	} else {
+		NDIS_TCP_IP_CHECKSUM_PACKET_INFO	chksum;
+		/* Flag the checksums as having been calculated. */
+		chksum.Value = 0;
+		chksum.Receive.NdisPacketTcpChecksumSucceeded = TRUE;
+		chksum.Receive.NdisPacketUdpChecksumSucceeded = TRUE;
+		chksum.Receive.NdisPacketIpChecksumSucceeded = TRUE;
+		NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
TcpIpChecksumPacketInfo ) =
+		(void*)(uintn_t)chksum.Value;
+	}
 	ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len );
 
 	IPOIB_EXIT( IPOIB_DBG_RECV );
@@ -3833,9 +3886,9 @@
 		p_desc->wr.dgrm.ud.hlen = lso_header_size; 
 		// Tell NDIS how much we will send.
 		PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] =
UlongToPtr(PacketLength);
-		p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM |
IB_SEND_OPT_TX_TCP_UDP_CSUM);
+		p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM |
IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED;
 		__send_gen(p_port, p_desc, IndexOfData);
-		p_desc->wr.wr_type = ( WR_LSO | IB_SEND_OPT_SIGNALED);
+		p_desc->wr.wr_type = WR_LSO;
 	} else {
 
 		/* Setup the first local data segment (used for the
IPoIB header). */
Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
(revision 3106)
+++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
(revision 3107)
@@ -60,15 +60,15 @@
 /*
  * Macros
  */
+typedef enum {CSUM_DISABLED = 0, CSUM_ENABLED, CSUM_BYPASS}
tCsumTypeEn;
 
-
 typedef struct _ipoib_params
 {
 	int32_t		rq_depth;
 	int32_t		rq_low_watermark;
 	int32_t		sq_depth;
-	boolean_t	send_chksum_offload;
-	boolean_t	recv_chksum_offload;
+	int32_t		send_chksum_offload;  //is actually of type
tCsumTypeEn
+	int32_t		recv_chksum_offload; //is actually of type
tCsumTypeEn
 	uint32_t	sa_timeout;
 	uint32_t	sa_retry_cnt;
 	uint32_t	recv_pool_ratio;
@@ -91,12 +91,11 @@
 *		Number of send WQEs to allocate.
 *
 *	send_chksum_offload
-*		Flag to indicate whether to offload send checksums.
This will make it
-*		so that IPoIB packets should never be forwarded out of
the IB subnet
-*		without recalculating the checksum.
-*
 *	recv_chksum_offload
-*		Flag to indicate whether to offload recv checksums.
+*		Flags to indicate whether to offload send/recv
checksums.
+*		0 - No hardware cheksum
+*		1 - Try to offload if the device support it
+*		2 - Always report success (checksum bypass)
 *
 *	wsdp_enabled
 *		Flag to indicate whether WSDP is enabled for an adapter
adapter.
Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
(revision 3106)
+++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
(revision 3107)
@@ -150,8 +150,8 @@
 	{NDIS_STRING_CONST("RqDepth"),          1,
IPOIB_OFFSET(rq_depth),              IPOIB_SIZE(rq_depth),
512,        128,    1024},
 	{NDIS_STRING_CONST("RqLowWatermark"),   0,
IPOIB_OFFSET(rq_low_watermark),      IPOIB_SIZE(rq_low_watermark),   4,
2,      8},
 	{NDIS_STRING_CONST("SqDepth"),          1,
IPOIB_OFFSET(sq_depth),              IPOIB_SIZE(sq_depth),
512,        128,    1024},
-	{NDIS_STRING_CONST("SendChksum"),       1,
IPOIB_OFFSET(send_chksum_offload),   IPOIB_SIZE(send_chksum_offload),0,
0,      1},
-	{NDIS_STRING_CONST("RecvChksum"),       1,
IPOIB_OFFSET(recv_chksum_offload),   IPOIB_SIZE(recv_chksum_offload),0,
0,      1},
+	{NDIS_STRING_CONST("SendChksum"),       1,
IPOIB_OFFSET(send_chksum_offload),
IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSUM_BYPASS},
+	{NDIS_STRING_CONST("RecvChksum"),       1,
IPOIB_OFFSET(recv_chksum_offload),
IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSUM_BYPASS},
 	{NDIS_STRING_CONST("SaTimeout"),        1,
IPOIB_OFFSET(sa_timeout),            IPOIB_SIZE(sa_timeout),
1000,       250,    UINT_MAX},
 	{NDIS_STRING_CONST("SaRetries"),        1,
IPOIB_OFFSET(sa_retry_cnt),          IPOIB_SIZE(sa_retry_cnt),       10,
1,      UINT_MAX},
 	{NDIS_STRING_CONST("RecvRatio"),        1,
IPOIB_OFFSET(recv_pool_ratio),       IPOIB_SIZE(recv_pool_ratio),    1,
1,      10},
@@ -1450,28 +1450,20 @@
 	p_offload_task->TaskBufferLength =
sizeof(NDIS_TASK_TCP_IP_CHECKSUM);
 	p_offload_chksum =
 		(NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer;
-
+	
 	p_offload_chksum->V4Transmit.IpOptionsSupported =
-		p_adapter->params.send_chksum_offload;
 	p_offload_chksum->V4Transmit.TcpOptionsSupported =
-		p_adapter->params.send_chksum_offload;
 	p_offload_chksum->V4Transmit.TcpChecksum =
-		p_adapter->params.send_chksum_offload;
 	p_offload_chksum->V4Transmit.UdpChecksum =
-		p_adapter->params.send_chksum_offload;
 	p_offload_chksum->V4Transmit.IpChecksum =
-		p_adapter->params.send_chksum_offload;
+		!!(p_adapter->params.send_chksum_offload);
 
 	p_offload_chksum->V4Receive.IpOptionsSupported =
-		p_adapter->params.recv_chksum_offload;
 	p_offload_chksum->V4Receive.TcpOptionsSupported =
-		p_adapter->params.recv_chksum_offload;
 	p_offload_chksum->V4Receive.TcpChecksum =
-			p_adapter->params.recv_chksum_offload;
 	p_offload_chksum->V4Receive.UdpChecksum =
-		p_adapter->params.recv_chksum_offload;
 	p_offload_chksum->V4Receive.IpChecksum =
-		p_adapter->params.recv_chksum_offload;
+		!!(p_adapter->params.recv_chksum_offload);
 
 	p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE;
 	p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE;
Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
===================================================================
--- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
(revision 3106)
+++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
(revision 3107)
@@ -95,18 +95,28 @@
 
 HKR, Ndi\Params\SendChksum,		ParamDesc,	0, "Send
Checksum Offload"
 HKR, Ndi\Params\SendChksum,		Type,		0, "enum"
-HKR, Ndi\Params\SendChksum,		Default,	0, "0"
+HKR, Ndi\Params\SendChksum,		Default,	0, "1"
 HKR, Ndi\Params\SendChksum,		Optional,	0, "0"
 HKR, Ndi\Params\SendChksum\enum,	"0",	0, "Disabled"
-HKR, Ndi\Params\SendChksum\enum,	"1",	0, "Enabled"
+HKR, Ndi\Params\SendChksum\enum,	"1",	0, "Enabled (if
supported by HW)"
+HKR, Ndi\Params\SendChksum\enum,	"2",	0, "Bypass"
 
 HKR, Ndi\Params\RecvChksum,		ParamDesc,	0, "Recv
Checksum Offload"
 HKR, Ndi\Params\RecvChksum,		Type,		0, "enum"
-HKR, Ndi\Params\RecvChksum,		Default,	0, "0"
+HKR, Ndi\Params\RecvChksum,		Default,	0, "1"
 HKR, Ndi\Params\RecvChksum,		Optional,	0, "0"
 HKR, Ndi\Params\RecvChksum\enum,	"0",	0, "Disabled"
-HKR, Ndi\Params\RecvChksum\enum,	"1",	0, "Enabled"
+HKR, Ndi\Params\RecvChksum\enum,	"1",	0, "Enabled (if
supported by HW)"
+HKR, Ndi\Params\RecvChksum\enum,	"2",	0, "Bypass"
 
+HKR, Ndi\Params\lso,		ParamDesc,	0, "Large Send Offload"
+HKR, Ndi\Params\lso,		Type,		0, "enum"
+HKR, Ndi\Params\lso,		Default,	0, "0"
+HKR, Ndi\Params\lso,		Optional,	0, "0"
+HKR, Ndi\Params\lso\enum,	"0",	0, "Disabled"
+HKR, Ndi\Params\lso\enum,	"1",	0, "Enabled"
+
+
 HKR, Ndi\Params\SaTimeout,		ParamDesc,	0, "SA Query
Timeout (ms)"
 HKR, Ndi\Params\SaTimeout,		Type,		0, "dword"
 HKR, Ndi\Params\SaTimeout,		Default,	0, "1000"
Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
===================================================================
--- D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
(revision 3106)
+++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
(revision 3107)
@@ -498,7 +498,7 @@
 		wc->recv.ud.path_bits		=
(u8)(cqe->g_mlpath_rqpn & 0x7f);
 		wc->recv.ud.recv_opt		|= cqe->g_mlpath_rqpn &
0x080 ? IB_RECV_OPT_GRH_VALID : 0;
 		wc->recv.ud.pkey_index	=
(u16)(be32_to_cpu(cqe->immed_rss_invalid)  & 0x7f);
-		wc->csum_ok =
mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
+		wc->recv.ud.csum_ok =
mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
 	}
 	if (!is_send && cqe->rlid == 0){
 		MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found
rlid == 0 \n "));



More information about the ofw mailing list