[ofw] [PATCH 3/3] Mlx4 (checksum): improving the mechanism of csum offload
Tzachi Dar
tzachid at mellanox.co.il
Mon Sep 8 00:42:17 PDT 2008
Applied to trunk and branch.
Thanks
Tzachi
> -----Original Message-----
> From: Alex Naslednikov
> Sent: Thursday, September 04, 2008 7:40 PM
> To: Alex Naslednikov; Tzachi Dar; ofw at lists.openfabrics.org
> Cc: Ishai Rabinovitz
> Subject: [ofw] [PATCH 3/3] Mlx4 (checksum): improving the
> mechanism of csum offload
>
> This is cumulative patch that fixes all open issues shown the
> last week:
> SGE wrong calculation
> LSO messages in log files
> Fixes in ipoib csum offload mechanism
> Adding 3 states for CSUM offload : disabled, enabled if
> supported by HW and bypass (see inline)
>
> Signed-off by: xalex (Alexander Naslednikov)
> Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
> ===================================================================
> ---
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib
> _port.c (revision 3106)
> +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib
> _port.c (revision 3107)
> @@ -825,6 +825,8 @@
> uint64_t vaddr;
> net32_t rkey;
> ib_qp_attr_t qp_attr;
> + ib_ca_attr_t * p_ca_attr;
> + uint32_t attr_size;
>
> IPOIB_ENTER( IPOIB_DBG_INIT );
>
> @@ -889,7 +891,7 @@
> p_port->p_adapter->p_ifc->get_err_str(
> status )) );
> return status;
> }
> -
> +
> /* Allocate the QP. */
> cl_memclr( &qp_create, sizeof(qp_create) );
> qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM; @@ -897,8 +899,47 @@
> qp_create.rq_sge = 2; /* To support buffers spanning pages. */
> qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq;
> qp_create.sq_depth = p_port->p_adapter->params.sq_depth;
> - //TODO: Figure out the right number of SGE entries for sends.
> - qp_create.sq_sge = MAX_SEND_SGE;
> +
> + //Figure out the right number of SGE entries for sends.
> + /* Get the size of the CA attribute structure. */
> + status = p_port->p_adapter->p_ifc->query_ca(
> p_port->ib_mgr.h_ca, NULL, &attr_size );
> + if( status != IB_INSUFFICIENT_MEMORY )
> + {
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("ib_query_ca failed with status
> %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) );
> + return status;
> + }
> +
> + /* Allocate enough space to store the attribute structure. */
> + p_ca_attr = cl_malloc( attr_size );
> + if( !p_ca_attr )
> + {
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("cl_malloc failed to allocate p_ca_attr!\n") );
> + return IB_INSUFFICIENT_RESOURCES;
> + }
> +
> + /* Query the CA attributes. */
> + status =
> p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca,
> p_ca_attr, &attr_size );
> + if( status != IB_SUCCESS )
> + {
> + cl_free( p_ca_attr );
> +
> + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> + ("ib_query_ca failed with status
> %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) );
> + return status;
> + }
> +#define UD_QP_USED_SGE 3
> + qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ?
> MAX_SEND_SGE : (p_ca_attr->max_sges - UD_QP_USED_SGE);
> + if (!p_ca_attr->ipoib_csum) {
> + //checksum is not supported by device
> + //user must specify BYPASS to explicitly cancel
> checksum calculation
> + if
> (p_port->p_adapter->params.send_chksum_offload == CSUM_ENABLED)
> +
> p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED;
> + if
> (p_port->p_adapter->params.recv_chksum_offload == CSUM_ENABLED)
> +
> p_port->p_adapter->params.recv_chksum_offload = CSUM_DISABLED;
> + }
> +
> qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq;
> qp_create.sq_signaled = TRUE;
> status = p_port->p_adapter->p_ifc->create_qp(
> @@ -1937,7 +1978,7 @@
>
> }
> /* Successful completion. Get the receive
> information. */
> - p_desc->ndis_csum.Value = (ULONG) p_wc->csum_ok;
> + p_desc->ndis_csum.Value = (ULONG) p_wc->recv.ud.csum_ok;
> cl_perf_start( GetRecvEndpts );
> __recv_get_endpts( p_port, p_desc, p_wc,
> &p_src, &p_dst );
> cl_perf_stop( &p_port->p_adapter->perf,
> GetRecvEndpts ); @@ -2580,10 +2621,22 @@
> ("__buf_mgr_get_ndis_pkt failed\n") );
> return IB_INSUFFICIENT_RESOURCES;
> }
> -
> + if (p_port->p_adapter->params.recv_chksum_offload !=
> CSUM_BYPASS) {
> /* Get the checksums directly from packet information. */
> - NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
> TcpIpChecksumPacketInfo ) =
> - (PVOID) (uintn_t) (p_desc->ndis_csum.Value);
> + /* In this case, no one of cheksum's cat get false value */
> + /* If hardware checksum failed or wasn't calculated,
> NDIS will recalculate it again */
> + NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
> TcpIpChecksumPacketInfo ) =
> + (PVOID) (uintn_t) (p_desc->ndis_csum.Value);
> + } else {
> + NDIS_TCP_IP_CHECKSUM_PACKET_INFO chksum;
> + /* Flag the checksums as having been calculated. */
> + chksum.Value = 0;
> + chksum.Receive.NdisPacketTcpChecksumSucceeded = TRUE;
> + chksum.Receive.NdisPacketUdpChecksumSucceeded = TRUE;
> + chksum.Receive.NdisPacketIpChecksumSucceeded = TRUE;
> + NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
> TcpIpChecksumPacketInfo ) =
> + (void*)(uintn_t)chksum.Value;
> + }
> ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len );
>
> IPOIB_EXIT( IPOIB_DBG_RECV );
> @@ -3833,9 +3886,9 @@
> p_desc->wr.dgrm.ud.hlen = lso_header_size;
> // Tell NDIS how much we will send.
> PktExt->NdisPacketInfo[TcpLargeSendPacketInfo]
> = UlongToPtr(PacketLength);
> - p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM
> | IB_SEND_OPT_TX_TCP_UDP_CSUM);
> + p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM |
> +IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED;
> __send_gen(p_port, p_desc, IndexOfData);
> - p_desc->wr.wr_type = ( WR_LSO | IB_SEND_OPT_SIGNALED);
> + p_desc->wr.wr_type = WR_LSO;
> } else {
>
> /* Setup the first local data segment (used for
> the IPoIB header). */
> Index:
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
> ===================================================================
> ---
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_ad
> apter.h (revision 3106)
> +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_ad
> apter.h (revision 3107)
> @@ -60,15 +60,15 @@
> /*
> * Macros
> */
> +typedef enum {CSUM_DISABLED = 0, CSUM_ENABLED, CSUM_BYPASS}
> +tCsumTypeEn;
>
> -
> typedef struct _ipoib_params
> {
> int32_t rq_depth;
> int32_t rq_low_watermark;
> int32_t sq_depth;
> - boolean_t send_chksum_offload;
> - boolean_t recv_chksum_offload;
> + int32_t send_chksum_offload; //is actually of
> type tCsumTypeEn
> + int32_t recv_chksum_offload; //is actually of
> type tCsumTypeEn
> uint32_t sa_timeout;
> uint32_t sa_retry_cnt;
> uint32_t recv_pool_ratio;
> @@ -91,12 +91,11 @@
> * Number of send WQEs to allocate.
> *
> * send_chksum_offload
> -* Flag to indicate whether to offload send
> checksums. This will make it
> -* so that IPoIB packets should never be forwarded
> out of the IB subnet
> -* without recalculating the checksum.
> -*
> * recv_chksum_offload
> -* Flag to indicate whether to offload recv checksums.
> +* Flags to indicate whether to offload send/recv
> checksums.
> +* 0 - No hardware cheksum
> +* 1 - Try to offload if the device support it
> +* 2 - Always report success (checksum bypass)
> *
> * wsdp_enabled
> * Flag to indicate whether WSDP is enabled for an
> adapter adapter.
> Index:
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
> ===================================================================
> ---
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_d
> river.c (revision 3106)
> +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_d
> river.c (revision 3107)
> @@ -150,8 +150,8 @@
> {NDIS_STRING_CONST("RqDepth"), 1,
> IPOIB_OFFSET(rq_depth), IPOIB_SIZE(rq_depth),
> 512, 128, 1024},
> {NDIS_STRING_CONST("RqLowWatermark"), 0,
> IPOIB_OFFSET(rq_low_watermark),
> IPOIB_SIZE(rq_low_watermark), 4, 2, 8},
> {NDIS_STRING_CONST("SqDepth"), 1,
> IPOIB_OFFSET(sq_depth), IPOIB_SIZE(sq_depth),
> 512, 128, 1024},
> - {NDIS_STRING_CONST("SendChksum"), 1,
> IPOIB_OFFSET(send_chksum_offload),
> IPOIB_SIZE(send_chksum_offload),0, 0, 1},
> - {NDIS_STRING_CONST("RecvChksum"), 1,
> IPOIB_OFFSET(recv_chksum_offload),
> IPOIB_SIZE(recv_chksum_offload),0, 0, 1},
> + {NDIS_STRING_CONST("SendChksum"), 1,
> IPOIB_OFFSET(send_chksum_offload),
> IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSU
> M_BYPASS},
> + {NDIS_STRING_CONST("RecvChksum"), 1,
> IPOIB_OFFSET(recv_chksum_offload),
> IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSU
> M_BYPASS},
> {NDIS_STRING_CONST("SaTimeout"), 1,
> IPOIB_OFFSET(sa_timeout), IPOIB_SIZE(sa_timeout),
> 1000, 250, UINT_MAX},
> {NDIS_STRING_CONST("SaRetries"), 1,
> IPOIB_OFFSET(sa_retry_cnt),
> IPOIB_SIZE(sa_retry_cnt), 10, 1, UINT_MAX},
> {NDIS_STRING_CONST("RecvRatio"), 1,
> IPOIB_OFFSET(recv_pool_ratio),
> IPOIB_SIZE(recv_pool_ratio), 1, 1, 10},
> @@ -1450,28 +1450,20 @@
> p_offload_task->TaskBufferLength =
> sizeof(NDIS_TASK_TCP_IP_CHECKSUM);
> p_offload_chksum =
> (NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer;
> -
> +
> p_offload_chksum->V4Transmit.IpOptionsSupported =
> - p_adapter->params.send_chksum_offload;
> p_offload_chksum->V4Transmit.TcpOptionsSupported =
> - p_adapter->params.send_chksum_offload;
> p_offload_chksum->V4Transmit.TcpChecksum =
> - p_adapter->params.send_chksum_offload;
> p_offload_chksum->V4Transmit.UdpChecksum =
> - p_adapter->params.send_chksum_offload;
> p_offload_chksum->V4Transmit.IpChecksum =
> - p_adapter->params.send_chksum_offload;
> + !!(p_adapter->params.send_chksum_offload);
>
> p_offload_chksum->V4Receive.IpOptionsSupported =
> - p_adapter->params.recv_chksum_offload;
> p_offload_chksum->V4Receive.TcpOptionsSupported =
> - p_adapter->params.recv_chksum_offload;
> p_offload_chksum->V4Receive.TcpChecksum =
> - p_adapter->params.recv_chksum_offload;
> p_offload_chksum->V4Receive.UdpChecksum =
> - p_adapter->params.recv_chksum_offload;
> p_offload_chksum->V4Receive.IpChecksum =
> - p_adapter->params.recv_chksum_offload;
> + !!(p_adapter->params.recv_chksum_offload);
>
> p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE;
> p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE;
> Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
> ===================================================================
> ---
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netip
> oib.inx (revision 3106)
> +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netip
> oib.inx (revision 3107)
> @@ -95,18 +95,28 @@
>
> HKR, Ndi\Params\SendChksum, ParamDesc, 0,
> "Send Checksum Offload"
> HKR, Ndi\Params\SendChksum, Type, 0, "enum"
> -HKR, Ndi\Params\SendChksum, Default, 0, "0"
> +HKR, Ndi\Params\SendChksum, Default, 0, "1"
> HKR, Ndi\Params\SendChksum, Optional, 0, "0"
> HKR, Ndi\Params\SendChksum\enum, "0", 0, "Disabled"
> -HKR, Ndi\Params\SendChksum\enum, "1", 0, "Enabled"
> +HKR, Ndi\Params\SendChksum\enum, "1", 0, "Enabled (if
> supported by HW)"
> +HKR, Ndi\Params\SendChksum\enum, "2", 0, "Bypass"
>
> HKR, Ndi\Params\RecvChksum, ParamDesc, 0,
> "Recv Checksum Offload"
> HKR, Ndi\Params\RecvChksum, Type, 0, "enum"
> -HKR, Ndi\Params\RecvChksum, Default, 0, "0"
> +HKR, Ndi\Params\RecvChksum, Default, 0, "1"
> HKR, Ndi\Params\RecvChksum, Optional, 0, "0"
> HKR, Ndi\Params\RecvChksum\enum, "0", 0, "Disabled"
> -HKR, Ndi\Params\RecvChksum\enum, "1", 0, "Enabled"
> +HKR, Ndi\Params\RecvChksum\enum, "1", 0, "Enabled (if
> supported by HW)"
> +HKR, Ndi\Params\RecvChksum\enum, "2", 0, "Bypass"
>
> +HKR, Ndi\Params\lso, ParamDesc, 0, "Large Send Offload"
> +HKR, Ndi\Params\lso, Type, 0, "enum"
> +HKR, Ndi\Params\lso, Default, 0, "0"
> +HKR, Ndi\Params\lso, Optional, 0, "0"
> +HKR, Ndi\Params\lso\enum, "0", 0, "Disabled"
> +HKR, Ndi\Params\lso\enum, "1", 0, "Enabled"
> +
> +
> HKR, Ndi\Params\SaTimeout, ParamDesc, 0, "SA
> Query Timeout (ms)"
> HKR, Ndi\Params\SaTimeout, Type, 0, "dword"
> HKR, Ndi\Params\SaTimeout, Default, 0, "1000"
> Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> ===================================================================
> ---
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> (revision 3106)
> +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> (revision 3107)
> @@ -498,7 +498,7 @@
> wc->recv.ud.path_bits =
> (u8)(cqe->g_mlpath_rqpn & 0x7f);
> wc->recv.ud.recv_opt |=
> cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0;
> wc->recv.ud.pkey_index =
> (u16)(be32_to_cpu(cqe->immed_rss_invalid) & 0x7f);
> - wc->csum_ok =
> mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
> + wc->recv.ud.csum_ok =
> +mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
> }
> if (!is_send && cqe->rlid == 0){
>
> MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid
> == 0 \n "));
More information about the ofw
mailing list