[ofw] [PATCH 3/3] Mlx4 (checksum): improving the mechanism of csum offload

Smith, Stan stan.smith at intel.com
Thu Sep 4 12:16:15 PDT 2008


Hello Alex,

Some questions:

1) Am I correct in understanding that ipoib offload is turned 'on' by default?

2) For debug/testing purposes, how does one turn ipoib offload 'off' or 'on'?

3) In the patch for ipoib_port.c, when making the call
    query_ca(p_port->ib_mgr.h_ca, NULL, &attr_size );
   I do not see attr_size initialized, hence it could be a very large integer
   (stack garbage). The garbage value could pass the 1st call to get the attr size,
   then the following cl_malloc(attr_size) call could fail due to the very large integer.
   Do you agree?

Please notify the list when the patches are applied and committed to svn.

Thanks,

Stan.


Alex Naslednikov wrote:
> This is cumulative patch that fixes all open issues shown the last
> week: SGE wrong calculation
> LSO messages in log files
> Fixes in ipoib csum offload mechanism
> Adding 3 states for CSUM offload : disabled, enabled if supported by
> HW and bypass (see inline)
>
> Signed-off by: xalex (Alexander Naslednikov)
> Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
> ===================================================================
> --- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
> (revision 3106)
> +++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c
> (revision 3107)
> @@ -825,6 +825,8 @@
>         uint64_t                        vaddr;
>         net32_t                         rkey;
>         ib_qp_attr_t            qp_attr;
> +       ib_ca_attr_t *          p_ca_attr;
> +       uint32_t                        attr_size;
>
>         IPOIB_ENTER( IPOIB_DBG_INIT );
>
> @@ -889,7 +891,7 @@
>                         p_port->p_adapter->p_ifc->get_err_str( status
> )) );
>                 return status;
>         }
> -
> +
>         /* Allocate the QP. */
>         cl_memclr( &qp_create, sizeof(qp_create) );
>         qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM;
> @@ -897,8 +899,47 @@
>         qp_create.rq_sge = 2;   /* To support buffers spanning pages.
>         */ qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq;
>         qp_create.sq_depth = p_port->p_adapter->params.sq_depth;
> -       //TODO: Figure out the right number of SGE entries for sends.
> -       qp_create.sq_sge = MAX_SEND_SGE;
> +
> +       //Figure out the right number of SGE entries for sends.
> +       /* Get the size of the CA attribute structure. */
> +       status = p_port->p_adapter->p_ifc->query_ca(
> p_port->ib_mgr.h_ca, NULL, &attr_size );
> +       if( status != IB_INSUFFICIENT_MEMORY )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("ib_query_ca failed with status %s.\n",
> p_port->p_adapter->p_ifc->get_err_str(status)) );
> +               return status;
> +       }
> +
> +       /* Allocate enough space to store the attribute structure. */
> +       p_ca_attr = cl_malloc( attr_size );
> +       if( !p_ca_attr )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("cl_malloc failed to allocate p_ca_attr!\n")
> ); +               return IB_INSUFFICIENT_RESOURCES;
> +       }
> +
> +       /* Query the CA attributes. */
> +       status =
> p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca, p_ca_attr,
> &attr_size ); +       if( status != IB_SUCCESS )
> +       {
> +               cl_free( p_ca_attr );
> +
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("ib_query_ca failed with status %s.\n",
> p_port->p_adapter->p_ifc->get_err_str(status)) );
> +               return status;
> +       }
> +#define UD_QP_USED_SGE 3
> +       qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ?
> MAX_SEND_SGE  : (p_ca_attr->max_sges - UD_QP_USED_SGE);
> +       if (!p_ca_attr->ipoib_csum) {
> +               //checksum is not supported by device
> +               //user must specify BYPASS to explicitly cancel
> checksum calculation
> +               if (p_port->p_adapter->params.send_chksum_offload ==
> CSUM_ENABLED)
> +                       p_port->p_adapter->params.send_chksum_offload
> = CSUM_DISABLED;
> +               if (p_port->p_adapter->params.recv_chksum_offload ==
> CSUM_ENABLED)
> +                       p_port->p_adapter->params.recv_chksum_offload
> = CSUM_DISABLED;
> +       }
> +
>         qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq;
>         qp_create.sq_signaled = TRUE;
>         status = p_port->p_adapter->p_ifc->create_qp(
> @@ -1937,7 +1978,7 @@
>
>                 }
>                 /* Successful completion.  Get the receive
> information. */
> -               p_desc->ndis_csum.Value = (ULONG) p_wc->csum_ok;
> +               p_desc->ndis_csum.Value = (ULONG)
>                 p_wc->recv.ud.csum_ok; cl_perf_start( GetRecvEndpts );
>                 __recv_get_endpts( p_port, p_desc, p_wc, &p_src,
> &p_dst );
>                 cl_perf_stop( &p_port->p_adapter->perf, GetRecvEndpts
> ); @@ -2580,10 +2621,22 @@
>                         ("__buf_mgr_get_ndis_pkt failed\n") );
>                 return IB_INSUFFICIENT_RESOURCES;
>         }
> -
> +       if (p_port->p_adapter->params.recv_chksum_offload !=
> CSUM_BYPASS) {
>         /* Get the checksums directly from packet information. */
> -       NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
> TcpIpChecksumPacketInfo ) =
> -               (PVOID) (uintn_t) (p_desc->ndis_csum.Value);
> +       /* In this case, no one of cheksum's cat get false value */
> +       /* If hardware checksum failed or wasn't calculated, NDIS will
> recalculate it again */
> +               NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
> TcpIpChecksumPacketInfo ) =
> +                       (PVOID) (uintn_t) (p_desc->ndis_csum.Value);
> +       } else {
> +               NDIS_TCP_IP_CHECKSUM_PACKET_INFO        chksum;
> +               /* Flag the checksums as having been calculated. */
> +               chksum.Value = 0;
> +               chksum.Receive.NdisPacketTcpChecksumSucceeded = TRUE;
> +               chksum.Receive.NdisPacketUdpChecksumSucceeded = TRUE;
> +               chksum.Receive.NdisPacketIpChecksumSucceeded = TRUE;
> +               NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet,
> TcpIpChecksumPacketInfo ) =
> +               (void*)(uintn_t)chksum.Value;
> +       }
>         ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len );
>
>         IPOIB_EXIT( IPOIB_DBG_RECV );
> @@ -3833,9 +3886,9 @@
>                 p_desc->wr.dgrm.ud.hlen = lso_header_size;
>                 // Tell NDIS how much we will send.
>                 PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] =
> UlongToPtr(PacketLength);
> -               p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM |
> IB_SEND_OPT_TX_TCP_UDP_CSUM);
> +               p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM |
> IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED;
>                 __send_gen(p_port, p_desc, IndexOfData);
> -               p_desc->wr.wr_type = ( WR_LSO | IB_SEND_OPT_SIGNALED);
> +               p_desc->wr.wr_type = WR_LSO;
>         } else {
>
>                 /* Setup the first local data segment (used for the
> IPoIB header). */
> Index:
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
> ===================================================================
> --- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
> (revision 3106) +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h
> (revision 3107) @@ -60,15 +60,15 @@
>  /*
>   * Macros
>   */
> +typedef enum {CSUM_DISABLED = 0, CSUM_ENABLED, CSUM_BYPASS}
> tCsumTypeEn;
>
> -
>  typedef struct _ipoib_params
>  {
>         int32_t         rq_depth;
>         int32_t         rq_low_watermark;
>         int32_t         sq_depth;
> -       boolean_t       send_chksum_offload;
> -       boolean_t       recv_chksum_offload;
> +       int32_t         send_chksum_offload;  //is actually of type
> tCsumTypeEn
> +       int32_t         recv_chksum_offload; //is actually of type
> tCsumTypeEn
>         uint32_t        sa_timeout;
>         uint32_t        sa_retry_cnt;
>         uint32_t        recv_pool_ratio;
> @@ -91,12 +91,11 @@
>  *              Number of send WQEs to allocate.
>  *
>  *      send_chksum_offload
> -*              Flag to indicate whether to offload send checksums.
> This will make it
> -*              so that IPoIB packets should never be forwarded out of
> the IB subnet
> -*              without recalculating the checksum.
> -*
>  *      recv_chksum_offload
> -*              Flag to indicate whether to offload recv checksums.
> +*              Flags to indicate whether to offload send/recv
> checksums.
> +*              0 - No hardware cheksum
> +*              1 - Try to offload if the device support it
> +*              2 - Always report success (checksum bypass)
>  *
>  *      wsdp_enabled
>  *              Flag to indicate whether WSDP is enabled for an
> adapter adapter.
> Index:
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
> ===================================================================
> --- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
> (revision 3106) +++
> D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c
> (revision 3107) @@ -150,8 +150,8 @@
>         {NDIS_STRING_CONST("RqDepth"),          1,
> IPOIB_OFFSET(rq_depth),              IPOIB_SIZE(rq_depth),
> 512,        128,    1024},
>         {NDIS_STRING_CONST("RqLowWatermark"),   0,
> IPOIB_OFFSET(rq_low_watermark),      IPOIB_SIZE(rq_low_watermark),
> 4, 2,      8},
>         {NDIS_STRING_CONST("SqDepth"),          1,
> IPOIB_OFFSET(sq_depth),              IPOIB_SIZE(sq_depth),
> 512,        128,    1024},
> -       {NDIS_STRING_CONST("SendChksum"),       1,
> IPOIB_OFFSET(send_chksum_offload),
> IPOIB_SIZE(send_chksum_offload),0, 0,      1},
> -       {NDIS_STRING_CONST("RecvChksum"),       1,
> IPOIB_OFFSET(recv_chksum_offload),
> IPOIB_SIZE(recv_chksum_offload),0, 0,      1},
> +       {NDIS_STRING_CONST("SendChksum"),       1,
> IPOIB_OFFSET(send_chksum_offload),
> IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSUM_BYPASS},
> +       {NDIS_STRING_CONST("RecvChksum"),       1,
> IPOIB_OFFSET(recv_chksum_offload),
> IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSUM_BYPASS},
>         {NDIS_STRING_CONST("SaTimeout"),        1,
> IPOIB_OFFSET(sa_timeout),            IPOIB_SIZE(sa_timeout),
> 1000,       250,    UINT_MAX},
>         {NDIS_STRING_CONST("SaRetries"),        1,
> IPOIB_OFFSET(sa_retry_cnt),          IPOIB_SIZE(sa_retry_cnt),
> 10, 1,      UINT_MAX},
>         {NDIS_STRING_CONST("RecvRatio"),        1,
> IPOIB_OFFSET(recv_pool_ratio),       IPOIB_SIZE(recv_pool_ratio),
> 1, 1,      10},
> @@ -1450,28 +1450,20 @@
>         p_offload_task->TaskBufferLength =
> sizeof(NDIS_TASK_TCP_IP_CHECKSUM);
>         p_offload_chksum =
>
> (NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer; -
> +
>         p_offload_chksum->V4Transmit.IpOptionsSupported =
> -               p_adapter->params.send_chksum_offload;
>         p_offload_chksum->V4Transmit.TcpOptionsSupported =
> -               p_adapter->params.send_chksum_offload;
>         p_offload_chksum->V4Transmit.TcpChecksum =
> -               p_adapter->params.send_chksum_offload;
>         p_offload_chksum->V4Transmit.UdpChecksum =
> -               p_adapter->params.send_chksum_offload;
>         p_offload_chksum->V4Transmit.IpChecksum =
> -               p_adapter->params.send_chksum_offload;
> +               !!(p_adapter->params.send_chksum_offload);
>
>         p_offload_chksum->V4Receive.IpOptionsSupported =
> -               p_adapter->params.recv_chksum_offload;
>         p_offload_chksum->V4Receive.TcpOptionsSupported =
> -               p_adapter->params.recv_chksum_offload;
>         p_offload_chksum->V4Receive.TcpChecksum =
> -                       p_adapter->params.recv_chksum_offload;
>         p_offload_chksum->V4Receive.UdpChecksum =
> -               p_adapter->params.recv_chksum_offload;
>         p_offload_chksum->V4Receive.IpChecksum =
> -               p_adapter->params.recv_chksum_offload;
> +               !!(p_adapter->params.recv_chksum_offload);
>
>         p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE;
>         p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE;
> Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
> ===================================================================
> --- D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
> (revision 3106)
> +++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx
> (revision 3107)
> @@ -95,18 +95,28 @@
>
>  HKR, Ndi\Params\SendChksum,            ParamDesc,      0, "Send
> Checksum Offload"
>  HKR, Ndi\Params\SendChksum,            Type,           0, "enum"
> -HKR, Ndi\Params\SendChksum,            Default,        0, "0"
> +HKR, Ndi\Params\SendChksum,            Default,        0, "1"
>  HKR, Ndi\Params\SendChksum,            Optional,       0, "0"
>  HKR, Ndi\Params\SendChksum\enum,       "0",    0, "Disabled"
> -HKR, Ndi\Params\SendChksum\enum,       "1",    0, "Enabled"
> +HKR, Ndi\Params\SendChksum\enum,       "1",    0, "Enabled (if
> supported by HW)"
> +HKR, Ndi\Params\SendChksum\enum,       "2",    0, "Bypass"
>
>  HKR, Ndi\Params\RecvChksum,            ParamDesc,      0, "Recv
> Checksum Offload"
>  HKR, Ndi\Params\RecvChksum,            Type,           0, "enum"
> -HKR, Ndi\Params\RecvChksum,            Default,        0, "0"
> +HKR, Ndi\Params\RecvChksum,            Default,        0, "1"
>  HKR, Ndi\Params\RecvChksum,            Optional,       0, "0"
>  HKR, Ndi\Params\RecvChksum\enum,       "0",    0, "Disabled"
> -HKR, Ndi\Params\RecvChksum\enum,       "1",    0, "Enabled"
> +HKR, Ndi\Params\RecvChksum\enum,       "1",    0, "Enabled (if
> supported by HW)"
> +HKR, Ndi\Params\RecvChksum\enum,       "2",    0, "Bypass"
>
> +HKR, Ndi\Params\lso,           ParamDesc,      0, "Large Send
> Offload" +HKR, Ndi\Params\lso,           Type,           0, "enum"
> +HKR, Ndi\Params\lso,           Default,        0, "0"
> +HKR, Ndi\Params\lso,           Optional,       0, "0"
> +HKR, Ndi\Params\lso\enum,      "0",    0, "Disabled"
> +HKR, Ndi\Params\lso\enum,      "1",    0, "Enabled"
> +
> +
>  HKR, Ndi\Params\SaTimeout,             ParamDesc,      0, "SA Query
> Timeout (ms)"
>  HKR, Ndi\Params\SaTimeout,             Type,           0, "dword"
>  HKR, Ndi\Params\SaTimeout,             Default,        0, "1000"
> Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> ===================================================================
> --- D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> (revision 3106)
> +++ D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c
> (revision 3107)
> @@ -498,7 +498,7 @@
>                 wc->recv.ud.path_bits           =
> (u8)(cqe->g_mlpath_rqpn & 0x7f);
>                 wc->recv.ud.recv_opt            |= cqe->g_mlpath_rqpn
> & 0x080 ? IB_RECV_OPT_GRH_VALID : 0;
>                 wc->recv.ud.pkey_index  =
> (u16)(be32_to_cpu(cqe->immed_rss_invalid)  & 0x7f);
> -               wc->csum_ok =
> mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
> +               wc->recv.ud.csum_ok =
> mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
>         }
>         if (!is_send && cqe->rlid == 0){
>                 MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found
> rlid == 0 \n "));
> _______________________________________________
> ofw mailing list
> ofw at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw




More information about the ofw mailing list