[Openib-windows] IBAL fixes for memfree driver

Fabian Tillier ftillier at silverstorm.com
Tue Mar 7 11:09:19 PST 2006


Hi Leonid,

On 3/7/06, Leonid Keller <leonid at mellanox.co.il> wrote:
>
> Hi Fab,
> Find below the patches, that've done working on the memfree driver.

This patch is severely white-space damaged.  Please preserve tabs,
don't convert them to spaces.  If you wouldn't mind resending it will
save me a lot of work.

More comments below.

- Fab

> Index: ib_types.h
> ===================================================================
> --- ib_types.h (revision 230)
> +++ ib_types.h (working copy)
> @@ -4169,41 +4169,27 @@
>  *
>  * SYNOPSIS
>  */
> +
>  AL_INLINE uint8_t AL_API
>  ib_port_info_compute_rate(
>   IN  const ib_port_info_t* const  p_pi )
>  {
> - switch( p_pi->link_width_active * p_pi->link_width_active *
> -  ib_port_info_get_link_speed_active( p_pi ) )
> + switch(p_pi->link_width_active)
>   {
> - case 1:
> -  return IB_PATH_RECORD_RATE_2_5_GBS;
> +  case IB_LINK_WIDTH_ACTIVE_1X:
> +    return IB_PATH_RECORD_RATE_2_5_GBS;
>
> - case 2:
> -  return IB_PATH_RECORD_RATE_5_GBS;
> +  case IB_LINK_WIDTH_ACTIVE_4X:
> +   return IB_PATH_RECORD_RATE_10_GBS;
>
> - case 4:
> -  return IB_PATH_RECORD_RATE_10_GBS;
> +  case IB_LINK_WIDTH_ACTIVE_12X:
> +   return IB_PATH_RECORD_RATE_30_GBS;
>
> - case 8:
> -  return IB_PATH_RECORD_RATE_20_GBS;
> -
> - case 16:
> -  return IB_PATH_RECORD_RATE_40_GBS;
> -
> - case 64:
> -  return IB_PATH_RECORD_RATE_30_GBS;
> -
> - case 128:
> -  return IB_PATH_RECORD_RATE_60_GBS;
> -
> - case 256:
> -  return IB_PATH_RECORD_RATE_120_GBS;
> -
> - default:
> -  return IB_PATH_RECORD_RATE_2_5_GBS;
> +  default:
> +   return IB_PATH_RECORD_RATE_2_5_GBS;
>   }
>  }
> +
>  /*
>  * PARAMETERS
>  * p_pi

This is a regression, removing the latest update I did to support DDR
and QDR link speeds.  You probably need to do a merge into your
repostitory of the OpenIB stuff.

> @@ -8764,6 +8750,17 @@
>   IB_WCS_RNR_RETRY_ERR,
>   IB_WCS_TIMEOUT_RETRY_ERR,
>   IB_WCS_REM_INVALID_REQ_ERR,
> + IB_WCS_LOCAL_EEC_OP_ERR,
> + IB_WCS_BAD_RESP_ERR,
> + IB_WCS_LOCAL_ACCESS_ERR,
> + IB_WCS_REM_INV_REQ_ERR,
> + IB_WCS_LOCAL_RDD_VIOL_ERR,
> + IB_WCS_REM_ABORT_ERR,
> + IB_WCS_INV_EECN_ERR,
> + IB_WCS_INV_EEC_STATE_ERR,
> + IB_WCS_FATAL_ERR,
> + IB_WCS_RESP_TIMEOUT_ERR,
> + IB_WCS_GENERAL_ERR,
>   IB_WCS_UNMATCHED_RESPONSE,   /* InfiniBand Access Layer
> */
>   IB_WCS_CANCELED,     /* InfiniBand Access Layer */
>   IB_WCS_UNKNOWN      /* Must be last. */

Can we skip all EEC and RD related errors, since there is no EEC/RD
support in the rest of the header?  Once there's a user of RD we can
add everything related to RD/EEC as needed.

> @@ -8827,6 +8824,50 @@
>  *   - There was insufficient buffers to receive a new atomic operation.
>  *   - An RDMA request was larger than 2^31 bytes.
>  *
> +* IB_WCS_LOCAL_EEC_OP_ERR,
> +*  An internal EE Context consistency error was detected
> +*  while processing this Work Request.
> +*
> +* IB_WCS_BAD_RESP_ERR,
> +*  An unexpected transport layer opcode was returned
> +*  by the responder.
> +*
> +*  IB_WCS_LOCAL_ACCESS_ERR,
> +*  A protection error occurred on a local data buffer
> +*  during the processing of a RDMA Write with Immediate Data
> +*  operation sent from the remote node.
> +*
> +*  IB_WCS_REM_INV_REQ_ERR,
> +*  The responder detected an invalid message on the channel.
> +*  Possible causes include the operation is:
> +*   - not supported by this receive queueĆ³
> +*   - insufficient buffering to receive a new RDMA or Atomic Operation
> request;
> +*   - the length specified in an RDMA request is greater than 231 bytes.
> +*
> +*  IB_WCS_LOCAL_RDD_VIOL_ERR,
> +*  The RDD associated with the QP does not match the RDD
> +*  associated with the EE Context.
> +*
> +*  IB_WCS_REM_ABORT_ERR,
> +*  The operation was aborted:
> +*   - For RD, the requester aborted the operation. One possible cause is
> +*    the requester suspended the operation and will retry it later using
> +*    a new Receive WQE. The other possible cause is the requester
> +*    abandoned the operation and placed the requester QP in
> +*    the SQEr state.
> +*   - For UD QPs associated with an SRQ, the responder aborted the
> operation.
> +*
> +*  IB_WCS_INV_EECN_ERR,
> +*  An invalid EE Context number was detected.
> +*
> +*  IB_WCS_INV_EEC_STATE_ERR,
> +*  Operation is not legal for the specified EE Context state.
> +*
> +*  IB_WCS_FATAL_ERR,
> +*
> +* IB_WCS_RESP_TIMEOUT_ERR,
> +*
> +*
>  * IB_WCS_UNMATCHED_RESPONSE
>  *  A response MAD was received for which there was no matching send.  The
>  *  send operation may have been canceled by the user or may have timed
> @@ -8834,6 +8875,10 @@
>  *
>  * IB_WCS_CANCELED
>  *  The completed work request was canceled by the user.
> +*
> +*  IB_WCS_GENERAL_ERR,
> +*  Any other error
> +*
>  *****/
>
>
> @@ -8968,21 +9013,23 @@
>   ib_wc_status_t   status;
>   uint64_t    vendor_specific;
>
> + uint32_t     qp_num;
> +

Can this be the user's QP context, rather than the QP number?  The QP
number doesn't do much for the user unless they keep a lookup table of
QPN to whatever context they set when they created the QP.

A QP context would be a very cool thing to have, IMO.

>   union _wc_recv
>   {
>    struct _wc_conn
>    {
> -   ib_recv_opt_t recv_opt;
> +   ib_recv_opt_t  recv_opt;
>     ib_net32_t  immediate_data;
>
>    } conn;
>
>    struct _wc_ud
>    {
> -   ib_recv_opt_t recv_opt;
> +   ib_recv_opt_t  recv_opt;
>     ib_net32_t  immediate_data;
>     ib_net32_t  remote_qp;
> -   uint16_t  pkey_index;
> +   uint16_t   pkey_index;
>     ib_net16_t  remote_lid;
>     uint8_t   remote_sl;
>     uint8_t   path_bits;
>
>
> Index: ib_statustext.c
> ===================================================================
> --- ib_statustext.c     (revision 230)
> +++ ib_statustext.c     (working copy)
> @@ -163,6 +163,17 @@
>         "IB_WCS_RNR_RETRY_ERR",
>         "IB_WCS_TIMEOUT_RETRY_ERR",
>         "IB_WCS_REM_INVALID_REQ_ERR",
> +       "IB_WCS_LOCAL_EEC_OP_ERR",
> +       "IB_WCS_BAD_RESP_ERR",
> +       "IB_WCS_LOCAL_ACCESS_ERR",
> +       "IB_WCS_REM_INV_REQ_ERR",
> +       "IB_WCS_LOCAL_RDD_VIOL_ERR",
> +       "IB_WCS_REM_ABORT_ERR",
> +       "IB_WCS_INV_EECN_ERR",
> +       "IB_WCS_INV_EEC_STATE_ERR",
> +       "IB_WCS_FATAL_ERR",
> +       "IB_WCS_RESP_TIMEOUT_ERR",
> +       "IB_WCS_GENERAL_ERR",
>         "IB_WCS_UNMATCHED_RESPONSE",                    /*
> InfiniBand Access Layer */
>         "IB_WCS_CANCELED",
>             /* InfiniBand Access Layer */
>         "IB_WCS_UNKNOWN"
>
>
> _______________________________________________
> openib-windows mailing list
> openib-windows at openib.org
> http://openib.org/mailman/listinfo/openib-windows
>
>
>



More information about the ofw mailing list