[Openib-windows] [RFC] IRP-based verbs

Fab Tillier ftillier at silverstorm.com
Wed Sep 7 17:40:24 PDT 2005


Hi Folks,

I've put together a couple definitions for the input and output buffers for the
IOCTLs to perform the following verbs:
- OpenCa
- CloseCa
- QueryCa
- QueryPort

These IOCTLs would be used by both kernel and user-mode clients.  Calling mode
of the request can be retrieved from the RequestorMode of the IRP.

A couple of behavioral changes compared to the existing verbs as follows:

- All requests can be issued at DISPATCH_LEVEL, and are expected to return
STATUS_PENDING if they can't complete immediately.  This will allow elimination
of the AL object abstraction that managed async destruction, and all the
complexity that brought with it.  It also gets rid of the destroy callbacks from
destruction - the IOCTL won't complete until the operation is complete.

- The HCA will see every client's OpenCa request.  Currently, it only sees
user-mode requests (via the um_open_ca handler).  This change allows each kernel
client to have their own UAR when using Mellanox HCAs.

- CloseCa will return STATUS_DEVICE_BUSY if there are unreleased resources.

- QueryCa will return as much or as little information as the user desires.
This allows users to query for the items at the beginning of the structure
without getting everything if that is what they want.  Further the CA and port
attribute structures have been changed to use offsets rather than embedded
pointers.  This will make support for user-mode a lot simpler as it will avoid
having to do pointer fixups.

- QueryPort is new, similar to what the Linux stack does.

- The CA attributes structure now uses a flag variable rather than a series of
Booleans to report capabilities, similar to the Linux stack.

- The CA and port attribute structures have version fields to allow us to do
versioning.  The versions for related structures are expected to stay in sync,
and the port attributes have a version in them to enable the QueryPort
functionality.

- Syntax generally follows what Microsoft does in terms of naming, types, etc.

Note that I put together two new files for this so as to not disturb existing
development.  We can either keep this development separate and keep backward
compatibility for a while, or we can evolve the main API instead.  I think
separating the files makes sense as kernel and user-mode APIs are going to
diverge.  It also allows us to think fresh, rather than try to make things fit
within the potentially constrictive existing design.

I'll be continuing to fill in the IOCTL definitions for the rest of the verbs,
and once those are in place, will start migrating the code base to implement
this.  Overall, I expect these changes will allow us to eliminate quite a bit of
code in the access layer and ULPs, as well as enable new capabilities currently
unavailable due to IRQL limitations.

Thoughts and comments welcome.

- Fab

Index: inc\kernel\iba\ib_defs.h
===================================================================
/*
 * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
 *
 * This software is available to you under the OpenIB.org BSD license
 * below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * $Id$
 */

/** @file */


typedef struct _IB_CA   *IB_CA_HANDLE, * __ptr64 IB_CA_HANDLE64;
typedef struct _IB_PD   *IB_PD_HANDLE, * __ptr64 IB_PD_HANDLE64;
typedef struct _IB_CQ   *IB_CQ_HANDLE, * __ptr64 IB_CQ_HANDLE64;
typedef struct _IB_QP   *IB_QP_HANDLE, * __ptr64 IB_QP_HANDLE64;
typedef struct _IB_AH   *IB_ADDR_HANDLE, * __ptr64 IB_ADDR_HANDLE64;
typedef struct _IB_MR   *IB_MR_HANDLE, * __ptr64 IB_MR_HANDLE64;
typedef struct _IB_MW   *IB_MW_HANDLE, * __ptr64 IB_MW_HANDLE64;


#pragma pack(push, 1)
typedef union _IB_GID
{
    UCHAR                   Raw[16];

    struct _IB_GID_UNICAST
    {
        UINT64              Prefix;
        UINT64              InterfaceId;

    } Unicast;

    struct _IB_GID_MULTICAST
    {
        UCHAR               Header[2];
        UCHAR               RawGroupId[14];

    } Multicast;

}   IB_GID;
#pragma pack(pop)


typedef enum _IB_MTU
{
    IB_MTU_256  = 1,
    IB_MTU_512  = 2,
    IB_MTU_1024 = 3,
    IB_MTU_2048 = 4,
    IB_MTU_4096 = 5

}   IB_MTU;


typedef enum _IB_LINK_STATE
{
    IB_LINK_NO_CHANGE   = 0,
    IB_LINK_DOWN        = 1,
    IB_LINK_INIT        = 2,
    IB_LINK_ARMED       = 3,
    IB_LINK_ACTIVE      = 4,
    IB_LINK_ACT_DEFER   = 5

}   IB_LINK_STATE;


typedef enum _IB_LINK_WIDTH
{
    IB_LINK_WIDTH_1X    = 1,
    IB_LINK_WIDTH_4X    = 2,
    IB_LINK_WIDTH_8X    = 4,
    IB_LINK_WIDTH_12X   = 8

}   IB_LINK_WIDTH;


typedef enum _IB_LINK_SPEED
{
    IB_LINK_SPEED_2_5_GBS   = 1,
    IB_LINK_SPEED_5_GBS     = 2,
    IB_LINK_SPEED_10_GBS    = 4

}   IB_LINK_SPEED;


enum IB_PORT_CAP_FLAGS
{
    IB_PORT_FLAG_IS_SM                      = (1<<1),
    IB_PORT_FLAG_NOTICE_SUP                 = (1<<2),
    IB_PORT_FLAG_TRAP_SUP                   = (1<<3),
    IB_PORT_FLAG_OPT_IPD_SUP                = (1<<4),
    IB_PORT_FLAG_APM_SUP                    = (1<<5),
    IB_PORT_FLAG_SL_MAP_SUP                 = (1<<6),
    IB_PORT_FLAG_MKEY_NVRAM                 = (1<<7),
    IB_PORT_FLAG_PKEY_NVRAM                 = (1<<8),
    IB_PORT_FLAG_LED_INFO_SUP               = (1<<9),
    IB_PORT_FLAG_IS_SM_DISABLED             = (1<<10),
    IB_PORT_FLAG_SYS_IMAGE_GUID_SUP         = (1<<11),
    IB_PORT_FLAG_PKEY_SW_EXT_PORT_TRAP_SUP  = (1<<12),
    IB_PORT_FLAG_CM_SUP                     = (1<<16),
    IB_PORT_FLAG_SNMP_TUNNEL_SUP            = (1<<17),
    IB_PORT_FLAG_REINIT_SUP                 = (1<<18),
    IB_PORT_FLAG_DEVICE_MGMT_SUP            = (1<<19),
    IB_PORT_FLAG_VENDOR_CLASS_SUP           = (1<<20),
    IB_PORT_FLAG_DR_NOTICE_SUP              = (1<<21),
    IB_PORT_FLAG_CAP_MASK_NOTICE_SUP        = (1<<22),
    IB_PORT_FLAG_BOOT_MGMT_SUP              = (1<<23),
    IB_PORT_FLAG_LINK_LATENCY_SUP           = (1<<24),
    IB_PORT_FLAG_CLIENT_REG_SUP             = (1<<25)
};


enum _IB_INIT_TYPE
{
    IB_INIT_TYPE_NO_LOAD                = 1,
    IB_INIT_TYPE_PRESERVE_CONTENT       = (1<<1),
    IB_INIT_TYPE_PRESERVE_PRESENCE      = (1<<2),
    IB_INIT_TYPE_DO_NOT_RESUSCITATE     = (1<<3)
};


typedef __declspec(align(8)) struct _IB_PORT_ATTR
{
    ULONG                   Version;
    ULONG                   Size;
    /** GUID are in network byte order. */
    UINT64                  PortGuid;

    IB_LINK_STATE           LinkState;
    IB_LINK_WIDTH           LinkWidth;
    IB_LINK_SPEED           LinkSpeed;
    IB_MTU                  Mtu;

    UINT64                  MaxMsgSize;
    ULONG                   InitTypeReply;

    /** HCA capabilites, formed by OR'ing IB_PORT_CAP_FLAGS values. */
    ULONG                   CapabilitiesFlags;

    USHORT                  NumGids;
    USHORT                  NumPkeys;

    /** Offsets are from beginning of structure. */
    ULONG                   NextPortAttrOffset;
    ULONG                   GidTblOffset;
    ULONG                   PkeyTblOffset;

    USHORT                  PkeyViolations;
    USHORT                  QkeyViolations;

    USHORT                  BaseLid;
    USHORT                  SmLid;
    USHORT                  MaxVls;
    UCHAR                   Lmc;
    UCHAR                   PortNum;

    /** The maximum expected subnet propagation delay to reach any port on
     * the subnet.  This value also determines the rate at which traps can
     * be generated from this node.  The value is expressed as
     *  timeout = 4.096 microseconds * 2^subnet_timeout
     */
    UCHAR                   SubnetTimeout;
    UCHAR                   SmSvcLvl;

}   IB_PORT_ATTR;


enum IB_CA_CAP_FLAGS
{
    IB_CA_FLAG_RESIZE_MAX_WR        = (1<<0)
    IB_CA_FLAG_BAD_PKEY_CNTR        = (1<<1),
    IB_CA_FLAG_BAD_QKEY_CNTR        = (1<<2),
    IB_CA_FLAG_AUTO_PATH_MIG        = (1<<3),
    IB_CA_FLAG_CHANGE_PHY_PORT      = (1<<4),
    IB_CA_FLAG_AH_PORT_ENFORCE      = (1<<5),
    IB_CA_FLAG_CURR_QP_STATE_MOD    = (1<<6),
    IB_CA_FLAG_SHUTDOWN_PORT        = (1<<7),
    IB_CA_FLAG_INIT_TYPE            = (1<<8),
    IB_CA_FLAG_PORT_ACTIVE_EVENT    = (1<<9),
    IB_CA_FLAG_SYS_IMAGE_GUID       = (1<<10),
    IB_CA_FLAG_RC_RNR_NAK_GEN       = (1<<11),
    IB_CA_FLAG_N_NOTIFY_CQ          = (1<<12),
    IB_CA_FLAG_SRQ_RESIZE           = (1<<13)
};


typedef enum _IB_ATOMIC_CAP
{
    IB_ATOMIC_NONE,
    IB_ATOMIC_LOCAL,
    IB_ATOMIC_GLOBAL

}   IB_ATOMIC_CAP;


typedef __declspec(align(8)) struct _IB_CA_ATTR
{
    ULONG                   Version;
    ULONG                   Size;
    /** GUIDs are in network byte order. */
    UINT64                  CaGuid;
    UINT64                  SystemImageGuid;

    UINT32                  VendorId;
    UINT16                  DeviceId;
    UINT16                  DeviceRev;
    UINT32                  FwVersion;

    /** Byte offset from the beginning of this structure to the first
    * IB_PORT_ATTR structure for the HCA.
    */
    ULONG                   PortAttrOffset;
    UINT8                   NumPorts;

    UINT8                   MaxQpReadAtomic;
    UINT8                   MaxReadAtomic;
    UINT8                   MaxQpInitReadAtomic;

    /** Each bit in PageSizeSupport indicates a 2^n page size supported
    * by the HCA, where bit locations are 1-based.  The minimum page
    * size that can be supported is thus 2 bytes, and the largest 2^64.
    */
    UINT64                  PageSizeSupport;

    /** HCA capabilites, formed by OR'ing IB_CA_CAP_FLAGS values. */
    ULONG                   CapabilitiesFlags;

    IB_ATOMIC_CAP           AtomicCap;

    ULONG                   MaxQp;
    ULONG                   MaxQpWr;
    ULONG                   MaxQpSge;
    ULONG                   MaxCq;
    ULONG                   MaxCqe;
    ULONG                   MaxPd;
    ULONG                   MaxAh;
    ULONG                   MaxMr;
    UINT64                  MaxMrSize;
    ULONG                   MaxMw;
    ULONG                   MaxMcastGrp;
    ULONG                   MaxMcastQp;
    ULONG                   MaxMcastQpPerGrp;
    ULONG                   MaxSrq;
    ULONG                   MaxSrqWr;
    ULONG                   MaxSrqSge;

    UINT16                  MaxPKeys;

    UINT8                   LocalAckDelay;

}   IB_CA_ATTR;

Index: inc\kernel\iba\ib_verbs.h
===================================================================
/*
 * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
 *
 * This software is available to you under the OpenIB.org BSD license
 * below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * $Id$
 */

/** @file */

#include <ntddk.h>
#include <ib_defs.h>


#define IB_IOCTL( code, access )    CTL_CODE( \
    FILE_DEVICE_INIFNIBAND, (0x800 + code), METHOD_BUFFERED, access )


enum _IB_IOCTL_FUNC
{
    IB_OPEN_CA,
    IB_CLOSE_CA,
    IB_QUERY_CA,
    IB_QUERY_PORT
};


/** IB_OPEN_CA
* Open a CA for use by a client.  Allocates all per-client information
* needed.
*
* The IRP input and output buffers overlap.
*
* <UNKNOWN>
*   [in] The input buffer is hardware vendor specific.  For kernel
*   clients it is always NULL.  For user-mode clients it is NULL if
*   there is no user-mode verb provider or the provider failed to
*   load.  Otherwise, the structure is defined by the user-mode verb
*   provider.
*
* IB_CA_HANDLE64
*   [out] The output buffer points to the location at which to store
*   the CA handle.  This location is 8 bytes long on all architectures.
*
* STATUS_SUCCESS
*   The operation completed successfully.
*
* STATUS_PENDING
*   The operation will be completed at a later time.
*/
#define IOCTL_IB_OPEN_CA    IB_IOCTL( IB_OPEN_CA, FILE_ALL_ACCESS )


/** IB_CLOSE_CA
* Close a CA after use by a client.  Frees all per-client information.
*
* IB_CA_HANDLE64
*   [in] Handle returned by a previous call to IB_OPEN_CA.
*
* There are no output parameters.
*
* STATUS_SUCCESS
*   The operation completed successfully.
*
* STATUS_PENDING
*   The operation will be completed at a later time.
*
* STATUS_DEVICE_BUSY
*   The operation could not complete because the CA instance has
*   outstanding resources that have not been freed.
*/
#define IOCTL_IB_CLOSE_CA   IB_IOCTL( IB_CLOSE_CA, FILE_ALL_ACCESS )


/** IB_QUERY_CA
* Query the attributes of a channel adapter.
*
* The IRP input and output buffers overlap.
*
* IB_QUERY_CA_IN
*   [in] Provides the version of the CA attributes requested.
*
* IB_QUERY_CA_OUT/IB_CA_ATTR
*   [out] Returns the CA attributes.
*
* STATUS_SUCCESS
*   The full CA attributes, including port attributes and their GID
*   and PKEY tables, were successfully copied to the output buffer.
*   The Information member of the IO_STATUS_BLOCK is updated to
*   indicate the number of bytes returned.
*
* STATUS_PENDING
*   The operation will be completed at a later time.
*
* STATUS_BUFFER_OVERFLOW
*   Returned partial attributes, full attributes were not returned due
*   to insufficient space.  The size required to get the full attributes
*   is returned in IB_QUERY_CA_OUT::Size (or IB_CA_ATTR::Size).  The
*   Information member of the IO_STATUS_BLOCK is updated to indicate
*   the number of bytes returned.
*
* STATUS_BUFFER_TOO_SMALL
*   The buffer was too small to return any information.  At a minimum,
*   the buffer must be the size of the IB_QUERY_CA_OUT structure to
*   allow the required size to be returned.
*
* STATUS_INVALID_PARAMETER
*   An input or output buffer was not provided.
*
* The expected usage is for clients to use an IB_CA_ATTR buffer as both
* input and output parameters.  On input, only the IB_CA_ATTR::Version
* field must be specified.  Clients can request only a subset of the CA
* attributes by setting the size of the input buffer to their desired
* needs.
*/
#define IOCTL_IB_QUERY_CA   IB_IOCTL( IB_QUERY_CA, FILE_READ_DATA )

typedef struct _IB_QUERY_CA_IN
{
    /** Version of CA attributes requested. */
    ULONG       Version;

}   IB_QUERY_CA_IN;

/** IOCTL_IB_QUERY_CA Minimal Output Parameters
* Matches the first two fields of the IB_CA_ATTR structure.
*/
typedef struct _IB_QUERY_CA_OUT
{
    /** Version of CA attributes returned. */
    ULONG       Version;
    /** Size required for the CA attributes. */
    ULONG       Size;

}   IB_QUERY_CA_OUT;


/** IB_QUERY_PORT
* Query the attributes of a channel adapter's port.
*
* The IRP input and output buffers overlap.
*
* IB_QUERY_PORT_IN
*   [in] Provides the version of the port attributes requested as
*   well as the port number whose attributes to return.
*
* IB_QUERY_PORT_OUT/IB_PORT_ATTR
*   [out] Returns the port
* attributes.
*
* STATUS_SUCCESS
*   The full port attributes, including GID and PKEY tables, were
*   successfully copied to the output buffer.  The Information member
*   of the IO_STATUS_BLOCK is updated to indicate the number of bytes
*   returned.
*
* STATUS_PENDING
*   The operation will be completed at a later time.
*
* STATUS_BUFFER_OVERFLOW
*   Returned partial attributes, full attributes were not returned due
*   to insufficient space.  The size required to get the full attributes
*   is returned in IB_QUERY_PORT_OUT::Size (or IB_PORT_ATTR::Size).  The
*   Information member of the IO_STATUS_BLOCK is updated to indicate the
*   number of bytes returned.
*
* STATUS_BUFFER_TOO_SMALL
*   The buffer was too small to return any information.  At a minimum,
*   the buffer must be the size of the IB_QUERY_PORT_OUT structure to
*   allow the required size to be returned.
*
* STATUS_INVALID_PARAMETER
*   An input or output buffer was not provided.
*
* The expected usage is for clients to use an IB_PORT_ATTR buffer as both
* input and output parameters.  On input, the IB_PORT_ATTR::Version
* field must be specified, as well as setting the desired port number in
* IB_PORT_ATTR::Size.  Clients can request only a subset of the port
* attributes by setting the size of the input buffer to their desired
* needs.
*/
#define IOCTL_IB_QUERY_PORT IB_IOCTL( IB_QUERY_PORT, FILE_READ_DATA )

typedef struct _IB_QUERY_PORT_IN
{
    ULONG       Version;
    /** 1-based port number to query. */
    ULONG       PortNum;

}   IB_QUERY_PORT_IN;

/** IOCTL_IB_QUERY_PORT Minimal Output Parameters
* Matches the first two fields of the IB_PORT_ATTR structure.
*/
typedef struct _IB_QUERY_PORT_OUT
{
    ULONG       Version;
    ULONG       Size;

}   IB_QUERY_PORT_OUT;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_defs.h
Type: application/octet-stream
Size: 6659 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050907/c30f3f65/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_verbs.h
Type: application/octet-stream
Size: 6783 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050907/c30f3f65/attachment-0001.obj>


More information about the ofw mailing list