[Openib-windows] [RFC] IRP-based verbs
Fab Tillier
ftillier at silverstorm.com
Wed Sep 7 17:40:24 PDT 2005
Hi Folks,
I've put together a couple definitions for the input and output buffers for the
IOCTLs to perform the following verbs:
- OpenCa
- CloseCa
- QueryCa
- QueryPort
These IOCTLs would be used by both kernel and user-mode clients. Calling mode
of the request can be retrieved from the RequestorMode of the IRP.
A couple of behavioral changes compared to the existing verbs as follows:
- All requests can be issued at DISPATCH_LEVEL, and are expected to return
STATUS_PENDING if they can't complete immediately. This will allow elimination
of the AL object abstraction that managed async destruction, and all the
complexity that brought with it. It also gets rid of the destroy callbacks from
destruction - the IOCTL won't complete until the operation is complete.
- The HCA will see every client's OpenCa request. Currently, it only sees
user-mode requests (via the um_open_ca handler). This change allows each kernel
client to have their own UAR when using Mellanox HCAs.
- CloseCa will return STATUS_DEVICE_BUSY if there are unreleased resources.
- QueryCa will return as much or as little information as the user desires.
This allows users to query for the items at the beginning of the structure
without getting everything if that is what they want. Further the CA and port
attribute structures have been changed to use offsets rather than embedded
pointers. This will make support for user-mode a lot simpler as it will avoid
having to do pointer fixups.
- QueryPort is new, similar to what the Linux stack does.
- The CA attributes structure now uses a flag variable rather than a series of
Booleans to report capabilities, similar to the Linux stack.
- The CA and port attribute structures have version fields to allow us to do
versioning. The versions for related structures are expected to stay in sync,
and the port attributes have a version in them to enable the QueryPort
functionality.
- Syntax generally follows what Microsoft does in terms of naming, types, etc.
Note that I put together two new files for this so as to not disturb existing
development. We can either keep this development separate and keep backward
compatibility for a while, or we can evolve the main API instead. I think
separating the files makes sense as kernel and user-mode APIs are going to
diverge. It also allows us to think fresh, rather than try to make things fit
within the potentially constrictive existing design.
I'll be continuing to fill in the IOCTL definitions for the rest of the verbs,
and once those are in place, will start migrating the code base to implement
this. Overall, I expect these changes will allow us to eliminate quite a bit of
code in the access layer and ULPs, as well as enable new capabilities currently
unavailable due to IRQL limitations.
Thoughts and comments welcome.
- Fab
Index: inc\kernel\iba\ib_defs.h
===================================================================
/*
* Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
*
* This software is available to you under the OpenIB.org BSD license
* below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id$
*/
/** @file */
typedef struct _IB_CA *IB_CA_HANDLE, * __ptr64 IB_CA_HANDLE64;
typedef struct _IB_PD *IB_PD_HANDLE, * __ptr64 IB_PD_HANDLE64;
typedef struct _IB_CQ *IB_CQ_HANDLE, * __ptr64 IB_CQ_HANDLE64;
typedef struct _IB_QP *IB_QP_HANDLE, * __ptr64 IB_QP_HANDLE64;
typedef struct _IB_AH *IB_ADDR_HANDLE, * __ptr64 IB_ADDR_HANDLE64;
typedef struct _IB_MR *IB_MR_HANDLE, * __ptr64 IB_MR_HANDLE64;
typedef struct _IB_MW *IB_MW_HANDLE, * __ptr64 IB_MW_HANDLE64;
#pragma pack(push, 1)
typedef union _IB_GID
{
UCHAR Raw[16];
struct _IB_GID_UNICAST
{
UINT64 Prefix;
UINT64 InterfaceId;
} Unicast;
struct _IB_GID_MULTICAST
{
UCHAR Header[2];
UCHAR RawGroupId[14];
} Multicast;
} IB_GID;
#pragma pack(pop)
typedef enum _IB_MTU
{
IB_MTU_256 = 1,
IB_MTU_512 = 2,
IB_MTU_1024 = 3,
IB_MTU_2048 = 4,
IB_MTU_4096 = 5
} IB_MTU;
typedef enum _IB_LINK_STATE
{
IB_LINK_NO_CHANGE = 0,
IB_LINK_DOWN = 1,
IB_LINK_INIT = 2,
IB_LINK_ARMED = 3,
IB_LINK_ACTIVE = 4,
IB_LINK_ACT_DEFER = 5
} IB_LINK_STATE;
typedef enum _IB_LINK_WIDTH
{
IB_LINK_WIDTH_1X = 1,
IB_LINK_WIDTH_4X = 2,
IB_LINK_WIDTH_8X = 4,
IB_LINK_WIDTH_12X = 8
} IB_LINK_WIDTH;
typedef enum _IB_LINK_SPEED
{
IB_LINK_SPEED_2_5_GBS = 1,
IB_LINK_SPEED_5_GBS = 2,
IB_LINK_SPEED_10_GBS = 4
} IB_LINK_SPEED;
enum IB_PORT_CAP_FLAGS
{
IB_PORT_FLAG_IS_SM = (1<<1),
IB_PORT_FLAG_NOTICE_SUP = (1<<2),
IB_PORT_FLAG_TRAP_SUP = (1<<3),
IB_PORT_FLAG_OPT_IPD_SUP = (1<<4),
IB_PORT_FLAG_APM_SUP = (1<<5),
IB_PORT_FLAG_SL_MAP_SUP = (1<<6),
IB_PORT_FLAG_MKEY_NVRAM = (1<<7),
IB_PORT_FLAG_PKEY_NVRAM = (1<<8),
IB_PORT_FLAG_LED_INFO_SUP = (1<<9),
IB_PORT_FLAG_IS_SM_DISABLED = (1<<10),
IB_PORT_FLAG_SYS_IMAGE_GUID_SUP = (1<<11),
IB_PORT_FLAG_PKEY_SW_EXT_PORT_TRAP_SUP = (1<<12),
IB_PORT_FLAG_CM_SUP = (1<<16),
IB_PORT_FLAG_SNMP_TUNNEL_SUP = (1<<17),
IB_PORT_FLAG_REINIT_SUP = (1<<18),
IB_PORT_FLAG_DEVICE_MGMT_SUP = (1<<19),
IB_PORT_FLAG_VENDOR_CLASS_SUP = (1<<20),
IB_PORT_FLAG_DR_NOTICE_SUP = (1<<21),
IB_PORT_FLAG_CAP_MASK_NOTICE_SUP = (1<<22),
IB_PORT_FLAG_BOOT_MGMT_SUP = (1<<23),
IB_PORT_FLAG_LINK_LATENCY_SUP = (1<<24),
IB_PORT_FLAG_CLIENT_REG_SUP = (1<<25)
};
enum _IB_INIT_TYPE
{
IB_INIT_TYPE_NO_LOAD = 1,
IB_INIT_TYPE_PRESERVE_CONTENT = (1<<1),
IB_INIT_TYPE_PRESERVE_PRESENCE = (1<<2),
IB_INIT_TYPE_DO_NOT_RESUSCITATE = (1<<3)
};
typedef __declspec(align(8)) struct _IB_PORT_ATTR
{
ULONG Version;
ULONG Size;
/** GUID are in network byte order. */
UINT64 PortGuid;
IB_LINK_STATE LinkState;
IB_LINK_WIDTH LinkWidth;
IB_LINK_SPEED LinkSpeed;
IB_MTU Mtu;
UINT64 MaxMsgSize;
ULONG InitTypeReply;
/** HCA capabilites, formed by OR'ing IB_PORT_CAP_FLAGS values. */
ULONG CapabilitiesFlags;
USHORT NumGids;
USHORT NumPkeys;
/** Offsets are from beginning of structure. */
ULONG NextPortAttrOffset;
ULONG GidTblOffset;
ULONG PkeyTblOffset;
USHORT PkeyViolations;
USHORT QkeyViolations;
USHORT BaseLid;
USHORT SmLid;
USHORT MaxVls;
UCHAR Lmc;
UCHAR PortNum;
/** The maximum expected subnet propagation delay to reach any port on
* the subnet. This value also determines the rate at which traps can
* be generated from this node. The value is expressed as
* timeout = 4.096 microseconds * 2^subnet_timeout
*/
UCHAR SubnetTimeout;
UCHAR SmSvcLvl;
} IB_PORT_ATTR;
enum IB_CA_CAP_FLAGS
{
IB_CA_FLAG_RESIZE_MAX_WR = (1<<0)
IB_CA_FLAG_BAD_PKEY_CNTR = (1<<1),
IB_CA_FLAG_BAD_QKEY_CNTR = (1<<2),
IB_CA_FLAG_AUTO_PATH_MIG = (1<<3),
IB_CA_FLAG_CHANGE_PHY_PORT = (1<<4),
IB_CA_FLAG_AH_PORT_ENFORCE = (1<<5),
IB_CA_FLAG_CURR_QP_STATE_MOD = (1<<6),
IB_CA_FLAG_SHUTDOWN_PORT = (1<<7),
IB_CA_FLAG_INIT_TYPE = (1<<8),
IB_CA_FLAG_PORT_ACTIVE_EVENT = (1<<9),
IB_CA_FLAG_SYS_IMAGE_GUID = (1<<10),
IB_CA_FLAG_RC_RNR_NAK_GEN = (1<<11),
IB_CA_FLAG_N_NOTIFY_CQ = (1<<12),
IB_CA_FLAG_SRQ_RESIZE = (1<<13)
};
typedef enum _IB_ATOMIC_CAP
{
IB_ATOMIC_NONE,
IB_ATOMIC_LOCAL,
IB_ATOMIC_GLOBAL
} IB_ATOMIC_CAP;
typedef __declspec(align(8)) struct _IB_CA_ATTR
{
ULONG Version;
ULONG Size;
/** GUIDs are in network byte order. */
UINT64 CaGuid;
UINT64 SystemImageGuid;
UINT32 VendorId;
UINT16 DeviceId;
UINT16 DeviceRev;
UINT32 FwVersion;
/** Byte offset from the beginning of this structure to the first
* IB_PORT_ATTR structure for the HCA.
*/
ULONG PortAttrOffset;
UINT8 NumPorts;
UINT8 MaxQpReadAtomic;
UINT8 MaxReadAtomic;
UINT8 MaxQpInitReadAtomic;
/** Each bit in PageSizeSupport indicates a 2^n page size supported
* by the HCA, where bit locations are 1-based. The minimum page
* size that can be supported is thus 2 bytes, and the largest 2^64.
*/
UINT64 PageSizeSupport;
/** HCA capabilites, formed by OR'ing IB_CA_CAP_FLAGS values. */
ULONG CapabilitiesFlags;
IB_ATOMIC_CAP AtomicCap;
ULONG MaxQp;
ULONG MaxQpWr;
ULONG MaxQpSge;
ULONG MaxCq;
ULONG MaxCqe;
ULONG MaxPd;
ULONG MaxAh;
ULONG MaxMr;
UINT64 MaxMrSize;
ULONG MaxMw;
ULONG MaxMcastGrp;
ULONG MaxMcastQp;
ULONG MaxMcastQpPerGrp;
ULONG MaxSrq;
ULONG MaxSrqWr;
ULONG MaxSrqSge;
UINT16 MaxPKeys;
UINT8 LocalAckDelay;
} IB_CA_ATTR;
Index: inc\kernel\iba\ib_verbs.h
===================================================================
/*
* Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
*
* This software is available to you under the OpenIB.org BSD license
* below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id$
*/
/** @file */
#include <ntddk.h>
#include <ib_defs.h>
#define IB_IOCTL( code, access ) CTL_CODE( \
FILE_DEVICE_INIFNIBAND, (0x800 + code), METHOD_BUFFERED, access )
enum _IB_IOCTL_FUNC
{
IB_OPEN_CA,
IB_CLOSE_CA,
IB_QUERY_CA,
IB_QUERY_PORT
};
/** IB_OPEN_CA
* Open a CA for use by a client. Allocates all per-client information
* needed.
*
* The IRP input and output buffers overlap.
*
* <UNKNOWN>
* [in] The input buffer is hardware vendor specific. For kernel
* clients it is always NULL. For user-mode clients it is NULL if
* there is no user-mode verb provider or the provider failed to
* load. Otherwise, the structure is defined by the user-mode verb
* provider.
*
* IB_CA_HANDLE64
* [out] The output buffer points to the location at which to store
* the CA handle. This location is 8 bytes long on all architectures.
*
* STATUS_SUCCESS
* The operation completed successfully.
*
* STATUS_PENDING
* The operation will be completed at a later time.
*/
#define IOCTL_IB_OPEN_CA IB_IOCTL( IB_OPEN_CA, FILE_ALL_ACCESS )
/** IB_CLOSE_CA
* Close a CA after use by a client. Frees all per-client information.
*
* IB_CA_HANDLE64
* [in] Handle returned by a previous call to IB_OPEN_CA.
*
* There are no output parameters.
*
* STATUS_SUCCESS
* The operation completed successfully.
*
* STATUS_PENDING
* The operation will be completed at a later time.
*
* STATUS_DEVICE_BUSY
* The operation could not complete because the CA instance has
* outstanding resources that have not been freed.
*/
#define IOCTL_IB_CLOSE_CA IB_IOCTL( IB_CLOSE_CA, FILE_ALL_ACCESS )
/** IB_QUERY_CA
* Query the attributes of a channel adapter.
*
* The IRP input and output buffers overlap.
*
* IB_QUERY_CA_IN
* [in] Provides the version of the CA attributes requested.
*
* IB_QUERY_CA_OUT/IB_CA_ATTR
* [out] Returns the CA attributes.
*
* STATUS_SUCCESS
* The full CA attributes, including port attributes and their GID
* and PKEY tables, were successfully copied to the output buffer.
* The Information member of the IO_STATUS_BLOCK is updated to
* indicate the number of bytes returned.
*
* STATUS_PENDING
* The operation will be completed at a later time.
*
* STATUS_BUFFER_OVERFLOW
* Returned partial attributes, full attributes were not returned due
* to insufficient space. The size required to get the full attributes
* is returned in IB_QUERY_CA_OUT::Size (or IB_CA_ATTR::Size). The
* Information member of the IO_STATUS_BLOCK is updated to indicate
* the number of bytes returned.
*
* STATUS_BUFFER_TOO_SMALL
* The buffer was too small to return any information. At a minimum,
* the buffer must be the size of the IB_QUERY_CA_OUT structure to
* allow the required size to be returned.
*
* STATUS_INVALID_PARAMETER
* An input or output buffer was not provided.
*
* The expected usage is for clients to use an IB_CA_ATTR buffer as both
* input and output parameters. On input, only the IB_CA_ATTR::Version
* field must be specified. Clients can request only a subset of the CA
* attributes by setting the size of the input buffer to their desired
* needs.
*/
#define IOCTL_IB_QUERY_CA IB_IOCTL( IB_QUERY_CA, FILE_READ_DATA )
typedef struct _IB_QUERY_CA_IN
{
/** Version of CA attributes requested. */
ULONG Version;
} IB_QUERY_CA_IN;
/** IOCTL_IB_QUERY_CA Minimal Output Parameters
* Matches the first two fields of the IB_CA_ATTR structure.
*/
typedef struct _IB_QUERY_CA_OUT
{
/** Version of CA attributes returned. */
ULONG Version;
/** Size required for the CA attributes. */
ULONG Size;
} IB_QUERY_CA_OUT;
/** IB_QUERY_PORT
* Query the attributes of a channel adapter's port.
*
* The IRP input and output buffers overlap.
*
* IB_QUERY_PORT_IN
* [in] Provides the version of the port attributes requested as
* well as the port number whose attributes to return.
*
* IB_QUERY_PORT_OUT/IB_PORT_ATTR
* [out] Returns the port
* attributes.
*
* STATUS_SUCCESS
* The full port attributes, including GID and PKEY tables, were
* successfully copied to the output buffer. The Information member
* of the IO_STATUS_BLOCK is updated to indicate the number of bytes
* returned.
*
* STATUS_PENDING
* The operation will be completed at a later time.
*
* STATUS_BUFFER_OVERFLOW
* Returned partial attributes, full attributes were not returned due
* to insufficient space. The size required to get the full attributes
* is returned in IB_QUERY_PORT_OUT::Size (or IB_PORT_ATTR::Size). The
* Information member of the IO_STATUS_BLOCK is updated to indicate the
* number of bytes returned.
*
* STATUS_BUFFER_TOO_SMALL
* The buffer was too small to return any information. At a minimum,
* the buffer must be the size of the IB_QUERY_PORT_OUT structure to
* allow the required size to be returned.
*
* STATUS_INVALID_PARAMETER
* An input or output buffer was not provided.
*
* The expected usage is for clients to use an IB_PORT_ATTR buffer as both
* input and output parameters. On input, the IB_PORT_ATTR::Version
* field must be specified, as well as setting the desired port number in
* IB_PORT_ATTR::Size. Clients can request only a subset of the port
* attributes by setting the size of the input buffer to their desired
* needs.
*/
#define IOCTL_IB_QUERY_PORT IB_IOCTL( IB_QUERY_PORT, FILE_READ_DATA )
typedef struct _IB_QUERY_PORT_IN
{
ULONG Version;
/** 1-based port number to query. */
ULONG PortNum;
} IB_QUERY_PORT_IN;
/** IOCTL_IB_QUERY_PORT Minimal Output Parameters
* Matches the first two fields of the IB_PORT_ATTR structure.
*/
typedef struct _IB_QUERY_PORT_OUT
{
ULONG Version;
ULONG Size;
} IB_QUERY_PORT_OUT;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_defs.h
Type: application/octet-stream
Size: 6659 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050907/c30f3f65/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_verbs.h
Type: application/octet-stream
Size: 6783 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050907/c30f3f65/attachment-0001.obj>
More information about the ofw
mailing list