[OFW][patch] partition - amended

Slava Strebkov slavas at voltaire.com
Tue Apr 15 01:45:52 PDT 2008


Hi Fab,

I'm thinking about writing additional application that will issue an
IOCTL to bus driver for additional PDO creation. Application will get
port number and pkey value as input parameters.

This will prevent existed IPoIB adapter from being disabled (current
scenario). "Any" number of additional PDOs per port can be created.

Slava

-----Original Message-----
From: Fab Tillier [mailto:ftillier at windows.microsoft.com] 
Sent: Monday, April 14, 2008 10:47 PM
To: Slava Strebkov; ofw at lists.openfabrics.org
Subject: RE: [OFW][patch] partition - amended

Hi Slava,

There was discussion at the OpenFabrics Workshop about adding support to
the bus driver for creating 'arbitrary' PDOs.  There are also issues
with the way the IOC PnP manager in IBAL works when the drivers are
deployed in a large fabric, where the sweep/scan can overwhelm the SM
and leave the fabric unusable for significant periods of time.

It seems that your work could align nicely with fixing the above, or at
least create a foundation that can be extended.  To me it seems odd that
IPoIB would load, and then request that the bus driver create more PDOs
for itself.  It would be cleaner for the bus driver to read the
configuration (from the registry for example), and have some kind of
trigger to cause it to re-read the configuration (an IOCTL for example).

>Hi,
>Attached patch is an implementation of partitions. It's a working
solution.
>Default installation creates two IPoIB adapters with default pkeys.

Two per HCA port?  Or two on a two-port HCA?

>New pkey (and IPoIB adapter instance) may be created by using [Device
Manager]
>IPoIB [Advanced properties], Partition Key.
>A value in hex format will define a new pkey for adapter.

So by default, you have an IPoIB instance on the default partition, and
you can provide a pkey in the advanced properties (one or more?) to have
more PDOs created?  Can you provide more pkeys in every 'derived'
instance too, or only on the 'master'?  What happens if multiple
adapters specify the same PKEY?  There doesn't seem to be any checks
that the same pkey isn't specified multiple times for the same port.

>
>
>===================================================================
>--- core/bus/kernel/bus_port_mgr.c         (revision 1049)
>+++ core/bus/kernel/bus_port_mgr.c       (working copy)
>@@ -62,7 +62,7 @@
> {
>            bus_pdo_ext_t                           pdo;
>
>-           net64_t
port_guid;
>+          port_guid_pkey                          port_guid;

Nit: name this 'port' rather than 'port_guid' so that you have port.guid
and port.pkey in the code, rather than port_guid.guid and
port_guid.pkey.

>            uint32_t                                     n_port;
>
>            /* Number of references on the upper interface. */
>@@ -657,7 +656,85 @@
>            return IB_SUCCESS;
> }
>
>+/*********************************************************************
***************
>+* name            :           port_mgr_pkey_add
>+*           creates pdo for each pkey value in pkey_array
>+* input :           p_pdo, pkey_array , pkey_count
>+* output:          none
>+* return:          NTSTATUS
>+**********************************************************************
***************/
>+NTSTATUS port_mgr_pkey_add(DEVICE_OBJECT *p_default_pdo , const
uint16_t *pkey_array, uint16_t pkey_count)
>+{
>+          NTSTATUS                    status;
>+          uint16_t i;
>+          DEVICE_OBJECT *p_pdo[MAX_NUM_PKEY];
>+          bus_port_ext_t   *p_port_ext, *default_port_ext;
>+          CL_ASSERT(pkey_array);
>+          CL_ASSERT(pkey_count <= MAX_NUM_PKEY);
>
>+          BUS_ENTER( BUS_DBG_PNP );
>+
>+          default_port_ext =
(bus_port_ext_t*)p_default_pdo->DeviceExtension;
>+
>+          for(i = 0; i < pkey_count; i++)
>+          {

You need to make sure that you don't already have an identical PDO.

>+                      /* Create the PDO for the new port device. */
>+                                  status = IoCreateDevice(
bus_globals.p_driver_obj, sizeof(bus_port_ext_t),
>+                                              NULL,
FILE_DEVICE_CONTROLLER,
>Index: inc/kernel/iba/ipoib_ifc.h
>===================================================================
>--- inc/kernel/iba/ipoib_ifc.h        (revision 1049)
>+++ inc/kernel/iba/ipoib_ifc.h     (working copy)
>@@ -62,6 +62,30 @@
> * DESCRIPTION
> *          IPoIB interface datat.
> *
>+*         The port guid combined from guid + PKEY
>+*/
>+typedef struct _port_guid_pkey
>+{
>+          net64_t             guid;
>+          ib_net16_t         pkey;
>+} port_guid_pkey;
>+
>+#define           MAX_NUM_PKEY          16
>+
>+typedef struct _pkey_array
>+{
>+          DEVICE_OBJECT *p_def_pkey_pdo;
>+          ib_net16_t           pkey_num;
>+          ib_net16_t    pkey_array[MAX_NUM_PKEY];
>+}pkey_array_t;
>+
>+/* Internal dev ctl code */
>+#define IOCTL_CREATE_PDO\
>+   CTL_CODE(ALDEV_KEY, 0x005, METHOD_BUFFERED,\
>+         FILE_READ_DATA | FILE_WRITE_DATA )

The docs for CTL_CODE state that you should pick a function code >=
0x800 as values less than 0x800 are reserved.

>+
>+
>+/*
> *          The ipoib_ifc_data_t structure
> *
> * SYNOPSIS
>@@ -69,7 +93,7 @@
> typedef struct _ipoib_ifc_data
> {
>            net64_t
ca_guid;
>-           net64_t
port_guid;
>+          port_guid_pkey
port_guid;

Nit: name this 'port' rather than 'port_guid' so that you have port.guid
and port.pkey in the code, rather than port_guid.guid and
port_guid.pkey.

>            uint8_t
port_num;
>
> }          ipoib_ifc_data_t;
>Index: ulp/ipoib/kernel/ipoib_driver.c
>===================================================================
>--- ulp/ipoib/kernel/ipoib_driver.c (revision 1049)
>+++ ulp/ipoib/kernel/ipoib_driver.c          (working copy)
>@@ -48,6 +48,7 @@
> #include <initguid.h>
> #include <iba/ipoib_ifc.h>
>
>+#include "al_dev.h"
>
> #if defined(NDIS50_MINIPORT)
> #define MAJOR_NDIS_VERSION 5
>@@ -225,6 +226,8 @@
> __ipoib_read_registry(
>            IN
UNICODE_STRING* const                     p_registry_path );
>
>+static NDIS_STATUS
>+ipoib_send_pkey_req(ipoib_adapter_t *p_adapter, pkey_array_t
*pkey_arr);
>
> //! Standard Windows Device Driver Entry Point
> /*! DriverEntry is the first routine called after a driver is loaded,
and
>@@ -397,6 +400,121 @@
> }
>
>
>+/*********************************************************************
***************
>+* name            :           __prepare_pKey_array
>+*           parses registry string and exrtacts pkey value(s) from it.
>+* input :           UNICODE_STRING *str
>+* output:          pkey_array
>+* return:          uint16_t number of pkey(s) found
>+**********************************************************************
***************/
>+static uint16_t __prepare_pKey_array(IN const UNICODE_STRING *str, OUT
uint16_t *pkey_array)
>+{
>+          uint16_t i, num_pKeys;
>+          NTSTATUS status;
>+          ANSI_STRING ansi_str;
>+
>+          IPOIB_ENTER( IPOIB_DBG_INIT );
>+
>+          CL_ASSERT(pkey_array);
>+          CL_ASSERT(str);
>+
>+          num_pKeys = 0;
>+
>+          status = RtlUnicodeStringToAnsiString(&ansi_str,str,TRUE);
>+          if(! NT_SUCCESS(status))
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
>+                                  ("RtlUnicodeStringToAnsiString
returned 0x%.8x\n", status) );
>+                      return 0;
>+          }
>+
>+          for (i = 0; (i < ansi_str.MaximumLength) && (num_pKeys <
MAX_NUM_PKEY) ; i++)
>+    {
>+                      switch(ansi_str.Buffer[i])
>+                      {
>+                      case '0':
>+                                  if (((i+1) < ansi_str.Length) && ( (
ansi_str.Buffer[i+1] == 'x') || ( ansi_str.Buffer[i+1] == 'X')))
>+                                              break;
>+                                  else
>+                                  {
>+                                              pkey_array[num_pKeys] =
\
>+                                              (pkey_array[num_pKeys]
<< 4)| 0;
>+                                              break;
>+                                  }
>+
>+                      case 'x':
>+                      case 'X':
>+                                  break;
>+
>+                      case ',':
>+                                  if(i > 3)
>+                                              num_pKeys++;
>+                                  break;
>+
>+                      case 'A':
>+                      case 'a':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xA;
>+                                  break;
>+
>+                      case 'B':
>+                      case 'b':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xB;
>+                                  break;
>+
>+                      case 'C':
>+                      case 'c':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xC;
>+                                  break;
>+
>+                      case 'D':
>+                      case 'd':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xD;
>+                                  break;
>+
>+                      case 'E':
>+                      case 'e':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xE;
>+                                  break;
>+
>+                      case 'F':
>+                      case 'f':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xF;
>+                                  break;
>+
>+                      case '1':
>+                      case '2':
>+                      case '3':
>+                      case '4':
>+                      case '5':
>+                      case '6':
>+                      case '7':
>+                      case '8':
>+                      case '9':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)|
(ansi_str.Buffer[i] - '0');
>+                                  break;
>+
>+                      case '\0':
>+                                  if(i > 3)
>+                                              num_pKeys++;
>+                                  break;
>+
>+                      default:
>+                                  break;
>+
>+                      }
>+          }
>+
>+    RtlFreeAnsiString(&ansi_str);
>+          IPOIB_EXIT( IPOIB_DBG_INIT );
>+          return num_pKeys;
>+}
> NDIS_STATUS
> ipoib_get_adapter_params(
>            IN                                             NDIS_HANDLE*
const                           wrapper_config_context,
>@@ -408,6 +526,7 @@
>            NDIS_STRING
keyword;
>            PUCHAR
mac;
>            UINT
len;
>+          pkey_array_t
pkey_arr;
>
>            IPOIB_ENTER( IPOIB_DBG_INIT );
>
>@@ -559,13 +678,91 @@
>                        }
>            }
>
>+          /* Optional: Partition Key values to be used with this port.
*/
>+          RtlZeroMemory(&pkey_arr,sizeof(pkey_arr));
>+          RtlInitUnicodeString( &keyword, L"PartitionKey" );
>+          NdisReadConfiguration(
>+                      &status, &p_param, h_config, &keyword,
NdisParameterString );
>+          if( status != NDIS_STATUS_SUCCESS )
>+          {
>+                      IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_INIT,
>+                                  ("port %04X Failed to read Pkey
values configuration\n",p_adapter->guids.port_guid.pkey));
>+          }
>+          else
>+                      pkey_arr.pkey_num =
__prepare_pKey_array(&p_param->ParameterData.StringData,(uint16_t*)pkey_
arr.pkey_array);
>+
>+          if( pkey_arr.pkey_num)
>+                      ipoib_send_pkey_req(p_adapter, &pkey_arr);
>            NdisCloseConfiguration( h_config );
>
>            IPOIB_EXIT( IPOIB_DBG_INIT );
>            return NDIS_STATUS_SUCCESS;
> }
>
>+/*********************************************************************
*****
>+* name            :           ipoib_send_pkey_req
>+*                                 creates pdo(s) for non-default
pkey(s)
>+* input :           p_adapter, pkey_arr
>+* output:          none
>+* return:          NDIS_STATUS
>+**********************************************************************
****/
>+NDIS_STATUS
>+ipoib_send_pkey_req(ipoib_adapter_t *p_adapter, pkey_array_t
*pkey_arr)
>+{
>+          NTSTATUS                                status;
>+          DEVICE_OBJECT                     *p_pdo;
>+          IRP
*p_irp;
>+          KEVENT                                               event;
>+          IO_STATUS_BLOCK                  io_status;
>+          NDIS_STRING                           dev_name;
>+          PFILE_OBJECT                        fobject;
>
>+          IPOIB_ENTER( IPOIB_DBG_INIT );
>+
>+          if(p_adapter->guids.port_guid.pkey != IB_DEFAULT_PKEY)
>+          {
>+                      IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_INIT,
>+                                  ("Only device with default pkey can
create partition VLAN\n"));
>+                      return STATUS_INVALID_PARAMETER;
>+          }
>+
>+          RtlInitUnicodeString( &dev_name, AL_DEVICE_NAME );
>+
>+    status = IoGetDeviceObjectPointer(& dev_name,
>+                                        FILE_GENERIC_READ,
>+                                      & fobject,
>+                                      & p_pdo);

You should be able to send this down to IPoIB's PDO (same place you send
the IRP_MN_QUERY_INTERFACE request for the IBAL interface.  You'll need
to add IOCTL handling to the PDO, but that would be cleaner.

>+          if(! NT_SUCCESS(status))
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
>+                                  ("IoGetDeviceObjectPointer returned
%08X\n",status) );
>+                      return status;
>+          }
>+
>+          NdisMGetDeviceProperty( p_adapter->h_adapter,
&pkey_arr->p_def_pkey_pdo, NULL, NULL, NULL, NULL );
>+          KeInitializeEvent( &event, NotificationEvent, FALSE );
>+
>+          p_irp = IoBuildDeviceIoControlRequest( IOCTL_CREATE_PDO,
p_pdo,
>+                      pkey_arr, sizeof(pkey_array_t), NULL,0,TRUE,
&event, &io_status );

If you used the p_def_pkey_pdo as the target of the IOCTL, you could
eliminate p_def_pkey_pdo from the pkey array since it's an input to the
IOCTL dispatch routine.

>+
>+          if( !p_irp )
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
>+                                  ("Failed to allocate
IoControlRequest IRP.\n") );
>+                      return STATUS_INSUFFICIENT_RESOURCES;
>+          }
>+
>+          status = IoCallDriver(p_pdo, p_irp);
>+          if (status == STATUS_PENDING)
>+          {
>+                      KeWaitForSingleObject( &event, Executive,
KernelMode,
>+                                  FALSE, NULL );
>+                      status = io_status.Status;
>+          }
>+          IPOIB_EXIT( IPOIB_DBG_INIT );
>+          return status;
>+}
>+
>Index: ulp/ipoib/kernel/ipoib_ibat.c
>===================================================================
>--- ulp/ipoib/kernel/ipoib_ibat.c    (revision 1049)
>+++ ulp/ipoib/kernel/ipoib_ibat.c (working copy)
>@@ -140,7 +140,7 @@
>            {
>                        pAdapter = CONTAINING_RECORD( pItem,
ipoib_adapter_t, entry );
>                        pOut->Ports[pOut->NumPorts].CaGuid =
pAdapter->guids.ca_guid;
>-                       pOut->Ports[pOut->NumPorts].PortGuid =
pAdapter->guids.port_guid;
>+                      pOut->Ports[pOut->NumPorts].PortGuid =
pAdapter->guids.port_guid.guid;
>                        pOut->Ports[pOut->NumPorts].PortNum =
pAdapter->guids.port_num;
>                        pOut->NumPorts++;

You should also return partition information via the IBAT IOCTLs or you
will break WSD/ND for any IPoIB instance that's running on a non-default
partition.  This should probably be done as a separate patch, and should
be pretty straight forward.

>
>@@ -394,7 +394,7 @@
>                                    if (!memcmp(
&pIn->Address.Address[12], pAddr->address.as_bytes, IPV4_ADDR_SIZE))
>                                    {
>                                                pOut->Port.CaGuid =
pAdapter->guids.ca_guid;
>-                                               pOut->Port.PortGuid =
pAdapter->guids.port_guid;
>+                                              pOut->Port.PortGuid =
pAdapter->guids.port_guid.guid;
>                                                pOut->Port.PortNum =
pAdapter->guids.port_num;

Same as above here for the pkey being output.

Thanks,
-Fab



More information about the ofw mailing list