[OFW][patch] partition - amended

Fab Tillier ftillier at windows.microsoft.com
Mon Apr 14 12:47:11 PDT 2008


Hi Slava,

There was discussion at the OpenFabrics Workshop about adding support to the bus driver for creating 'arbitrary' PDOs.  There are also issues with the way the IOC PnP manager in IBAL works when the drivers are deployed in a large fabric, where the sweep/scan can overwhelm the SM and leave the fabric unusable for significant periods of time.

It seems that your work could align nicely with fixing the above, or at least create a foundation that can be extended.  To me it seems odd that IPoIB would load, and then request that the bus driver create more PDOs for itself.  It would be cleaner for the bus driver to read the configuration (from the registry for example), and have some kind of trigger to cause it to re-read the configuration (an IOCTL for example).

>Hi,
>Attached patch is an implementation of partitions. It's a working solution.
>Default installation creates two IPoIB adapters with default pkeys.

Two per HCA port?  Or two on a two-port HCA?

>New pkey (and IPoIB adapter instance) may be created by using [Device Manager]
>IPoIB [Advanced properties], Partition Key.
>A value in hex format will define a new pkey for adapter.

So by default, you have an IPoIB instance on the default partition, and you can provide a pkey in the advanced properties (one or more?) to have more PDOs created?  Can you provide more pkeys in every 'derived' instance too, or only on the 'master'?  What happens if multiple adapters specify the same PKEY?  There doesn't seem to be any checks that the same pkey isn't specified multiple times for the same port.

>
>
>===================================================================
>--- core/bus/kernel/bus_port_mgr.c         (revision 1049)
>+++ core/bus/kernel/bus_port_mgr.c       (working copy)
>@@ -62,7 +62,7 @@
> {
>            bus_pdo_ext_t                           pdo;
>
>-           net64_t                                                 port_guid;
>+          port_guid_pkey                          port_guid;

Nit: name this 'port' rather than 'port_guid' so that you have port.guid and port.pkey in the code, rather than port_guid.guid and port_guid.pkey.

>            uint32_t                                     n_port;
>
>            /* Number of references on the upper interface. */
>@@ -657,7 +656,85 @@
>            return IB_SUCCESS;
> }
>
>+/************************************************************************************
>+* name            :           port_mgr_pkey_add
>+*           creates pdo for each pkey value in pkey_array
>+* input :           p_pdo, pkey_array , pkey_count
>+* output:          none
>+* return:          NTSTATUS
>+*************************************************************************************/
>+NTSTATUS port_mgr_pkey_add(DEVICE_OBJECT *p_default_pdo , const uint16_t *pkey_array, uint16_t pkey_count)
>+{
>+          NTSTATUS                    status;
>+          uint16_t i;
>+          DEVICE_OBJECT *p_pdo[MAX_NUM_PKEY];
>+          bus_port_ext_t   *p_port_ext, *default_port_ext;
>+          CL_ASSERT(pkey_array);
>+          CL_ASSERT(pkey_count <= MAX_NUM_PKEY);
>
>+          BUS_ENTER( BUS_DBG_PNP );
>+
>+          default_port_ext = (bus_port_ext_t*)p_default_pdo->DeviceExtension;
>+
>+          for(i = 0; i < pkey_count; i++)
>+          {

You need to make sure that you don't already have an identical PDO.

>+                      /* Create the PDO for the new port device. */
>+                                  status = IoCreateDevice( bus_globals.p_driver_obj, sizeof(bus_port_ext_t),
>+                                              NULL, FILE_DEVICE_CONTROLLER,
>Index: inc/kernel/iba/ipoib_ifc.h
>===================================================================
>--- inc/kernel/iba/ipoib_ifc.h        (revision 1049)
>+++ inc/kernel/iba/ipoib_ifc.h     (working copy)
>@@ -62,6 +62,30 @@
> * DESCRIPTION
> *          IPoIB interface datat.
> *
>+*         The port guid combined from guid + PKEY
>+*/
>+typedef struct _port_guid_pkey
>+{
>+          net64_t             guid;
>+          ib_net16_t         pkey;
>+} port_guid_pkey;
>+
>+#define           MAX_NUM_PKEY          16
>+
>+typedef struct _pkey_array
>+{
>+          DEVICE_OBJECT *p_def_pkey_pdo;
>+          ib_net16_t           pkey_num;
>+          ib_net16_t    pkey_array[MAX_NUM_PKEY];
>+}pkey_array_t;
>+
>+/* Internal dev ctl code */
>+#define IOCTL_CREATE_PDO\
>+   CTL_CODE(ALDEV_KEY, 0x005, METHOD_BUFFERED,\
>+         FILE_READ_DATA | FILE_WRITE_DATA )

The docs for CTL_CODE state that you should pick a function code >= 0x800 as values less than 0x800 are reserved.

>+
>+
>+/*
> *          The ipoib_ifc_data_t structure
> *
> * SYNOPSIS
>@@ -69,7 +93,7 @@
> typedef struct _ipoib_ifc_data
> {
>            net64_t                                                             ca_guid;
>-           net64_t                                                             port_guid;
>+          port_guid_pkey                                      port_guid;

Nit: name this 'port' rather than 'port_guid' so that you have port.guid and port.pkey in the code, rather than port_guid.guid and port_guid.pkey.

>            uint8_t                                                              port_num;
>
> }          ipoib_ifc_data_t;
>Index: ulp/ipoib/kernel/ipoib_driver.c
>===================================================================
>--- ulp/ipoib/kernel/ipoib_driver.c (revision 1049)
>+++ ulp/ipoib/kernel/ipoib_driver.c          (working copy)
>@@ -48,6 +48,7 @@
> #include <initguid.h>
> #include <iba/ipoib_ifc.h>
>
>+#include "al_dev.h"
>
> #if defined(NDIS50_MINIPORT)
> #define MAJOR_NDIS_VERSION 5
>@@ -225,6 +226,8 @@
> __ipoib_read_registry(
>            IN                                             UNICODE_STRING* const                     p_registry_path );
>
>+static NDIS_STATUS
>+ipoib_send_pkey_req(ipoib_adapter_t *p_adapter, pkey_array_t *pkey_arr);
>
> //! Standard Windows Device Driver Entry Point
> /*! DriverEntry is the first routine called after a driver is loaded, and
>@@ -397,6 +400,121 @@
> }
>
>
>+/************************************************************************************
>+* name            :           __prepare_pKey_array
>+*           parses registry string and exrtacts pkey value(s) from it.
>+* input :           UNICODE_STRING *str
>+* output:          pkey_array
>+* return:          uint16_t number of pkey(s) found
>+*************************************************************************************/
>+static uint16_t __prepare_pKey_array(IN const UNICODE_STRING *str, OUT uint16_t *pkey_array)
>+{
>+          uint16_t i, num_pKeys;
>+          NTSTATUS status;
>+          ANSI_STRING ansi_str;
>+
>+          IPOIB_ENTER( IPOIB_DBG_INIT );
>+
>+          CL_ASSERT(pkey_array);
>+          CL_ASSERT(str);
>+
>+          num_pKeys = 0;
>+
>+          status = RtlUnicodeStringToAnsiString(&ansi_str,str,TRUE);
>+          if(! NT_SUCCESS(status))
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                  ("RtlUnicodeStringToAnsiString returned 0x%.8x\n", status) );
>+                      return 0;
>+          }
>+
>+          for (i = 0; (i < ansi_str.MaximumLength) && (num_pKeys < MAX_NUM_PKEY) ; i++)
>+    {
>+                      switch(ansi_str.Buffer[i])
>+                      {
>+                      case '0':
>+                                  if (((i+1) < ansi_str.Length) && ( ( ansi_str.Buffer[i+1] == 'x') || ( ansi_str.Buffer[i+1] == 'X')))
>+                                              break;
>+                                  else
>+                                  {
>+                                              pkey_array[num_pKeys] = \
>+                                              (pkey_array[num_pKeys] << 4)| 0;
>+                                              break;
>+                                  }
>+
>+                      case 'x':
>+                      case 'X':
>+                                  break;
>+
>+                      case ',':
>+                                  if(i > 3)
>+                                              num_pKeys++;
>+                                  break;
>+
>+                      case 'A':
>+                      case 'a':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xA;
>+                                  break;
>+
>+                      case 'B':
>+                      case 'b':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xB;
>+                                  break;
>+
>+                      case 'C':
>+                      case 'c':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xC;
>+                                  break;
>+
>+                      case 'D':
>+                      case 'd':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xD;
>+                                  break;
>+
>+                      case 'E':
>+                      case 'e':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xE;
>+                                  break;
>+
>+                      case 'F':
>+                      case 'f':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| 0xF;
>+                                  break;
>+
>+                      case '1':
>+                      case '2':
>+                      case '3':
>+                      case '4':
>+                      case '5':
>+                      case '6':
>+                      case '7':
>+                      case '8':
>+                      case '9':
>+                                  pkey_array[num_pKeys] = \
>+                                  (pkey_array[num_pKeys] << 4)| (ansi_str.Buffer[i] - '0');
>+                                  break;
>+
>+                      case '\0':
>+                                  if(i > 3)
>+                                              num_pKeys++;
>+                                  break;
>+
>+                      default:
>+                                  break;
>+
>+                      }
>+          }
>+
>+    RtlFreeAnsiString(&ansi_str);
>+          IPOIB_EXIT( IPOIB_DBG_INIT );
>+          return num_pKeys;
>+}
> NDIS_STATUS
> ipoib_get_adapter_params(
>            IN                                             NDIS_HANDLE* const                           wrapper_config_context,
>@@ -408,6 +526,7 @@
>            NDIS_STRING                                                               keyword;
>            PUCHAR                                                                                   mac;
>            UINT                                                                             len;
>+          pkey_array_t                                                     pkey_arr;
>
>            IPOIB_ENTER( IPOIB_DBG_INIT );
>
>@@ -559,13 +678,91 @@
>                        }
>            }
>
>+          /* Optional: Partition Key values to be used with this port. */
>+          RtlZeroMemory(&pkey_arr,sizeof(pkey_arr));
>+          RtlInitUnicodeString( &keyword, L"PartitionKey" );
>+          NdisReadConfiguration(
>+                      &status, &p_param, h_config, &keyword, NdisParameterString );
>+          if( status != NDIS_STATUS_SUCCESS )
>+          {
>+                      IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_INIT,
>+                                  ("port %04X Failed to read Pkey values configuration\n",p_adapter->guids.port_guid.pkey));
>+          }
>+          else
>+                      pkey_arr.pkey_num =  __prepare_pKey_array(&p_param->ParameterData.StringData,(uint16_t*)pkey_arr.pkey_array);
>+
>+          if( pkey_arr.pkey_num)
>+                      ipoib_send_pkey_req(p_adapter, &pkey_arr);
>            NdisCloseConfiguration( h_config );
>
>            IPOIB_EXIT( IPOIB_DBG_INIT );
>            return NDIS_STATUS_SUCCESS;
> }
>
>+/**************************************************************************
>+* name            :           ipoib_send_pkey_req
>+*                                 creates pdo(s) for non-default pkey(s)
>+* input :           p_adapter, pkey_arr
>+* output:          none
>+* return:          NDIS_STATUS
>+**************************************************************************/
>+NDIS_STATUS
>+ipoib_send_pkey_req(ipoib_adapter_t *p_adapter, pkey_array_t *pkey_arr)
>+{
>+          NTSTATUS                                status;
>+          DEVICE_OBJECT                     *p_pdo;
>+          IRP                                                       *p_irp;
>+          KEVENT                                               event;
>+          IO_STATUS_BLOCK                  io_status;
>+          NDIS_STRING                           dev_name;
>+          PFILE_OBJECT                        fobject;
>
>+          IPOIB_ENTER( IPOIB_DBG_INIT );
>+
>+          if(p_adapter->guids.port_guid.pkey != IB_DEFAULT_PKEY)
>+          {
>+                      IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_INIT,
>+                                  ("Only device with default pkey can create partition VLAN\n"));
>+                      return STATUS_INVALID_PARAMETER;
>+          }
>+
>+          RtlInitUnicodeString( &dev_name, AL_DEVICE_NAME );
>+
>+    status = IoGetDeviceObjectPointer(& dev_name,
>+                                        FILE_GENERIC_READ,
>+                                      & fobject,
>+                                      & p_pdo);

You should be able to send this down to IPoIB's PDO (same place you send the IRP_MN_QUERY_INTERFACE request for the IBAL interface.  You'll need to add IOCTL handling to the PDO, but that would be cleaner.

>+          if(! NT_SUCCESS(status))
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                  ("IoGetDeviceObjectPointer returned %08X\n",status) );
>+                      return status;
>+          }
>+
>+          NdisMGetDeviceProperty( p_adapter->h_adapter, &pkey_arr->p_def_pkey_pdo, NULL, NULL, NULL, NULL );
>+          KeInitializeEvent( &event, NotificationEvent, FALSE );
>+
>+          p_irp = IoBuildDeviceIoControlRequest( IOCTL_CREATE_PDO, p_pdo,
>+                      pkey_arr, sizeof(pkey_array_t), NULL,0,TRUE, &event, &io_status );

If you used the p_def_pkey_pdo as the target of the IOCTL, you could eliminate p_def_pkey_pdo from the pkey array since it's an input to the IOCTL dispatch routine.

>+
>+          if( !p_irp )
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                  ("Failed to allocate IoControlRequest IRP.\n") );
>+                      return STATUS_INSUFFICIENT_RESOURCES;
>+          }
>+
>+          status = IoCallDriver(p_pdo, p_irp);
>+          if (status == STATUS_PENDING)
>+          {
>+                      KeWaitForSingleObject( &event, Executive, KernelMode,
>+                                  FALSE, NULL );
>+                      status = io_status.Status;
>+          }
>+          IPOIB_EXIT( IPOIB_DBG_INIT );
>+          return status;
>+}
>+
>Index: ulp/ipoib/kernel/ipoib_ibat.c
>===================================================================
>--- ulp/ipoib/kernel/ipoib_ibat.c    (revision 1049)
>+++ ulp/ipoib/kernel/ipoib_ibat.c (working copy)
>@@ -140,7 +140,7 @@
>            {
>                        pAdapter = CONTAINING_RECORD( pItem, ipoib_adapter_t, entry );
>                        pOut->Ports[pOut->NumPorts].CaGuid = pAdapter->guids.ca_guid;
>-                       pOut->Ports[pOut->NumPorts].PortGuid = pAdapter->guids.port_guid;
>+                      pOut->Ports[pOut->NumPorts].PortGuid = pAdapter->guids.port_guid.guid;
>                        pOut->Ports[pOut->NumPorts].PortNum = pAdapter->guids.port_num;
>                        pOut->NumPorts++;

You should also return partition information via the IBAT IOCTLs or you will break WSD/ND for any IPoIB instance that's running on a non-default partition.  This should probably be done as a separate patch, and should be pretty straight forward.

>
>@@ -394,7 +394,7 @@
>                                    if (!memcmp( &pIn->Address.Address[12], pAddr->address.as_bytes, IPV4_ADDR_SIZE))
>                                    {
>                                                pOut->Port.CaGuid = pAdapter->guids.ca_guid;
>-                                               pOut->Port.PortGuid = pAdapter->guids.port_guid;
>+                                              pOut->Port.PortGuid = pAdapter->guids.port_guid.guid;
>                                                pOut->Port.PortNum = pAdapter->guids.port_num;

Same as above here for the pkey being output.

Thanks,
-Fab



More information about the ofw mailing list