[ofw] [RFC] Generate IBAT path records in IPoIB

Fab Tillier ftillier at windows.microsoft.com
Tue Aug 5 15:34:08 PDT 2008


The following patch is being sent out for comments only.  It removes the need to query the SA for a path record for clients of IPoIB's IBAT functionality.

A new IOCTL returns a path record for an input destination local port GUID and destination Ethernet MAC address.

IPoIB creates the path record using information from the receive work completion of the ARP request/response (DLID/DGID), the local endpoint (SLID/SGID), and the broadcast group (SL, flow label, hop limit, traffic class, MTU, rate, packet lifetime).

The drawbacks of doing this in IPoIB vs. an SA cache are that the parameters from the broadcast group may not be optimal.  The advantage over a cache is that the  DLID/DGID pair will be kept up to date via ARP entry aging - whenever the OS determines that an ARP needs to be sent the endpoint in IPoIB is updated and all further path requests for that target use the updated information.

Like my previous RFC, this isn't intended to be checked in as I haven't completed testing, but wanted to get a leg up on discussing it.

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

Index: ulp/ipoib/kernel/ipoib_ibat.c
===================================================================
--- ulp/ipoib/kernel/ipoib_ibat.c       (revision 1408)
+++ ulp/ipoib/kernel/ipoib_ibat.c       (working copy)
@@ -329,6 +329,80 @@ __ibat_mac_to_gid(


 static NTSTATUS
+__ibat_mac_to_path(
+       IN                              IRP                                                     *pIrp,
+       IN                              IO_STACK_LOCATION                       *pIoStack )
+{
+       NTSTATUS                                        status = STATUS_INVALID_PARAMETER;
+       IOCTL_IBAT_MAC_TO_PATH_IN       *pIn;
+       IOCTL_IBAT_MAC_TO_PATH_OUT      *pOut;
+       KLOCK_QUEUE_HANDLE                      hdl;
+       cl_list_item_t                          *pItem;
+       ipoib_adapter_t                         *pAdapter;
+
+       IPOIB_ENTER(IPOIB_DBG_IOCTL);
+
+       if( pIoStack->Parameters.DeviceIoControl.InputBufferLength !=
+               sizeof(IOCTL_IBAT_MAC_TO_PATH_IN) )
+       {
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                       ("Invalid input buffer size.\n") );
+               return STATUS_INVALID_PARAMETER;
+       }
+
+       if( pIoStack->Parameters.DeviceIoControl.OutputBufferLength !=
+               sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT) )
+       {
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                       ("Invalid output buffer size.\n") );
+               return STATUS_INVALID_PARAMETER;
+       }
+
+       pIn = pIrp->AssociatedIrp.SystemBuffer;
+       pOut = pIrp->AssociatedIrp.SystemBuffer;
+
+       if( pIn->Version != IBAT_IOCTL_VERSION )
+       {
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                       ("Invalid version.\n") );
+               return STATUS_INVALID_PARAMETER;
+       }
+
+       KeAcquireInStackQueuedSpinLock( &g_ipoib.lock, &hdl );
+
+       for( pItem = cl_qlist_head( &g_ipoib.adapter_list );
+               pItem != cl_qlist_end( &g_ipoib.adapter_list );
+               pItem = cl_qlist_next( pItem ) )
+       {
+               pAdapter = CONTAINING_RECORD( pItem, ipoib_adapter_t, entry );
+               if( pIn->PortGuid != pAdapter->guids.port_guid.guid )
+                       continue;
+
+               /* Found the port - lookup the MAC. */
+               cl_obj_lock( &pAdapter->obj );
+               if( pAdapter->p_port )
+               {
+                       status = ipoib_mac_to_path(
+                               pAdapter->p_port, *(mac_addr_t*)pIn->DestMac, &pOut->Path );
+
+                       if( NT_SUCCESS( status ) )
+                       {
+                               pIrp->IoStatus.Information =
+                                       sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT);
+                       }
+               }
+               cl_obj_unlock( &pAdapter->obj );
+               break;
+       }
+
+       KeReleaseInStackQueuedSpinLock( &hdl );
+
+       IPOIB_EXIT( IPOIB_DBG_IOCTL );
+       return status;
+}
+
+
+static NTSTATUS
 __ibat_ip_to_port(
        IN                              IRP                                                     *pIrp,
        IN                              IO_STACK_LOCATION                       *pIoStack )
@@ -571,6 +645,12 @@ __ipoib_dispatch(
                IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL,
                        ("IOCTL_IBAT_IP_TO_PORT received\n" ));
                status = __ibat_ip_to_port( pIrp, pIoStack );
+               break;
+
+       case IOCTL_IBAT_MAC_TO_PATH:
+               IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL,
+                       ("IOCTL_IBAT_MAC_TO_PATH received\n" ));
+               status = __ibat_mac_to_path( pIrp, pIoStack );
                break;

        default:
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c       (revision 1408)
+++ ulp/ipoib/kernel/ipoib_port.c       (working copy)
@@ -4336,6 +4333,86 @@ ipoib_mac_to_gid(
 }


+NTSTATUS
+ipoib_mac_to_path(
+       IN                              ipoib_port_t* const                     p_port,
+       IN              const   mac_addr_t                                      mac,
+               OUT                     ib_path_rec_t*                          p_path )
+{
+       ipoib_endpt_t*  p_endpt;
+       cl_map_item_t   *p_item;
+       uint64_t                key = 0;
+       uint8_t                 sl;
+       net32_t                 flow_lbl;
+       uint8_t                 hop_limit;
+
+       IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+       cl_memcpy( &key, &mac, sizeof(mac_addr_t) );
+
+       cl_obj_lock( &p_port->obj );
+
+       if( p_port->p_local_endpt == NULL )
+       {
+               cl_obj_unlock( &p_port->obj );
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                       ("No local endpoint.\n") );
+               return STATUS_INVALID_PARAMETER;
+       }
+
+       if( mac.addr[0] == 0 && mac.addr[1] == 0 && mac.addr[2] == 0 &&
+               mac.addr[3] == 0 && mac.addr[4] == 0 && mac.addr[5] == 0 )
+       {
+               p_endpt = p_port->p_local_endpt;
+       }
+       else
+       {
+               p_item = cl_qmap_get( &p_port->endpt_mgr.mac_endpts, key );
+               if( p_item == cl_qmap_end( &p_port->endpt_mgr.mac_endpts ) )
+               {
+                       cl_obj_unlock( &p_port->obj );
+                       IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                               ("Failed endpoint lookup.\n") );
+                       return STATUS_INVALID_PARAMETER;
+               }
+
+               p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item );
+       }
+
+       p_path->resv0 = 0;
+       p_path->dgid = p_endpt->dgid;
+       p_path->sgid = p_port->p_local_endpt->dgid;
+       p_path->dlid = p_endpt->dlid;
+       p_path->slid = p_port->p_local_endpt->dlid;
+
+       ib_member_get_sl_flow_hop(
+               p_port->ib_mgr.bcast_rec.sl_flow_hop,
+               &sl,
+               &flow_lbl,
+               &hop_limit
+               );
+       ib_path_rec_set_hop_flow_raw( p_path, hop_limit, flow_lbl, FALSE );
+
+       p_path->tclass = p_port->ib_mgr.bcast_rec.tclass;
+       p_path->num_path = 1;
+       p_path->pkey = IB_DEFAULT_PKEY;
+       p_path->mtu = p_port->ib_mgr.bcast_rec.mtu;
+       p_path->rate = p_port->ib_mgr.bcast_rec.rate;
+       if( p_path->slid == p_path->dlid )
+               p_path->pkt_life = 0;
+       else
+               p_path->pkt_life = p_port->ib_mgr.bcast_rec.pkt_life;
+       p_path->preference = 0;
+       p_path->resv1 = 0;
+       p_path->resv2 = 0;
+
+       cl_obj_unlock( &p_port->obj );
+
+       IPOIB_EXIT( IPOIB_DBG_ENDPT );
+       return STATUS_SUCCESS;
+}
+
+
 static inline NDIS_STATUS
 __endpt_mgr_ref(
        IN                              ipoib_port_t* const                     p_port,
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h       (revision 1408)
+++ ulp/ipoib/kernel/ipoib_port.h       (working copy)
@@ -610,6 +610,12 @@ ipoib_mac_to_gid(
        IN              const   mac_addr_t                                      mac,
                OUT                     ib_gid_t*                                       p_gid );

+NTSTATUS
+ipoib_mac_to_path(
+       IN                              ipoib_port_t* const                     p_port,
+       IN              const   mac_addr_t                                      mac,
+               OUT                     ib_path_rec_t*                          p_path );
+
 inline void ipoib_port_ref(
        IN                              ipoib_port_t *                          p_port,
        IN                              int                                             type);
Index: inc/iba/ib_at_ioctl.h
===================================================================
--- inc/iba/ib_at_ioctl.h       (revision 1408)
+++ inc/iba/ib_at_ioctl.h       (working copy)
@@ -146,6 +146,24 @@ typedef struct _IOCTL_IBAT_IP_TO_PORT_OU
 } IOCTL_IBAT_IP_TO_PORT_OUT;


+/** This IRP is used to convert a remote MAC addresses to a remote GID */
+#define        IOCTL_IBAT_MAC_TO_PATH IOCTL_IBAT( 5 )
+
+typedef struct _IOCTL_IBAT_MAC_TO_PATH_IN
+{
+       ULONG                           Version;
+       UINT64                          PortGuid;
+       UCHAR                           DestMac[IBAT_MAC_LEN];
+
+} IOCTL_IBAT_MAC_TO_PATH_IN;
+
+typedef struct _IOCTL_IBAT_MAC_TO_PATH_OUT
+{
+       ib_path_rec_t           Path;
+
+} IOCTL_IBAT_MAC_TO_PATH_OUT;
+
+
 #define        IBAT_DEV_NAME   L"\\Device\\ibat"
 #define        IBAT_DOS_DEV_NAME L"\\DosDevices\\Global\\ibat"
 #define        IBAT_WIN32_NAME L"\\\\.\\ibat"



More information about the ofw mailing list