[ofw] [PATCH 4/4] Avoid the SM

Fab Tillier ftillier at windows.microsoft.com
Thu Aug 21 10:00:23 PDT 2008


This patch changes the IBAT library to return a path record.  I didn't want to include all the IB headers so I just defined a IBAT_PATH_BLOB 64-byte array to represent the path.

The WSD code is also updated, but retains the original path query logic.

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\core\ibat\user\ibat.cpp trunk\core\ibat\user\ibat.cpp
--- old\core\ibat\user\ibat.cpp Tue Jul 08 10:15:55 2008
+++ trunk\core\ibat\user\ibat.cpp       Thu Aug 21 09:56:36 2008
@@ -33,12 +33,11 @@
 // IBAT: InfiniBand Address Translation
 //
 // Description:
-//  Maps remote IP addresses (IPv4 and IPv6) to the remote GID.
+//  Maps source & remote IP addresses (IPv4 and IPv6) to a path record.
 //
 //  The mapping requires two steps:
 //      1. Mapping the remote IP address to the remote Ethernet MAC address
-//      2. Retrieve the remote GID given the remote Ethernet MAC address
-//      from IPoIB
+//      2. Retrieve the path given the remote Ethernet MAC address from IPoIB
 //
 //  The first step is accomplished as follows on Windows Server 2008:
 //      1. Lookup the desired MAC from the OS using GetIpNetEntry2
@@ -53,8 +52,9 @@
 //      3. If the remote IP isn't found, resolve the remote IP address using
 //      SendARP.
 //
-//  The second step is accomplished by asking IPoIB for the remote GID
-//  given the remote MAC.
+//  The second step is accomplished by asking IPoIB for the path
+//  given the remote MAC.  IPoIB creates the path internally without going to
+//  the SA.

 #pragma warning( push, 3 )
 #include <windows.h>
@@ -65,6 +65,7 @@
 #include <iphlpapi.h>
 #include "iba/ib_at_ioctl.h"

+C_ASSERT( sizeof(IBAT_PATH_BLOB) == sizeof(ib_path_rec_t) );

 namespace IBAT
 {
@@ -89,9 +90,7 @@ HRESULT
 Resolve(
     __in const struct sockaddr* pSrcAddr,
     __in const struct sockaddr* pDestAddr,
-    __out IN6_ADDR* pSrcGid,
-    __out IN6_ADDR* pDestGid,
-    __out USHORT* pPkey
+    __out IBAT_PATH_BLOB* pPath
     )
 {
     if( pSrcAddr->sa_family != pDestAddr->sa_family )
@@ -139,101 +138,94 @@ Resolve(
         return HRESULT_FROM_WIN32( GetLastError() );

     // Check for loopback.
-    if( fLoopback )
-    {
-        *pSrcGid = x_DefaultGid;
-        RtlCopyMemory(
-            &pSrcGid->u.Byte[8],
-            &port_out.PortGuid,
-            sizeof(port_out.PortGuid) );
-        *pDestGid = *pSrcGid;
-        *pPkey = port_out.PKey;
-        return S_OK;
-    }
-
-    NET_LUID luid;
-    DWORD ret;
-    do
-    {
-        DWORD iIf;
-        ret = GetBestInterfaceEx( (struct sockaddr*)pSrcAddr, &iIf );
-        if( ret != NO_ERROR )
-            return HRESULT_FROM_WIN32( ret );
-
-        // Interface indexes are not constant, so get the LUID mapping for the
-        // returned interface for use in the rest of the function.
-        ret = ConvertInterfaceIndexToLuid( iIf, &luid );
-
-    } while( ret != NO_ERROR );
-
-    SOCKADDR_INET src;
-    MIB_IPNET_ROW2 net = {0};
-    net.InterfaceLuid = luid;
-    switch( pDestAddr->sa_family )
+    IOCTL_IBAT_MAC_TO_PATH_IN mac_in = {0};
+    mac_in.Version = IBAT_IOCTL_VERSION;
+    mac_in.PortGuid = port_out.PortGuid;
+    if( !fLoopback )
     {
-    case AF_INET:
-        net.Address.si_family = src.si_family = AF_INET;
-        net.Address.Ipv4 = *(struct sockaddr_in*)pDestAddr;
-        src.Ipv4 = *(struct sockaddr_in*)pSrcAddr;
-        break;
-
-    case AF_INET6:
-        net.Address.si_family = src.si_family = AF_INET6;
-        net.Address.Ipv6 = *(struct sockaddr_in6*)pDestAddr;
-        src.Ipv6 = *(struct sockaddr_in6*)pSrcAddr;
-        break;
+        NET_LUID luid;
+        DWORD ret;
+        do
+        {
+            DWORD iIf;
+            ret = GetBestInterfaceEx( (struct sockaddr*)pSrcAddr, &iIf );
+            if( ret != NO_ERROR )
+                return HRESULT_FROM_WIN32( ret );
+
+            // Interface indexes are not constant, so get the LUID mapping for the
+            // returned interface for use in the rest of the function.
+            ret = ConvertInterfaceIndexToLuid( iIf, &luid );
+
+        } while( ret != NO_ERROR );
+
+        SOCKADDR_INET src;
+        MIB_IPNET_ROW2 net = {0};
+        net.InterfaceLuid = luid;
+        switch( pDestAddr->sa_family )
+        {
+        case AF_INET:
+            net.Address.si_family = src.si_family = AF_INET;
+            net.Address.Ipv4 = *(struct sockaddr_in*)pDestAddr;
+            src.Ipv4 = *(struct sockaddr_in*)pSrcAddr;
+            break;

-    default:
-        return E_INVALIDARG;
-    }
+        case AF_INET6:
+            net.Address.si_family = src.si_family = AF_INET6;
+            net.Address.Ipv6 = *(struct sockaddr_in6*)pDestAddr;
+            src.Ipv6 = *(struct sockaddr_in6*)pSrcAddr;
+            break;

-    bool fRetry = true;
-retry:
-    ret = GetIpNetEntry2( &net );
-    if( ret != NO_ERROR )
-        return HRESULT_FROM_WIN32( ret );
+        default:
+            return E_INVALIDARG;
+        }

-    switch( net.State )
-    {
-    default:
-    case NlnsUnreachable:
-        ret = ResolveIpNetEntry2( &net, &src );
-        if( ret == ERROR_BAD_NET_NAME && fRetry )
+        bool fRetry = true;
+    retry:
+        ret = GetIpNetEntry2( &net );
+        if( ret == ERROR_NOT_FOUND )
         {
-            fRetry = false;
-            goto retry;
+            net.State = NlnsUnreachable;
         }
         else if( ret != NO_ERROR )
         {
             return HRESULT_FROM_WIN32( ret );
         }
-        break;

-    case NlnsReachable:
-    case NlnsPermanent:
-        break;
+        switch( net.State )
+        {
+        default:
+        case NlnsUnreachable:
+            ret = ResolveIpNetEntry2( &net, &src );
+            if( ret == ERROR_BAD_NET_NAME && fRetry )
+            {
+                fRetry = false;
+                goto retry;
+            }
+            else if( ret != NO_ERROR )
+            {
+                return HRESULT_FROM_WIN32( ret );
+            }
+            break;

-    case NlnsIncomplete:
-        return E_PENDING;
-    }
+        case NlnsReachable:
+        case NlnsPermanent:
+            break;

-    if( net.PhysicalAddressLength > 6 )
-        return E_UNEXPECTED;
+        case NlnsIncomplete:
+            return E_PENDING;
+        }

-    IOCTL_IBAT_MAC_TO_GID_IN mac_in;
-    mac_in.Version = IBAT_IOCTL_VERSION;
-    mac_in.PortGuid = port_out.PortGuid;
-    RtlCopyMemory( mac_in.DestMac, net.PhysicalAddress, IBAT_MAC_LEN );
+        if( net.PhysicalAddressLength > 6 )
+            return E_UNEXPECTED;
+
+        RtlCopyMemory( mac_in.DestMac, net.PhysicalAddress, IBAT_MAC_LEN );
+    }

-    fSuccess = DeviceIoControl( hIbatDev, IOCTL_IBAT_MAC_TO_GID,
-        &mac_in, sizeof(mac_in), pDestGid, sizeof(*pDestGid), &size, NULL );
+    fSuccess = DeviceIoControl( hIbatDev, IOCTL_IBAT_MAC_TO_PATH,
+        &mac_in, sizeof(mac_in), pPath, sizeof(*pPath), &size, NULL );
     if( !fSuccess )
         return HRESULT_FROM_WIN32( GetLastError() );

-    // Use the same subnet prefix as the destination.
-    *pSrcGid = *pDestGid;
-    RtlCopyMemory( &pSrcGid->u.Byte[8], &port_out.PortGuid, sizeof(port_out.PortGuid) );
-    *pPkey = port_out.PKey;
     return S_OK;
 }
 #else   // Back compatibility with Windows Server 2003
@@ -306,9 +298,7 @@ HRESULT
 Resolve(
     __in const struct sockaddr* pSrcAddr,
     __in const struct sockaddr* pDestAddr,
-    __out IN6_ADDR* pSrcGid,
-    __out IN6_ADDR* pDestGid,
-    __out USHORT* pPkey
+    __out IBAT_PATH_BLOB* pPath
     )
 {
     if( pDestAddr->sa_family != AF_INET )
@@ -337,45 +327,33 @@ Resolve(
         return HRESULT_FROM_WIN32( GetLastError() );

     // Check for loopback.
-    if( ((struct sockaddr_in*)pDestAddr)->sin_addr.s_addr ==
-        ((struct sockaddr_in*)pSrcAddr)->sin_addr.s_addr )
-    {
-        *pSrcGid = x_DefaultGid;
-        RtlCopyMemory(
-            &pSrcGid->u.Byte[8],
-            &port_out.PortGuid,
-            sizeof(port_out.PortGuid) );
-        *pDestGid = *pSrcGid;
-        *pPkey = port_out.PKey;
-        return S_OK;
-    }
-
-    IOCTL_IBAT_MAC_TO_GID_IN mac_in;
+    IOCTL_IBAT_MAC_TO_GID_IN mac_in = {0};
     mac_in.Version = IBAT_IOCTL_VERSION;
     mac_in.PortGuid = port_out.PortGuid;
-    HRESULT hr = GetDestMac( (struct sockaddr_in*)pDestAddr, mac_in.DestMac );
-    if( FAILED( hr ) )
+
+    if( ((struct sockaddr_in*)pDestAddr)->sin_addr.s_addr !=
+        ((struct sockaddr_in*)pSrcAddr)->sin_addr.s_addr )
     {
-        ULONG len = sizeof(mac_in.DestMac);
-        DWORD ret = SendARP(
-            ((struct sockaddr_in*)pDestAddr)->sin_addr.s_addr,
-            ((struct sockaddr_in*)pSrcAddr)->sin_addr.s_addr,
-            (ULONG*)mac_in.DestMac,
-            &len
-            );
-        if( ret != NO_ERROR )
-            return HRESULT_FROM_WIN32( ret );
+        HRESULT hr = GetDestMac( (struct sockaddr_in*)pDestAddr, mac_in.DestMac );
+        if( FAILED( hr ) )
+        {
+            ULONG len = sizeof(mac_in.DestMac);
+            DWORD ret = SendARP(
+                ((struct sockaddr_in*)pDestAddr)->sin_addr.s_addr,
+                ((struct sockaddr_in*)pSrcAddr)->sin_addr.s_addr,
+                (ULONG*)mac_in.DestMac,
+                &len
+                );
+            if( ret != NO_ERROR )
+                return HRESULT_FROM_WIN32( ret );
+        }
     }

-    fSuccess = DeviceIoControl( hIbatDev, IOCTL_IBAT_MAC_TO_GID,
-        &mac_in, sizeof(mac_in), pDestGid, sizeof(*pDestGid), &size, NULL );
+    fSuccess = DeviceIoControl( hIbatDev, IOCTL_IBAT_MAC_TO_PATH,
+        &mac_in, sizeof(mac_in), pPath, sizeof(*pPath), &size, NULL );
     if( !fSuccess )
         return HRESULT_FROM_WIN32( GetLastError() );

-    // Use the same subnet prefix as the destination.
-    *pSrcGid = *pDestGid;
-    RtlCopyMemory( &pSrcGid->u.Byte[8], &port_out.PortGuid, sizeof(port_out.PortGuid) );
-    *pPkey = port_out.PKey;
     return S_OK;
 }

@@ -389,12 +367,10 @@ HRESULT
 IbatResolve(
     __in const struct sockaddr* pSrcAddr,
     __in const struct sockaddr* pDestAddr,
-    __out IN6_ADDR* pSrcGid,
-    __out IN6_ADDR* pDestGid,
-    __out USHORT* pPkey
+    __out IBAT_PATH_BLOB* pPath
     )
 {
-    return IBAT::Resolve( pSrcAddr, pDestAddr, pSrcGid, pDestGid, pPkey );
+    return IBAT::Resolve( pSrcAddr, pDestAddr, pPath );
 }

 } /* extern "C" */
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\inc\user\iba\ibat.h trunk\inc\user\iba\ibat.h
--- old\inc\user\iba\ibat.h     Tue Jul 08 10:16:08 2008
+++ trunk\inc\user\iba\ibat.h   Thu Aug 21 09:56:36 2008
@@ -32,6 +32,12 @@
 #include <winsock2.h>
 #include <ws2tcpip.h>

+typedef struct _IBAT_PATH_BLOB
+{
+    UINT8 byte[64];
+
+} IBAT_PATH_BLOB;
+
 #ifdef __cplusplus
 namespace IBAT
 {
@@ -40,9 +46,7 @@ HRESULT
 Resolve(
     __in const struct sockaddr* pSrcAddr,
     __in const struct sockaddr* pDestAddr,
-    __out IN6_ADDR* pSrcGid,
-    __out IN6_ADDR* pDestGid,
-    __out USHORT* pPkey
+    __out IBAT_PATH_BLOB* pPath
     );

 }
@@ -52,9 +56,7 @@ HRESULT
 IbatResolve(
     __in const struct sockaddr* pSrcAddr,
     __in const struct sockaddr* pDestAddr,
-    __out IN6_ADDR* pSrcGid,
-    __out IN6_ADDR* pDestGid,
-    __out USHORT* pPkey
+    __out IBAT_PATH_BLOB* pPath
     );

 #endif /* __cplusplus */
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\tests\wherebu\user\wherebu.cpp trunk\tests\wherebu\user\wherebu.cpp
--- old\tests\wherebu\user\wherebu.cpp  Tue Jul 08 10:15:55 2008
+++ trunk\tests\wherebu\user\wherebu.cpp        Thu Aug 21 09:56:36 2008
@@ -30,9 +30,10 @@
  */


-#include "iba/ibat.h"
 #include "stdlib.h"
 #include "stdio.h"
+#include "iba/ib_types.h"
+#include "iba/ibat.h"


 inline LONGLONG GetElapsedTime()
@@ -70,16 +71,11 @@ int __cdecl main(int argc, char *argv[])
     destAddr.sin_family = AF_INET;
     destAddr.sin_addr.s_addr = inet_addr( argv[2] );

-    IN6_ADDR srcGid;
-    IN6_ADDR destGid;
-    USHORT pkey;
-
+    ib_path_rec_t path;
     HRESULT hr = IBAT::Resolve(
         (struct sockaddr*)&srcAddr,
         (struct sockaddr*)&destAddr,
-        &srcGid,
-        &destGid,
-        &pkey
+        (IBAT_PATH_BLOB*)&path
         );
     if( FAILED( hr ) )
     {
@@ -90,13 +86,17 @@ int __cdecl main(int argc, char *argv[])
     printf(
         "I B at:\n"
         "partition %x\n"
-        "source GID %x:%x:%x:%x:%x:%x:%x:%x\n"
-        "destination GID %x:%x:%x:%x:%x:%x:%x:%x\n",
-        pkey,
-        srcGid.u.Word[0], srcGid.u.Word[1], srcGid.u.Word[2], srcGid.u.Word[3],
-        srcGid.u.Word[4], srcGid.u.Word[5], srcGid.u.Word[6], srcGid.u.Word[7],
-        destGid.u.Word[0], destGid.u.Word[1], destGid.u.Word[2], destGid.u.Word[3],
-        destGid.u.Word[4], destGid.u.Word[5], destGid.u.Word[6], destGid.u.Word[7]
+        "source GID %x%x:%x%x:%x%x:%x%x:%x%x:%x%x:%x%x:%x%x\n"
+        "destination GID %x%x:%x%x:%x%x:%x%x:%x%x:%x%x:%x%x:%x%x\n",
+        path.pkey,
+        path.sgid.raw[0], path.sgid.raw[1], path.sgid.raw[2], path.sgid.raw[3],
+        path.sgid.raw[4], path.sgid.raw[5], path.sgid.raw[6], path.sgid.raw[7],
+        path.sgid.raw[8], path.sgid.raw[9], path.sgid.raw[10], path.sgid.raw[11],
+        path.sgid.raw[12], path.sgid.raw[13], path.sgid.raw[14], path.sgid.raw[15],
+        path.dgid.raw[0], path.dgid.raw[1], path.dgid.raw[2], path.dgid.raw[3],
+        path.dgid.raw[4], path.dgid.raw[5], path.dgid.raw[6], path.dgid.raw[7],
+        path.dgid.raw[8], path.dgid.raw[9], path.dgid.raw[10], path.dgid.raw[11],
+        path.dgid.raw[12], path.dgid.raw[13], path.dgid.raw[14], path.dgid.raw[15]
     );

     LONGLONG StartTime = GetElapsedTime();
@@ -105,9 +105,7 @@ int __cdecl main(int argc, char *argv[])
         HRESULT hr = IBAT::Resolve(
             (struct sockaddr*)&srcAddr,
             (struct sockaddr*)&destAddr,
-            &srcGid,
-            &destGid,
-            &pkey
+            (IBAT_PATH_BLOB*)&path
             );
         if( FAILED( hr ) )
         {
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\ulp\wsd\user\ibsp_ip.c trunk\ulp\wsd\user\ibsp_ip.c
--- old\ulp\wsd\user\ibsp_ip.c  Wed Jul 09 09:44:26 2008
+++ trunk\ulp\wsd\user\ibsp_ip.c        Thu Aug 21 09:56:36 2008
@@ -266,8 +266,7 @@ query_guid_address(
        IN                              const struct sockaddr           *p_dest_addr,
                OUT                     ib_net64_t                                      *port_guid )
 {
-       ib_gid_pair_t gids;
-       uint16_t pkey;
+       ib_path_rec_t path;
        HRESULT hr;

        IBSP_ENTER( IBSP_DBG_HW );
@@ -277,19 +276,17 @@ query_guid_address(
                hr = IbatResolve(
                        p_src_addr,
                        p_dest_addr,
-                       (IN6_ADDR*)&gids.src_gid,
-                       (IN6_ADDR*)&gids.dest_gid,
-                       &pkey
+                       (IBAT_PATH_BLOB*)&path
                        );

                if( hr != E_PENDING )
                        break;

-                       Sleep( 100 );
+               Sleep( 100 );
        }
        if( hr == S_OK )
        {
-               *port_guid = gids.dest_gid.unicast.interface_id;
+               *port_guid = path.dgid.unicast.interface_id;
        }
        else
        {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ibat_use_path.patch
Type: application/octet-stream
Size: 15044 bytes
Desc: ibat_use_path.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/918a0822/attachment.obj>


More information about the ofw mailing list