[ofw] [RFC] Generate IBAT path records in IPoIB
Fab Tillier
ftillier at windows.microsoft.com
Tue Aug 5 15:34:08 PDT 2008
The following patch is being sent out for comments only. It removes the need to query the SA for a path record for clients of IPoIB's IBAT functionality.
A new IOCTL returns a path record for an input destination local port GUID and destination Ethernet MAC address.
IPoIB creates the path record using information from the receive work completion of the ARP request/response (DLID/DGID), the local endpoint (SLID/SGID), and the broadcast group (SL, flow label, hop limit, traffic class, MTU, rate, packet lifetime).
The drawbacks of doing this in IPoIB vs. an SA cache are that the parameters from the broadcast group may not be optimal. The advantage over a cache is that the DLID/DGID pair will be kept up to date via ARP entry aging - whenever the OS determines that an ARP needs to be sent the endpoint in IPoIB is updated and all further path requests for that target use the updated information.
Like my previous RFC, this isn't intended to be checked in as I haven't completed testing, but wanted to get a leg up on discussing it.
Signed-off-by: Fab Tillier <ftillier at microsoft.com>
Index: ulp/ipoib/kernel/ipoib_ibat.c
===================================================================
--- ulp/ipoib/kernel/ipoib_ibat.c (revision 1408)
+++ ulp/ipoib/kernel/ipoib_ibat.c (working copy)
@@ -329,6 +329,80 @@ __ibat_mac_to_gid(
static NTSTATUS
+__ibat_mac_to_path(
+ IN IRP *pIrp,
+ IN IO_STACK_LOCATION *pIoStack )
+{
+ NTSTATUS status = STATUS_INVALID_PARAMETER;
+ IOCTL_IBAT_MAC_TO_PATH_IN *pIn;
+ IOCTL_IBAT_MAC_TO_PATH_OUT *pOut;
+ KLOCK_QUEUE_HANDLE hdl;
+ cl_list_item_t *pItem;
+ ipoib_adapter_t *pAdapter;
+
+ IPOIB_ENTER(IPOIB_DBG_IOCTL);
+
+ if( pIoStack->Parameters.DeviceIoControl.InputBufferLength !=
+ sizeof(IOCTL_IBAT_MAC_TO_PATH_IN) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Invalid input buffer size.\n") );
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ if( pIoStack->Parameters.DeviceIoControl.OutputBufferLength !=
+ sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Invalid output buffer size.\n") );
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ pIn = pIrp->AssociatedIrp.SystemBuffer;
+ pOut = pIrp->AssociatedIrp.SystemBuffer;
+
+ if( pIn->Version != IBAT_IOCTL_VERSION )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Invalid version.\n") );
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ KeAcquireInStackQueuedSpinLock( &g_ipoib.lock, &hdl );
+
+ for( pItem = cl_qlist_head( &g_ipoib.adapter_list );
+ pItem != cl_qlist_end( &g_ipoib.adapter_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ pAdapter = CONTAINING_RECORD( pItem, ipoib_adapter_t, entry );
+ if( pIn->PortGuid != pAdapter->guids.port_guid.guid )
+ continue;
+
+ /* Found the port - lookup the MAC. */
+ cl_obj_lock( &pAdapter->obj );
+ if( pAdapter->p_port )
+ {
+ status = ipoib_mac_to_path(
+ pAdapter->p_port, *(mac_addr_t*)pIn->DestMac, &pOut->Path );
+
+ if( NT_SUCCESS( status ) )
+ {
+ pIrp->IoStatus.Information =
+ sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT);
+ }
+ }
+ cl_obj_unlock( &pAdapter->obj );
+ break;
+ }
+
+ KeReleaseInStackQueuedSpinLock( &hdl );
+
+ IPOIB_EXIT( IPOIB_DBG_IOCTL );
+ return status;
+}
+
+
+static NTSTATUS
__ibat_ip_to_port(
IN IRP *pIrp,
IN IO_STACK_LOCATION *pIoStack )
@@ -571,6 +645,12 @@ __ipoib_dispatch(
IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL,
("IOCTL_IBAT_IP_TO_PORT received\n" ));
status = __ibat_ip_to_port( pIrp, pIoStack );
+ break;
+
+ case IOCTL_IBAT_MAC_TO_PATH:
+ IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL,
+ ("IOCTL_IBAT_MAC_TO_PATH received\n" ));
+ status = __ibat_mac_to_path( pIrp, pIoStack );
break;
default:
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1408)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -4336,6 +4333,86 @@ ipoib_mac_to_gid(
}
+NTSTATUS
+ipoib_mac_to_path(
+ IN ipoib_port_t* const p_port,
+ IN const mac_addr_t mac,
+ OUT ib_path_rec_t* p_path )
+{
+ ipoib_endpt_t* p_endpt;
+ cl_map_item_t *p_item;
+ uint64_t key = 0;
+ uint8_t sl;
+ net32_t flow_lbl;
+ uint8_t hop_limit;
+
+ IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+ cl_memcpy( &key, &mac, sizeof(mac_addr_t) );
+
+ cl_obj_lock( &p_port->obj );
+
+ if( p_port->p_local_endpt == NULL )
+ {
+ cl_obj_unlock( &p_port->obj );
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("No local endpoint.\n") );
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ if( mac.addr[0] == 0 && mac.addr[1] == 0 && mac.addr[2] == 0 &&
+ mac.addr[3] == 0 && mac.addr[4] == 0 && mac.addr[5] == 0 )
+ {
+ p_endpt = p_port->p_local_endpt;
+ }
+ else
+ {
+ p_item = cl_qmap_get( &p_port->endpt_mgr.mac_endpts, key );
+ if( p_item == cl_qmap_end( &p_port->endpt_mgr.mac_endpts ) )
+ {
+ cl_obj_unlock( &p_port->obj );
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed endpoint lookup.\n") );
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item );
+ }
+
+ p_path->resv0 = 0;
+ p_path->dgid = p_endpt->dgid;
+ p_path->sgid = p_port->p_local_endpt->dgid;
+ p_path->dlid = p_endpt->dlid;
+ p_path->slid = p_port->p_local_endpt->dlid;
+
+ ib_member_get_sl_flow_hop(
+ p_port->ib_mgr.bcast_rec.sl_flow_hop,
+ &sl,
+ &flow_lbl,
+ &hop_limit
+ );
+ ib_path_rec_set_hop_flow_raw( p_path, hop_limit, flow_lbl, FALSE );
+
+ p_path->tclass = p_port->ib_mgr.bcast_rec.tclass;
+ p_path->num_path = 1;
+ p_path->pkey = IB_DEFAULT_PKEY;
+ p_path->mtu = p_port->ib_mgr.bcast_rec.mtu;
+ p_path->rate = p_port->ib_mgr.bcast_rec.rate;
+ if( p_path->slid == p_path->dlid )
+ p_path->pkt_life = 0;
+ else
+ p_path->pkt_life = p_port->ib_mgr.bcast_rec.pkt_life;
+ p_path->preference = 0;
+ p_path->resv1 = 0;
+ p_path->resv2 = 0;
+
+ cl_obj_unlock( &p_port->obj );
+
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+ return STATUS_SUCCESS;
+}
+
+
static inline NDIS_STATUS
__endpt_mgr_ref(
IN ipoib_port_t* const p_port,
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h (revision 1408)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -610,6 +610,12 @@ ipoib_mac_to_gid(
IN const mac_addr_t mac,
OUT ib_gid_t* p_gid );
+NTSTATUS
+ipoib_mac_to_path(
+ IN ipoib_port_t* const p_port,
+ IN const mac_addr_t mac,
+ OUT ib_path_rec_t* p_path );
+
inline void ipoib_port_ref(
IN ipoib_port_t * p_port,
IN int type);
Index: inc/iba/ib_at_ioctl.h
===================================================================
--- inc/iba/ib_at_ioctl.h (revision 1408)
+++ inc/iba/ib_at_ioctl.h (working copy)
@@ -146,6 +146,24 @@ typedef struct _IOCTL_IBAT_IP_TO_PORT_OU
} IOCTL_IBAT_IP_TO_PORT_OUT;
+/** This IRP is used to convert a remote MAC addresses to a remote GID */
+#define IOCTL_IBAT_MAC_TO_PATH IOCTL_IBAT( 5 )
+
+typedef struct _IOCTL_IBAT_MAC_TO_PATH_IN
+{
+ ULONG Version;
+ UINT64 PortGuid;
+ UCHAR DestMac[IBAT_MAC_LEN];
+
+} IOCTL_IBAT_MAC_TO_PATH_IN;
+
+typedef struct _IOCTL_IBAT_MAC_TO_PATH_OUT
+{
+ ib_path_rec_t Path;
+
+} IOCTL_IBAT_MAC_TO_PATH_OUT;
+
+
#define IBAT_DEV_NAME L"\\Device\\ibat"
#define IBAT_DOS_DEV_NAME L"\\DosDevices\\Global\\ibat"
#define IBAT_WIN32_NAME L"\\\\.\\ibat"
More information about the ofw
mailing list