[ofw] [RFC] Generate IBAT path records in IPoIB

Alex Estrin alex.estrin at qlogic.com
Wed Aug 6 09:03:48 PDT 2008


Hi Fab,

I think this should work fine along with your previous patch. Couple
questions though.
Why broadcast broup record values could be different from path query
record?
If so, maybe path query could be left as an option for Customer?
Pkey is set to default - why not to get it from broadcast record as
well?

Thanks,
Alex.

> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org 
> [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Fab Tillier
> Sent: Tuesday, August 05, 2008 6:34 PM
> To: ofw at lists.openfabrics.org
> Subject: [ofw] [RFC] Generate IBAT path records in IPoIB
> 
> The following patch is being sent out for comments only.  It 
> removes the need to query the SA for a path record for 
> clients of IPoIB's IBAT functionality.
> 
> A new IOCTL returns a path record for an input destination 
> local port GUID and destination Ethernet MAC address.
> 
> IPoIB creates the path record using information from the 
> receive work completion of the ARP request/response 
> (DLID/DGID), the local endpoint (SLID/SGID), and the 
> broadcast group (SL, flow label, hop limit, traffic class, 
> MTU, rate, packet lifetime).
> 
> The drawbacks of doing this in IPoIB vs. an SA cache are that 
> the parameters from the broadcast group may not be optimal.  
> The advantage over a cache is that the  DLID/DGID pair will 
> be kept up to date via ARP entry aging - whenever the OS 
> determines that an ARP needs to be sent the endpoint in IPoIB 
> is updated and all further path requests for that target use 
> the updated information.
> 
> Like my previous RFC, this isn't intended to be checked in as 
> I haven't completed testing, but wanted to get a leg up on 
> discussing it.
> 
> Signed-off-by: Fab Tillier <ftillier at microsoft.com>
> 
> Index: ulp/ipoib/kernel/ipoib_ibat.c
> ===================================================================
> --- ulp/ipoib/kernel/ipoib_ibat.c       (revision 1408)
> +++ ulp/ipoib/kernel/ipoib_ibat.c       (working copy)
> @@ -329,6 +329,80 @@ __ibat_mac_to_gid(
> 
> 
>  static NTSTATUS
> +__ibat_mac_to_path(
> +       IN                              IRP                   
>                                   *pIrp,
> +       IN                              IO_STACK_LOCATION     
>                   *pIoStack )
> +{
> +       NTSTATUS                                        
> status = STATUS_INVALID_PARAMETER;
> +       IOCTL_IBAT_MAC_TO_PATH_IN       *pIn;
> +       IOCTL_IBAT_MAC_TO_PATH_OUT      *pOut;
> +       KLOCK_QUEUE_HANDLE                      hdl;
> +       cl_list_item_t                          *pItem;
> +       ipoib_adapter_t                         *pAdapter;
> +
> +       IPOIB_ENTER(IPOIB_DBG_IOCTL);
> +
> +       if( pIoStack->Parameters.DeviceIoControl.InputBufferLength !=
> +               sizeof(IOCTL_IBAT_MAC_TO_PATH_IN) )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("Invalid input buffer size.\n") );
> +               return STATUS_INVALID_PARAMETER;
> +       }
> +
> +       if( pIoStack->Parameters.DeviceIoControl.OutputBufferLength !=
> +               sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT) )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("Invalid output buffer size.\n") );
> +               return STATUS_INVALID_PARAMETER;
> +       }
> +
> +       pIn = pIrp->AssociatedIrp.SystemBuffer;
> +       pOut = pIrp->AssociatedIrp.SystemBuffer;
> +
> +       if( pIn->Version != IBAT_IOCTL_VERSION )
> +       {
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("Invalid version.\n") );
> +               return STATUS_INVALID_PARAMETER;
> +       }
> +
> +       KeAcquireInStackQueuedSpinLock( &g_ipoib.lock, &hdl );
> +
> +       for( pItem = cl_qlist_head( &g_ipoib.adapter_list );
> +               pItem != cl_qlist_end( &g_ipoib.adapter_list );
> +               pItem = cl_qlist_next( pItem ) )
> +       {
> +               pAdapter = CONTAINING_RECORD( pItem, 
> ipoib_adapter_t, entry );
> +               if( pIn->PortGuid != pAdapter->guids.port_guid.guid )
> +                       continue;
> +
> +               /* Found the port - lookup the MAC. */
> +               cl_obj_lock( &pAdapter->obj );
> +               if( pAdapter->p_port )
> +               {
> +                       status = ipoib_mac_to_path(
> +                               pAdapter->p_port, 
> *(mac_addr_t*)pIn->DestMac, &pOut->Path );
> +
> +                       if( NT_SUCCESS( status ) )
> +                       {
> +                               pIrp->IoStatus.Information =
> +                                       
> sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT);
> +                       }
> +               }
> +               cl_obj_unlock( &pAdapter->obj );
> +               break;
> +       }
> +
> +       KeReleaseInStackQueuedSpinLock( &hdl );
> +
> +       IPOIB_EXIT( IPOIB_DBG_IOCTL );
> +       return status;
> +}
> +
> +
> +static NTSTATUS
>  __ibat_ip_to_port(
>         IN                              IRP                   
>                                   *pIrp,
>         IN                              IO_STACK_LOCATION     
>                   *pIoStack )
> @@ -571,6 +645,12 @@ __ipoib_dispatch(
>                 IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL,
>                         ("IOCTL_IBAT_IP_TO_PORT received\n" ));
>                 status = __ibat_ip_to_port( pIrp, pIoStack );
> +               break;
> +
> +       case IOCTL_IBAT_MAC_TO_PATH:
> +               IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL,
> +                       ("IOCTL_IBAT_MAC_TO_PATH received\n" ));
> +               status = __ibat_mac_to_path( pIrp, pIoStack );
>                 break;
> 
>         default:
> Index: ulp/ipoib/kernel/ipoib_port.c
> ===================================================================
> --- ulp/ipoib/kernel/ipoib_port.c       (revision 1408)
> +++ ulp/ipoib/kernel/ipoib_port.c       (working copy)
> @@ -4336,6 +4333,86 @@ ipoib_mac_to_gid(
>  }
> 
> 
> +NTSTATUS
> +ipoib_mac_to_path(
> +       IN                              ipoib_port_t* const   
>                   p_port,
> +       IN              const   mac_addr_t                    
>                   mac,
> +               OUT                     ib_path_rec_t*        
>                   p_path )
> +{
> +       ipoib_endpt_t*  p_endpt;
> +       cl_map_item_t   *p_item;
> +       uint64_t                key = 0;
> +       uint8_t                 sl;
> +       net32_t                 flow_lbl;
> +       uint8_t                 hop_limit;
> +
> +       IPOIB_ENTER( IPOIB_DBG_ENDPT );
> +
> +       cl_memcpy( &key, &mac, sizeof(mac_addr_t) );
> +
> +       cl_obj_lock( &p_port->obj );
> +
> +       if( p_port->p_local_endpt == NULL )
> +       {
> +               cl_obj_unlock( &p_port->obj );
> +               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> +                       ("No local endpoint.\n") );
> +               return STATUS_INVALID_PARAMETER;
> +       }
> +
> +       if( mac.addr[0] == 0 && mac.addr[1] == 0 && 
> mac.addr[2] == 0 &&
> +               mac.addr[3] == 0 && mac.addr[4] == 0 && 
> mac.addr[5] == 0 )
> +       {
> +               p_endpt = p_port->p_local_endpt;
> +       }
> +       else
> +       {
> +               p_item = cl_qmap_get( 
> &p_port->endpt_mgr.mac_endpts, key );
> +               if( p_item == cl_qmap_end( 
> &p_port->endpt_mgr.mac_endpts ) )
> +               {
> +                       cl_obj_unlock( &p_port->obj );
> +                       IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, 
> IPOIB_DBG_ERROR,
> +                               ("Failed endpoint lookup.\n") );
> +                       return STATUS_INVALID_PARAMETER;
> +               }
> +
> +               p_endpt = PARENT_STRUCT( p_item, 
> ipoib_endpt_t, mac_item );
> +       }
> +
> +       p_path->resv0 = 0;
> +       p_path->dgid = p_endpt->dgid;
> +       p_path->sgid = p_port->p_local_endpt->dgid;
> +       p_path->dlid = p_endpt->dlid;
> +       p_path->slid = p_port->p_local_endpt->dlid;
> +
> +       ib_member_get_sl_flow_hop(
> +               p_port->ib_mgr.bcast_rec.sl_flow_hop,
> +               &sl,
> +               &flow_lbl,
> +               &hop_limit
> +               );
> +       ib_path_rec_set_hop_flow_raw( p_path, hop_limit, 
> flow_lbl, FALSE );
> +
> +       p_path->tclass = p_port->ib_mgr.bcast_rec.tclass;
> +       p_path->num_path = 1;
> +       p_path->pkey = IB_DEFAULT_PKEY;
> +       p_path->mtu = p_port->ib_mgr.bcast_rec.mtu;
> +       p_path->rate = p_port->ib_mgr.bcast_rec.rate;
> +       if( p_path->slid == p_path->dlid )
> +               p_path->pkt_life = 0;
> +       else
> +               p_path->pkt_life = p_port->ib_mgr.bcast_rec.pkt_life;
> +       p_path->preference = 0;
> +       p_path->resv1 = 0;
> +       p_path->resv2 = 0;
> +
> +       cl_obj_unlock( &p_port->obj );
> +
> +       IPOIB_EXIT( IPOIB_DBG_ENDPT );
> +       return STATUS_SUCCESS;
> +}
> +
> +
>  static inline NDIS_STATUS
>  __endpt_mgr_ref(
>         IN                              ipoib_port_t* const   
>                   p_port,
> Index: ulp/ipoib/kernel/ipoib_port.h
> ===================================================================
> --- ulp/ipoib/kernel/ipoib_port.h       (revision 1408)
> +++ ulp/ipoib/kernel/ipoib_port.h       (working copy)
> @@ -610,6 +610,12 @@ ipoib_mac_to_gid(
>         IN              const   mac_addr_t                    
>                   mac,
>                 OUT                     ib_gid_t*             
>                           p_gid );
> 
> +NTSTATUS
> +ipoib_mac_to_path(
> +       IN                              ipoib_port_t* const   
>                   p_port,
> +       IN              const   mac_addr_t                    
>                   mac,
> +               OUT                     ib_path_rec_t*        
>                   p_path );
> +
>  inline void ipoib_port_ref(
>         IN                              ipoib_port_t *        
>                   p_port,
>         IN                              int                   
>                           type);
> Index: inc/iba/ib_at_ioctl.h
> ===================================================================
> --- inc/iba/ib_at_ioctl.h       (revision 1408)
> +++ inc/iba/ib_at_ioctl.h       (working copy)
> @@ -146,6 +146,24 @@ typedef struct _IOCTL_IBAT_IP_TO_PORT_OU
>  } IOCTL_IBAT_IP_TO_PORT_OUT;
> 
> 
> +/** This IRP is used to convert a remote MAC addresses to a 
> remote GID */
> +#define        IOCTL_IBAT_MAC_TO_PATH IOCTL_IBAT( 5 )
> +
> +typedef struct _IOCTL_IBAT_MAC_TO_PATH_IN
> +{
> +       ULONG                           Version;
> +       UINT64                          PortGuid;
> +       UCHAR                           DestMac[IBAT_MAC_LEN];
> +
> +} IOCTL_IBAT_MAC_TO_PATH_IN;
> +
> +typedef struct _IOCTL_IBAT_MAC_TO_PATH_OUT
> +{
> +       ib_path_rec_t           Path;
> +
> +} IOCTL_IBAT_MAC_TO_PATH_OUT;
> +
> +
>  #define        IBAT_DEV_NAME   L"\\Device\\ibat"
>  #define        IBAT_DOS_DEV_NAME L"\\DosDevices\\Global\\ibat"
>  #define        IBAT_WIN32_NAME L"\\\\.\\ibat"
> _______________________________________________
> ofw mailing list
> ofw at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
> 



More information about the ofw mailing list