[ofw] [PATCH] Fix ND connection establishment races

Fab Tillier ftillier at windows.microsoft.com
Thu Mar 26 14:05:24 PDT 2009


Hi Folks,

This patch fixes issues with connection establishment for NetworkDirect.  The root cause of the issue is 'too many cooks' - CIDs exposed to user-mode should not be destroyed in the kernel code without explicit request from the user.  Otherwise, the CID can get recycled in the kernel for the same process and improperly freed when the stale CID is released by the application (multiple connection objects in the app have the same CID.)

Unfortunately, the fix is not simple.  The QP references the CEP, so QP destruction frees the CEP, even if there's a reference to that CEP left in the application.  Removing the CEP reference form the QP solves this problem, but deadlocks the app if it destroys the QP before the CEP, since the QP is used to queue connection-related IRPs, and the CEP uses the QP as its context and so holds a reference on it.

This patch does the following:
- Remove CEP reference for ND related QP.
- Remove ND connection related IRP queue from QP.
- Remove ND IRP handling from CEP manager.
- Add a function to CEP manager to reference the context associated with a CEP if the context is non-NULL.
- Move ND connection related IRP management into al_ndi_cm.c, in nd_csq_t structure.

As part of testing, I needed to add NotifyDisconnect functionality, so this is also included in the patch.

Note that the patch depends on Sean's previous patch to change kal_cep_destroy to allow silently dropping a REQ.  I did not remove Sean's previous changes from this patch, so they are duplicated here.  This allows the patch to be applied and build.

I tested an earlier version of this, before I went through and did some minor cosmetic cleanup.  I wanted to send this patch to get the review process going, and will report back when I finish retesting.

Sorry for the big patch, I couldn't find a good way to split it up into smaller chunks.

-Fab

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

Index: core/al/kernel/al_cm_cep.c
===================================================================
--- core/al/kernel/al_cm_cep.c  (revision 2061)
+++ core/al/kernel/al_cm_cep.c  (working copy)
@@ -370,9 +370,6 @@ typedef struct _al_kcep
         * NDI stuff - TODO: manage above core kernel CM code
         */

-       /* IRP list head */
-       LIST_ENTRY                                      irp_que;
-
        /* private data of REQ, REP, REJ CM requests */
        uint8_t                                         psize;
        uint8_t                                         pdata[IB_REP_PDATA_SIZE];
@@ -3382,24 +3379,6 @@ create_cep_mgr(
 * CEP manager API
 ******************************************************************************/

-static inline void
-__complete_ndi_irp(
-       IN                              PIRP                                            p_irp,
-       IN                              ib_mad_element_t*                       p_mad )
-{
-       net32_t* p_new_cid = (net32_t*)cl_ioctl_out_buf( p_irp );
-       kcep_t* p_cep = (kcep_t*)p_mad->send_context1;
-
-       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM,
-               ("Completing al_cep_get_cid IRP with CID = %d\n", p_cep->cid) );
-
-       *p_new_cid = p_cep->cid;
-       p_irp->IoStatus.Information = sizeof(uint32_t);
-       p_irp->IoStatus.Status = STATUS_SUCCESS;
-       IoCompleteRequest( p_irp, IO_NETWORK_INCREMENT );
-       ib_put_mad( p_mad );
-}
-

 /* Called with the CEP and CEP manager locks held */
 static ib_api_status_t
@@ -3422,8 +3401,6 @@ __cep_queue_mad(
        if ( p_cep->state == CEP_STATE_LISTEN &&
                (p_cep->sid & ~0x0ffffffI64) == IB_REQ_CM_RDMA_SID_PREFIX )
        { /* Try to complete pending IRP, if any */
-               PIRP p_irp;
-               PLIST_ENTRY p_list_entry;
                mad_cm_req_t* p_req = (mad_cm_req_t*)ib_get_mad_buf( p_mad );
                ib_cm_rdma_req_t *p_rdma_req = (ib_cm_rdma_req_t *)p_req->pdata;

@@ -3437,24 +3414,6 @@ __cep_queue_mad(
                                p_rdma_req->maj_min_ver, p_rdma_req->ipv ) );
                        return IB_UNSUPPORTED;
                }
-
-               /* get a pending IRP */
-               if ( !IsListEmpty( &p_cep->irp_que ) )
-               {
-                       // get IRP
-                       p_list_entry = RemoveHeadList( &p_cep->irp_que );
-                       p_irp = (PIRP)CONTAINING_RECORD( p_list_entry, IRP, Tail.Overlay.ListEntry );
-
-                       // complete GetConnectionReq IRP
-#pragma warning(push, 3)
-                       IoSetCancelRoutine( p_irp, NULL );
-#pragma warning(pop)
-
-                       __complete_ndi_irp( p_irp, p_mad );
-
-                       AL_EXIT( AL_DBG_CM );
-                       return IB_PENDING;
-               }
        }

        /* Queue this MAD for processing. */
@@ -3628,8 +3587,6 @@ __create_cep()

        p_cep->p_cid->p_cep = p_cep;

-       InitializeListHead( &p_cep->irp_que );
-
        ref_al_obj( &gp_cep_mgr->obj );

        AL_EXIT( AL_DBG_CM );
@@ -3682,7 +3639,7 @@ kal_cep_alloc(
        return IB_SUCCESS;
 }

-void
+ib_api_status_t
 kal_cep_config(
        IN                              ib_al_handle_t                          h_al,
        IN                              net32_t                                         cid,
@@ -3695,13 +3652,18 @@ kal_cep_config(

        KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
        p_cep = __lookup_cep( h_al, cid );
-       CL_ASSERT( p_cep );
+       if( p_cep == NULL )
+       {
+               KeReleaseInStackQueuedSpinLock( &hdl );
+               return IB_INVALID_HANDLE;
+       }

        p_cep->pfn_cb = pfn_cb;
        p_cep->context = context;
        p_cep->pfn_destroy_cb = pfn_destroy_cb;

        KeReleaseInStackQueuedSpinLock( &hdl );
+       return IB_SUCCESS;
 }

 static inline void
@@ -4128,7 +4090,7 @@ al_create_cep(
        status = kal_cep_alloc(h_al, p_cid);
        if ( status == IB_SUCCESS )
        {
-               kal_cep_config(h_al, *p_cid, pfn_cb, context, pfn_destroy_cb);
+               status = kal_cep_config(h_al, *p_cid, pfn_cb, context, pfn_destroy_cb);
        }

        AL_EXIT( AL_DBG_CM );
@@ -4174,28 +4136,6 @@ al_destroy_cep(
        context = p_cep->context;
        pfn_destroy_cb = p_cep->pfn_destroy_cb;

-       /* Cancel any queued IRP */
-       __cep_complete_irp( p_cep, STATUS_CANCELLED, IO_NO_INCREMENT );
-
-    while( !IsListEmpty( &p_cep->irp_que ) )
-    {
-        LIST_ENTRY* p_list_entry;
-        IRP* p_irp;
-
-        // get IRP
-        p_list_entry = RemoveHeadList( &p_cep->irp_que );
-        p_irp = (PIRP)CONTAINING_RECORD( p_list_entry, IRP, Tail.Overlay.ListEntry );
-
-        // complete GetConnectionReq IRP
-#pragma warning(push, 3)
-        IoSetCancelRoutine( p_irp, NULL );
-#pragma warning(pop)
-
-        p_irp->IoStatus.Information = 0;
-        p_irp->IoStatus.Status = STATUS_CANCELLED;
-        IoCompleteRequest( p_irp, IO_NETWORK_INCREMENT );
-    }
-
        __unbind_cep( p_cep );
        ref_cnt = __cleanup_cep( p_cep );
     if( reusable )
@@ -4218,9 +4158,35 @@ al_destroy_cep(
 void
 kal_cep_destroy(
        IN                              ib_al_handle_t                          h_al,
-       IN                              net32_t                                         cid )
+       IN                              net32_t                                         cid,
+       IN                              NTSTATUS                                        status )
 {
-       al_destroy_cep(h_al, &cid, FALSE);
+       KLOCK_QUEUE_HANDLE      hdl;
+       kcep_t                          *p_cep;
+    ib_pfn_destroy_cb_t pfn_destroy_cb;
+       void                            *context;
+       int32_t                         ref_cnt;
+
+       KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
+       p_cep = __lookup_cep( h_al, cid );
+       CL_ASSERT( p_cep );
+
+       context = p_cep->context;
+       pfn_destroy_cb = p_cep->pfn_destroy_cb;
+
+       __unbind_cep( p_cep );
+       /* Drop new REQs so they can be retried when resources may be available */
+       if( status == STATUS_NO_MORE_ENTRIES &&
+               (p_cep->state == CEP_STATE_REQ_RCVD ||
+                p_cep->state == CEP_STATE_REQ_MRA_SENT) )
+       {
+               p_cep->state = CEP_STATE_IDLE;
+       }
+       ref_cnt = __cleanup_cep( p_cep );
+       KeReleaseInStackQueuedSpinLock( &hdl );
+
+       if( !ref_cnt && pfn_destroy_cb )
+               pfn_destroy_cb( context );
 }


@@ -5078,55 +5044,6 @@ out:


 ib_api_status_t
-kal_cep_config_pre_rep_copy_cid(
-       IN                              ib_al_handle_t                          h_al,
-       IN                              net32_t                                         cid,
-       IN                              al_pfn_cep_cb_t                         pfn_cb,
-       IN                              void*                                           context,
-       IN                              ib_pfn_destroy_cb_t                     pfn_destroy_cb,
-       IN              const   iba_cm_rep* const                       p_cm_rep,
-       IN                              uint8_t                                         rnr_nak_timeout,
-       IN      OUT                     net32_t* const                          p_cid,
-               OUT                     ib_qp_mod_t* const                      p_init )
-{
-       kcep_t                          *p_cep;
-       KLOCK_QUEUE_HANDLE      hdl;
-       ib_api_status_t         status;
-
-       CL_ASSERT( h_al );
-       CL_ASSERT( p_cm_rep );
-       CL_ASSERT( p_init );
-
-       KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
-       if (*p_cid != AL_INVALID_CID)
-       {
-               status = IB_RESOURCE_BUSY;
-               goto out;
-       }
-
-       p_cep = __lookup_cep( h_al, cid );
-       if (!p_cep )
-       {
-               status = IB_INVALID_HANDLE;
-               goto out;
-       }
-
-       status = __al_cep_pre_rep( p_cep, p_cm_rep, rnr_nak_timeout, p_init );
-       if ( status == IB_SUCCESS )
-       {
-               p_cep->pfn_cb = pfn_cb;
-               p_cep->context = context;
-               p_cep->pfn_destroy_cb = pfn_destroy_cb;
-               *p_cid = cid;
-       }
-
-out:
-       KeReleaseInStackQueuedSpinLock( &hdl );
-       return status;
-}
-
-
-ib_api_status_t
 kal_cep_pre_rep(
        IN                              ib_al_handle_t                          h_al,
        IN                              net32_t                                         cid,
@@ -6678,60 +6595,6 @@ __cep_cancel_ndi_irp(
        AL_EXIT( AL_DBG_CM );
 }

-NTSTATUS
-al_cep_get_cid(
-       IN              ib_al_handle_t                                          h_al,
-       IN              net32_t                 const                           cid,
-       IN              PIRP                                                            p_irp
-       )
-{
-       kcep_t *p_cep;
-       NTSTATUS nt_status;
-       KLOCK_QUEUE_HANDLE hdl;
-       ib_mad_element_t* p_mad = NULL;
-
-       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("[ CID = %d\n", cid) );
-       KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
-
-       p_cep = __lookup_cep( h_al, cid );
-       if( !p_cep )
-       {
-               p_irp->IoStatus.Information = 0;
-               p_irp->IoStatus.Status = STATUS_INVALID_PARAMETER;
-               IoCompleteRequest( p_irp, IO_NETWORK_INCREMENT );
-               nt_status = STATUS_EVENT_DONE;
-               goto exit;
-       }
-
-       if( !p_cep->p_mad_head )
-       { /* no pending MADs - queue the IRP */
-               p_cep->signalled = FALSE;
-               InsertTailList( &p_cep->irp_que, &p_irp->Tail.Overlay.ListEntry );
-               p_irp->Tail.Overlay.DriverContext[0] = (void*)(size_t)cid;
-               p_irp->Tail.Overlay.DriverContext[1] = (void*)h_al;
-#pragma warning(push, 3)
-               IoSetCancelRoutine( p_irp, __cep_cancel_ndi_irp );
-#pragma warning(pop)
-               IoMarkIrpPending( p_irp );
-               nt_status = STATUS_PENDING;
-               goto exit;
-       }
-
-       /* Set the MAD. */
-       p_mad = p_cep->p_mad_head;
-       p_cep->p_mad_head = p_mad->p_next;
-       p_mad->p_next = NULL;
-
-       /* complete the IRP */
-       __complete_ndi_irp( p_irp, p_mad );
-       nt_status = STATUS_EVENT_DONE;
-
-exit:
-       KeReleaseInStackQueuedSpinLock( &hdl );
-       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("] returned %08x\n", nt_status) );
-       return nt_status;
-}
-

 NTSTATUS
 al_cep_get_pdata(
@@ -6778,4 +6641,58 @@ al_cep_get_pdata(
        KeReleaseInStackQueuedSpinLock( &hdl );
        AL_EXIT( AL_DBG_CM );
        return STATUS_SUCCESS;
+}
+
+
+/*
+ * This function is designed to support moving the NetorkDirect IRP queue to the CEP
+ * without performing major surgery on the CEP manager.
+ *
+ * It retrieves the context associated with a CEP, using the pfn_addref function
+ * to prevent the context from being destroyed after it is returned.
+ *
+ * It returns NULL if there is no context, requiring contexts to be pointers.
+ */
+void*
+kal_cep_get_context(
+       IN                              ib_al_handle_t                          h_al,
+       IN                              net32_t                                         cid,
+       IN                              al_pfn_cep_cb_t                         pfn_cb,
+       IN                              ib_pfn_destroy_cb_t                     pfn_addref )
+{
+       kcep_t                          *p_cep;
+       void*                           context = NULL;
+       KLOCK_QUEUE_HANDLE      hdl;
+
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("[ CID = %d\n", cid) );
+
+       CL_ASSERT( h_al );
+       CL_ASSERT( pfn_addref );
+
+       KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
+       p_cep = __lookup_cep( h_al, cid );
+       if( !p_cep )
+       {
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                       ("CEP not found for cid %d, h_al %p\n", cid, h_al ));
+               goto out;
+       }
+
+       if( p_cep->pfn_cb != pfn_cb )
+       {
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                       ("CEP callback mismatch for cid %d, h_al %p\n", cid, h_al ));
+               goto out;
+       }
+
+       context = p_cep->context;
+       if( context != NULL )
+       {
+               pfn_addref( context );
+       }
+
+out:
+       KeReleaseInStackQueuedSpinLock( &hdl );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("] returning %p\n", context) );
+       return context;
 }
Index: core/al/kernel/al_ndi_cm.c
===================================================================
--- core/al/kernel/al_ndi_cm.c  (revision 2061)
+++ core/al/kernel/al_ndi_cm.c  (working copy)
@@ -81,7 +81,7 @@ __ndi_send_req(

 NTSTATUS
 __ndi_send_rep(
-       IN              ib_qp_handle_t                                          h_qp,
+       IN              nd_csq_t                                                        *p_csq,
        IN              PIRP                                                            p_irp
        );

@@ -90,11 +90,18 @@ __ndi_send_dreq(
        IN              IRP*                                                            p_irp
        );

+NTSTATUS
+__ndi_get_req(
+       IN              nd_csq_t                                                        *p_csq,
+       IN              IRP*                                                            p_irp
+       );
+
 static void
 __ndi_queue_drep(
        IN                              IRP                                                     *p_irp
        );

+
 /*******************************************************************
  *
  * Helpers
@@ -123,12 +130,12 @@ static char * State2String(ndi_cm_state_

 static inline void
 __ndi_complete_irp(
-       IN      ib_qp_handle_t                                                  h_qp,
+       IN      nd_csq_t*                                                               p_csq,
        IN      PIRP                                                                    p_irp,
        IN      NTSTATUS                                                                status
        )
 {
-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("[ CID = %d\n", p_csq->cid) );

        CL_ASSERT( p_irp );
        CL_ASSERT( p_irp->Tail.Overlay.DriverContext[1] == NULL );
@@ -144,7 +151,7 @@ __ndi_complete_irp(
                p_irp->IoStatus.Information = 0;
                IoCompleteRequest( p_irp, 0 );
        }
-       deref_al_obj( &h_qp->obj ); /* Release IRP reference */
+       nd_csq_release( p_csq ); /* Release IRP reference */

        AL_EXIT( AL_DBG_NDI );
 }
@@ -159,23 +166,28 @@ __ndi_complete_irp(
  */
 static void
 __cep_timewait_qp(
-       IN              const   ib_qp_handle_t                          h_qp )
+       IN                              nd_csq_t                                        *p_csq )
 {
        uint64_t                        timewait = 0;
+       ib_qp_handle_t          h_qp;
        ib_qp_mod_t                     qp_mod;
        ib_api_status_t         status;

        AL_ENTER( AL_DBG_CM );

-       CL_ASSERT( h_qp );
+       CL_ASSERT( p_csq != NULL );

        /*
         * The CM should have set the proper timewait time-out value.  Reset
         * the QP and let it enter the timewait state.
         */
-       if( al_cep_get_timewait( h_qp->obj.h_al,
-               ((al_conn_qp_t*)h_qp)->cid, &timewait ) == IB_SUCCESS )
+       if( al_cep_get_timewait( p_csq->h_al, p_csq->cid, &timewait ) == IB_SUCCESS )
        {
+               h_qp = CONTAINING_RECORD(
+                       al_hdl_ref( p_csq->h_al, p_csq->h_qp, AL_OBJ_TYPE_H_QP ),
+                       ib_qp_t,
+                       obj );
+
                /* Special checks on the QP state for error handling - see above. */
                if( !h_qp || !AL_OBJ_IS_TYPE( h_qp, AL_OBJ_TYPE_H_QP ) ||
                        ( (h_qp->obj.state != CL_INITIALIZED) &&
@@ -192,16 +204,16 @@ __cep_timewait_qp(
                status = h_qp->pfn_modify_qp( h_qp, &qp_mod, NULL );
                if( status != IB_SUCCESS )
                {
-                       AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
                                ("pfn_modify_qp to IB_QPS_ERROR returned %s\n",
                                ib_get_err_str( status )) );
-                       return;
                }
-
-#ifdef CL_KERNEL
-               /* Store the timestamp after which the QP exits timewait. */
-               h_qp->timewait = cl_get_time_stamp() + timewait;
-#endif /* CL_KERNEL */
+               else
+               {
+                       /* Store the timestamp after which the QP exits timewait. */
+                       h_qp->timewait = cl_get_time_stamp() + timewait;
+               }
+               deref_al_obj( &h_qp->obj );
        }

        AL_EXIT( AL_DBG_CM );
@@ -209,22 +221,39 @@ __cep_timewait_qp(

 static ib_api_status_t
 __ndi_qp2rts(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              nd_csq_t*                                                       p_csq,
        IN              PIRP                                                            p_irp
        )
 {
        ib_api_status_t status;
+       ib_qp_handle_t h_qp;
        ib_qp_mod_t qp_mod;

-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("[ CID = %d\n", p_csq->cid) );
+
+       h_qp = CONTAINING_RECORD(
+               al_hdl_ref( p_csq->h_al, p_csq->h_qp, AL_OBJ_TYPE_H_QP ),
+               ib_qp_t,
+               obj );
+       if( h_qp == NULL )
+       {
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                       ("Bad QP %I64d\n", p_csq->h_qp) );
+               status = IB_INVALID_HANDLE;
+               goto err;
+       }
+
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("QP %p state %d\n", h_qp, h_qp->state) );

        /* fill required qp attributes */
-       status = al_cep_get_rtr_attr( qp_get_al( h_qp ),
-               ((al_conn_qp_t*)h_qp)->cid, &qp_mod );
+       status = al_cep_get_rtr_attr( p_csq->h_al, p_csq->cid, &qp_mod );
        if ( status != IB_SUCCESS )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("al_cep_get_rtr_attr returned %s\n", ib_get_err_str( status )) );
+                       ("al_cep_get_rtr_attr for CID %d returned %s\n",
+                       p_csq->cid, ib_get_err_str( status )) );
                goto exit;
        }

@@ -234,17 +263,18 @@ __ndi_qp2rts(
        if ( status != IB_SUCCESS )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("ndi_modify_qp to RTR returned %s.\n", ib_get_err_str(status) ) );
+                       ("ndi_modify_qp %p from %d to RTR returned %s.\n",
+                       h_qp, h_qp->state, ib_get_err_str(status) ) );
                goto exit;
        }

        /* fill required qp attributes */
-       status = al_cep_get_rts_attr( qp_get_al( h_qp ),
-               ((al_conn_qp_t*)h_qp)->cid, &qp_mod );
+       status = al_cep_get_rts_attr( p_csq->h_al, p_csq->cid, &qp_mod );
        if ( status != IB_SUCCESS )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("al_cep_get_rts_attr returned %s\n", ib_get_err_str( status )) );
+                       ("al_cep_get_rts_attr for CID %d returned %s\n",
+                       p_csq->cid, ib_get_err_str( status )) );
                goto exit;
        }

@@ -254,10 +284,13 @@ __ndi_qp2rts(
        if ( status != IB_SUCCESS )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("ndi_modify_qp to RTS returned %s.\n", ib_get_err_str(status) ) );
+                       ("ndi_modify_qp %p from %d to RTS returned %s.\n",
+                       h_qp, h_qp->state, ib_get_err_str(status) ) );
        }

 exit:
+       deref_al_obj( &h_qp->obj );
+err:
        AL_EXIT( AL_DBG_NDI );
        return status;
 }
@@ -277,11 +310,15 @@ static NTSTATUS __ndi_insert_irp_ex(
        )
 {
        NTSTATUS status;
-       ndi_qp_csq_t *p_ndi_csq = (ndi_qp_csq_t*)pCsq;
+       nd_csq_t *p_ndi_csq = CONTAINING_RECORD( pCsq, nd_csq_t, csq );

-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("[ CID = %d\n", p_ndi_csq->cid) );
        switch( (ULONG_PTR)Context )
        {
+       case NDI_CM_LISTEN:
+               status = __ndi_get_req( p_ndi_csq, pIrp );
+               break;
+
        case NDI_CM_CONNECTING_QPR_SENT:
                status = __ndi_pr_query( pIrp );
                break;
@@ -290,24 +327,38 @@ static NTSTATUS __ndi_insert_irp_ex(
                status = __ndi_send_req( pIrp );
                break;

-       case NDI_CM_CONNECTING_REP_SENT:
-               status = __ndi_send_rep( p_ndi_csq->h_qp, pIrp );
-               break;
-
        case NDI_CM_DISCONNECTING:
                status = __ndi_send_dreq( pIrp );
                break;

+       case NDI_CM_CONNECTED_DREQ_RCVD:
+               if( p_ndi_csq->state == NDI_CM_LISTEN )
+               {
+                       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_NDI,
+                               ("] Invalid state (%d).\n", p_ndi_csq->state) );
+                       return STATUS_INVALID_DEVICE_REQUEST;
+               }
+
+               /*
+                * Overwrite the context so that the state change
+                * below turns into a noop.
+                */
+               Context = (VOID*)(ULONG_PTR)p_ndi_csq->state;
+               status = STATUS_PENDING;
+               break;
+
        default:
                status = STATUS_INVALID_DEVICE_REQUEST;
                ASSERT( FALSE );
        }

-       if( status == STATUS_SUCCESS )
+       ASSERT( status == STATUS_PENDING || !NT_SUCCESS( status ) );
+       if( NT_SUCCESS( status ) )
        {
+               AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("] Queueing IRP\n") );
                p_ndi_csq->state = (ndi_cm_state_t)(ULONG_PTR)Context;
                InsertTailList( &p_ndi_csq->queue, &pIrp->Tail.Overlay.ListEntry );
-               ref_al_obj( &p_ndi_csq->h_qp->obj ); /* Take IRP reference. */
+               nd_csq_ref( p_ndi_csq ); /* Take IRP reference. */
        }
        AL_EXIT( AL_DBG_NDI );
        return status;
@@ -334,7 +385,7 @@ static PIRP __ndi_peek_next_irp(
        PIRP nextIrp = NULL;
        PLIST_ENTRY nextEntry;
        PLIST_ENTRY listHead;
-       ndi_qp_csq_t *p_ndi_csq = (ndi_qp_csq_t*)Csq;
+       nd_csq_t *p_ndi_csq = (nd_csq_t*)Csq;

        AL_ENTER( AL_DBG_NDI );

@@ -380,16 +431,12 @@ static PIRP __ndi_peek_next_irp(

 static VOID __ndi_acquire_lock(
        IN      PIO_CSQ                                                                 Csq,
-       OUT     PKIRQL                                                                  Irql
+       OUT     PKIRQL                                                                  pIrql
        )
 {
-       ndi_qp_csq_t *p_ndi_csq = (ndi_qp_csq_t*)Csq;
-       ib_qp_handle_t h_qp = p_ndi_csq->h_qp;
-       UNUSED_PARAM( Irql );
+       nd_csq_t *p_ndi_csq = (nd_csq_t*)Csq;

-       AL_ENTER( AL_DBG_NDI );
-       cl_spinlock_acquire( &h_qp->obj.lock );
-       AL_EXIT( AL_DBG_NDI );
+       KeAcquireSpinLock( &p_ndi_csq->lock, pIrql );
 }

 static VOID __ndi_release_lock(
@@ -397,13 +444,9 @@ static VOID __ndi_release_lock(
        IN      KIRQL                                                                   Irql
        )
 {
-       ndi_qp_csq_t *p_ndi_csq = (ndi_qp_csq_t*)Csq;
-       ib_qp_handle_t h_qp = p_ndi_csq->h_qp;
-       UNUSED_PARAM( Irql );
+       nd_csq_t *p_ndi_csq = (nd_csq_t*)Csq;

-       AL_ENTER( AL_DBG_NDI );
-       cl_spinlock_release( &h_qp->obj.lock );
-       AL_EXIT( AL_DBG_NDI );
+       KeReleaseSpinLock( &p_ndi_csq->lock, Irql );
 }

 static VOID __ndi_complete_cancelled_irp(
@@ -411,8 +454,7 @@ static VOID __ndi_complete_cancelled_irp
        IN      PIRP                                                                    p_irp
        )
 {
-       ndi_qp_csq_t *p_ndi_csq = (ndi_qp_csq_t*)Csq;
-       ib_qp_handle_t h_qp = p_ndi_csq->h_qp;
+       nd_csq_t *p_ndi_csq = (nd_csq_t*)Csq;
        KIRQL irql;
        ib_query_handle_t h_query;

@@ -427,14 +469,11 @@ static VOID __ndi_complete_cancelled_irp
                 * SA callback by the CSQ lock.
                 */
 #pragma warning( disable:4305 )
-               h_query = InterlockedExchangePointer( &h_qp->p_irp_queue->h_query, NULL );
+               h_query = InterlockedExchangePointer( &p_ndi_csq->h_query, NULL );
 #pragma warning( default:4305 )
                if( h_query != NULL )
                        al_cancel_sa_req( &h_query->sa_req );

-               /* Always try to destroy the CEP.  The CEP manager handles invalid CIDs. */
-               al_destroy_cep( qp_get_al( h_qp ), &((al_conn_qp_t*)h_qp)->cid, TRUE );
-
                if( p_ndi_csq->state != NDI_CM_INVALID )
                        p_ndi_csq->state = NDI_CM_IDLE;

@@ -443,7 +482,8 @@ static VOID __ndi_complete_cancelled_irp
                __fallthrough;

        case UAL_NDI_NOTIFY_DREQ:
-               __ndi_complete_irp( h_qp, p_irp, STATUS_CANCELLED );
+       case UAL_NDI_GET_REQ_CM:
+               __ndi_complete_irp( p_ndi_csq, p_irp, STATUS_CANCELLED );
                break;

        case UAL_NDI_DREQ_CM:
@@ -457,138 +497,158 @@ static VOID __ndi_complete_cancelled_irp


 NTSTATUS
-ndi_qp_init(
-       IN                              ib_qp_handle_t                          h_qp )
+nd_csq_init(
+       IN                              ib_al_handle_t                          h_al,
+       IN                              net32_t                                         cid,
+       IN                              uint64_t                                        h_qp,
+               OUT                     nd_csq_t                                        **pp_csq )
 {
-
+       nd_csq_t *p_nd_csq;
        NTSTATUS status;
+       ib_api_status_t ib_status;

        AL_ENTER( AL_DBG_NDI );

-       if ( h_qp->type != IB_QPT_RELIABLE_CONN )
-       {
-               AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                       ("Only RC QP type is supported \n"));
-               status = STATUS_SUCCESS;
-               goto exit;
-       }
-
-       h_qp->p_irp_queue = (ndi_qp_csq_t*)cl_zalloc(sizeof(ndi_qp_csq_t));
-       if (!h_qp->p_irp_queue)
+       p_nd_csq = (nd_csq_t*)cl_zalloc( sizeof(*p_nd_csq) );
+       if( p_nd_csq == NULL )
        {
                status = STATUS_NO_MEMORY;
                goto exit;
        }

-       status = IoCsqInitializeEx( &h_qp->p_irp_queue->csq,
+       KeInitializeSpinLock( &p_nd_csq->lock );
+       InitializeListHead( &p_nd_csq->queue );
+       p_nd_csq->h_al = h_al;
+       p_nd_csq->h_qp = h_qp;
+       p_nd_csq->h_query = NULL;
+       p_nd_csq->state = NDI_CM_IDLE;
+       p_nd_csq->cid = cid;
+
+       status = IoCsqInitializeEx( &p_nd_csq->csq,
                __ndi_insert_irp_ex, __ndi_remove_irp,
                __ndi_peek_next_irp, __ndi_acquire_lock,
                __ndi_release_lock, __ndi_complete_cancelled_irp );
        if ( !NT_SUCCESS( status ) )
+       {
+               cl_free( p_nd_csq );
                goto exit;
+       }

-       InitializeListHead( &h_qp->p_irp_queue->queue );
-       h_qp->p_irp_queue->h_qp = h_qp;
-       h_qp->p_irp_queue->h_query = NULL;
-       h_qp->p_irp_queue->state = NDI_CM_IDLE;
+       /*
+        * One reference for the CEP, one for the caller (so that if the CEP
+        * gets destroyed we don't blow up.)
+        */
+       p_nd_csq->ref_cnt = 2;
+
+       ib_status = kal_cep_config(
+               h_al, cid, nd_cm_handler, p_nd_csq, nd_csq_release );
+
+       if( ib_status != IB_SUCCESS )
+       {
+               status = STATUS_UNSUCCESSFUL;
+               cl_free( p_nd_csq );
+               goto exit;
+       }
+
+       *pp_csq = p_nd_csq;
        status = STATUS_SUCCESS;

-AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-       ("Creating h_qp %#I64x, uhdl %#I64x \n",
-       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl ) );
+       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
+               ("Creating CSQ %p, uhdl %#I64x \n", p_nd_csq, h_qp) );

 exit:
        AL_EXIT( AL_DBG_NDI );
        return status;
 }

-#pragma warning(disable:4706)
+
 void
-ndi_qp_destroy(
-       IN              ib_qp_handle_t                                  h_qp )
+ndi_cancel_cm_irps(
+       IN                              nd_csq_t                                        *p_nd_csq )
 {
-       KIRQL irql;
        PIRP Irp;

-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("[ CSQ %p (CID = %d)\n",
+               p_nd_csq, p_nd_csq->cid ) );

-       if (h_qp->type == IB_QPT_RELIABLE_CONN && h_qp->p_irp_queue)
+       /* cancel pending IRPS for NDI type CQ */
+       AL_ENTER( AL_DBG_NDI );
+       for( Irp = IoCsqRemoveNextIrp( &p_nd_csq->csq, NULL );
+               Irp != NULL;
+               Irp = IoCsqRemoveNextIrp( &p_nd_csq->csq, NULL ) )
        {
-               AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                       ("Destroying h_qp %#I64x, uhdl %#I64x, cid %d\n",
-                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, ((al_conn_qp_t*)h_qp)->cid ) );
+               __ndi_complete_cancelled_irp( &p_nd_csq->csq, Irp );
+       }
+       for( Irp = IoCsqRemoveNextIrp( &p_nd_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_NOTIFY_DREQ );
+               Irp != NULL;
+               Irp = IoCsqRemoveNextIrp( &p_nd_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_NOTIFY_DREQ ) )
+       {
+               __ndi_complete_cancelled_irp( &p_nd_csq->csq, Irp );
+       }

-               /* Move the state before flushing, so that all new IRPs fail to queue. */
-               __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-               h_qp->p_irp_queue->state = NDI_CM_INVALID;
-               __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       AL_EXIT( AL_DBG_NDI );
+}

-               /* cancel pending IRPS for NDI type CQ */
-               AL_ENTER( AL_DBG_NDI );
-               while( Irp = IoCsqRemoveNextIrp( &h_qp->p_irp_queue->csq, NULL ) )
-               {
-                       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                               ("h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",
-                               (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );

-                       __ndi_complete_cancelled_irp( &h_qp->p_irp_queue->csq, Irp );
-               }
-               while( Irp = IoCsqRemoveNextIrp(
-                       &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_NOTIFY_DREQ ) )
-               {
-                       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                               ("h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",
-                               (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );
+void
+nd_csq_destroy(
+       IN                              nd_csq_t                                        *p_nd_csq )
+{
+       KIRQL irql;

-                       __ndi_complete_cancelled_irp( &h_qp->p_irp_queue->csq, Irp );
-               }
-               AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                       ("h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",
-                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );
-       }
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("[ CSQ %p (CID = %d)\n",
+               p_nd_csq, p_nd_csq->cid ) );
+
+       /* Move the state before flushing, so that all new IRPs fail to queue. */
+       __ndi_acquire_lock( &p_nd_csq->csq, &irql );
+       p_nd_csq->state = NDI_CM_INVALID;
+       __ndi_release_lock( &p_nd_csq->csq, irql );
+
+       /* cancel pending IRPS */
+       ndi_cancel_cm_irps( p_nd_csq );
+
+       cl_free( p_nd_csq );

        AL_EXIT( AL_DBG_NDI );
 }
-#pragma warning(default:4706)


 void
-ndi_qp_free(
-       IN              ib_qp_handle_t                                  h_qp )
+nd_csq_ref( nd_csq_t* p_csq )
 {
-       AL_ENTER( AL_DBG_NDI );
+       InterlockedIncrement( &p_csq->ref_cnt );
+}

-       if (h_qp->type == IB_QPT_RELIABLE_CONN && h_qp->p_irp_queue)
+
+void
+nd_csq_release( nd_csq_t* p_csq )
+{
+       if( InterlockedDecrement( &p_csq->ref_cnt ) == 0 )
        {
-               /* free NDI context */
-               cl_free( h_qp->p_irp_queue );
-               h_qp->p_irp_queue = NULL;
+               nd_csq_destroy( p_csq );
        }
-
-       AL_EXIT( AL_DBG_NDI );
 }


 static inline void
-__ndi_complete_req_irp(
-       IN      ib_qp_handle_t                                                  h_qp,
-       IN      NTSTATUS                                                                code
-       )
+__ndi_timeout_req_irp(
+       __in nd_csq_t* p_csq )
 {
        PIRP Irp;
        KIRQL irql;

        AL_ENTER( AL_DBG_NDI );
-       Irp = IoCsqRemoveNextIrp( &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_REQ_CM );
-       if ( Irp )
+       Irp = IoCsqRemoveNextIrp( &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_REQ_CM );
+       if( Irp )
        {
-               __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-               if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-                       h_qp->p_irp_queue->state = NDI_CM_IDLE;
-               __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
-               __ndi_complete_irp( h_qp, Irp, code );
+               __ndi_acquire_lock( &p_csq->csq, &irql );
+               if( p_csq->state != NDI_CM_INVALID )
+                       p_csq->state = NDI_CM_IDLE;
+               __ndi_release_lock( &p_csq->csq, irql );
+               __ndi_complete_irp( p_csq, Irp, STATUS_TIMEOUT );
        }
-       al_destroy_cep( qp_get_al( h_qp ), &((al_conn_qp_t*)h_qp)->cid, TRUE );
        AL_EXIT( AL_DBG_NDI );
 }

@@ -600,38 +660,41 @@ __ndi_complete_req_irp(

 static void
 __ndi_notify_dreq(
-       IN                              ib_qp_handle_t const            h_qp )
+       __in nd_csq_t* p_csq )
 {
-       IRP *p_irp = IoCsqRemoveNextIrp(
-               &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_NOTIFY_DREQ );
-
-       if( p_irp )
+       IRP *p_irp;
+       do
        {
-               __ndi_complete_irp( h_qp, p_irp, STATUS_SUCCESS );
-       }
+               p_irp = IoCsqRemoveNextIrp(
+                       &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_NOTIFY_DREQ );
+
+               if( p_irp )
+               {
+                       __ndi_complete_irp( p_csq, p_irp, STATUS_SUCCESS );
+               }
+
+       } while ( p_irp );
 }


 static void
 __ndi_proc_dreq(
-       IN                              ib_qp_handle_t const            h_qp )
+       IN                              nd_csq_t                                        *p_csq)
 {
        IRP *p_irp;
        KIRQL irql;
-       ndi_cm_state_t old_state;

-       __ndi_notify_dreq( h_qp );
+       __ndi_notify_dreq( p_csq );

-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-       old_state = h_qp->p_irp_queue->state;
-       if( old_state == NDI_CM_CONNECTED )
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       if( p_csq->state == NDI_CM_CONNECTED )
        {
-               h_qp->p_irp_queue->state = NDI_CM_CONNECTED_DREQ_RCVD;
+               p_csq->state = NDI_CM_CONNECTED_DREQ_RCVD;
        }
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       __ndi_release_lock( &p_csq->csq, irql );

        p_irp = IoCsqRemoveNextIrp(
-               &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_DREQ_CM );
+               &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_DREQ_CM );
        if( p_irp != NULL )
        {
                __ndi_queue_drep( p_irp );
@@ -645,7 +708,7 @@ __ndi_proc_dreq(
  */
 static void
 __ndi_proc_rej(
-       IN                              ib_qp_handle_t const            h_qp,
+       IN                              nd_csq_t*                                       p_csq,
        IN              const   mad_cm_rej_t* const                     p_rej )
 {
        KIRQL irql;
@@ -654,73 +717,135 @@ __ndi_proc_rej(
        AL_ENTER( AL_DBG_NDI );

        AL_PRINT(TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-               ("p_rej %p, h_qp %#I64x, uhdl %#I64x, connect reject, reason=%hd\n",
-               p_rej, (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, cl_ntoh16(p_rej->reason) ) );
+               ("p_rej %p, CID=%d, uhdl %#I64x, connect reject, reason=%hd\n",
+               p_rej, p_csq->cid, p_csq->h_qp, cl_ntoh16(p_rej->reason) ) );

-       p_irp = IoCsqRemoveNextIrp( &h_qp->p_irp_queue->csq, NULL );
-       __ndi_notify_dreq( h_qp );
-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
+       p_irp = IoCsqRemoveNextIrp( &p_csq->csq, NULL );
+       __ndi_notify_dreq( p_csq );
+       __ndi_acquire_lock( &p_csq->csq, &irql );
        if( p_irp != NULL )
        {
                switch( cl_ioctl_ctl_code( p_irp ) )
                {
                case UAL_NDI_REQ_CM:
-                       if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-                               h_qp->p_irp_queue->state = NDI_CM_IDLE;
-                       if( p_rej->reason == IB_REJ_TIMEOUT )
-                               __ndi_complete_irp( h_qp, p_irp, STATUS_TIMEOUT );
+                       if( p_csq->state != NDI_CM_INVALID )
+                               p_csq->state = NDI_CM_IDLE;
+                       if( p_rej->reason == IB_REJ_USER_DEFINED )
+                               __ndi_complete_irp( p_csq, p_irp, STATUS_CONNECTION_REFUSED );
                        else
-                               __ndi_complete_irp( h_qp, p_irp, STATUS_CONNECTION_REFUSED );
+                               __ndi_complete_irp( p_csq, p_irp, STATUS_TIMEOUT );

-                       al_destroy_cep(
-                               qp_get_al( h_qp ), &((al_conn_qp_t*)h_qp)->cid, TRUE );
+                       /* We leave the CEP active so that the private data can be retrieved. */
                        break;

                case UAL_NDI_DREQ_CM:
                        __ndi_queue_drep( p_irp );
                        break;
+
+               case UAL_NDI_NOTIFY_DREQ:
+                       __ndi_complete_irp( p_csq, p_irp, STATUS_CONNECTION_ABORTED );
+                       break;
+
+               default:
+                       ASSERT( cl_ioctl_ctl_code( p_irp ) == UAL_NDI_REQ_CM ||
+                               cl_ioctl_ctl_code( p_irp ) == UAL_NDI_DREQ_CM ||
+                               cl_ioctl_ctl_code( p_irp ) == UAL_NDI_NOTIFY_DREQ );
                }
        }
-       else if( h_qp->p_irp_queue->state == NDI_CM_CONNECTED )
+       else if( p_csq->state == NDI_CM_CONNECTED || p_csq->state == NDI_CM_CONNECTING_REQ_RCVD )
        {
-               if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-                       h_qp->p_irp_queue->state = NDI_CM_CONNECTED_DREQ_RCVD;
+               p_csq->state = NDI_CM_CONNECTED_DREQ_RCVD;
+       }
+       __ndi_release_lock( &p_csq->csq, irql );
+
+       AL_EXIT( AL_DBG_NDI );
+}
+
+
+static void
+__ndi_proc_req(
+       IN                              nd_csq_t*                                       p_csq,
+       IN                              net32_t                                         new_cid,
+       IN                              ib_mad_element_t                        *p_mad )
+{
+       IRP* p_irp;
+       KIRQL irql;
+       NTSTATUS status;
+       nd_csq_t* p_new_csq;
+
+       AL_ENTER( AL_DBG_NDI );
+
+       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI ,("CID = %d\n", p_csq->cid));
+
+       status = nd_csq_init( p_csq->h_al, new_cid, 0, &p_new_csq );
+       if( status != STATUS_SUCCESS )
+       {
+               // Ignore the request.
+               kal_cep_destroy( p_csq->h_al, new_cid, STATUS_NO_MORE_ENTRIES );
+               ib_put_mad( p_mad );
+               return;
+       }
+
+       __ndi_acquire_lock( &p_new_csq->csq, &irql );
+       p_new_csq->state = NDI_CM_CONNECTING_REQ_RCVD;
+       __ndi_release_lock( &p_new_csq->csq, irql );
+       nd_csq_release( p_new_csq );
+
+       p_irp = IoCsqRemoveNextIrp( &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_GET_REQ_CM );
+       if( p_irp == NULL )
+       {
+               p_mad->send_context1 = (VOID*)(ULONG_PTR)new_cid;
+               __ndi_acquire_lock( &p_csq->csq, &irql );
+               if( p_csq->p_mad_head == NULL )
+               {
+                       p_csq->p_mad_head = p_mad;
+               }
+               else
+               {
+                       p_csq->p_mad_tail->p_next = p_mad;
+               }
+               p_csq->p_mad_tail = p_mad;
+               __ndi_release_lock( &p_csq->csq, irql );
+       }
+       else
+       {
+               *(net32_t*)cl_ioctl_out_buf( p_irp ) = new_cid;
+               __ndi_complete_irp( p_csq, p_irp, STATUS_SUCCESS );
+               ib_put_mad( p_mad );
        }
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );

        AL_EXIT( AL_DBG_NDI );
+       return;
 }


 static void
 __ndi_proc_rep(
-       IN                              ib_qp_handle_t const            h_qp,
-       IN                              net32_t                                         cid )
+       IN                              nd_csq_t*                                       p_csq )
 {
-       ndi_qp_csq_t *p_ndi_csq = h_qp->p_irp_queue;
        IRP* p_irp;
        KIRQL irql;

        AL_ENTER( AL_DBG_NDI );

-       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI ,("h_qp = 0x%p\n", h_qp));
+       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI ,("CID = %d\n", p_csq->cid));

-       p_irp = IoCsqRemoveNextIrp( &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_REQ_CM );
-       __ndi_acquire_lock( &p_ndi_csq->csq, &irql );
+       p_irp = IoCsqRemoveNextIrp( &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_REQ_CM );
+       __ndi_acquire_lock( &p_csq->csq, &irql );
        if( p_irp == NULL )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("Not the expected state %s\n", State2String( p_ndi_csq->state )));
-               CL_ASSERT( IsListEmpty( &h_qp->p_irp_queue->queue ) );
-               al_cep_rej( qp_get_al( h_qp ), cid, IB_REJ_INVALID_COMM_INSTANCE, NULL, 0, NULL, 0 );
+                       ("Not the expected state %s\n", State2String( p_csq->state )));
+               CL_ASSERT( IsListEmpty( &p_csq->queue ) );
+               al_cep_rej( p_csq->h_al, p_csq->cid, IB_REJ_INVALID_COMM_INSTANCE, NULL, 0, NULL, 0 );
        }
        else
        {
-               p_ndi_csq->state = NDI_CM_CONNECTING_REP_RCVD;
+               p_csq->state = NDI_CM_CONNECTING_REP_RCVD;

-               __ndi_complete_irp( h_qp, p_irp, STATUS_SUCCESS );
+               __ndi_complete_irp( p_csq, p_irp, STATUS_SUCCESS );
        }
-       __ndi_release_lock( &p_ndi_csq->csq, irql );
+       __ndi_release_lock( &p_csq->csq, irql );

        AL_EXIT( AL_DBG_NDI );
        return;
@@ -732,7 +857,8 @@ __ndi_do_drep(
        IN                              DEVICE_OBJECT*                          p_dev_obj,
        IN                              PIRP                                            p_irp )
 {
-       ib_qp_handle_t h_qp = p_irp->Tail.Overlay.DriverContext[0];
+       nd_csq_t* p_csq = p_irp->Tail.Overlay.DriverContext[0];
+       ib_qp_handle_t h_qp;
        ib_qp_mod_t qp_mod;
        ib_api_status_t status;
        uint64_t timewait_us;
@@ -746,11 +872,8 @@ __ndi_do_drep(
        CL_ASSERT( p_irp->Tail.Overlay.DriverContext[1] );
        IoFreeWorkItem( p_irp->Tail.Overlay.DriverContext[1] );
        p_irp->Tail.Overlay.DriverContext[1] = NULL;
-       deref_al_obj( &h_qp->obj ); /* Release work item reference. */
-
-       status = al_cep_get_timewait( qp_get_al( h_qp ),
-               ((al_conn_qp_t*)h_qp)->cid, &timewait_us );

+       status = al_cep_get_timewait( p_csq->h_al, p_csq->cid, &timewait_us );
        if (status != IB_SUCCESS)
        {
                nt_status = STATUS_CONNECTION_INVALID;
@@ -758,31 +881,44 @@ __ndi_do_drep(
        }

        /* Store the timestamp after which the QP exits timewait. */
-       h_qp->timewait = cl_get_time_stamp() + timewait_us;
+       h_qp = CONTAINING_RECORD(
+               al_hdl_ref( p_csq->h_al, p_csq->h_qp, AL_OBJ_TYPE_H_QP ),
+               ib_qp_t,
+               obj );
+       if( h_qp != NULL )
+       {
+               h_qp->timewait = cl_get_time_stamp() + timewait_us;
+       }

-       al_destroy_cep( qp_get_al( h_qp ), &((al_conn_qp_t*)h_qp)->cid, TRUE );
+       /* Send the DREP. */
+       al_cep_drep( p_csq->h_al, p_csq->cid, NULL, 0 );

        /* bring QP to error state */
-       cl_memclr( &qp_mod, sizeof(qp_mod) );
-       qp_mod.req_state = IB_QPS_ERROR;
-
-       status = ndi_modify_qp( h_qp, &qp_mod,
-               cl_ioctl_out_size( p_irp ), cl_ioctl_out_buf( p_irp ) );
-       if ( status != IB_SUCCESS )
+       if( h_qp != NULL )
        {
-               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("ndi_modify_qp to ERROR returned %s.\n", ib_get_err_str(status) ) );
+               cl_memclr( &qp_mod, sizeof(qp_mod) );
+               qp_mod.req_state = IB_QPS_ERROR;
+
+               status = ndi_modify_qp( h_qp, &qp_mod,
+                       cl_ioctl_out_size( p_irp ), cl_ioctl_out_buf( p_irp ) );
+               if ( status != IB_SUCCESS )
+               {
+                       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                               ("ndi_modify_qp to ERROR returned %s.\n", ib_get_err_str(status) ) );
+               }
+               deref_al_obj( &h_qp->obj );
        }

        nt_status = ib_to_ntstatus( status );

 exit:
-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-       if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-               h_qp->p_irp_queue->state = NDI_CM_IDLE;
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       if( p_csq->state != NDI_CM_INVALID )
+               p_csq->state = NDI_CM_IDLE;
+       __ndi_release_lock( &p_csq->csq, irql );

-       __ndi_complete_irp( h_qp, p_irp, nt_status );
+       __ndi_complete_irp( p_csq, p_irp, nt_status );
+       nd_csq_release( p_csq ); /* Release work item reference. */
        AL_EXIT( AL_DBG_NDI );
 }

@@ -799,16 +935,17 @@ __ndi_queue_drep(

 static void
 __ndi_proc_drep(
-       IN                              ib_qp_handle_t const            h_qp )
+       IN                              nd_csq_t*                                       p_csq )
 {
        IRP* p_irp;

        AL_ENTER( AL_DBG_NDI );

        p_irp = IoCsqRemoveNextIrp(
-               &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_DREQ_CM );
+               &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_DREQ_CM );
        if( p_irp != NULL )
        {
+               CL_ASSERT( p_irp->Tail.Overlay.DriverContext[0] == p_csq );
                __ndi_queue_drep( p_irp );
        }

@@ -816,8 +953,8 @@ __ndi_proc_drep(
 }


-static void
-__ndi_cm_handler(
+void
+nd_cm_handler(
        IN              const   ib_al_handle_t                          h_al,
        IN              const   net32_t                                         cid )
 {
@@ -829,8 +966,11 @@ __ndi_cm_handler(

        while( al_cep_poll( h_al, cid, &context, &new_cid, &p_mad_el ) == IB_SUCCESS )
        {
-               ib_mad_t*p_mad = ib_get_mad_buf( p_mad_el );
-               ib_qp_handle_t h_qp = (ib_qp_handle_t)context;
+               ib_mad_t* p_mad = ib_get_mad_buf( p_mad_el );
+               nd_csq_t* p_csq = (nd_csq_t*)context;
+
+               CL_ASSERT( p_csq != NULL );
+               CL_ASSERT( p_csq->cid == cid );

                if( p_mad_el->status != IB_SUCCESS )
                {
@@ -838,28 +978,28 @@ __ndi_cm_handler(
                        {
                        case CM_REQ_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("REQ timeouted for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
-                               __ndi_complete_req_irp( h_qp, STATUS_TIMEOUT );
+                                       ("REQ timed out for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_timeout_req_irp( p_csq );
                                break;

                        case CM_REP_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("REP timeouted for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
+                                       ("REP timed out for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
                                break;

                        case CM_DREQ_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("DREQ timeouted for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
-                               __ndi_proc_drep( h_qp );
+                                       ("DREQ timed out for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_proc_drep( p_csq );
                                break;

                        default:
                                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
                                        ("Unhandled failed MAD attr ID %d for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       p_mad->attr_id, cid, h_al, h_qp, new_cid ) );
+                                       p_mad->attr_id, cid, h_al, p_csq, new_cid ) );
                                break;
                        }
                }
@@ -867,50 +1007,51 @@ __ndi_cm_handler(
                {
                        switch( p_mad->attr_id )
                        {
+                       case CM_REQ_ATTR_ID:
+                               AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
+                                       ("REQ received for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_proc_req( p_csq, new_cid, p_mad_el );
+                               continue;
+
                        case CM_REP_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("REP received for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
-                               CL_ASSERT( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid ||
-                                       ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID ||
-                                       ((al_conn_qp_t*)h_qp)->cid == AL_RESERVED_CID );
-                               __ndi_proc_rep( h_qp, cid );
+                                       ("REP received for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_proc_rep( p_csq );
                                break;

                        case CM_REJ_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("REJ received for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
-                               __ndi_proc_rej( h_qp, (mad_cm_rej_t*)p_mad );
+                                       ("REJ received for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_proc_rej( p_csq, (mad_cm_rej_t*)p_mad );
                                break;

                        case CM_DREQ_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("DREQ received for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
-                               CL_ASSERT( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid ||
-                               ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID ||
-                               ((al_conn_qp_t*)h_qp)->cid == AL_RESERVED_CID );
-                               __ndi_proc_dreq( h_qp );
+                                       ("DREQ received for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_proc_dreq( p_csq );
                                break;

                        case CM_DREP_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("DREP received for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       cid, h_al, h_qp, new_cid ) );
-                               __ndi_proc_drep( h_qp );
+                                       ("DREP received for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
+                               __ndi_proc_drep( p_csq );
                                break;

                        case CM_RTU_ATTR_ID:
                                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-                                       ("RTU received for CEP with cid %d, h_al %p, context %p, new_cid %d.\n",
-                                       cid, h_al, h_qp, new_cid ) );
+                                       ("RTU received for CEP with cid %d, h_al %p, context %p.\n",
+                                       cid, h_al, p_csq ) );
                                break;

                        default:
                                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
                                        ("Unhandled MAD attr ID %d for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
-                                       p_mad->attr_id, cid, h_al, h_qp, new_cid ) );
+                                       p_mad->attr_id, cid, h_al, p_csq, new_cid ) );
                        }
                }

@@ -922,24 +1063,26 @@ __ndi_cm_handler(

 static void
 __ndi_fill_cm_req(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              net32_t                                                         qpn,
        IN              ual_ndi_req_cm_ioctl_in_t                       *p_req,
        IN              ib_path_rec_t                                           *p_path_rec,
-               OUT     ib_cm_req_t                                                     *p_cm_req)
+               OUT     iba_cm_req                                                      *p_cm_req)
 {
        AL_ENTER( AL_DBG_NDI );

-       memset( p_cm_req, 0, sizeof(ib_cm_req_t) );
-
-       p_cm_req->svc_id = IB_REQ_CM_RDMA_SID_PREFIX | (p_req->prot << 16) | p_req->dst_port;
-       p_cm_req->max_cm_retries = g_max_cm_retries;
-       p_cm_req->p_primary_path = p_path_rec;
+       memset( p_cm_req, 0, sizeof(*p_cm_req) );

-       p_cm_req->p_req_pdata = (uint8_t *)&p_req->pdata;
-       p_cm_req->req_length = sizeof(p_req->pdata);
+       p_cm_req->service_id = IB_REQ_CM_RDMA_SID_PREFIX | (p_req->prot << 16) | p_req->dst_port;
+       p_cm_req->p_primary_path = p_path_rec;

+       p_cm_req->qpn = qpn;
        p_cm_req->qp_type = IB_QPT_RELIABLE_CONN;
-       p_cm_req->h_qp = h_qp;
+       p_cm_req->starting_psn = qpn;
+
+       p_cm_req->p_pdata = (uint8_t *)&p_req->pdata;
+       p_cm_req->pdata_len = sizeof(p_req->pdata);
+
+       p_cm_req->max_cm_retries = g_max_cm_retries;
        p_cm_req->resp_res = p_req->resp_res;
        p_cm_req->init_depth = p_req->init_depth;

@@ -959,10 +1102,8 @@ __ndi_fill_cm_req(
        else if( p_cm_req->local_resp_timeout < CM_MIN_LOCAL_TIMEOUT )
                p_cm_req->local_resp_timeout = CM_MIN_LOCAL_TIMEOUT;

-       p_cm_req->rnr_nak_timeout = QP_ATTRIB_RNR_NAK_TIMEOUT;
        p_cm_req->rnr_retry_cnt = QP_ATTRIB_RNR_RETRY;
        p_cm_req->retry_cnt = g_qp_retries;
-       p_cm_req->p_alt_path = NULL;

        AL_EXIT( AL_DBG_NDI );
 }
@@ -974,58 +1115,47 @@ __ndi_send_req(
        )
 {
        ib_api_status_t status;
-       ib_qp_handle_t h_qp = (ib_qp_handle_t)p_irp->Tail.Overlay.DriverContext[0];
+       nd_csq_t* p_csq = (nd_csq_t*)p_irp->Tail.Overlay.DriverContext[0];
        ib_path_rec_t *p_path_rec = p_irp->Tail.Overlay.DriverContext[1];
        ual_ndi_req_cm_ioctl_in_t *p_req =
                (ual_ndi_req_cm_ioctl_in_t*)cl_ioctl_in_buf( p_irp );
        NTSTATUS nt_status;
-       ib_cm_req_t cm_req;
-       ib_qp_mod_t qp_mod;
-       al_conn_qp_t *p_qp;
+       ib_qp_handle_t h_qp;
+       iba_cm_req cm_req;

-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("[ CID = %d, h_al %p, context %p\n",
+               p_req->cid, p_csq->h_al, p_csq) );

        p_irp->Tail.Overlay.DriverContext[1] = NULL;

-       if( h_qp->p_irp_queue->state != NDI_CM_CONNECTING_QPR_SENT &&
-               h_qp->p_irp_queue->state != NDI_CM_IDLE )
+       if( p_csq->state != NDI_CM_CONNECTING_QPR_SENT &&
+               p_csq->state != NDI_CM_IDLE )
        {
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("Unexpected state: %d\n", h_qp->p_irp_queue->state) );
+                       ("Unexpected state: %d\n", p_csq->state) );
                return STATUS_CONNECTION_ACTIVE;
        }

-       /* Get a CEP and bind it to the QP. */
-       p_qp = (al_conn_qp_t*)h_qp;
-       cl_spinlock_acquire( &h_qp->obj.lock );
-       if( h_qp->obj.state != CL_DESTROYING && p_qp->cid == AL_INVALID_CID )
-       {
-               status = al_create_cep( qp_get_al( h_qp ), __ndi_cm_handler,
-                                                               &h_qp->obj, deref_al_obj, &p_qp->cid );
-       }
-       else
-       {
-               status = IB_RESOURCE_BUSY;
-       }
-       cl_spinlock_release( &h_qp->obj.lock );
-       if( status != IB_SUCCESS )
+       h_qp = CONTAINING_RECORD(
+               al_hdl_ref( p_csq->h_al, p_req->h_qp, AL_OBJ_TYPE_H_QP ),
+               ib_qp_t,
+               obj );
+       if( !h_qp )
        {
-               h_qp->p_irp_queue->state = NDI_CM_IDLE;
+               /* The QP was valid when the IOCTL first came in... */
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("al_create_cep returned %s.\n", ib_get_err_str( status )) );
-               return ib_to_ntstatus( status );
+                       ("Invalid QP: %I64d\n", p_req->h_qp) );
+               return STATUS_CONNECTION_ABORTED;
        }
-       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-               ("Created Active CEP with cid %d, h_al %p, context %p\n",
-               p_qp->cid, qp_get_al( h_qp ), h_qp ) );
-
-       ref_al_obj( &h_qp->obj ); /* Take CEP reference. */

        /* Format ib_cm_req_t structure */
-       __ndi_fill_cm_req( h_qp, p_req, p_path_rec, &cm_req );
+       __ndi_fill_cm_req( h_qp->num, p_req, p_path_rec, &cm_req );
+       deref_al_obj( &h_qp->obj );

        /* prepare CEP for connection */
-       status = al_cep_pre_req( qp_get_al( h_qp ), p_qp->cid, &cm_req, &qp_mod );
+       status = kal_cep_pre_req(
+               p_csq->h_al, p_csq->cid, &cm_req, QP_ATTRIB_RNR_NAK_TIMEOUT, NULL );
        if( status != STATUS_SUCCESS )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
@@ -1034,7 +1164,7 @@ __ndi_send_req(
        }

        /* send CM REQ */
-       status = al_cep_send_req( qp_get_al( h_qp ), p_qp->cid );
+       status = al_cep_send_req( p_csq->h_al, p_csq->cid );
        if( status != IB_SUCCESS )
        {
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
@@ -1044,11 +1174,9 @@ __ndi_send_req(

        /* SUCCESS ! */
        AL_EXIT( AL_DBG_NDI );
-       return STATUS_SUCCESS;
+       return STATUS_PENDING;

 error:
-       al_destroy_cep( qp_get_al( h_qp ), &p_qp->cid, TRUE );
-
        switch( status )
        {
        case IB_INVALID_HANDLE:
@@ -1063,7 +1191,7 @@ error:
                nt_status = ib_to_ntstatus( status );
        }

-       h_qp->p_irp_queue->state = NDI_CM_IDLE;
+       p_csq->state = NDI_CM_IDLE;
        AL_EXIT( AL_DBG_NDI );
        return nt_status;
 }
@@ -1076,7 +1204,7 @@ __ndi_pr_query_cb(
        cl_ioctl_handle_t p_irp;
        uint8_t pkt_life;
        ib_path_rec_t *p_path_rec;
-       ib_qp_handle_t h_qp = (ib_qp_handle_t)p_query_rec->query_context;
+       nd_csq_t* p_csq = (nd_csq_t*)p_query_rec->query_context;
        NTSTATUS status;
        KIRQL irql;

@@ -1086,22 +1214,22 @@ __ndi_pr_query_cb(
                ("status is %d, count is %d, context %p\n", p_query_rec->status,
                p_query_rec->result_cnt, p_query_rec->query_context) );

-       p_irp = IoCsqRemoveNextIrp( &h_qp->p_irp_queue->csq, (VOID*)(ULONG_PTR)UAL_NDI_REQ_CM );
+       p_irp = IoCsqRemoveNextIrp( &p_csq->csq, (VOID*)(ULONG_PTR)UAL_NDI_REQ_CM );
        if( p_irp == NULL )
        {
                goto exit;
        }

 #pragma warning( disable:4305 )
-       InterlockedExchangePointer( &h_qp->p_irp_queue->h_query, NULL );
+       InterlockedExchangePointer( &p_csq->h_query, NULL );
 #pragma warning( default:4305 )

        if( p_query_rec->status != IB_SUCCESS || p_query_rec->result_cnt == 0 )
        {
-               __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-               if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-                       h_qp->p_irp_queue->state = NDI_CM_IDLE;
-               __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+               __ndi_acquire_lock( &p_csq->csq, &irql );
+               if( p_csq->state != NDI_CM_INVALID )
+                       p_csq->state = NDI_CM_IDLE;
+               __ndi_release_lock( &p_csq->csq, irql );
                switch( p_query_rec->status )
                {
                case IB_TIMEOUT:
@@ -1129,7 +1257,7 @@ __ndi_pr_query_cb(
                        status = STATUS_HOST_UNREACHABLE;
                        break;
                }
-               __ndi_complete_irp( h_qp, p_irp, status );
+               __ndi_complete_irp( p_csq, p_irp, status );
                goto exit;
        }

@@ -1148,15 +1276,15 @@ __ndi_pr_query_cb(
        p_irp->Tail.Overlay.DriverContext[1] = p_path_rec;

        status = IoCsqInsertIrpEx(
-               &h_qp->p_irp_queue->csq,
+               &p_csq->csq,
                p_irp,
                NULL,
                (VOID*)(ULONG_PTR)NDI_CM_CONNECTING_REQ_SENT
                );
-       if( status != STATUS_SUCCESS )
+       if( !NT_SUCCESS( status ) )
        {
                p_irp->Tail.Overlay.DriverContext[1] = NULL;
-               __ndi_complete_irp( h_qp, p_irp, status );
+               __ndi_complete_irp( p_csq, p_irp, status );
        }
        else
        {
@@ -1164,14 +1292,14 @@ __ndi_pr_query_cb(
                 * Release the previous reference because IoCsqInsertIrpEx
                 * took a new one.
                 */
-               deref_al_obj( &h_qp->obj ); /* Release IRP reference. */
+               nd_csq_release( p_csq ); /* Release IRP reference. */
        }

 exit:
        if( p_query_rec->p_result_mad )
                ib_put_mad( p_query_rec->p_result_mad );

-       deref_al_obj( &h_qp->obj );     /* release path query reference */
+       nd_csq_release( p_csq );        /* release path query reference */
        AL_EXIT( AL_DBG_NDI );
 }

@@ -1190,16 +1318,16 @@ __ndi_pr_query(
        ib_api_status_t status;
        ual_ndi_req_cm_ioctl_in_t *p_req =
                (ual_ndi_req_cm_ioctl_in_t*)cl_ioctl_in_buf( p_irp );
-       ib_qp_handle_t h_qp = (ib_qp_handle_t)p_irp->Tail.Overlay.DriverContext[0];
+       nd_csq_t* p_csq = (nd_csq_t*)p_irp->Tail.Overlay.DriverContext[0];
     ib_gid_pair_t gids;

        AL_ENTER( AL_DBG_NDI );

-       if ( h_qp->p_irp_queue->state != NDI_CM_IDLE )
+       if ( p_csq->state != NDI_CM_IDLE )
        {
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("STATUS_CONNECTION_ACTIVE: h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",
-                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );
+                       ("STATUS_CONNECTION_ACTIVE: CID=%d, uhdl %#I64x, ref_cnt %d\n",
+                       p_csq->cid, p_csq->h_qp, p_csq->ref_cnt ) );
                return STATUS_CONNECTION_ACTIVE;
        }

@@ -1212,41 +1340,50 @@ __ndi_pr_query(
        query_req.timeout_ms = g_sa_timeout;
        query_req.retry_cnt = g_sa_retries;
        query_req.flags = 0;    /* IB_FLAGS_SYNC */
-       query_req.query_context = h_qp;
+       query_req.query_context = p_csq;
        query_req.pfn_query_cb = __ndi_pr_query_cb;

        AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
                ("Query for path from %I64x to %I64x\n",
                p_req->guid, ib_gid_get_guid( &p_req->path.dgid )) );

-       ref_al_obj( &h_qp->obj );               /* take path query reference */
-       status = ib_query( qp_get_al( h_qp ), &query_req, &h_qp->p_irp_queue->h_query );
+       nd_csq_ref( p_csq );            /* take path query reference */
+       status = ib_query( p_csq->h_al, &query_req, &p_csq->h_query );
        if( status != IB_SUCCESS )
        {
-               h_qp->p_irp_queue->state = NDI_CM_IDLE;
+               p_csq->state = NDI_CM_IDLE;
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("ib_query failed (%d)\n", status) );
-               deref_al_obj( &h_qp->obj );     /* release path query reference */
+               nd_csq_release( p_csq );        /* release path query reference */
                return ib_to_ntstatus( status );
        }

        AL_EXIT( AL_DBG_NDI );
-       return STATUS_SUCCESS;
+       return STATUS_PENDING;
 }


 NTSTATUS
 ndi_req_cm(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              ib_al_handle_t                                          h_al,
        IN              IRP                                                                     *p_irp
        )
 {
        NTSTATUS status;
+       nd_csq_t* p_csq;
        ual_ndi_req_cm_ioctl_in_t *p_req =
                (ual_ndi_req_cm_ioctl_in_t*)cl_ioctl_in_buf( p_irp );

        AL_ENTER( AL_DBG_NDI );

-       p_irp->Tail.Overlay.DriverContext[0] = (ib_qp_t*)h_qp;
+       p_csq = kal_cep_get_context( h_al, p_req->cid, nd_cm_handler, nd_csq_ref );
+       if( p_csq == NULL )
+       {
+               status = nd_csq_init( h_al, p_req->cid, p_req->h_qp, &p_csq );
+               if( status != STATUS_SUCCESS )
+                       goto err;
+       }
+
+       p_irp->Tail.Overlay.DriverContext[0] = p_csq;

     if( p_req->path.dlid != 0 )
     {
@@ -1260,7 +1397,7 @@ ndi_req_cm(

            p_irp->Tail.Overlay.DriverContext[1] = &p_req->path;
            status = IoCsqInsertIrpEx(
-                   &h_qp->p_irp_queue->csq,
+                   &p_csq->csq,
                    p_irp,
                    NULL,
                    (VOID*)(ULONG_PTR)NDI_CM_CONNECTING_REQ_SENT
@@ -1269,15 +1406,15 @@ ndi_req_cm(
     else
     {
            status = IoCsqInsertIrpEx(
-                   &h_qp->p_irp_queue->csq,
+                   &p_csq->csq,
                    p_irp,
                    NULL,
                    (VOID*)(ULONG_PTR)NDI_CM_CONNECTING_QPR_SENT
                    );
     }
-       if( status == STATUS_SUCCESS )
-               status = STATUS_PENDING;

+       nd_csq_release( p_csq );
+err:
        AL_EXIT( AL_DBG_NDI );
        return status;
 }
@@ -1295,7 +1432,7 @@ __ndi_do_rtu(
        IN                              PIRP                                            p_irp )
 {
        ib_api_status_t status;
-       ib_qp_handle_t h_qp = p_irp->Tail.Overlay.DriverContext[0];
+       nd_csq_t* p_csq = p_irp->Tail.Overlay.DriverContext[0];
        KIRQL irql;
        NTSTATUS nt_status;

@@ -1308,67 +1445,67 @@ __ndi_do_rtu(
        {
                IoFreeWorkItem( p_irp->Tail.Overlay.DriverContext[1] );
                p_irp->Tail.Overlay.DriverContext[1] = NULL;
-               deref_al_obj( &h_qp->obj ); /* Release work item reference. */
+               nd_csq_release( p_csq ); /* Release work item reference. */
        }

-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-       if( h_qp->p_irp_queue->state != NDI_CM_CONNECTING_REP_RCVD )
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       if( p_csq->state != NDI_CM_CONNECTING_REP_RCVD )
        {
+               __ndi_release_lock( &p_csq->csq, irql );
                nt_status = STATUS_CONNECTION_ABORTED;
                goto exit;
        }
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       __ndi_release_lock( &p_csq->csq, irql );

        /* change the QP state to RTS */
-       status = __ndi_qp2rts( h_qp, p_irp );
-
+       status = __ndi_qp2rts( p_csq, p_irp );
        if ( status != IB_SUCCESS )
        {
                goto err;
        }

        /* send RTU */
-       status = al_cep_rtu( qp_get_al( h_qp ), ((al_conn_qp_t*)h_qp)->cid, NULL, 0 );
+       status = al_cep_rtu( p_csq->h_al, p_csq->cid, NULL, 0 );
        if( status != IB_SUCCESS )
        {
 err:
-               /* Reject and abort the connection. */
-               al_cep_rej(
-                       qp_get_al( h_qp ), ((al_conn_qp_t*)h_qp)->cid,
-                       IB_REJ_INSUF_QP, NULL, 0, NULL, 0 );
-
-               __cep_timewait_qp( h_qp );
+               /*
+                * Reject the connection.  Note that we don't free the CEP since the
+                * usermode INDConnector object references it, and the CEP will be
+                * freed when that object is freed.
+                */
+               al_cep_rej( p_csq->h_al, p_csq->cid, IB_REJ_INSUF_QP, NULL, 0, NULL, 0 );

-               al_destroy_cep( qp_get_al( h_qp ), &((al_conn_qp_t*)h_qp)->cid, TRUE );
+               __cep_timewait_qp( p_csq );

                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
                        ("al_cep_rtu returned %s.\n", ib_get_err_str( status )) );

-               __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-               if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-                       h_qp->p_irp_queue->state = NDI_CM_IDLE;
-               __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+               __ndi_acquire_lock( &p_csq->csq, &irql );
+               if( p_csq->state != NDI_CM_INVALID )
+                       p_csq->state = NDI_CM_IDLE;
+               __ndi_release_lock( &p_csq->csq, irql );

                nt_status = STATUS_CONNECTION_ABORTED;
                goto exit;
        }

-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-       if( h_qp->p_irp_queue->state == NDI_CM_CONNECTING_REP_RCVD )
-               h_qp->p_irp_queue->state = NDI_CM_CONNECTED;
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       if( p_csq->state == NDI_CM_CONNECTING_REP_RCVD )
+               p_csq->state = NDI_CM_CONNECTED;
+       __ndi_release_lock( &p_csq->csq, irql );

        nt_status = STATUS_SUCCESS;

 exit:
-       __ndi_complete_irp( h_qp, p_irp, nt_status );
+       __ndi_complete_irp( p_csq, p_irp, nt_status );
        AL_EXIT( AL_DBG_NDI );
 }


 cl_status_t
 ndi_rtu_cm(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              nd_csq_t                                                        *p_csq,
        IN              PIRP                                                            p_irp
        )
 {
@@ -1376,16 +1513,16 @@ ndi_rtu_cm(

        AL_ENTER( AL_DBG_NDI );

-       p_irp->Tail.Overlay.DriverContext[0] = h_qp;
+       p_irp->Tail.Overlay.DriverContext[0] = p_csq;
+       nd_csq_ref( p_csq ); /* Take IRP reference. */
        p_io_stack = IoGetCurrentIrpStackLocation( p_irp );
        p_irp->Tail.Overlay.DriverContext[1] = IoAllocateWorkItem( p_io_stack->DeviceObject );
-       ref_al_obj( &h_qp->obj ); /* Take IRP reference. */

        IoMarkIrpPending( p_irp );
        if ( p_irp->Tail.Overlay.DriverContext[1] )
        { /* asyncronous performing */
                /* take a ref to prevent QP destroy before calling work item */
-               ref_al_obj( &h_qp->obj ); /* Take work item reference. */
+               nd_csq_ref( p_csq ); /* Take work item reference. */
                IoQueueWorkItem( p_irp->Tail.Overlay.DriverContext[1],
                        __ndi_do_rtu, DelayedWorkQueue, p_irp );
        }
@@ -1410,73 +1547,75 @@ __ndi_do_rep(
        IN                              DEVICE_OBJECT*                          p_dev_obj,
        IN              PIRP                                                            p_irp )
 {
-       ib_qp_handle_t h_qp = p_irp->Tail.Overlay.DriverContext[0];
+       nd_csq_t* p_csq = p_irp->Tail.Overlay.DriverContext[0];
        ib_api_status_t status;
-       ual_ndi_rep_cm_ioctl_in_t *p_rep;
        KIRQL irql;
        NTSTATUS nt_status;

        UNUSED_PARAM(p_dev_obj);

-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("[ CID = %d\n", p_csq->cid) );

        /* free the work item if any */
        CL_ASSERT( p_irp->Tail.Overlay.DriverContext[1] != NULL );
        IoFreeWorkItem( p_irp->Tail.Overlay.DriverContext[1] );
        p_irp->Tail.Overlay.DriverContext[1] = NULL;
-       deref_al_obj( &h_qp->obj ); /* Release work item reference. */
-
-       p_rep = (ual_ndi_rep_cm_ioctl_in_t*)cl_ioctl_in_buf( p_irp );

        /* change the QP state to RTS */
-       status = __ndi_qp2rts( h_qp, p_irp );
+       status = __ndi_qp2rts( p_csq, p_irp );
        if ( status != IB_SUCCESS )
        {
                goto err;
        }

        /* send REP */
-       status = al_cep_send_rep ( qp_get_al( h_qp ), p_rep->cid );
+       status = al_cep_send_rep ( p_csq->h_al, p_csq->cid );
        if( status != IB_SUCCESS )
        {
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
                        ("al_cep_send_rep returned %s\n", ib_get_err_str(status)) );
 err:
-               /* Reject and abort the connection. */
-               al_cep_rej( qp_get_al( h_qp ), p_rep->cid, IB_REJ_INSUF_QP, NULL, 0, NULL, 0 );
+               /*
+                * Reject the connection.  Note that we don't free the CEP since the
+                * usermode INDConnector object references it, and the CEP will be
+                * freed when that object is freed.
+                */
+               al_cep_rej( p_csq->h_al, p_csq->cid, IB_REJ_INSUF_QP, NULL, 0, NULL, 0 );

                /* transit QP to error state */
-               __cep_timewait_qp( h_qp );
-
-               al_destroy_cep( qp_get_al( h_qp ), &((al_conn_qp_t*)h_qp)->cid, TRUE );
+               __cep_timewait_qp( p_csq );

                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
                        ("al_cep_rtu returned %s.\n", ib_get_err_str( status )) );
-               __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-               if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
-                       h_qp->p_irp_queue->state = NDI_CM_IDLE;
-               __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
-               if (status == IB_INVALID_STATE)
+               __ndi_acquire_lock( &p_csq->csq, &irql );
+               if( p_csq->state != NDI_CM_INVALID )
+                       p_csq->state = NDI_CM_IDLE;
+               __ndi_release_lock( &p_csq->csq, irql );
+               if (status == IB_INVALID_STATE )
                        nt_status = STATUS_CONNECTION_ABORTED;
+               /* The HCA driver will return IB_INVALID_PARAMETER if the QP is in the wrong state. */
+               else if( status == IB_INVALID_HANDLE || status == IB_INVALID_PARAMETER )
+                       nt_status = STATUS_CANCELLED;
                else
-                       nt_status =STATUS_INSUFFICIENT_RESOURCES;
+                       nt_status = STATUS_INSUFFICIENT_RESOURCES;
                goto exit;
        }

-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-       if( h_qp->p_irp_queue->state == NDI_CM_CONNECTING_REP_SENT )
-               h_qp->p_irp_queue->state = NDI_CM_CONNECTED;
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       if( p_csq->state == NDI_CM_CONNECTING_REP_SENT )
+               p_csq->state = NDI_CM_CONNECTED;
+       __ndi_release_lock( &p_csq->csq, irql );
        nt_status = STATUS_SUCCESS;

 exit:
-       __ndi_complete_irp( h_qp, p_irp, nt_status );
+       __ndi_complete_irp( p_csq, p_irp, nt_status );
+       nd_csq_release( p_csq ); /* Release work item reference. */
        AL_EXIT( AL_DBG_NDI );
 }

 static void
 __ndi_fill_cm_rep(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              net32_t                                                         qpn,
        IN              ual_ndi_rep_cm_ioctl_in_t                       *p_rep,
                OUT     iba_cm_rep                                                      *p_cm_rep)
 {
@@ -1487,7 +1626,7 @@ __ndi_fill_cm_rep(
        p_cm_rep->p_pdata = p_rep->pdata;
        p_cm_rep->pdata_len = sizeof(p_rep->pdata);

-       p_cm_rep->qpn = h_qp->num;
+       p_cm_rep->qpn = qpn;

        p_cm_rep->init_depth = p_rep->init_depth;
        p_cm_rep->failover_accepted = IB_FAILOVER_ACCEPT_UNSUPPORTED;
@@ -1500,20 +1639,29 @@ __ndi_fill_cm_rep(

 NTSTATUS
 __ndi_send_rep(
-       IN              ib_qp_handle_t                                          h_qp,
+       IN              nd_csq_t                                                        *p_csq,
        IN              PIRP                                                            p_irp )
 {
        IO_STACK_LOCATION       *p_io_stack;
+       ib_qp_handle_t h_qp;
        iba_cm_rep cm_rep;
-       ib_qp_mod_t qp_mod;
        ib_api_status_t status;
        ual_ndi_rep_cm_ioctl_in_t *p_rep =
                (ual_ndi_rep_cm_ioctl_in_t*)cl_ioctl_in_buf( p_irp );

-       AL_ENTER( AL_DBG_NDI );
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("[ CID = %d\n", p_csq->cid) );

-       if( h_qp->p_irp_queue->state != NDI_CM_IDLE )
+       switch( p_csq->state )
        {
+       case NDI_CM_CONNECTING_REQ_RCVD:
+               break;
+
+       case NDI_CM_CONNECTED_DREQ_RCVD:
+               AL_EXIT( AL_DBG_NDI );
+               return STATUS_CONNECTION_ABORTED;
+
+       default:
                AL_EXIT( AL_DBG_NDI );
                return STATUS_CONNECTION_ACTIVE;
        }
@@ -1525,26 +1673,36 @@ __ndi_send_rep(
                AL_EXIT( AL_DBG_NDI );
                return STATUS_NO_MEMORY;
        }
-       ref_al_obj( &h_qp->obj ); /* Take work item reference. */
+       nd_csq_ref( p_csq ); /* Take work item reference. */

-       /* Format ib_cm_req_t structure */
-       __ndi_fill_cm_rep( h_qp, p_rep, &cm_rep );
+       h_qp = CONTAINING_RECORD(
+               al_hdl_ref( p_csq->h_al, p_csq->h_qp, AL_OBJ_TYPE_H_QP ),
+               ib_qp_t,
+               obj );
+       if( !h_qp )
+       {
+               /* The QP was valid when the IOCTL first came in... */
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                       ("Invalid QP: %I64d\n", p_rep->h_qp) );
+               status = IB_INVALID_HANDLE;
+               goto err;
+       }

-       ref_al_obj( &h_qp->obj ); /* Take CEP reference. */
+       /* Format ib_cm_req_t structure */
+       __ndi_fill_cm_rep( h_qp->num, p_rep, &cm_rep );
+       deref_al_obj( &h_qp->obj );

        /* prepare Passive CEP for connection */
-       status = kal_cep_config_pre_rep_copy_cid(
-               qp_get_al( h_qp ), p_rep->cid, __ndi_cm_handler, &h_qp->obj, deref_al_obj,
-               &cm_rep, QP_ATTRIB_RNR_NAK_TIMEOUT, &((al_conn_qp_t*)h_qp)->cid, &qp_mod);
+       status = kal_cep_pre_rep(
+               p_csq->h_al, p_csq->cid, &cm_rep, QP_ATTRIB_RNR_NAK_TIMEOUT, NULL );
        if( status != IB_SUCCESS )
        {
+               AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                       ("kal_cep_pre_rep returned %s.\n", ib_get_err_str( status )) );
+err:
                IoFreeWorkItem( p_irp->Tail.Overlay.DriverContext[1] );
                p_irp->Tail.Overlay.DriverContext[1] = NULL;
-               deref_al_obj( &h_qp->obj ); /* Release work item reference. */
-               al_destroy_cep( qp_get_al( h_qp ), &p_rep->cid, FALSE );
-               deref_al_obj( &h_qp->obj ); /* Release CEP reference. */
-               AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("kal_cep_config_pre_rep_copy_cid returned %s.\n", ib_get_err_str( status )) );
+               nd_csq_release( p_csq ); /* Release work item reference. */
                switch (status)
                {
                        case IB_INVALID_HANDLE:
@@ -1560,7 +1718,7 @@ __ndi_send_rep(

        AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
                ("Prepared Passive CEP with cid %d, h_al %p, context %p\n",
-               p_rep->cid, qp_get_al( h_qp ), h_qp ) );
+               p_csq->cid, p_csq->h_al, h_qp ) );

        /*
         * transfer work to a worker thread so that QP transitions can be done
@@ -1570,39 +1728,52 @@ __ndi_send_rep(
                __ndi_do_rep, DelayedWorkQueue, p_irp );

        AL_EXIT( AL_DBG_NDI );
-       return STATUS_SUCCESS;
+       return STATUS_PENDING;
 }


 NTSTATUS
 ndi_rep_cm(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              ib_al_handle_t                                          h_al,
        IN              PIRP                                                            p_irp
        )
 {
        NTSTATUS status;
+       nd_csq_t* p_csq;
+       ual_ndi_rep_cm_ioctl_in_t *p_rep =
+               (ual_ndi_rep_cm_ioctl_in_t*)cl_ioctl_in_buf( p_irp );
        KIRQL irql;

        AL_ENTER( AL_DBG_NDI );

-       p_irp->Tail.Overlay.DriverContext[0] = h_qp;
+       p_csq = kal_cep_get_context( h_al, p_rep->cid, nd_cm_handler, nd_csq_ref );
+       if( p_csq == NULL )
+       {
+               status = STATUS_CONNECTION_ABORTED;
+               goto err;
+       }

-       __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
-       status = __ndi_send_rep( h_qp, p_irp );
-       if( status == STATUS_SUCCESS )
+       p_csq->h_qp = p_rep->h_qp;
+
+       p_irp->Tail.Overlay.DriverContext[0] = p_csq;
+
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       status = __ndi_send_rep( p_csq, p_irp );
+       if( status == STATUS_PENDING )
        {
                /*
                 * We're going to keep the IRP dangling for a bit - take a reference
                 * on the QP until it completes.
                 */
-               ref_al_obj( &h_qp->obj ); /* Take IRP reference. */
-               h_qp->p_irp_queue->state = NDI_CM_CONNECTING_REP_SENT;
+               nd_csq_ref( p_csq ); /* Take IRP reference. */
+               p_csq->state = NDI_CM_CONNECTING_REP_SENT;
                IoMarkIrpPending( p_irp );
-               status = STATUS_PENDING;
        }
-       __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
+       __ndi_release_lock( &p_csq->csq, irql );

-       AL_EXIT( AL_DBG_NDI );
+       nd_csq_release( p_csq );
+err:
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("] returning %08x\n", status) );
        return status;
 }

@@ -1620,19 +1791,19 @@ __ndi_send_dreq(
        IN              IRP*                                                            p_irp
        )
 {
-       ib_qp_handle_t h_qp = (ib_qp_handle_t)p_irp->Tail.Overlay.DriverContext[0];
+       nd_csq_t *p_csq = (nd_csq_t*)p_irp->Tail.Overlay.DriverContext[0];
        IO_STACK_LOCATION       *p_io_stack;
        ib_api_status_t status;
        NTSTATUS nt_status;

        AL_ENTER( AL_DBG_NDI );

-       if ( h_qp->p_irp_queue->state != NDI_CM_CONNECTED &&
-               h_qp->p_irp_queue->state != NDI_CM_CONNECTED_DREQ_RCVD )
+       if ( p_csq->state != NDI_CM_CONNECTED &&
+               p_csq->state != NDI_CM_CONNECTED_DREQ_RCVD )
        {
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                       ("STATUS_CONNECTION_ACTIVE: h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",
-                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );
+                       ("STATUS_CONNECTION_ACTIVE: CID = %d, uhdl %#I64x, ref_cnt %d\n",
+                       p_csq->cid, p_csq->h_qp, p_csq->ref_cnt ) );
                return STATUS_CONNECTION_INVALID;
        }

@@ -1648,24 +1819,26 @@ __ndi_send_dreq(
                AL_EXIT( AL_DBG_NDI );
                return STATUS_NO_MEMORY;
        }
-       ref_al_obj( &h_qp->obj ); /* Take work item reference. */
+       nd_csq_ref( p_csq ); /* Take work item reference. */

-       status = al_cep_dreq( qp_get_al( h_qp ), ((al_conn_qp_t*)h_qp)->cid, NULL, 0 );
+       status = al_cep_dreq( p_csq->h_al, p_csq->cid, NULL, 0 );
        switch( status )
        {
        case IB_INVALID_STATE:
-               /* We might have just received a DREQ, so try sending a DREP. */
-               __ndi_queue_drep( p_irp );
-               IoMarkIrpPending( p_irp );
                /*
                 * We're going to keep the IRP dangling for a bit - take a reference
-                * on the QP until it completes.
+                * on the CSQ until it completes.
                 */
-               ref_al_obj( &h_qp->obj ); /* Take IRP reference. */
+               nd_csq_ref( p_csq ); /* Take IRP reference. */
+               /* We might have just received a DREQ, so try sending a DREP. */
+               IoMarkIrpPending( p_irp );
+               __ndi_queue_drep( p_irp );
+               AL_EXIT( AL_DBG_NDI );
+               return STATUS_INVALID_DEVICE_STATE;

        case IB_SUCCESS:
                AL_EXIT( AL_DBG_NDI );
-               return( ib_to_ntstatus( status ) );
+               return STATUS_PENDING;

        case IB_INVALID_HANDLE:
                nt_status = STATUS_CONNECTION_INVALID;
@@ -1675,7 +1848,7 @@ __ndi_send_dreq(
        }
        IoFreeWorkItem( p_irp->Tail.Overlay.DriverContext[1] );
        p_irp->Tail.Overlay.DriverContext[1] = NULL;
-       deref_al_obj( &h_qp->obj ); /* Release work item reference. */
+       nd_csq_release( p_csq ); /* Release work item reference. */
        AL_EXIT( AL_DBG_NDI );
        return nt_status;
 }
@@ -1683,7 +1856,7 @@ __ndi_send_dreq(

 NTSTATUS
 ndi_dreq_cm(
-       IN              ib_qp_handle_t  const                           h_qp,
+       IN              nd_csq_t*                                                       p_csq,
        IN              PIRP                                                            p_irp
        )
 {
@@ -1691,10 +1864,10 @@ ndi_dreq_cm(

        AL_ENTER( AL_DBG_NDI );

-       p_irp->Tail.Overlay.DriverContext[0] = h_qp;
+       p_irp->Tail.Overlay.DriverContext[0] = p_csq;

        status = IoCsqInsertIrpEx(
-               &h_qp->p_irp_queue->csq,
+               &p_csq->csq,
                p_irp,
                NULL,
                (VOID*)(ULONG_PTR)NDI_CM_DISCONNECTING
@@ -1706,10 +1879,136 @@ ndi_dreq_cm(
         * The IRP should never be queued if the work item is queued, so
         * we trap the special error code for INVALID_STATE.
         */
-       if( status == STATUS_SUCCESS || status == STATUS_INVALID_DEVICE_STATE )
+       if( status == STATUS_INVALID_DEVICE_STATE )
                status = STATUS_PENDING;

        AL_EXIT( AL_DBG_NDI );
        return status;
 }
+
+
+NTSTATUS
+ndi_listen_cm(
+       IN              ib_al_handle_t                                  h_al,
+       IN              ib_cep_listen_t                                 *p_listen,
+               OUT     net32_t                                                 *p_cid,
+               OUT     size_t                                                  *p_ret_bytes
+       )
+{
+       NTSTATUS status;
+       net32_t cid;
+       ib_api_status_t ib_status;
+       nd_csq_t *p_csq;
+       KIRQL irql;
+
+       AL_ENTER( AL_DBG_NDI );
+
+       ib_status = al_create_cep( h_al, NULL, NULL, NULL, &cid );
+       if( ib_status != IB_SUCCESS )
+       {
+               AL_EXIT( AL_DBG_NDI );
+               return ib_to_ntstatus( ib_status );
+       }
+
+       status = nd_csq_init( h_al, cid, 0, &p_csq );
+       if( status != STATUS_SUCCESS )
+       {
+               kal_cep_destroy( h_al, cid, STATUS_SUCCESS );
+               AL_EXIT( AL_DBG_NDI );
+               return status;
+       }
+
+       __ndi_acquire_lock( &p_csq->csq, &irql );
+       p_csq->state = NDI_CM_LISTEN;
+       __ndi_release_lock( &p_csq->csq, irql );
+
+       if( (p_listen->svc_id & 0xFFFF) == 0 )
+       {
+               p_listen->svc_id |= (USHORT)cid | (USHORT)(cid >> 16);
+       }
+
+       ib_status = al_cep_listen( h_al, cid, p_listen );
+       if( ib_status == IB_SUCCESS )
+       {
+               *p_cid = cid;
+               *p_ret_bytes = sizeof(*p_cid);
+       }
+
+       nd_csq_release( p_csq );
+       status = ib_to_ntstatus( ib_status );
+       AL_EXIT( AL_DBG_NDI );
+       return status;
+}
+
+
+NTSTATUS
+__ndi_get_req(
+       IN              nd_csq_t                                                        *p_csq,
+       IN              IRP*                                                            p_irp
+       )
+{
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("[ CID = %d\n", p_csq->cid) );
+
+       if( p_csq->state != NDI_CM_LISTEN )
+       {
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_NDI,
+                       ("] Invalid state (%d).\n", p_csq->state) );
+               return STATUS_INVALID_DEVICE_REQUEST;
+       }
+
+       /* Check the MAD list. */
+       if( p_csq->p_mad_head != NULL )
+       {
+               ib_mad_element_t* p_mad = p_csq->p_mad_head;
+               net32_t cid = (net32_t)(ULONG_PTR)p_mad->send_context1;
+               p_csq->p_mad_head = p_mad->p_next;
+               p_mad->p_next = NULL;
+
+               *(net32_t*)cl_ioctl_out_buf( p_irp ) = cid;
+               p_irp->IoStatus.Information = sizeof(net32_t);
+               p_irp->IoStatus.Status = STATUS_SUCCESS;
+               IoMarkIrpPending( p_irp );
+               IoCompleteRequest( p_irp, IO_NETWORK_INCREMENT );
+               ib_put_mad( p_mad );
+               AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("] Returned new CID = %d\n", cid) );
+               return STATUS_INVALID_DEVICE_STATE;
+       }
+
+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI, ("] Queueing IRP\n") );
+       return STATUS_PENDING;
+}
+
+
+NTSTATUS
+ndi_get_req_cm(
+       IN              nd_csq_t                                                *p_csq,
+       IN              PIRP                                                    p_irp
+       )
+{
+       NTSTATUS status;
+
+       AL_ENTER( AL_DBG_NDI );
+
+       status = IoCsqInsertIrpEx(
+               &p_csq->csq,
+               p_irp,
+               NULL,
+               (VOID*)(ULONG_PTR)NDI_CM_LISTEN
+               );
+
+       /*
+        * __ndi_get_req will return STATUS_INVALID_DEVICE_STATE to prevent the IRP
+        * from being inserted into the CSQ because the IRP was immediately completed.
+        * In this case, we need to return STATUS_PENDING.
+        */
+       if( status == STATUS_INVALID_DEVICE_STATE )
+       {
+               status = STATUS_PENDING;
+       }
+
+       AL_EXIT( AL_DBG_NDI );
+       return status;
+}
+
+

Index: core/al/kernel/al_ndi_cm.h
===================================================================
--- core/al/kernel/al_ndi_cm.h  (revision 2061)
+++ core/al/kernel/al_ndi_cm.h  (working copy)
@@ -64,8 +64,10 @@
 typedef enum _ndi_cm_state
 {
        NDI_CM_IDLE,
+       NDI_CM_LISTEN,
        NDI_CM_CONNECTING_QPR_SENT, // QPR = Query path record
        NDI_CM_CONNECTING_REQ_SENT,
+       NDI_CM_CONNECTING_REQ_RCVD,
        NDI_CM_CONNECTING_REP_SENT,
        NDI_CM_CONNECTING_REP_RCVD,
        NDI_CM_CONNECTED,
@@ -77,16 +79,27 @@ typedef enum _ndi_cm_state

 typedef struct _ib_qp  ib_qp_t;

-typedef struct _ndi_qp_csq
+typedef struct _nd_csq
 {
        IO_CSQ                                          csq;
        LIST_ENTRY                                      queue;
-       ib_qp_t*                                        h_qp;
-       ib_query_handle_t                       h_query;
+       ib_al_handle_t                          h_al;
+       union {
+               uint64_t                                h_qp;
+               ib_mad_element_t                *p_mad_head;
+       };
+       union {
+               ib_query_handle_t               h_query;
+               ib_mad_element_t                *p_mad_tail;
+       };
+       net32_t                                         cid;
        ndi_cm_state_t                          state;
        PIO_WORKITEM                            p_workitem;
+       volatile LONG                           ref_cnt;
+       KSPIN_LOCK                                      lock;
+
+} nd_csq_t;

-} ndi_qp_csq_t;

 ib_api_status_t
 ndi_modify_qp(
@@ -95,41 +108,59 @@ ndi_modify_qp(
        IN              const   uint32_t                                        buf_size,
        IN                              uint8_t* const                          p_outbuf);

+void
+nd_csq_ref( nd_csq_t* p_csq );
+
+void
+nd_csq_release( nd_csq_t* p_csq );
+
+void
+nd_cm_handler(
+       IN              const   ib_al_handle_t                          h_al,
+       IN              const   net32_t                                         cid );
+
 NTSTATUS
 ndi_req_cm(
-       IN              ib_qp_handle_t  const                   h_qp,
-       IN              cl_ioctl_handle_t                               h_ioctl
+       IN              ib_al_handle_t                                  h_al,
+       IN              PIRP                                                    p_irp
        );

 NTSTATUS
 ndi_rep_cm(
-       IN              ib_qp_handle_t  const                   h_qp,
+       IN              ib_al_handle_t                                  h_al,
        IN              PIRP                                                    p_irp
        );

 cl_status_t
 ndi_rtu_cm(
-       IN              ib_qp_handle_t  const                   h_qp,
+       IN              nd_csq_t                                                *p_csq,
        IN              PIRP                                                    p_irp
        );

 NTSTATUS
 ndi_dreq_cm(
-       IN              ib_qp_handle_t  const                   h_qp,
+       IN              nd_csq_t                                                *p_csq,
        IN              PIRP                                                    p_irp
        );
-
-NTSTATUS
-ndi_qp_init(
-       IN              ib_qp_handle_t                                  h_qp );

 void
-ndi_qp_destroy(
-       IN              ib_qp_handle_t                                  h_qp );
+ndi_cancel_cm_irps(
+       IN              nd_csq_t                                                *p_csq
+       );

-void
-ndi_qp_free(
-       IN              ib_qp_handle_t                                  h_qp );
+NTSTATUS
+ndi_listen_cm(
+       IN              ib_al_handle_t                                  h_al,
+       IN              ib_cep_listen_t                                 *p_listen,
+               OUT     net32_t                                                 *p_cid,
+               OUT     size_t                                                  *p_ret_bytes
+       );
+
+NTSTATUS
+ndi_get_req_cm(
+       IN              nd_csq_t                                                *p_csq,
+       IN              PIRP                                                    p_irp
+       );

 #endif

Index: core/al/kernel/al_cm.c
===================================================================
--- core/al/kernel/al_cm.c      (revision 2061)
+++ core/al/kernel/al_cm.c      (working copy)
@@ -90,7 +90,7 @@ cm_cep_handler(const ib_al_handle_t h_al

                        id = cm_alloc_id(listen_id->callback, listen_id);
                        if (id == NULL) {
-                               kal_cep_destroy(h_al, new_cid);
+                               kal_cep_destroy(h_al, new_cid, STATUS_NO_MORE_ENTRIES);
                                ib_put_mad(mad);
                                continue;
                        }
@@ -103,7 +103,7 @@ cm_cep_handler(const ib_al_handle_t h_al
                status = id->callback(id, &event);
                if (!NT_SUCCESS(status)) {
                        kal_cep_config(h_al, new_cid, NULL, NULL, NULL);
-                       kal_cep_destroy(h_al, id->cid);
+                       kal_cep_destroy(h_al, id->cid, status);
                        cm_free_id(id);
                }
                ib_put_mad(mad);
@@ -139,7 +139,7 @@ cm_destroy_id(iba_cm_id *p_id)
        iba_cm_id_priv  *id;

        id = CONTAINING_RECORD(p_id, iba_cm_id_priv, id);
-       kal_cep_destroy(gh_al, p_id->cid);
+       kal_cep_destroy(gh_al, p_id->cid, STATUS_SUCCESS);
        KeWaitForSingleObject(&id->destroy_event, Executive, KernelMode, FALSE, NULL);
        cm_free_id(p_id);
 }
Index: core/al/kernel/al_proxy_cep.c
===================================================================
--- core/al/kernel/al_proxy_cep.c       (revision 2061)
+++ core/al/kernel/al_proxy_cep.c       (working copy)
@@ -53,7 +53,6 @@ proxy_create_cep(
                OUT     size_t                                  *p_ret_bytes )
 {
        al_dev_open_context_t           *p_context;
-       void*                                           user_context;
        ual_create_cep_ioctl_t          *p_ioctl;

        AL_ENTER( AL_DBG_CM );
@@ -62,19 +61,13 @@ proxy_create_cep(
        p_ioctl = (ual_create_cep_ioctl_t*)cl_ioctl_out_buf( h_ioctl );

        /* Validate user parameters. */
-       if( cl_ioctl_in_size( h_ioctl ) != sizeof(uint64_t) ||
-               cl_ioctl_out_size( h_ioctl ) != sizeof(ual_create_cep_ioctl_t) )
+       if( cl_ioctl_out_size( h_ioctl ) != sizeof(ual_create_cep_ioctl_t) )
        {
                AL_EXIT( AL_DBG_CM );
                return CL_INVALID_PARAMETER;
        }
-
-       user_context = *(void**)cl_ioctl_in_buf( h_ioctl );
-
        /* We use IRPs as notification mechanism so the callback is NULL. */
-       p_ioctl->cid = AL_INVALID_CID;
-       p_ioctl->status = al_create_cep( p_context->h_al, NULL,
-               user_context, NULL, &p_ioctl->cid );
+       p_ioctl->status = kal_cep_alloc( p_context->h_al, &p_ioctl->cid );

        *p_ret_bytes = sizeof(ual_create_cep_ioctl_t);

@@ -862,37 +855,6 @@ proxy_cep_get_event(
 }


-static cl_status_t
-proxy_cep_get_req_cid(
-       IN              void                                    *p_open_context,
-       IN              cl_ioctl_handle_t               h_ioctl,
-               OUT     size_t                                  *p_ret_bytes )
-{
-       cl_status_t cl_status;
-       al_dev_open_context_t *p_context;
-       UNUSED_PARAM(p_ret_bytes);
-
-       AL_ENTER( AL_DBG_CM );
-
-       p_context = (al_dev_open_context_t*)p_open_context;
-
-       /* Validate user parameters. */
-       if( cl_ioctl_in_size( h_ioctl ) != sizeof(uint32_t) ||
-               cl_ioctl_out_size( h_ioctl ) != sizeof(uint32_t) )
-       {
-               AL_EXIT( AL_DBG_CM );
-               return CL_INVALID_PARAMETER;
-       }
-
-       /* get CID */
-       cl_status = al_cep_get_cid( p_context->h_al,
-               *(uint32_t*)cl_ioctl_in_buf( h_ioctl ), h_ioctl );
-
-       AL_EXIT( AL_DBG_CM );
-       return cl_status;
-}
-
-

 static cl_status_t
 proxy_cep_get_pdata(
@@ -902,8 +864,8 @@ proxy_cep_get_pdata(
 {
        al_dev_open_context_t           *p_context;
        ual_cep_get_pdata_ioctl_t       *p_ioctl;
-       al_conn_qp_t                            *p_qp;
        NTSTATUS                    status;
+       net32_t                                         cid;

        AL_ENTER( AL_DBG_CM );

@@ -922,23 +884,9 @@ proxy_cep_get_pdata(
                return CL_INVALID_PARAMETER;
        }

-       if ( p_ioctl->in.h_qp )
-       {
-               /* Get the kernel QP handle. */
-               p_qp = (al_conn_qp_t*)al_hdl_ref(
-                       p_context->h_al, p_ioctl->in.h_qp, AL_OBJ_TYPE_H_QP );
-               if( !p_qp )
-               {
-                       AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-                               ("Invalid QP handle\n"));
-                       return CL_CONNECTION_INVALID;
-               }
-               p_ioctl->in.cid = p_qp->cid;
-               deref_al_obj( &p_qp->qp.obj );
-       }
-
+       cid = p_ioctl->in.cid;
        p_ioctl->out.pdata_len = sizeof(p_ioctl->out.pdata);
-       status = al_cep_get_pdata( p_context->h_al, p_ioctl->in.cid,
+       status = al_cep_get_pdata( p_context->h_al, cid,
         &p_ioctl->out.init_depth, &p_ioctl->out.resp_res,
                (uint8_t*)&p_ioctl->out.pdata_len, p_ioctl->out.pdata );

@@ -953,6 +901,7 @@ proxy_cep_get_pdata(
        return status;
 }

+
 cl_status_t cep_ioctl(
        IN              cl_ioctl_handle_t               h_ioctl,
                OUT     size_t                                  *p_ret_bytes )
@@ -1035,9 +984,6 @@ cl_status_t cep_ioctl(
                break;
        case UAL_CEP_POLL:
                cl_status = proxy_cep_poll( p_context, h_ioctl, p_ret_bytes );
-               break;
-       case UAL_CEP_GET_REQ_CID:
-               cl_status = proxy_cep_get_req_cid( p_context, h_ioctl, p_ret_bytes );
                break;
        case UAL_CEP_GET_PDATA:
                cl_status = proxy_cep_get_pdata( p_context, h_ioctl, p_ret_bytes );
Index: core/al/kernel/al_proxy_ndi.c
===================================================================
--- core/al/kernel/al_proxy_ndi.c       (revision 2061)
+++ core/al/kernel/al_proxy_ndi.c       (working copy)
@@ -322,7 +322,7 @@ __ndi_req_cm(
        p_context = (al_dev_open_context_t*)p_open_context;

        /* Validate user parameters. */
-       if( cl_ioctl_in_size( h_ioctl ) < sizeof(ual_ndi_req_cm_ioctl_in_t))
+       if( cl_ioctl_in_size( h_ioctl ) < sizeof(ual_ndi_req_cm_ioctl_in_t) )
        {
                cl_status = CL_INVALID_PARAMETER;
                goto exit;
@@ -351,7 +351,7 @@ __ndi_req_cm(
        }

        /* perform the ioctl */
-       cl_status = ndi_req_cm( h_qp, h_ioctl );
+       cl_status = ndi_req_cm( p_context->h_al, h_ioctl );

 err:
        deref_al_obj( &h_qp->obj );
@@ -370,7 +370,6 @@ __ndi_rep_cm(
        cl_status_t cl_status;
        ib_qp_handle_t h_qp = NULL;
        al_dev_open_context_t *p_context;
-       net32_t cid;
        ual_ndi_rep_cm_ioctl_in_t *p_rep =
                (ual_ndi_rep_cm_ioctl_in_t*)cl_ioctl_in_buf( h_ioctl );
        UNUSED_PARAM(p_ret_bytes);
@@ -386,6 +385,9 @@ __ndi_rep_cm(
                goto exit;
        }

+       AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+               ("CID = %d\n", p_rep->cid) );
+
        /* Get and validate QP handle */
        h_qp = (ib_qp_handle_t)al_hdl_ref( p_context->h_al, p_rep->h_qp, AL_OBJ_TYPE_H_QP );
        if( !h_qp )
@@ -406,12 +408,9 @@ __ndi_rep_cm(
                cl_status = CL_INVALID_PARAMETER;
                goto err;
        }
-
-       /* Get and validate CID */
-       cid = p_rep->cid;

        /* perform the ioctls */
-       cl_status = ndi_rep_cm( h_qp, h_ioctl );
+       cl_status = ndi_rep_cm( p_context->h_al, h_ioctl );

 err:
        deref_al_obj( &h_qp->obj );
@@ -455,7 +454,6 @@ __ndi_rej_cm(
                goto exit;
        }

-       al_destroy_cep( p_context->h_al, &p_rej->cid, FALSE );
        ntstatus = STATUS_SUCCESS;

 exit:
@@ -470,10 +468,8 @@ __ndi_rtu_cm(
                OUT     size_t                                  *p_ret_bytes )
 {
        cl_status_t cl_status;
-       ib_qp_handle_t h_qp = NULL;
+       nd_csq_t* p_csq;
        al_dev_open_context_t *p_context;
-       ual_ndi_rtu_cm_ioctl_in_t *p_rtu =
-               (ual_ndi_rtu_cm_ioctl_in_t*)cl_ioctl_in_buf( h_ioctl );

        UNUSED_PARAM(p_ret_bytes);

@@ -482,32 +478,28 @@ __ndi_rtu_cm(
        p_context = (al_dev_open_context_t*)p_open_context;

        /* Validate user parameters. */
-       if( cl_ioctl_in_size( h_ioctl ) < sizeof(ual_ndi_rtu_cm_ioctl_in_t))
+       if( cl_ioctl_in_size( h_ioctl ) < sizeof(net32_t) )
        {
                cl_status = CL_INVALID_PARAMETER;
                goto exit;
        }

-       /* Validate QP handle */
-       h_qp = (ib_qp_handle_t)al_hdl_ref( p_context->h_al, p_rtu->h_qp, AL_OBJ_TYPE_H_QP );
-       if( !h_qp )
+       p_csq = kal_cep_get_context(
+               p_context->h_al,
+               *(net32_t*)cl_ioctl_in_buf( h_ioctl ),
+               nd_cm_handler,
+               nd_csq_ref
+               );
+       if( p_csq == NULL )
        {
                cl_status = CL_INVALID_HANDLE;
                goto exit;
        }

-       /* Check QP type */
-       if( h_qp->type != IB_QPT_RELIABLE_CONN )
-       {
-               cl_status = CL_INVALID_HANDLE;
-               goto err;
-       }
-
        /* perform the ioctl */
-       cl_status = ndi_rtu_cm( h_qp, h_ioctl );
+       cl_status = ndi_rtu_cm( p_csq, h_ioctl );

-err:
-       deref_al_obj( &h_qp->obj );
+       nd_csq_release( p_csq );

 exit:
        AL_EXIT( AL_DBG_NDI );
@@ -521,7 +513,7 @@ __ndi_dreq_cm(
                OUT     size_t                                  *p_ret_bytes )
 {
        cl_status_t cl_status;
-       ib_qp_handle_t h_qp = NULL;
+       nd_csq_t *p_csq;
        al_dev_open_context_t *p_context;

        UNUSED_PARAM(p_ret_bytes);
@@ -531,37 +523,206 @@ __ndi_dreq_cm(
        p_context = (al_dev_open_context_t*)p_open_context;

        /* Validate user parameters. */
-       if( cl_ioctl_in_size( h_ioctl ) < sizeof(uint64_t))
+       if( cl_ioctl_in_size( h_ioctl ) < sizeof(net32_t) )
        {
                cl_status = CL_INVALID_PARAMETER;
                goto exit;
        }

-       /* Validate QP handle */
-       h_qp = (ib_qp_handle_t)al_hdl_ref( p_context->h_al,
-               *(uint64_t*)cl_ioctl_in_buf( h_ioctl ), AL_OBJ_TYPE_H_QP );
-       if( !h_qp )
+       /* Validate CID */
+       p_csq = (nd_csq_t*)kal_cep_get_context(
+               p_context->h_al,
+               *(net32_t*)cl_ioctl_in_buf( h_ioctl ),
+               nd_cm_handler,
+               nd_csq_ref
+               );
+
+       if( p_csq == NULL )
        {
                cl_status = CL_CONNECTION_INVALID;
                goto exit;
        }

-       /* Check QP type */
-       if( h_qp->type != IB_QPT_RELIABLE_CONN )
+       /* perform the ioctl */
+       cl_status = ndi_dreq_cm( p_csq, h_ioctl );
+
+       nd_csq_release( p_csq );
+
+exit:
+       AL_EXIT( AL_DBG_NDI );
+       return cl_status;
+}
+
+static NTSTATUS
+__ndi_notify_dreq_cm(
+       IN              void                                    *p_open_context,
+       IN              cl_ioctl_handle_t               h_ioctl,
+               OUT     size_t                                  *p_ret_bytes )
+{
+       NTSTATUS status;
+       nd_csq_t *p_csq;
+       al_dev_open_context_t *p_context;
+
+       UNUSED_PARAM(p_ret_bytes);
+
+       AL_ENTER( AL_DBG_NDI );
+
+       p_context = (al_dev_open_context_t*)p_open_context;
+
+       /* Validate user parameters. */
+       if( cl_ioctl_in_size( h_ioctl ) < sizeof(net32_t) )
        {
-               cl_status = CL_CONNECTION_INVALID;
-               goto err;
+               status = STATUS_INVALID_PARAMETER;
+               goto exit;
+       }
+
+       /* Validate CID */
+       p_csq = (nd_csq_t*)kal_cep_get_context(
+               p_context->h_al,
+               *(net32_t*)cl_ioctl_in_buf( h_ioctl ),
+               nd_cm_handler,
+               nd_csq_ref
+               );
+
+       if( p_csq == NULL )
+       {
+               status = STATUS_CONNECTION_INVALID;
+               goto exit;
        }

        /* perform the ioctl */
-       cl_status = ndi_dreq_cm( h_qp, h_ioctl );
+       status = IoCsqInsertIrpEx(
+               &p_csq->csq,
+               h_ioctl,
+               NULL,
+               (VOID*)(ULONG_PTR)NDI_CM_CONNECTED_DREQ_RCVD
+               );

-err:
-       deref_al_obj( &h_qp->obj );
+       nd_csq_release( p_csq );

 exit:
        AL_EXIT( AL_DBG_NDI );
-       return cl_status;
+       return status;
+}
+
+static cl_status_t
+__ndi_cancel_cm_irps(
+       IN              void                                    *p_open_context,
+       IN              cl_ioctl_handle_t               h_ioctl,
+               OUT     size_t                                  *p_ret_bytes )
+{
+       nd_csq_t *p_csq;
+       al_dev_open_context_t *p_context;
+
+       UNUSED_PARAM(p_ret_bytes);
+
+       AL_ENTER( AL_DBG_NDI );
+
+       p_context = (al_dev_open_context_t*)p_open_context;
+
+       /* Validate user parameters. */
+       if( cl_ioctl_in_size( h_ioctl ) < sizeof(net32_t) )
+       {
+               AL_EXIT( AL_DBG_NDI );
+               return STATUS_INVALID_PARAMETER;
+       }
+
+       /* Validate CID */
+       p_csq = (nd_csq_t*)kal_cep_get_context(
+               p_context->h_al,
+               *(net32_t*)cl_ioctl_in_buf( h_ioctl ),
+               nd_cm_handler,
+               nd_csq_ref
+               );
+
+       if( p_csq == NULL )
+       {
+               AL_EXIT( AL_DBG_NDI );
+               return STATUS_UNSUCCESSFUL;
+       }
+
+       /* perform the ioctl */
+       ndi_cancel_cm_irps( p_csq );
+       nd_csq_release( p_csq );
+
+       AL_EXIT( AL_DBG_NDI );
+       return STATUS_SUCCESS;
+}
+
+static cl_status_t
+__ndi_listen_cm(
+       IN              void                                    *p_open_context,
+       IN              cl_ioctl_handle_t               h_ioctl,
+               OUT     size_t                                  *p_ret_bytes )
+{
+       al_dev_open_context_t *p_context;
+       ual_cep_listen_ioctl_t *p_listen =
+               (ual_cep_listen_ioctl_t*)cl_ioctl_in_buf( h_ioctl );
+       net32_t* p_cid =
+               (net32_t*)cl_ioctl_out_buf( h_ioctl );
+
+       AL_ENTER( AL_DBG_NDI );
+
+       p_context = (al_dev_open_context_t*)p_open_context;
+
+       /* Validate user parameters. */
+       if( cl_ioctl_in_size( h_ioctl ) < sizeof(*p_listen) ||
+               cl_ioctl_out_size( h_ioctl ) != sizeof(*p_cid) )
+       {
+               AL_EXIT( AL_DBG_NDI );
+               return CL_INVALID_PARAMETER;
+       }
+
+       /* Set the private data compare buffer to our kernel copy. */
+       if( p_listen->cep_listen.p_cmp_buf )
+               p_listen->cep_listen.p_cmp_buf = p_listen->compare;
+
+       AL_EXIT( AL_DBG_NDI );
+       return ndi_listen_cm( p_context->h_al, &p_listen->cep_listen, p_cid, p_ret_bytes );
+}
+
+static cl_status_t
+__ndi_get_req_cm(
+       IN              void                                    *p_open_context,
+       IN              cl_ioctl_handle_t               h_ioctl,
+               OUT     size_t                                  *p_ret_bytes )
+{
+       al_dev_open_context_t *p_context;
+       nd_csq_t *p_csq;
+       NTSTATUS status;
+
+       AL_ENTER( AL_DBG_NDI );
+
+       UNREFERENCED_PARAMETER( p_ret_bytes );
+
+       p_context = (al_dev_open_context_t*)p_open_context;
+
+       /* Validate user parameters. */
+       if( cl_ioctl_in_size( h_ioctl ) != sizeof(net32_t) ||
+               cl_ioctl_out_size( h_ioctl ) != sizeof(net32_t) )
+       {
+               AL_EXIT( AL_DBG_NDI );
+               return CL_INVALID_PARAMETER;
+       }
+
+       /* Validate CID */
+       p_csq = (nd_csq_t*)kal_cep_get_context(
+               p_context->h_al,
+               *(net32_t*)cl_ioctl_in_buf( h_ioctl ),
+               nd_cm_handler,
+               nd_csq_ref
+               );
+
+       if( p_csq == NULL )
+       {
+               AL_EXIT( AL_DBG_NDI );
+               return STATUS_UNSUCCESSFUL;
+       }
+
+       status = ndi_get_req_cm( p_csq, h_ioctl );
+       nd_csq_release( p_csq );
+       AL_EXIT( AL_DBG_NDI );
+       return status;
 }

 cl_status_t
@@ -621,9 +782,23 @@ ndi_ioctl(
             h_ioctl->IoStatus.Status = CL_SUCCESS;
         h_ioctl->IoStatus.Information = 0;

+               AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_NDI,
+                       ("UAL_NDI_NOOP completed with %08x\n", h_ioctl->IoStatus.Status) );
         IoCompleteRequest( h_ioctl, IO_NETWORK_INCREMENT );
         cl_status = CL_PENDING;
         break;
+       case UAL_NDI_NOTIFY_DREQ:
+               cl_status = __ndi_notify_dreq_cm( p_context, h_ioctl, p_ret_bytes );
+               break;
+       case UAL_NDI_CANCEL_CM_IRPS:
+               cl_status = __ndi_cancel_cm_irps( p_context, h_ioctl, p_ret_bytes );
+               break;
+       case UAL_NDI_LISTEN_CM:
+               cl_status = __ndi_listen_cm( p_context, h_ioctl, p_ret_bytes );
+               break;
+       case UAL_NDI_GET_REQ_CM:
+               cl_status = __ndi_get_req_cm( p_context, h_ioctl, p_ret_bytes );
+               break;
        default:
                cl_status = CL_INVALID_PARAMETER;
                break;
Index: core/al/al_qp.h
===================================================================
--- core/al/al_qp.h     (revision 2061)
+++ core/al/al_qp.h     (working copy)
@@ -135,10 +135,6 @@ typedef struct _ib_qp
                IN              const   ib_qp_handle_t                          h_qp,
                IN              const   ib_mcast_req_t* const           p_mcast_req );

-#ifdef CL_KERNEL
-       ndi_qp_csq_t                            *p_irp_queue;
-#endif
-
 }      ib_qp_t;


Index: core/al/al_cm_cep.h
===================================================================
--- core/al/al_cm_cep.h (revision 2061)
+++ core/al/al_cm_cep.h (working copy)
@@ -116,7 +116,7 @@ kal_cep_alloc(
        IN                              ib_al_handle_t                          h_al,
        IN      OUT                     net32_t* const                          p_cid );

-void
+ib_api_status_t
 kal_cep_config(
        IN                              ib_al_handle_t                          h_al,
        IN                              net32_t                                         cid,
@@ -184,22 +184,11 @@ kal_cep_pre_rep(
        IN                              uint8_t                                         rnr_nak_timeout,
        IN      OUT                     ib_qp_mod_t* const                      p_init OPTIONAL );

-ib_api_status_t
-kal_cep_config_pre_rep_copy_cid(
-       IN                              ib_al_handle_t                          h_al,
-       IN                              net32_t                                         cid,
-       IN                              al_pfn_cep_cb_t                         pfn_cb,
-       IN                              void*                                           context,
-       IN                              ib_pfn_destroy_cb_t                     pfn_destroy_cb,
-       IN              const   iba_cm_rep* const                       p_cm_rep,
-       IN                              uint8_t                                         rnr_nak_timeout,
-       IN      OUT                     net32_t* const                          p_cid,
-               OUT                     ib_qp_mod_t* const                      p_init );
-
 void
 kal_cep_destroy(
        IN                              ib_al_handle_t                          h_al,
-       IN                              net32_t                                         cid );
+       IN                              net32_t                                         cid,
+       IN                              NTSTATUS                                        status );
 #endif

 ib_api_status_t
@@ -337,13 +326,6 @@ al_cep_queue_irp(
        IN                              IRP* const                                      p_irp );

 NTSTATUS
-al_cep_get_cid(
-       IN              ib_al_handle_t                                          h_al,
-       IN              net32_t                 const                           cid,
-       IN              PIRP                                                            h_ioctl
-       );
-
-NTSTATUS
 al_cep_get_pdata(
        IN                              ib_al_handle_t                          h_al,
        IN                              net32_t                                         cid,
@@ -352,6 +334,12 @@ al_cep_get_pdata(
        IN      OUT                     uint8_t                                         *p_psize,
                OUT                     uint8_t*                                        pdata );

+void*
+kal_cep_get_context(
+       IN                              ib_al_handle_t                          h_al,
+       IN                              net32_t                                         cid,
+       IN                              al_pfn_cep_cb_t                         pfn_cb,
+       IN                              ib_pfn_destroy_cb_t                     pfn_addref );
 #endif /* CL_KERNEL */


Index: core/al/al_dev.h
===================================================================
--- core/al/al_dev.h    (revision 2061)
+++ core/al/al_dev.h    (working copy)
@@ -55,7 +55,7 @@
 #define AL_DEVICE_NAME L"\\Device\\ibal"
 #define        ALDEV_KEY               (0x3B)  /* Matches FILE_DEVICE_INFINIBAND from wdm.h */

-#define AL_IOCTL_VERSION                       (11)
+#define AL_IOCTL_VERSION                       (12)

 /* max number of devices with non-default pkey */
 #define        MAX_NUM_PKEY    16
@@ -353,7 +353,6 @@ typedef enum _ual_cep_ops
        ual_cep_get_timewait,
        ual_cep_get_event,
        ual_cep_poll,
-       ual_cep_get_req_cid,
        ual_cep_get_pdata,

        al_cep_maxops
@@ -408,6 +407,9 @@ typedef enum _al_ndi_ops
        ual_ndi_dreq_cm_ioctl_cmd,
     ual_ndi_noop,
     ual_ndi_notify_dreq_cmd,
+       ual_ndi_cancel_cm_irps,
+       ual_ndi_listen_cm_cmd,
+       ual_ndi_get_req_cm_cmd,

        al_ndi_maxops

@@ -440,6 +442,9 @@ typedef enum _al_ioc_device_config
 #define UAL_NDI_DREQ_CM                        IOCTL_CODE(ALDEV_KEY, ual_ndi_dreq_cm_ioctl_cmd)
 #define UAL_NDI_NOOP            IOCTL_CODE(ALDEV_KEY, ual_ndi_noop)
 #define UAL_NDI_NOTIFY_DREQ     IOCTL_CODE(ALDEV_KEY, ual_ndi_notify_dreq_cmd)
+#define UAL_NDI_CANCEL_CM_IRPS IOCTL_CODE(ALDEV_KEY, ual_ndi_cancel_cm_irps)
+#define UAL_NDI_LISTEN_CM              IOCTL_CODE(ALDEV_KEY, ual_ndi_listen_cm_cmd)
+#define UAL_NDI_GET_REQ_CM             IOCTL_CODE(ALDEV_KEY, ual_ndi_get_req_cm_cmd)

 /*
  * Various Operation Allowable on the System Helper
@@ -541,7 +546,6 @@ typedef enum _al_ioc_device_config
 #define UAL_CEP_GET_TIMEWAIT   IOCTL_CODE(ALDEV_KEY, ual_cep_get_timewait)
 #define UAL_CEP_GET_EVENT      IOCTL_CODE(ALDEV_KEY, ual_cep_get_event)
 #define UAL_CEP_POLL           IOCTL_CODE(ALDEV_KEY, ual_cep_poll)
-#define UAL_CEP_GET_REQ_CID    IOCTL_CODE(ALDEV_KEY, ual_cep_get_req_cid)
 #define UAL_CEP_GET_PDATA      IOCTL_CODE(ALDEV_KEY, ual_cep_get_pdata)


Index: core/al/al_qp.c
===================================================================
--- core/al/al_qp.c     (revision 2061)
+++ core/al/al_qp.c     (working copy)
@@ -327,10 +327,6 @@ create_qp(
                        break;
                }
                status = init_conn_qp( (al_conn_qp_t*)h_qp, h_pd, p_qp_create, p_umv_buf );
-#ifdef CL_KERNEL
-               if( status == IB_SUCCESS && !NT_SUCCESS( ndi_qp_init(h_qp) ) )
-                       status = IB_ERROR;
-#endif
                break;

        case IB_QPT_UNRELIABLE_DGRM:
@@ -1117,9 +1113,6 @@ destroying_qp(
        case IB_QPT_RELIABLE_CONN:
        case IB_QPT_UNRELIABLE_CONN:
                al_destroy_cep( h_qp->obj.h_al, &((al_conn_qp_t*)h_qp)->cid, FALSE );
-#ifdef CL_KERNEL
-               ndi_qp_destroy( h_qp );
-#endif

                /* Fall through. */
        case IB_QPT_UNRELIABLE_DGRM:
@@ -1236,10 +1229,6 @@ free_qp(

        CL_ASSERT( p_obj );
        h_qp = PARENT_STRUCT( p_obj, ib_qp_t, obj );
-
-#ifdef CL_KERNEL
-       ndi_qp_free( h_qp );
-#endif

        destroy_al_obj( p_obj );
        cl_free( h_qp );
Index: core/al/user/ual_cm_cep.c
===================================================================
--- core/al/user/ual_cm_cep.c   (revision 2061)
+++ core/al/user/ual_cm_cep.c   (working copy)
@@ -272,9 +272,8 @@ __create_ucep(
        /* Create a kernel CEP only if we don't already have a CID. */
        if( cid == AL_INVALID_CID )
        {
-               uint64_t cep_context = (ULONG_PTR)context;
-               if( !DeviceIoControl( g_al_device, UAL_CREATE_CEP, &cep_context,
-                       sizeof(cep_context), &ioctl, sizeof(ioctl), &bytes_ret, NULL ) ||
+               if( !DeviceIoControl( g_al_device, UAL_CREATE_CEP, NULL,
+                       0, &ioctl, sizeof(ioctl), &bytes_ret, NULL ) ||
                        bytes_ret != sizeof(ioctl) )
                {
                        __destroy_ucep( p_cep );
Index: inc/iba/ib_al_ioctl.h
===================================================================
--- inc/iba/ib_al_ioctl.h       (revision 2061)
+++ inc/iba/ib_al_ioctl.h       (working copy)
@@ -3089,7 +3089,6 @@ typedef union _ual_cep_get_pdata_ioctl
 {
        struct _ual_cep_get_pdata_ioctl_in
        {
-               uint64_t                                h_qp;
                net32_t                                 cid;

        }       in;
@@ -3106,9 +3105,6 @@ typedef union _ual_cep_get_pdata_ioctl
 }      ual_cep_get_pdata_ioctl_t;
 /*
 * FIELDS
-*      h_qp
-*              A handle to the QP to modify.
-*
 *      in.cid
 *              The CID for the target CEP.
 *
@@ -3483,6 +3479,7 @@ typedef struct _ual_ndi_req_cm_ioctl_in
     ib_path_rec_t               path;
        uint64_t                                        h_qp;
        net64_t                                         guid;
+       net32_t                                         cid;
        uint16_t                                        dst_port;
     uint8_t                     resp_res;
     uint8_t                     init_depth;
@@ -3493,7 +3490,7 @@ typedef struct _ual_ndi_req_cm_ioctl_in
 }      ual_ndi_req_cm_ioctl_in_t;
 /*
 * NOTES
-*      The output parameter is the new QP state (RTS).
+*      There is no output parameter.
 *
 * FIELDS
 *      h_qp
@@ -3502,6 +3499,9 @@ typedef struct _ual_ndi_req_cm_ioctl_in
 *      guid
 *              Local port GUID to which to bind to.
 *
+*      cid
+*              CID of the CEP to use for the connection request.
+*
 *      dst_port
 *              Destination port number.
 *
@@ -3600,38 +3600,6 @@ typedef struct _ual_ndi_rej_cm_ioctl_in
 *
 *      pdata
 *              Private data in format RDMA CM
-*
-*****/
-
-
-/****s* User-mode Access Layer/ual_ndi_rtu_cm_ioctl_in_t
-* NAME
-*      ual_ndi_rtu_cm_ioctl_in_t
-*
-* DESCRIPTION
-*      IOCTL structure containing the input parameters
-*      sending CM RTU response .
-*
-* SYNOPSIS
-*/
-typedef struct _ual_ndi_rtu_cm_ioctl_in
-{
-       uint64_t                                        h_qp;
-
-}      ual_ndi_rtu_cm_ioctl_in_t;
-/*
-* NOTES
-*      The output parameter is the new QP state (RTS).
-*
-* FIELDS
-*      h_qp
-*              A handle to the QP to modify.
-*
-*      init_depth
-*              The maximum number of outstanding RDMA read/atomic operations.
-*
-*      resp_res
-*              The maximum number of RDMA read/atomic operations from the recipient.
 *
 *****/

-------------- next part --------------
A non-text attachment was scrubbed...
Name: nd_cm.patch
Type: application/octet-stream
Size: 95535 bytes
Desc: nd_cm.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090326/d397cb77/attachment.obj>


More information about the ofw mailing list