[ofw] ***SPAM*** [PATCH] Fix CEP destruction
Leonid Keller
leonid at mellanox.co.il
Wed Jul 9 03:40:20 PDT 2008
Applied in 1356. Thank you.
> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org
> [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Fab Tillier
> Sent: Wednesday, July 09, 2008 2:15 AM
> To: ofw at lists.openfabrics.org
> Subject: [ofw] ***SPAM*** [PATCH] Fix CEP destruction
>
> In my previous patch "Fix race reading/setting connection ID"
> I incorrectly stated that the patch changed the semantics of
> creation/destruction of the CEPs by providing the destroy
> callback at CEP creation time. It didn't, but this patch does.
>
> Note that it also backs out the "Cleanup CEPs after child
> objects have been destroyed" changes, as those actually
> introduced the following issue:
>
> UM listen CEPs are not tracked in AL's handle table, but can
> queue MADs which take a reference on the AL instance. AL's
> destroying callback must cleanup the CEPs to free the MADs in
> order for the ref count to reach zero.
>
> The root problem, and the iterations of the fix that are
> apparent in the patch sequence has to do with race conditions
> cleaning up QPs while CM messages are being received and
> processed. First there was the issue of the CID stored in
> the QP having two reserved states and races
> checking/assigning this value. This was fixed (successfully)
> by pushing checks into the CEP manager, protected by the CEP
> manager's spinlock. Next was the issue that a reference on
> the QP is taken when the CEP is bound to a QP, but if AL was
> destroyed the CEP cleanup in AL would blow away the CEPs
> before the QPs were done being destroyed. This would leak a
> reference count on the QP since the CEP was destroyed without
> a destroy callback. The change that added a cleanup callback
> to AL was the first (failed) attempt to fix this (for the
> reasons listed above). This patch is the successful attempt
> to fix this, as it sets the destroy callback at creation
> time. No matter what path destroys the CEP, if a destroy
> callback was taken (because some object has a reference for
> the CEP), the destroy callback will always be invoked.
>
> Signed-off-by: Fab Tillier <ftillier at microsoft.com>
>
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\al.c trunk\core\al\al.c
> --- old\core\al\al.c Tue Jul 08 10:15:55 2008
> +++ trunk\core\al\al.c Tue Jul 08 16:00:59 2008
> @@ -116,16 +116,11 @@ destroying_al(
> }
>
> cl_spinlock_release( &p_obj->lock ); -}
> -
>
> -void
> -cleanup_al(
> - IN al_obj_t
> *p_obj )
> -{
> /* Cleanup any left-over connections. */
> - al_cep_cleanup_al( PARENT_STRUCT( p_obj, ib_al_t, obj ) );
> + al_cep_cleanup_al( h_al );
> }
> +
>
>
> static void
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\al.h trunk\core\al\al.h
> --- old\core\al\al.h Tue Jul 08 10:15:55 2008
> +++ trunk\core\al\al.h Tue Jul 08 16:00:59 2008
> @@ -109,11 +109,6 @@ destroying_al(
>
>
> void
> -cleanup_al(
> - IN al_obj_t
> *p_obj );
> -
> -
> -void
> free_al(
> IN al_obj_t
> *p_obj );
>
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\al_cm_cep.h trunk\core\al\al_cm_cep.h
> --- old\core\al\al_cm_cep.h Wed Jul 02 09:53:25 2008
> +++ trunk\core\al\al_cm_cep.h Tue Jul 08 15:33:53 2008
> @@ -98,6 +98,7 @@ al_create_cep(
> IN ib_al_handle_t
> h_al,
> IN al_pfn_cep_cb_t
> pfn_cb,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb OPTIONAL,
> IN OUT net32_t* const
> p_cid );
> /*
> * NOTES
> @@ -112,8 +113,7 @@ void
> al_destroy_cep(
> IN ib_al_handle_t
> h_al,
> IN OUT net32_t* const
> p_cid,
> - IN boolean_t
> reusable,
> - IN ib_pfn_destroy_cb_t
> pfn_destroy_cb );
> + IN boolean_t
> reusable );
> /*
> *********/
>
> @@ -143,6 +143,7 @@ al_cep_pre_rep(
> IN ib_al_handle_t
> h_al,
> IN net32_t
> cid,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb OPTIONAL,
> IN const ib_cm_rep_t* const
> p_cm_rep,
> IN OUT net32_t* const
> p_cid,
> OUT ib_qp_mod_t* const
> p_init );
> @@ -153,6 +154,7 @@ al_cep_pre_rep_ex(
> IN net32_t
> cid,
> IN al_pfn_cep_cb_t
> pfn_cb,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb OPTIONAL,
> IN const ib_cm_rep_t* const
> p_cm_rep,
> IN OUT net32_t* const
> p_cid,
> OUT ib_qp_mod_t* const
> p_init );
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\al_cm_qp.c trunk\core\al\al_cm_qp.c
> --- old\core\al\al_cm_qp.c Wed Jul 02 09:53:25 2008
> +++ trunk\core\al\al_cm_qp.c Tue Jul 08 15:33:53 2008
> @@ -280,7 +280,7 @@ __proc_listen(
> CL_ASSERT( p_mad->attr_id == CM_REQ_ATTR_ID ||
> p_mad->attr_id == CM_SIDR_REQ_ATTR_ID );
> /* Destroy the new CEP as it won't ever be
> reported to the user. */
> - al_destroy_cep( p_listen->obj.h_al, &new_cid,
> FALSE, NULL );
> + al_destroy_cep( p_listen->obj.h_al, &new_cid, FALSE );
> }
>
> AL_EXIT( AL_DBG_CM );
> @@ -319,8 +319,7 @@ __proc_conn_timeout(
> /* Unbind the QP from the CEP. */
> __cep_timewait_qp( h_qp );
>
> - al_destroy_cep(
> - h_qp->obj.h_al, &((al_conn_qp_t*)h_qp)->cid,
> TRUE, deref_al_obj );
> + al_destroy_cep( h_qp->obj.h_al, &((al_conn_qp_t*)h_qp)->cid,
> + TRUE );
>
> /* Invoke the callback. */
> ((al_conn_qp_t*)h_qp)->pfn_cm_rej_cb( &rej_rec ); @@
> -376,8 +375,7 @@ __proc_dconn_timeout(
>
> __cep_timewait_qp( h_qp );
>
> - al_destroy_cep(
> - h_qp->obj.h_al, &((al_conn_qp_t*)h_qp)->cid,
> TRUE, deref_al_obj );
> + al_destroy_cep( h_qp->obj.h_al, &((al_conn_qp_t*)h_qp)->cid,
> + TRUE );
>
> /* Call the user back. */
> ((al_conn_qp_t*)h_qp)->pfn_cm_drep_cb( &drep_rec );
> @@ -509,8 +507,7 @@ __proc_rej(
> */
> __cep_timewait_qp( p_cm->h_qp );
>
> - al_destroy_cep(
> - p_cm->h_al,
> &((al_conn_qp_t*)p_cm->h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( p_cm->h_al,
> + &((al_conn_qp_t*)p_cm->h_qp)->cid, TRUE );
>
> /* Call the user back. */
> ((al_conn_qp_t*)p_cm->h_qp)->pfn_cm_rej_cb(
> &rej_rec ); @@ -608,8 +605,7 @@ __proc_drep(
>
> __cep_timewait_qp( p_cm->h_qp );
>
> - al_destroy_cep(
> - p_cm->h_al,
> &((al_conn_qp_t*)p_cm->h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( p_cm->h_al,
> &((al_conn_qp_t*)p_cm->h_qp)->cid,
> + TRUE );
>
> ((al_conn_qp_t*)p_cm->h_qp)->pfn_cm_drep_cb( &drep_rec );
>
> @@ -1041,7 +1037,7 @@ __cep_conn_req(
> p_qp = (al_conn_qp_t*)p_cm_req->h_qp;
>
> /* Get a CEP and bind it to the QP. */
> - status = al_create_cep( h_al, __cm_handler, p_qp,
> &p_qp->cid );
> + status = al_create_cep( h_al, __cm_handler, p_qp,
> deref_al_obj,
> + &p_qp->cid );
> if( status != IB_SUCCESS )
> {
> AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, @@
> -1076,7 +1072,7 @@ __cep_conn_req(
> AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
> ("al_cep_send_req returned %s.\n",
> ib_get_err_str(status)) );
> err:
> - al_destroy_cep( h_al, &p_qp->cid, TRUE,
> deref_al_obj );
> + al_destroy_cep( h_al, &p_qp->cid, TRUE );
> }
>
> /* wait on event if synchronous operation */ @@
> -1277,7 +1273,7 @@ __cep_conn_rep(
> AL_ENTER( AL_DBG_CM );
>
> status = al_cep_pre_rep(
> - h_cm.h_al, h_cm.cid, p_cm_rep->h_qp, p_cm_rep,
> + h_cm.h_al, h_cm.cid, p_cm_rep->h_qp, deref_al_obj,
> + p_cm_rep,
> &((al_conn_qp_t*)p_cm_rep->h_qp)->cid, &qp_mod );
> switch( status )
> {
> @@ -1291,7 +1287,7 @@ __cep_conn_rep(
>
> default:
> al_cep_rej( h_cm.h_al, h_cm.cid,
> IB_REJ_INSUF_RESOURCES, NULL, 0, NULL, 0 );
> - al_destroy_cep( h_cm.h_al, &h_cm.cid, FALSE, NULL );
> + al_destroy_cep( h_cm.h_al, &h_cm.cid, FALSE );
>
> AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
> ("al_cep_pre_rep returned %s.\n",
> ib_get_err_str( status )) ); @@ -1327,7 +1323,7 @@ err:
> /* Reject and abort the connection. */
> al_cep_rej( h_cm.h_al, h_cm.cid,
> IB_REJ_INSUF_QP, NULL, 0, NULL, 0 );
> al_destroy_cep(
> - h_cm.h_al,
> &((al_conn_qp_t*)p_cm_rep->h_qp)->cid, TRUE, deref_al_obj );
> + h_cm.h_al,
> + &((al_conn_qp_t*)p_cm_rep->h_qp)->cid, TRUE );
> }
>
> AL_EXIT( AL_DBG_CM );
> @@ -1391,7 +1387,7 @@ ib_cm_rep(
> cid = h_cm_req.cid;
> al_cep_rej(
> h_cm_req.h_al, h_cm_req.cid,
> IB_REJ_INSUF_QP, NULL, 0, NULL, 0 );
> - al_destroy_cep( h_cm_req.h_al, &cid, FALSE, NULL );
> + al_destroy_cep( h_cm_req.h_al, &cid, FALSE );
>
> AL_EXIT( AL_DBG_CM );
> return status;
> @@ -1460,7 +1456,7 @@ err:
> __cep_timewait_qp( h_cm_rep.h_qp );
>
> al_destroy_cep(
> - h_cm_rep.h_al,
> &((al_conn_qp_t*)h_cm_rep.h_qp)->cid, TRUE, deref_al_obj );
> + h_cm_rep.h_al,
> + &((al_conn_qp_t*)h_cm_rep.h_qp)->cid, TRUE );
>
> AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
> ("al_cep_rtu returned %s.\n",
> ib_get_err_str( status )) ); @@ -1524,12 +1520,12 @@ ib_cm_rej(
> __cep_timewait_qp( h_cm.h_qp );
>
> al_destroy_cep(
> - h_cm.h_al,
> &((al_conn_qp_t*)h_cm.h_qp)->cid, TRUE, deref_al_obj );
> + h_cm.h_al, &((al_conn_qp_t*)h_cm.h_qp)->cid,
> + TRUE );
> }
> else
> {
> cid = h_cm.cid;
> - al_destroy_cep( h_cm.h_al, &cid, FALSE, NULL );
> + al_destroy_cep( h_cm.h_al, &cid, FALSE );
> }
>
> AL_EXIT( AL_DBG_CM );
> @@ -1600,7 +1596,7 @@ ib_cm_dreq(
> __cep_timewait_qp( p_cm_dreq->h_qp );
>
> al_destroy_cep( p_cm_dreq->h_qp->obj.h_al,
> -
> &((al_conn_qp_t*)p_cm_dreq->h_qp)->cid, TRUE, deref_al_obj );
> +
> &((al_conn_qp_t*)p_cm_dreq->h_qp)->cid, TRUE );
> status = IB_SUCCESS;
> }
>
> @@ -1646,7 +1642,7 @@ ib_cm_drep(
> __cep_timewait_qp( h_cm_dreq.h_qp );
>
> al_destroy_cep( h_cm_dreq.h_al,
> -
> &((al_conn_qp_t*)h_cm_dreq.h_qp)->cid, TRUE, deref_al_obj );
> + &((al_conn_qp_t*)h_cm_dreq.h_qp)->cid, TRUE );
> }
>
> AL_EXIT( AL_DBG_CM );
> @@ -1797,8 +1793,7 @@ __destroying_listen(
> p_listen = PARENT_STRUCT( p_obj, al_listen_t, obj );
>
> /* Destroy the listen's CEP. */
> - al_destroy_cep(
> - p_obj->h_al, &p_listen->cid, TRUE, deref_al_obj );
> + al_destroy_cep( p_obj->h_al, &p_listen->cid, TRUE );
> }
>
>
> @@ -1864,7 +1859,7 @@ __cep_listen(
>
> /* Create a CEP to listen on. */
> p_listen->cid = AL_INVALID_CID;
> - status = al_create_cep( h_al, __cm_handler, p_listen,
> &p_listen->cid );
> + status = al_create_cep( h_al, __cm_handler, p_listen,
> + deref_al_obj, &p_listen->cid );
> if( status != IB_SUCCESS )
> {
> p_listen->obj.pfn_destroy( &p_listen->obj,
> NULL ); diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\al_qp.c trunk\core\al\al_qp.c
> --- old\core\al\al_qp.c Wed Jul 02 09:53:25 2008
> +++ trunk\core\al\al_qp.c Tue Jul 08 15:33:53 2008
> @@ -1116,7 +1116,7 @@ destroying_qp(
>
> case IB_QPT_RELIABLE_CONN:
> case IB_QPT_UNRELIABLE_CONN:
> - al_destroy_cep( h_qp->obj.h_al,
> &((al_conn_qp_t*)h_qp)->cid, FALSE, deref_al_obj );
> + al_destroy_cep( h_qp->obj.h_al,
> + &((al_conn_qp_t*)h_qp)->cid, FALSE );
> #ifdef CL_KERNEL
> ndi_qp_destroy( h_qp );
> #endif
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_cm_cep.c trunk\core\al\kernel\al_cm_cep.c
> --- old\core\al\kernel\al_cm_cep.c Wed Jul 02 09:53:24 2008
> +++ trunk\core\al\kernel\al_cm_cep.c Tue Jul 08 15:33:53 2008
> @@ -4060,6 +4060,7 @@ al_create_cep(
> IN ib_al_handle_t
> h_al,
> IN al_pfn_cep_cb_t
> pfn_cb,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb,
> IN OUT net32_t* const
> p_cid )
> {
> kcep_t *p_cep;
> @@ -4085,6 +4086,7 @@ al_create_cep(
>
> __bind_cep( p_cep, h_al, pfn_cb, context );
>
> + p_cep->pfn_destroy_cb = pfn_destroy_cb;
> *p_cid = p_cep->cid;
>
> KeReleaseInStackQueuedSpinLock( &hdl ); @@ -4102,13
> +4104,13 @@ void al_destroy_cep(
> IN ib_al_handle_t
> h_al,
> IN OUT net32_t* const
> p_cid,
> - IN boolean_t
> reusable,
> - IN ib_pfn_destroy_cb_t
> pfn_destroy_cb )
> + IN boolean_t
> reusable )
> {
> net32_t cid = *p_cid;
> kcep_t *p_cep;
> KLOCK_QUEUE_HANDLE hdl;
> void *context;
> + ib_pfn_destroy_cb_t pfn_destroy_cb;
> int32_t ref_cnt;
>
> AL_ENTER( AL_DBG_CM );
> @@ -4133,8 +4135,7 @@ al_destroy_cep(
> }
>
> context = p_cep->context;
> -
> - p_cep->pfn_destroy_cb = pfn_destroy_cb;
> + pfn_destroy_cb = p_cep->pfn_destroy_cb;
>
> /* Cancel any queued IRP */
> __cep_complete_irp( p_cep, STATUS_CANCELLED,
> IO_NO_INCREMENT ); @@ -4994,6 +4995,7 @@ al_cep_pre_rep(
> IN ib_al_handle_t
> h_al,
> IN net32_t
> cid,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb,
> IN const ib_cm_rep_t* const
> p_cm_rep,
> IN OUT net32_t* const
> p_cid,
> OUT ib_qp_mod_t* const
> p_init )
> @@ -5028,6 +5030,7 @@ al_cep_pre_rep(
>
> if( status == IB_SUCCESS )
> {
> + p_cep->pfn_destroy_cb = pfn_destroy_cb;
> *p_cid = cid;
> }
>
> @@ -5043,6 +5046,7 @@ al_cep_pre_rep_ex(
> IN net32_t
> cid,
> IN al_pfn_cep_cb_t
> pfn_cb,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb,
> IN const ib_cm_rep_t* const
> p_cm_rep,
> IN OUT net32_t* const
> p_cid,
> OUT ib_qp_mod_t* const
> p_init )
> @@ -5078,6 +5082,7 @@ al_cep_pre_rep_ex(
> if( status == IB_SUCCESS )
> {
> p_cep->pfn_cb = pfn_cb;
> + p_cep->pfn_destroy_cb = pfn_destroy_cb;
> *p_cid = cid;
> }
>
> @@ -6305,7 +6310,7 @@ al_cep_cleanup_al(
> */
> cid = PARENT_STRUCT( p_item, kcep_t, al_item )->cid;
> cl_spinlock_release( &h_al->obj.lock );
> - al_destroy_cep( h_al, &cid, FALSE, NULL );
> + al_destroy_cep( h_al, &cid, FALSE );
> cl_spinlock_acquire( &h_al->obj.lock );
> }
> cl_spinlock_release( &h_al->obj.lock ); diff -up -r
> -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_mgr.c trunk\core\al\kernel\al_mgr.c
> --- old\core\al\kernel\al_mgr.c Tue Jul 08 10:15:55 2008
> +++ trunk\core\al\kernel\al_mgr.c Tue Jul 08 16:00:59 2008
> @@ -414,7 +414,7 @@ ib_open_al(
>
> /* Initialize the base object. */
> status = init_al_obj( &h_al->obj, NULL, FALSE,
> - destroying_al, cleanup_al, free_al );
> + destroying_al, NULL, free_al );
> if( status != IB_SUCCESS )
> {
> free_al( &h_al->obj );
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_ndi_cm.c trunk\core\al\kernel\al_ndi_cm.c
> --- old\core\al\kernel\al_ndi_cm.c Tue Jul 08 15:21:33 2008
> +++ trunk\core\al\kernel\al_ndi_cm.c Tue Jul 08 16:08:14 2008
> @@ -416,6 +416,7 @@ static VOID __ndi_complete_cancelled_irp
> ndi_qp_csq_t *p_ndi_csq = (ndi_qp_csq_t*)Csq;
> ib_qp_handle_t h_qp = p_ndi_csq->h_qp;
> KIRQL irql;
> + ib_query_handle_t h_query;
>
> AL_ENTER( AL_DBG_NDI );
>
> @@ -423,29 +424,21 @@ static VOID __ndi_complete_cancelled_irp
> {
> case UAL_NDI_REQ_CM:
> __ndi_acquire_lock( Csq, &irql );
> - if( p_ndi_csq->state != NDI_CM_INVALID )
> - {
> - switch( p_ndi_csq->state )
> - {
> - case NDI_CM_CONNECTING_ATS_SENT:
> - case NDI_CM_CONNECTING_QPR_SENT:
> /*
> - * Note that al_cancel_sa_req
> must be synchronized with any potential
> - * SA callback.
> + * Note that al_cancel_sa_req is synchronized
> with any potential
> + * SA callback by the CSQ lock.
> */
> - al_cancel_sa_req(
> &h_qp->p_irp_queue->h_query->sa_req );
> - break;
> +#pragma warning( disable:4305 )
> + h_query = InterlockedExchangePointer(
> +&h_qp->p_irp_queue->h_query, NULL ); #pragma warning( default:4305 )
> + if( h_query != NULL )
> + al_cancel_sa_req( &h_query->sa_req );
>
> - default:
> - CL_ASSERT( p_ndi_csq->state ==
> NDI_CM_CONNECTING_ATS_SENT ||
> - p_ndi_csq->state ==
> NDI_CM_CONNECTING_QPR_SENT ||
> - p_ndi_csq->state ==
> NDI_CM_CONNECTING_REQ_SENT );
> - }
> - p_ndi_csq->state = NDI_CM_IDLE;
> - }
> /* Always try to destroy the CEP. The CEP
> manager handles invalid CIDs. */
> - al_destroy_cep(
> - qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( qp_get_al( h_qp ),
> + &((al_conn_qp_t*)h_qp)->cid, TRUE );
> +
> + if( p_ndi_csq->state != NDI_CM_INVALID )
> + p_ndi_csq->state = NDI_CM_IDLE;
>
> __ndi_release_lock( Csq, irql );
>
> @@ -681,18 +674,9 @@ __ndi_proc_rej(
> __ndi_complete_irp( h_qp,
> p_irp, STATUS_CONNECTION_REFUSED );
>
> al_destroy_cep(
> - qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> + qp_get_al( h_qp ),
> + &((al_conn_qp_t*)h_qp)->cid, TRUE );
> break;
>
> - // TODO: REP IRPs never get queued in the CSQ. Can
> we delete?
> - //case UAL_NDI_REP_CM:
> - // if( h_qp->p_irp_queue->state !=
> NDI_CM_INVALID )
> - // h_qp->p_irp_queue->state =
> NDI_CM_IDLE;
> - // __ndi_complete_irp( h_qp, p_irp,
> STATUS_CONNECTION_ABORTED );
> - // al_destroy_cep(
> - // qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> - // break;
> -
> case UAL_NDI_DREQ_CM:
> __ndi_queue_drep( p_irp );
> break;
> @@ -777,8 +761,7 @@ __ndi_do_drep(
> /* Store the timestamp after which the QP exits timewait. */
> h_qp->timewait = cl_get_time_stamp() + timewait_us;
>
> - al_destroy_cep(
> - qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid,
> + TRUE );
>
> /* bring QP to error state */
> cl_memclr( &qp_mod, sizeof(qp_mod) ); @@ -865,17
> +848,18 @@ __ndi_cm_handler(
> AL_PRINT(
> TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
> ("REP timeouted for
> CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
> cid, h_al, h_qp, new_cid ) );
> - // TODO: REP IRPs don't get queued int he
> IRP queue, so don't complete some random IRP (like a DREQ)
> - //__ndi_complete_irp_ex(
> h_qp, STATUS_CONNECTION_ABORTED );
> break;
>
> case CM_DREQ_ATTR_ID:
> + AL_PRINT(
> TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
> + ("DREQ timeouted for
> CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
> + cid, h_al, h_qp, new_cid ) );
> __ndi_proc_drep( h_qp );
> break;
>
> default:
> AL_PRINT( TRACE_LEVEL_ERROR,
> AL_DBG_ERROR,
> - ("Unhandled MAD attr
> ID %d for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
> + ("Unhandled failed
> MAD attr ID
> + %d for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
> p_mad->attr_id, cid,
> h_al, h_qp, new_cid ) );
> break;
> }
> @@ -912,12 +896,22 @@ __ndi_cm_handler(
> break;
>
> case CM_DREP_ATTR_ID:
> + AL_PRINT(
> TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
> + ("DREP received for
> CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
> + cid, h_al, h_qp, new_cid ) );
> __ndi_proc_drep( h_qp );
> break;
>
> + case CM_RTU_ATTR_ID:
> + AL_PRINT(
> TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
> + ("RTU received for
> CEP with cid %d, h_al %p, context %p, new_cid %d.\n",
> + cid, h_al, h_qp, new_cid ) );
> + break;
> +
> default:
> AL_PRINT( TRACE_LEVEL_ERROR,
> AL_DBG_ERROR,
> - ("Invalid CM recv MAD
> attribute ID %d.\n", p_mad->attr_id) );
> + ("Unhandled MAD attr
> ID %d for CEP with cid %d, h_al %p, context %p, new_cid %d .\n",
> + p_mad->attr_id, cid,
> h_al, h_qp,
> + new_cid ) );
> }
> }
>
> @@ -1002,7 +996,8 @@ __ndi_send_req(
>
> /* Get a CEP and bind it to the QP. */
> status = al_create_cep(
> - qp_get_al( h_qp ), __ndi_cm_handler, h_qp,
> &((al_conn_qp_t*)h_qp)->cid );
> + qp_get_al( h_qp ), __ndi_cm_handler, h_qp,
> deref_al_obj,
> + &((al_conn_qp_t*)h_qp)->cid );
> if( status != IB_SUCCESS )
> {
> h_qp->p_irp_queue->state = NDI_CM_IDLE; @@
> -1043,8 +1038,7 @@ __ndi_send_req(
> return STATUS_SUCCESS;
>
> error:
> - al_destroy_cep(
> - qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid,
> + TRUE );
>
> switch( status )
> {
> @@ -1070,12 +1064,11 @@ static void AL_API __ndi_pr_query_cb(
> ib_query_rec_t
> *p_query_rec )
> {
> - ib_api_status_t status;
> cl_ioctl_handle_t p_irp;
> uint8_t pkt_life;
> ib_path_rec_t *p_path_rec;
> ib_qp_handle_t h_qp =
> (ib_qp_handle_t)p_query_rec->query_context;
> - NTSTATUS nt_status;
> + NTSTATUS status;
> KIRQL irql;
>
> AL_ENTER( AL_DBG_NDI );
> @@ -1090,26 +1083,44 @@ __ndi_pr_query_cb(
> goto exit;
> }
>
> - status = p_query_rec->status;
> - if( status != IB_SUCCESS || !p_query_rec->result_cnt )
> +#pragma warning( disable:4305 )
> + InterlockedExchangePointer(
> &h_qp->p_irp_queue->h_query, NULL );
> +#pragma warning( default:4305 )
> +
> + if( p_query_rec->status != IB_SUCCESS ||
> p_query_rec->result_cnt
> + == 0 )
> {
> __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );
> if( h_qp->p_irp_queue->state != NDI_CM_INVALID )
> h_qp->p_irp_queue->state = NDI_CM_IDLE;
> __ndi_release_lock( &h_qp->p_irp_queue->csq, irql );
> - switch( status )
> + switch( p_query_rec->status )
> {
> case IB_TIMEOUT:
> - __ndi_complete_irp( h_qp, p_irp,
> STATUS_TIMEOUT );
> + case IB_CANCELED:
> + status = ib_to_ntstatus(
> p_query_rec->status );
> break;
>
> - case IB_CANCELED:
> - __ndi_complete_irp( h_qp, p_irp,
> STATUS_CANCELLED );
> + case IB_REMOTE_ERROR:
> + CL_ASSERT( p_query_rec->p_result_mad );
> + switch(
> p_query_rec->p_result_mad->p_mad_buf->status )
> + {
> + case IB_MAD_STATUS_BUSY:
> + case IB_SA_MAD_STATUS_NO_RESOURCES:
> + status = STATUS_TIMEOUT;
> break;
>
> default:
> - __ndi_complete_irp( h_qp, p_irp,
> STATUS_HOST_UNREACHABLE );
> + status = STATUS_INVALID_PARAMETER_1 +
> +
> (p_query_rec->p_result_mad->p_mad_buf->status & 0xFF);
> + break;
> + }
> + break;
> +
> + default:
> + status = STATUS_HOST_UNREACHABLE;
> + break;
> }
> + __ndi_complete_irp( h_qp, p_irp, status );
> goto exit;
> }
>
> @@ -1127,16 +1138,16 @@ __ndi_pr_query_cb(
>
> p_irp->Tail.Overlay.DriverContext[1] = p_path_rec;
>
> - nt_status = IoCsqInsertIrpEx(
> + status = IoCsqInsertIrpEx(
> &h_qp->p_irp_queue->csq,
> p_irp,
> NULL,
> (VOID*)(ULONG_PTR)NDI_CM_CONNECTING_REQ_SENT
> );
> - if( nt_status != STATUS_SUCCESS )
> + if( status != STATUS_SUCCESS )
> {
> p_irp->Tail.Overlay.DriverContext[1] = NULL;
> - __ndi_complete_irp( h_qp, p_irp, nt_status );
> + __ndi_complete_irp( h_qp, p_irp, status );
> }
> else
> {
> @@ -1148,7 +1159,6 @@ __ndi_pr_query_cb(
> }
>
> exit:
> -
> if( p_query_rec->p_result_mad )
> ib_put_mad( p_query_rec->p_result_mad );
>
> @@ -1157,7 +1167,6 @@ exit:
> }
>
>
> -
> /*
> * Send asynchronous query to the SA for a path record.
> *
> @@ -1198,9 +1207,7 @@ __ndi_pr_query(
> p_req->guid, ib_gid_get_guid(
> &p_req->gids.dest_gid )) );
>
> ref_al_obj( &h_qp->obj ); /* take path
> query reference */
> -
> status = ib_query( qp_get_al( h_qp ), &query_req,
> &h_qp->p_irp_queue->h_query );
> -
> if( status != IB_SUCCESS )
> {
> h_qp->p_irp_queue->state = NDI_CM_IDLE; @@
> -1237,6 +1244,10 @@ __ndi_ats_query_cb(
> goto exit;
> }
>
> +#pragma warning( disable:4305 )
> + InterlockedExchangePointer(
> &h_qp->p_irp_queue->h_query, NULL );
> +#pragma warning( default:4305 )
> +
> if( p_query_rec->status != IB_SUCCESS ||
> !p_query_rec->result_cnt )
> {
> __ndi_acquire_lock( &h_qp->p_irp_queue->csq,
> &irql ); @@ -1462,8 +1473,7 @@ err:
>
> __cep_timewait_qp( h_qp );
>
> - al_destroy_cep(
> - qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( qp_get_al( h_qp ),
> + &((al_conn_qp_t*)h_qp)->cid, TRUE );
>
> AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
> ("al_cep_rtu returned %s.\n",
> ib_get_err_str( status )) ); @@ -1572,8 +1582,7 @@ err:
> /* transit QP to error state */
> __cep_timewait_qp( h_qp );
>
> - al_destroy_cep(
> - qp_get_al( h_qp ),
> &((al_conn_qp_t*)h_qp)->cid, TRUE, deref_al_obj );
> + al_destroy_cep( qp_get_al( h_qp ),
> + &((al_conn_qp_t*)h_qp)->cid, TRUE );
>
> AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
> ("al_cep_rtu returned %s.\n",
> ib_get_err_str( status )) ); @@ -1663,14 +1672,14 @@ __ndi_send_rep(
>
> /* prepare Passive CEP for connection */
> status = al_cep_pre_rep_ex(
> - qp_get_al( h_qp ), p_rep->cid,
> __ndi_cm_handler, h_qp, &cm_rep,
> - &((al_conn_qp_t*)h_qp)->cid, &qp_mod );
> + qp_get_al( h_qp ), p_rep->cid,
> __ndi_cm_handler, h_qp, deref_al_obj,
> + &cm_rep, &((al_conn_qp_t*)h_qp)->cid, &qp_mod );
> if( status != IB_SUCCESS )
> {
> IoFreeWorkItem(
> p_irp->Tail.Overlay.DriverContext[1] );
> p_irp->Tail.Overlay.DriverContext[1] = NULL;
> deref_al_obj( &h_qp->obj ); /* Release work
> item reference. */
> - al_destroy_cep( qp_get_al( h_qp ),
> &p_rep->cid, FALSE, NULL );
> + al_destroy_cep( qp_get_al( h_qp ),
> &p_rep->cid, FALSE );
> deref_al_obj( &h_qp->obj ); /* Release CEP
> reference. */
> AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
> ("al_cep_pre_rep_ex returned %s.\n",
> ib_get_err_str( status )) ); diff -up -r -X
> trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_proxy_cep.c trunk\core\al\kernel\al_proxy_cep.c
> --- old\core\al\kernel\al_proxy_cep.c Wed Jul 02 09:53:25 2008
> +++ trunk\core\al\kernel\al_proxy_cep.c Tue Jul 08 15:33:53 2008
> @@ -53,7 +53,7 @@ proxy_create_cep(
> OUT size_t
> *p_ret_bytes )
> {
> al_dev_open_context_t *p_context;
> - void* VOID_PTR64 *
> p_user_context; //TODO use ioctl here
> + void* user_context;
> ual_create_cep_ioctl_t *p_ioctl;
>
> AL_ENTER( AL_DBG_CM );
> @@ -62,19 +62,19 @@ proxy_create_cep(
> p_ioctl = (ual_create_cep_ioctl_t*)cl_ioctl_out_buf(
> h_ioctl );
>
> /* Validate user parameters. */
> - if( cl_ioctl_in_size( h_ioctl ) != sizeof(void* VOID_PTR64) ||
> + if( cl_ioctl_in_size( h_ioctl ) != sizeof(uint64_t) ||
> cl_ioctl_out_size( h_ioctl ) !=
> sizeof(ual_create_cep_ioctl_t) )
> {
> AL_EXIT( AL_DBG_CM );
> return CL_INVALID_PARAMETER;
> }
>
> - p_user_context = cl_ioctl_in_buf( h_ioctl );
> + user_context = *(void**)cl_ioctl_in_buf( h_ioctl );
>
> /* We use IRPs as notification mechanism so the
> callback is NULL. */
> p_ioctl->cid = AL_INVALID_CID;
> p_ioctl->status = al_create_cep( p_context->h_al, NULL,
> - *p_user_context, &p_ioctl->cid );
> + user_context, NULL, &p_ioctl->cid );
>
> *p_ret_bytes = sizeof(ual_create_cep_ioctl_t);
>
> @@ -124,7 +124,7 @@ proxy_destroy_cep(
> }
>
> al_destroy_cep( p_context->h_al,
> - (net32_t*)cl_ioctl_in_buf( h_ioctl ), TRUE, NULL );
> + (net32_t*)cl_ioctl_in_buf( h_ioctl ), TRUE );
>
> AL_EXIT( AL_DBG_CM );
> return CL_SUCCESS;
> @@ -205,7 +205,7 @@ proxy_cep_pre_req(
> p_ioctl->in.cm_req.p_req_pdata = p_ioctl->in.pdata;
>
> /* Get the kernel QP handle. */
> - h_qp = (ib_qp_handle_t VOID_PTR64)al_hdl_ref(
> + h_qp = (ib_qp_handle_t)al_hdl_ref(
> p_context->h_al,
> (uint64_t)p_ioctl->in.cm_req.h_qp, AL_OBJ_TYPE_H_QP );
> if( !h_qp )
> {
> @@ -291,7 +291,7 @@ proxy_cep_pre_rep(
> p_ioctl->in.cm_rep.p_rep_pdata = p_ioctl->in.pdata;
>
> /* Get the kernel QP handle. */
> - h_qp = (ib_qp_handle_t VOID_PTR64)al_hdl_ref(
> + h_qp = (ib_qp_handle_t)al_hdl_ref(
> p_context->h_al,
> (uint64_t)p_ioctl->in.cm_rep.h_qp, AL_OBJ_TYPE_H_QP );
> if( !h_qp )
> {
> @@ -303,7 +303,7 @@ proxy_cep_pre_rep(
>
> cid = AL_INVALID_CID;
> p_ioctl->out.status = al_cep_pre_rep(
> p_context->h_al, p_ioctl->in.cid,
> - p_ioctl->in.context, &p_ioctl->in.cm_rep,
> &cid, &p_ioctl->out.init );
> + p_ioctl->in.context, NULL, &p_ioctl->in.cm_rep, &cid,
> + &p_ioctl->out.init );
>
> deref_al_obj( &h_qp->obj );
>
> @@ -551,7 +551,7 @@ proxy_cep_lap(
> p_ioctl->pdata;
>
> /* Get the kernel QP handle. */
> - h_qp = (ib_qp_handle_t VOID_PTR64)al_hdl_ref(
> + h_qp = (ib_qp_handle_t)al_hdl_ref(
> p_context->h_al,
> (uint64_t)p_ioctl->cm_lap.h_qp, AL_OBJ_TYPE_H_QP );
> if( !h_qp )
> {
> @@ -604,7 +604,7 @@ proxy_cep_pre_apr(
> p_ioctl->in.cm_apr.p_apr_pdata = p_ioctl->in.pdata;
>
> /* Get the kernel QP handle. */
> - h_qp = (ib_qp_handle_t VOID_PTR64)al_hdl_ref(
> + h_qp = (ib_qp_handle_t)al_hdl_ref(
> p_context->h_al,
> (uint64_t)p_ioctl->in.cm_apr.h_qp, AL_OBJ_TYPE_H_QP );
> if( !h_qp )
> {
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_proxy_ndi.c trunk\core\al\kernel\al_proxy_ndi.c
> --- old\core\al\kernel\al_proxy_ndi.c Tue Jul 08 15:21:33 2008
> +++ trunk\core\al\kernel\al_proxy_ndi.c Tue Jul 08 15:33:53 2008
> @@ -458,7 +458,7 @@ __ndi_rej_cm(
> goto exit;
> }
>
> - al_destroy_cep( p_context->h_al, &cid, FALSE, NULL );
> + al_destroy_cep( p_context->h_al, &p_rej->cid, FALSE );
> h_ioctl->IoStatus.Status = STATUS_SUCCESS;
>
> exit:
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\user\ual_cm_cep.c trunk\core\al\user\ual_cm_cep.c
> --- old\core\al\user\ual_cm_cep.c Tue Jul 08 10:15:55 2008
> +++ trunk\core\al\user\ual_cm_cep.c Tue Jul 08 15:33:53 2008
> @@ -221,7 +221,7 @@ al_cep_cleanup_al(
> */
> cid = PARENT_STRUCT( p_item, ucep_t, al_item )->cid;
> cl_spinlock_release( &h_al->obj.lock );
> - al_destroy_cep( h_al, &cid, FALSE, NULL );
> + al_destroy_cep( h_al, &cid, FALSE );
> cl_spinlock_acquire( &h_al->obj.lock );
> }
> cl_spinlock_release( &h_al->obj.lock ); @@ -246,6
> +246,7 @@ __create_ucep(
> IN net32_t
> cid,
> IN al_pfn_cep_cb_t
> pfn_cb,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb,
> IN OUT net32_t* const
> p_cid )
> {
> ucep_t *p_cep;
> @@ -331,9 +332,10 @@ __create_ucep(
> if( *p_cid != AL_INVALID_CID )
> {
> cl_spinlock_release( &h_al->obj.lock );
> - al_destroy_cep( h_al, &cid, TRUE, NULL );
> + al_destroy_cep( h_al, &cid, TRUE );
> return IB_INVALID_STATE;
> }
> + p_cep->pfn_destroy_cb = pfn_destroy_cb;
> *p_cid = p_cep->cid;
> }
>
> @@ -351,13 +353,15 @@ al_create_cep(
> IN ib_al_handle_t
> h_al,
> IN al_pfn_cep_cb_t
> pfn_cb,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb,
> IN OUT net32_t* const
> p_cid )
> {
> ib_api_status_t status;
>
> AL_ENTER( AL_DBG_CM );
>
> - status = __create_ucep( h_al, AL_INVALID_CID, pfn_cb,
> context, p_cid );
> + status = __create_ucep(
> + h_al, AL_INVALID_CID, pfn_cb, context,
> pfn_destroy_cb,
> + p_cid );
>
> AL_EXIT( AL_DBG_CM );
> return status;
> @@ -372,8 +376,7 @@ void
> al_destroy_cep(
> IN ib_al_handle_t
> h_al,
> IN OUT net32_t* const
> p_cid,
> - IN boolean_t
> reusable,
> - IN ib_pfn_destroy_cb_t
> pfn_destroy_cb OPTIONAL )
> + IN boolean_t
> reusable )
> {
> ucep_t *p_cep;
> DWORD bytes_ret;
> @@ -415,8 +418,6 @@ invalid:
>
> cl_spinlock_release( &gp_cep_mgr->obj.lock );
>
> - p_cep->pfn_destroy_cb = pfn_destroy_cb;
> -
> /*
> * Remove from the AL instance. Note that once removed, all
> * callbacks for an item will stop.
> @@ -605,6 +606,7 @@ al_cep_pre_rep(
> IN ib_al_handle_t
> h_al,
> IN net32_t
> cid,
> IN void*
> context,
> + IN ib_pfn_destroy_cb_t
> pfn_destroy_cb,
> IN const ib_cm_rep_t* const
> p_cm_rep,
> IN OUT net32_t* const
> p_cid,
> OUT ib_qp_mod_t* const
> p_init )
> @@ -681,6 +683,7 @@ al_cep_pre_rep(
> cl_spinlock_release( &h_al->obj.lock );
> return IB_INVALID_STATE;
> }
> + p_cep->pfn_destroy_cb = pfn_destroy_cb;
> *p_cid = p_cep->cid;
> *p_init = ioctl.out.init;
> cl_spinlock_release( &h_al->obj.lock ); @@
> -1364,7 +1367,7 @@ al_cep_poll(
> {
> /* Need to create a new CEP for user-mode. */
> status = __create_ucep( p_cep->h_al,
> ioctl.new_cid,
> - p_cep->pfn_cb, NULL, NULL );
> + p_cep->pfn_cb, NULL, NULL, NULL );
> if( status != IB_SUCCESS )
> {
> DeviceIoControl( g_al_device,
> UAL_DESTROY_CEP,
>
>
>
>
More information about the ofw
mailing list