[ofw] [PATCH] Limit time spent at DISPATCH_LEVEL when processingMADs
Leonid Keller
leonid at mellanox.co.il
Sun Jul 13 04:36:58 PDT 2008
Applied in 1393. Thank you.
> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org
> [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Fab Tillier
> Sent: Saturday, July 12, 2008 3:47 AM
> To: ofw at lists.openfabrics.org
> Subject: [ofw] [PATCH] Limit time spent at DISPATCH_LEVEL
> when processingMADs
>
> In working with some switch SMs, I found that under stress
> the QP0 MAD processing could get delayed by eternal QP1 MAD
> processing (from hitting the SA for path queries). This
> caused the SMs to remove the node from the fabric (multicast
> group membership, etc) because it appeared unreachable.
>
> This patch adds a DPC for QP0 and QP1 to the SMI, and limits
> the SMI to processing 16 MADs before re-queueing the DPC to
> the DPC queue.
>
> Because QP0 processing is crucial to the fabric operating
> properly, the QP0 DPC is set to high importance so that it
> always gets queued to the front of the DPC queue, pre-empting
> any QP1 processing (among other things).
>
> In any case, with this patch all my changes have now been
> mailed to the list.
>
> Signed-off-by: Fab Tillier <ftillier at microsoft.com>
>
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_smi.c trunk\core\al\kernel\al_smi.c
> --- old\core\al\kernel\al_smi.c Fri Jul 11 16:24:31 2008
> +++ trunk\core\al\kernel\al_smi.c Fri Jul 11 16:07:29 2008
> @@ -141,6 +141,22 @@ spl_qp_send_comp_cb(
> IN void
> *cq_context );
>
> void
> +spl_qp_send_dpc_cb(
> + IN KDPC *p_dpc,
> + IN void *context,
> + IN void *arg1,
> + IN void *arg2
> + );
> +
> +void
> +spl_qp_recv_dpc_cb(
> + IN KDPC *p_dpc,
> + IN void *context,
> + IN void *arg1,
> + IN void *arg2
> + );
> +
> +void
> spl_qp_recv_comp_cb(
> IN const ib_cq_handle_t
> h_cq,
> IN void
> *cq_context );
> @@ -554,7 +570,17 @@ create_spl_qp_svc(
> cl_qlist_init( &p_spl_qp_svc->send_queue );
> cl_qlist_init( &p_spl_qp_svc->recv_queue );
> cl_spinlock_init(&p_spl_qp_svc->cache_lock);
> -
> +
> + /* Initialize the DPCs. */
> + KeInitializeDpc( &p_spl_qp_svc->send_dpc,
> spl_qp_send_dpc_cb, p_spl_qp_svc );
> + KeInitializeDpc( &p_spl_qp_svc->recv_dpc, spl_qp_recv_dpc_cb,
> + p_spl_qp_svc );
> +
> + if( qp_type == IB_QPT_QP0 )
> + {
> + KeSetImportanceDpc( &p_spl_qp_svc->send_dpc,
> HighImportance );
> + KeSetImportanceDpc( &p_spl_qp_svc->recv_dpc,
> HighImportance );
> + }
> +
> #if defined( CL_USE_MUTEX )
> /* Initialize async callbacks and flags for
> send/receive processing. */
> p_spl_qp_svc->send_async_queued = FALSE; @@ -2461,6
> +2487,8 @@ spl_qp_send_comp_cb(
>
> AL_ENTER( AL_DBG_SMI );
>
> + UNREFERENCED_PARAMETER( h_cq );
> +
> CL_ASSERT( cq_context );
> p_spl_qp_svc = cq_context;
>
> @@ -2477,21 +2505,55 @@ spl_qp_send_comp_cb(
> cl_spinlock_release( &p_spl_qp_svc->obj.lock );
>
> #else
> + cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
> + if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
> + {
> + cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> + AL_EXIT( AL_DBG_SMI );
> + return;
> + }
> + cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
> + cl_spinlock_release( &p_spl_qp_svc->obj.lock );
>
> - /* Invoke the callback directly. */
> + /* Queue the DPC. */
> CL_ASSERT( h_cq == p_spl_qp_svc->h_send_cq );
> - spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_SEND );
> + KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL ); #endif
> +
> + AL_EXIT( AL_DBG_SMI );
> +}
> +
> +
> +void
> +spl_qp_send_dpc_cb(
> + IN KDPC *p_dpc,
> + IN void *context,
> + IN void *arg1,
> + IN void *arg2
> + )
> +{
> + spl_qp_svc_t* p_spl_qp_svc;
> +
> + AL_ENTER( AL_DBG_SMI );
> +
> + CL_ASSERT( context );
> + p_spl_qp_svc = context;
> +
> + UNREFERENCED_PARAMETER( p_dpc );
> + UNREFERENCED_PARAMETER( arg1 );
> + UNREFERENCED_PARAMETER( arg2 );
> +
> + spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_send_cq,
> IB_WC_SEND
> + );
>
> /* Continue processing any queued MADs on the QP. */
> special_qp_resume_sends( p_spl_qp_svc->h_qp );
>
> -#endif
> + cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
>
> - AL_EXIT( AL_DBG_SMI );
> + AL_EXIT( AL_DBG_SMI );
> }
>
>
> -
> #if defined( CL_USE_MUTEX )
> void
> spl_qp_send_async_cb(
> @@ -2536,6 +2598,8 @@ spl_qp_recv_comp_cb(
>
> AL_ENTER( AL_DBG_SMI );
>
> + UNREFERENCED_PARAMETER( h_cq );
> +
> CL_ASSERT( cq_context );
> p_spl_qp_svc = cq_context;
>
> @@ -2552,16 +2616,51 @@ spl_qp_recv_comp_cb(
> cl_spinlock_release( &p_spl_qp_svc->obj.lock );
>
> #else
> + cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
> + if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
> + {
> + cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> + AL_EXIT( AL_DBG_SMI );
> + return;
> + }
> + cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
> + cl_spinlock_release( &p_spl_qp_svc->obj.lock );
>
> + /* Queue the DPC. */
> CL_ASSERT( h_cq == p_spl_qp_svc->h_recv_cq );
> - spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_RECV );
> -
> + KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
> #endif
>
> AL_EXIT( AL_DBG_SMI );
> }
>
>
> +void
> +spl_qp_recv_dpc_cb(
> + IN KDPC *p_dpc,
> + IN void *context,
> + IN void *arg1,
> + IN void *arg2
> + )
> +{
> + spl_qp_svc_t* p_spl_qp_svc;
> +
> + AL_ENTER( AL_DBG_SMI );
> +
> + CL_ASSERT( context );
> + p_spl_qp_svc = context;
> +
> + UNREFERENCED_PARAMETER( p_dpc );
> + UNREFERENCED_PARAMETER( arg1 );
> + UNREFERENCED_PARAMETER( arg2 );
> +
> + spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_recv_cq,
> IB_WC_RECV
> + );
> +
> + cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> +
> + AL_EXIT( AL_DBG_SMI );
> +}
> +
>
> #if defined( CL_USE_MUTEX )
> void
> @@ -2589,7 +2688,7 @@ spl_qp_recv_async_cb( #endif
>
>
> -
> +#define SPL_QP_MAX_POLL 16
> /*
> * Special QP completion handler.
> */
> @@ -2607,6 +2706,7 @@ spl_qp_comp(
> ib_mad_element_t* p_mad_element;
> ib_smp_t* p_smp;
> ib_api_status_t status;
> + int max_poll = SPL_QP_MAX_POLL;
>
> AL_ENTER( AL_DBG_SMI_CB );
>
> @@ -2625,7 +2725,7 @@ spl_qp_comp(
>
> wc.p_next = NULL;
> /* Process work completions. */
> - while( ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) ==
> IB_SUCCESS )
> + while( max_poll && ib_poll_cq( h_cq, &p_free_wc,
> &p_done_wc ) ==
> + IB_SUCCESS )
> {
> /* Process completions one at a time. */
> CL_ASSERT( p_done_wc );
> @@ -2713,13 +2813,25 @@ spl_qp_comp(
> spl_qp_svc_reset( p_spl_qp_svc );
> }
> p_free_wc = &wc;
> + --max_poll;
> }
>
> - /* Rearm the CQ. */
> - status = ib_rearm_cq( h_cq, FALSE );
> - CL_ASSERT( status == IB_SUCCESS );
> + if( max_poll == 0 )
> + {
> + /* We already have an in_use_cnt reference - use it
> to queue the DPC. */
> + if( wc_type == IB_WC_SEND )
> + KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL );
> + else
> + KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
> + }
> + else
> + {
> + /* Rearm the CQ. */
> + status = ib_rearm_cq( h_cq, FALSE );
> + CL_ASSERT( status == IB_SUCCESS );
>
> - cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> + cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> + }
> AL_EXIT( AL_DBG_SMI_CB );
> }
>
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id:
> old\core\al\kernel\al_smi.h trunk\core\al\kernel\al_smi.h
> --- old\core\al\kernel\al_smi.h Fri Jul 11 16:24:31 2008
> +++ trunk\core\al\kernel\al_smi.h Fri Jul 11 16:07:29 2008
> @@ -157,6 +157,9 @@ typedef struct _spl_qp_svc
> ib_pool_key_t pool_key;
> ib_mad_svc_handle_t h_mad_svc;
>
> + KDPC send_dpc;
> + KDPC recv_dpc;
> +
> } spl_qp_svc_t;
>
>
>
More information about the ofw
mailing list