[ofw] [PATCH] Limit time spent at DISPATCH_LEVEL when processingMADs

Tzachi Dar tzachid at mellanox.co.il
Sun Jul 13 00:23:12 PDT 2008


Hi Fab,

For the long term, we should probably change the way that we work:

Today's machines always come with more than one core. I'm quit sure that
in the situation that you are facing, there are other cores that are not
doing anything, while other cores are competing on one core.
The natural solution is to use more than one core for this requests.

I believe that once we will move to MSI on server 2008, this should
become more natural.

Thanks
Tzachi

> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org 
> [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Fab Tillier
> Sent: Saturday, July 12, 2008 3:47 AM
> To: ofw at lists.openfabrics.org
> Subject: [ofw] [PATCH] Limit time spent at DISPATCH_LEVEL 
> when processingMADs
> 
> In working with some switch SMs, I found that under stress 
> the QP0 MAD processing could get delayed by eternal QP1 MAD 
> processing (from hitting the SA for path queries).  This 
> caused the SMs to remove the node from the fabric (multicast 
> group membership, etc) because it appeared unreachable.
> 
> This patch adds a DPC for QP0 and QP1 to the SMI, and limits 
> the SMI to processing 16 MADs before re-queueing the DPC to 
> the DPC queue.
> 
> Because QP0 processing is crucial to the fabric operating 
> properly, the QP0 DPC is set to high importance so that it 
> always gets queued to the front of the DPC queue, pre-empting 
> any QP1 processing (among other things).
> 
> In any case, with this patch all my changes have now been 
> mailed to the list.
> 
> Signed-off-by: Fab Tillier <ftillier at microsoft.com>
> 
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: 
> old\core\al\kernel\al_smi.c trunk\core\al\kernel\al_smi.c
> --- old\core\al\kernel\al_smi.c Fri Jul 11 16:24:31 2008
> +++ trunk\core\al\kernel\al_smi.c       Fri Jul 11 16:07:29 2008
> @@ -141,6 +141,22 @@ spl_qp_send_comp_cb(
>         IN                              void                  
>                           *cq_context );
> 
>  void
> +spl_qp_send_dpc_cb(
> +    IN              KDPC                        *p_dpc,
> +    IN              void                        *context,
> +    IN              void                        *arg1,
> +    IN              void                        *arg2
> +    );
> +
> +void
> +spl_qp_recv_dpc_cb(
> +    IN              KDPC                        *p_dpc,
> +    IN              void                        *context,
> +    IN              void                        *arg1,
> +    IN              void                        *arg2
> +    );
> +
> +void
>  spl_qp_recv_comp_cb(
>         IN              const   ib_cq_handle_t                
>           h_cq,
>         IN                              void                  
>                           *cq_context );
> @@ -554,7 +570,17 @@ create_spl_qp_svc(
>         cl_qlist_init( &p_spl_qp_svc->send_queue );
>         cl_qlist_init( &p_spl_qp_svc->recv_queue );
>         cl_spinlock_init(&p_spl_qp_svc->cache_lock);
> -
> +
> +    /* Initialize the DPCs. */
> +    KeInitializeDpc( &p_spl_qp_svc->send_dpc, 
> spl_qp_send_dpc_cb, p_spl_qp_svc );
> +    KeInitializeDpc( &p_spl_qp_svc->recv_dpc, spl_qp_recv_dpc_cb, 
> + p_spl_qp_svc );
> +
> +    if( qp_type == IB_QPT_QP0 )
> +    {
> +        KeSetImportanceDpc( &p_spl_qp_svc->send_dpc, 
> HighImportance );
> +        KeSetImportanceDpc( &p_spl_qp_svc->recv_dpc, 
> HighImportance );
> +    }
> +
>  #if defined( CL_USE_MUTEX )
>         /* Initialize async callbacks and flags for 
> send/receive processing. */
>         p_spl_qp_svc->send_async_queued = FALSE; @@ -2461,6 
> +2487,8 @@ spl_qp_send_comp_cb(
> 
>         AL_ENTER( AL_DBG_SMI );
> 
> +       UNREFERENCED_PARAMETER( h_cq );
> +
>         CL_ASSERT( cq_context );
>         p_spl_qp_svc = cq_context;
> 
> @@ -2477,21 +2505,55 @@ spl_qp_send_comp_cb(
>         cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> 
>  #else
> +    cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
> +       if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
> +       {
> +               cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> +        AL_EXIT( AL_DBG_SMI );
> +               return;
> +       }
> +       cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
> +       cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> 
> -       /* Invoke the callback directly. */
> +    /* Queue the DPC. */
>         CL_ASSERT( h_cq == p_spl_qp_svc->h_send_cq );
> -       spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_SEND );
> +    KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL ); #endif
> +
> +       AL_EXIT( AL_DBG_SMI );
> +}
> +
> +
> +void
> +spl_qp_send_dpc_cb(
> +    IN              KDPC                        *p_dpc,
> +    IN              void                        *context,
> +    IN              void                        *arg1,
> +    IN              void                        *arg2
> +    )
> +{
> +       spl_qp_svc_t*                   p_spl_qp_svc;
> +
> +       AL_ENTER( AL_DBG_SMI );
> +
> +       CL_ASSERT( context );
> +       p_spl_qp_svc = context;
> +
> +    UNREFERENCED_PARAMETER( p_dpc );
> +    UNREFERENCED_PARAMETER( arg1 );
> +    UNREFERENCED_PARAMETER( arg2 );
> +
> +       spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_send_cq, 
> IB_WC_SEND 
> + );
> 
>         /* Continue processing any queued MADs on the QP. */
>         special_qp_resume_sends( p_spl_qp_svc->h_qp );
> 
> -#endif
> +    cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> 
> -       AL_EXIT( AL_DBG_SMI );
> +    AL_EXIT( AL_DBG_SMI );
>  }
> 
> 
> -
>  #if defined( CL_USE_MUTEX )
>  void
>  spl_qp_send_async_cb(
> @@ -2536,6 +2598,8 @@ spl_qp_recv_comp_cb(
> 
>         AL_ENTER( AL_DBG_SMI );
> 
> +       UNREFERENCED_PARAMETER( h_cq );
> +
>         CL_ASSERT( cq_context );
>         p_spl_qp_svc = cq_context;
> 
> @@ -2552,16 +2616,51 @@ spl_qp_recv_comp_cb(
>         cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> 
>  #else
> +    cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
> +       if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
> +       {
> +               cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> +        AL_EXIT( AL_DBG_SMI );
> +               return;
> +       }
> +       cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
> +       cl_spinlock_release( &p_spl_qp_svc->obj.lock );
> 
> +    /* Queue the DPC. */
>         CL_ASSERT( h_cq == p_spl_qp_svc->h_recv_cq );
> -       spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_RECV );
> -
> +    KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
>  #endif
> 
>         AL_EXIT( AL_DBG_SMI );
>  }
> 
> 
> +void
> +spl_qp_recv_dpc_cb(
> +    IN              KDPC                        *p_dpc,
> +    IN              void                        *context,
> +    IN              void                        *arg1,
> +    IN              void                        *arg2
> +    )
> +{
> +       spl_qp_svc_t*                   p_spl_qp_svc;
> +
> +       AL_ENTER( AL_DBG_SMI );
> +
> +       CL_ASSERT( context );
> +       p_spl_qp_svc = context;
> +
> +    UNREFERENCED_PARAMETER( p_dpc );
> +    UNREFERENCED_PARAMETER( arg1 );
> +    UNREFERENCED_PARAMETER( arg2 );
> +
> +       spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_recv_cq, 
> IB_WC_RECV 
> + );
> +
> +    cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> +
> +    AL_EXIT( AL_DBG_SMI );
> +}
> +
> 
>  #if defined( CL_USE_MUTEX )
>  void
> @@ -2589,7 +2688,7 @@ spl_qp_recv_async_cb(  #endif
> 
> 
> -
> +#define SPL_QP_MAX_POLL 16
>  /*
>   * Special QP completion handler.
>   */
> @@ -2607,6 +2706,7 @@ spl_qp_comp(
>         ib_mad_element_t*               p_mad_element;
>         ib_smp_t*                               p_smp;
>         ib_api_status_t                 status;
> +    int                     max_poll = SPL_QP_MAX_POLL;
> 
>         AL_ENTER( AL_DBG_SMI_CB );
> 
> @@ -2625,7 +2725,7 @@ spl_qp_comp(
> 
>         wc.p_next = NULL;
>         /* Process work completions. */
> -       while( ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == 
> IB_SUCCESS )
> +       while( max_poll && ib_poll_cq( h_cq, &p_free_wc, 
> &p_done_wc ) == 
> + IB_SUCCESS )
>         {
>                 /* Process completions one at a time. */
>                 CL_ASSERT( p_done_wc );
> @@ -2713,13 +2813,25 @@ spl_qp_comp(
>                         spl_qp_svc_reset( p_spl_qp_svc );
>                 }
>                 p_free_wc = &wc;
> +        --max_poll;
>         }
> 
> -       /* Rearm the CQ. */
> -       status = ib_rearm_cq( h_cq, FALSE );
> -       CL_ASSERT( status == IB_SUCCESS );
> +    if( max_poll == 0 )
> +    {
> +        /* We already have an in_use_cnt reference - use it 
> to queue the DPC. */
> +        if( wc_type == IB_WC_SEND )
> +            KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL );
> +        else
> +            KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
> +    }
> +    else
> +    {
> +           /* Rearm the CQ. */
> +           status = ib_rearm_cq( h_cq, FALSE );
> +           CL_ASSERT( status == IB_SUCCESS );
> 
> -       cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> +           cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
> +    }
>         AL_EXIT( AL_DBG_SMI_CB );
>  }
> 
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: 
> old\core\al\kernel\al_smi.h trunk\core\al\kernel\al_smi.h
> --- old\core\al\kernel\al_smi.h Fri Jul 11 16:24:31 2008
> +++ trunk\core\al\kernel\al_smi.h       Fri Jul 11 16:07:29 2008
> @@ -157,6 +157,9 @@ typedef struct _spl_qp_svc
>         ib_pool_key_t                           pool_key;
>         ib_mad_svc_handle_t                     h_mad_svc;
> 
> +    KDPC                        send_dpc;
> +    KDPC                        recv_dpc;
> +
>  }      spl_qp_svc_t;
> 
> 
> 



More information about the ofw mailing list