[ofw] [PATCH] Limit time spent at DISPATCH_LEVEL when processing MADs

Fab Tillier ftillier at windows.microsoft.com
Fri Jul 11 17:46:36 PDT 2008


In working with some switch SMs, I found that under stress the QP0 MAD processing could get delayed by eternal QP1 MAD processing (from hitting the SA for path queries).  This caused the SMs to remove the node from the fabric (multicast group membership, etc) because it appeared unreachable.

This patch adds a DPC for QP0 and QP1 to the SMI, and limits the SMI to processing 16 MADs before re-queueing the DPC to the DPC queue.

Because QP0 processing is crucial to the fabric operating properly, the QP0 DPC is set to high importance so that it always gets queued to the front of the DPC queue, pre-empting any QP1 processing (among other things).

In any case, with this patch all my changes have now been mailed to the list.

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\core\al\kernel\al_smi.c trunk\core\al\kernel\al_smi.c
--- old\core\al\kernel\al_smi.c Fri Jul 11 16:24:31 2008
+++ trunk\core\al\kernel\al_smi.c       Fri Jul 11 16:07:29 2008
@@ -141,6 +141,22 @@ spl_qp_send_comp_cb(
        IN                              void                                            *cq_context );

 void
+spl_qp_send_dpc_cb(
+    IN              KDPC                        *p_dpc,
+    IN              void                        *context,
+    IN              void                        *arg1,
+    IN              void                        *arg2
+    );
+
+void
+spl_qp_recv_dpc_cb(
+    IN              KDPC                        *p_dpc,
+    IN              void                        *context,
+    IN              void                        *arg1,
+    IN              void                        *arg2
+    );
+
+void
 spl_qp_recv_comp_cb(
        IN              const   ib_cq_handle_t                          h_cq,
        IN                              void                                            *cq_context );
@@ -554,7 +570,17 @@ create_spl_qp_svc(
        cl_qlist_init( &p_spl_qp_svc->send_queue );
        cl_qlist_init( &p_spl_qp_svc->recv_queue );
        cl_spinlock_init(&p_spl_qp_svc->cache_lock);
-
+
+    /* Initialize the DPCs. */
+    KeInitializeDpc( &p_spl_qp_svc->send_dpc, spl_qp_send_dpc_cb, p_spl_qp_svc );
+    KeInitializeDpc( &p_spl_qp_svc->recv_dpc, spl_qp_recv_dpc_cb, p_spl_qp_svc );
+
+    if( qp_type == IB_QPT_QP0 )
+    {
+        KeSetImportanceDpc( &p_spl_qp_svc->send_dpc, HighImportance );
+        KeSetImportanceDpc( &p_spl_qp_svc->recv_dpc, HighImportance );
+    }
+
 #if defined( CL_USE_MUTEX )
        /* Initialize async callbacks and flags for send/receive processing. */
        p_spl_qp_svc->send_async_queued = FALSE;
@@ -2461,6 +2487,8 @@ spl_qp_send_comp_cb(

        AL_ENTER( AL_DBG_SMI );

+       UNREFERENCED_PARAMETER( h_cq );
+
        CL_ASSERT( cq_context );
        p_spl_qp_svc = cq_context;

@@ -2477,21 +2505,55 @@ spl_qp_send_comp_cb(
        cl_spinlock_release( &p_spl_qp_svc->obj.lock );

 #else
+    cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
+       if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
+       {
+               cl_spinlock_release( &p_spl_qp_svc->obj.lock );
+        AL_EXIT( AL_DBG_SMI );
+               return;
+       }
+       cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
+       cl_spinlock_release( &p_spl_qp_svc->obj.lock );

-       /* Invoke the callback directly. */
+    /* Queue the DPC. */
        CL_ASSERT( h_cq == p_spl_qp_svc->h_send_cq );
-       spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_SEND );
+    KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL );
+#endif
+
+       AL_EXIT( AL_DBG_SMI );
+}
+
+
+void
+spl_qp_send_dpc_cb(
+    IN              KDPC                        *p_dpc,
+    IN              void                        *context,
+    IN              void                        *arg1,
+    IN              void                        *arg2
+    )
+{
+       spl_qp_svc_t*                   p_spl_qp_svc;
+
+       AL_ENTER( AL_DBG_SMI );
+
+       CL_ASSERT( context );
+       p_spl_qp_svc = context;
+
+    UNREFERENCED_PARAMETER( p_dpc );
+    UNREFERENCED_PARAMETER( arg1 );
+    UNREFERENCED_PARAMETER( arg2 );
+
+       spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_send_cq, IB_WC_SEND );

        /* Continue processing any queued MADs on the QP. */
        special_qp_resume_sends( p_spl_qp_svc->h_qp );

-#endif
+    cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );

-       AL_EXIT( AL_DBG_SMI );
+    AL_EXIT( AL_DBG_SMI );
 }


-
 #if defined( CL_USE_MUTEX )
 void
 spl_qp_send_async_cb(
@@ -2536,6 +2598,8 @@ spl_qp_recv_comp_cb(

        AL_ENTER( AL_DBG_SMI );

+       UNREFERENCED_PARAMETER( h_cq );
+
        CL_ASSERT( cq_context );
        p_spl_qp_svc = cq_context;

@@ -2552,16 +2616,51 @@ spl_qp_recv_comp_cb(
        cl_spinlock_release( &p_spl_qp_svc->obj.lock );

 #else
+    cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
+       if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
+       {
+               cl_spinlock_release( &p_spl_qp_svc->obj.lock );
+        AL_EXIT( AL_DBG_SMI );
+               return;
+       }
+       cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
+       cl_spinlock_release( &p_spl_qp_svc->obj.lock );

+    /* Queue the DPC. */
        CL_ASSERT( h_cq == p_spl_qp_svc->h_recv_cq );
-       spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_RECV );
-
+    KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
 #endif

        AL_EXIT( AL_DBG_SMI );
 }


+void
+spl_qp_recv_dpc_cb(
+    IN              KDPC                        *p_dpc,
+    IN              void                        *context,
+    IN              void                        *arg1,
+    IN              void                        *arg2
+    )
+{
+       spl_qp_svc_t*                   p_spl_qp_svc;
+
+       AL_ENTER( AL_DBG_SMI );
+
+       CL_ASSERT( context );
+       p_spl_qp_svc = context;
+
+    UNREFERENCED_PARAMETER( p_dpc );
+    UNREFERENCED_PARAMETER( arg1 );
+    UNREFERENCED_PARAMETER( arg2 );
+
+       spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_recv_cq, IB_WC_RECV );
+
+    cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
+
+    AL_EXIT( AL_DBG_SMI );
+}
+

 #if defined( CL_USE_MUTEX )
 void
@@ -2589,7 +2688,7 @@ spl_qp_recv_async_cb(
 #endif


-
+#define SPL_QP_MAX_POLL 16
 /*
  * Special QP completion handler.
  */
@@ -2607,6 +2706,7 @@ spl_qp_comp(
        ib_mad_element_t*               p_mad_element;
        ib_smp_t*                               p_smp;
        ib_api_status_t                 status;
+    int                     max_poll = SPL_QP_MAX_POLL;

        AL_ENTER( AL_DBG_SMI_CB );

@@ -2625,7 +2725,7 @@ spl_qp_comp(

        wc.p_next = NULL;
        /* Process work completions. */
-       while( ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == IB_SUCCESS )
+       while( max_poll && ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == IB_SUCCESS )
        {
                /* Process completions one at a time. */
                CL_ASSERT( p_done_wc );
@@ -2713,13 +2813,25 @@ spl_qp_comp(
                        spl_qp_svc_reset( p_spl_qp_svc );
                }
                p_free_wc = &wc;
+        --max_poll;
        }

-       /* Rearm the CQ. */
-       status = ib_rearm_cq( h_cq, FALSE );
-       CL_ASSERT( status == IB_SUCCESS );
+    if( max_poll == 0 )
+    {
+        /* We already have an in_use_cnt reference - use it to queue the DPC. */
+        if( wc_type == IB_WC_SEND )
+            KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL );
+        else
+            KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
+    }
+    else
+    {
+           /* Rearm the CQ. */
+           status = ib_rearm_cq( h_cq, FALSE );
+           CL_ASSERT( status == IB_SUCCESS );

-       cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
+           cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
+    }
        AL_EXIT( AL_DBG_SMI_CB );
 }

diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\core\al\kernel\al_smi.h trunk\core\al\kernel\al_smi.h
--- old\core\al\kernel\al_smi.h Fri Jul 11 16:24:31 2008
+++ trunk\core\al\kernel\al_smi.h       Fri Jul 11 16:07:29 2008
@@ -157,6 +157,9 @@ typedef struct _spl_qp_svc
        ib_pool_key_t                           pool_key;
        ib_mad_svc_handle_t                     h_mad_svc;

+    KDPC                        send_dpc;
+    KDPC                        recv_dpc;
+
 }      spl_qp_svc_t;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: smi_dpc.patch
Type: application/octet-stream
Size: 6957 bytes
Desc: smi_dpc.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080711/c880c54f/attachment.obj>


More information about the ofw mailing list