[ofw] [PATCH] Limit time spent at DISPATCH_LEVEL when processing MADs
Fab Tillier
ftillier at windows.microsoft.com
Fri Jul 11 17:46:36 PDT 2008
In working with some switch SMs, I found that under stress the QP0 MAD processing could get delayed by eternal QP1 MAD processing (from hitting the SA for path queries). This caused the SMs to remove the node from the fabric (multicast group membership, etc) because it appeared unreachable.
This patch adds a DPC for QP0 and QP1 to the SMI, and limits the SMI to processing 16 MADs before re-queueing the DPC to the DPC queue.
Because QP0 processing is crucial to the fabric operating properly, the QP0 DPC is set to high importance so that it always gets queued to the front of the DPC queue, pre-empting any QP1 processing (among other things).
In any case, with this patch all my changes have now been mailed to the list.
Signed-off-by: Fab Tillier <ftillier at microsoft.com>
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\core\al\kernel\al_smi.c trunk\core\al\kernel\al_smi.c
--- old\core\al\kernel\al_smi.c Fri Jul 11 16:24:31 2008
+++ trunk\core\al\kernel\al_smi.c Fri Jul 11 16:07:29 2008
@@ -141,6 +141,22 @@ spl_qp_send_comp_cb(
IN void *cq_context );
void
+spl_qp_send_dpc_cb(
+ IN KDPC *p_dpc,
+ IN void *context,
+ IN void *arg1,
+ IN void *arg2
+ );
+
+void
+spl_qp_recv_dpc_cb(
+ IN KDPC *p_dpc,
+ IN void *context,
+ IN void *arg1,
+ IN void *arg2
+ );
+
+void
spl_qp_recv_comp_cb(
IN const ib_cq_handle_t h_cq,
IN void *cq_context );
@@ -554,7 +570,17 @@ create_spl_qp_svc(
cl_qlist_init( &p_spl_qp_svc->send_queue );
cl_qlist_init( &p_spl_qp_svc->recv_queue );
cl_spinlock_init(&p_spl_qp_svc->cache_lock);
-
+
+ /* Initialize the DPCs. */
+ KeInitializeDpc( &p_spl_qp_svc->send_dpc, spl_qp_send_dpc_cb, p_spl_qp_svc );
+ KeInitializeDpc( &p_spl_qp_svc->recv_dpc, spl_qp_recv_dpc_cb, p_spl_qp_svc );
+
+ if( qp_type == IB_QPT_QP0 )
+ {
+ KeSetImportanceDpc( &p_spl_qp_svc->send_dpc, HighImportance );
+ KeSetImportanceDpc( &p_spl_qp_svc->recv_dpc, HighImportance );
+ }
+
#if defined( CL_USE_MUTEX )
/* Initialize async callbacks and flags for send/receive processing. */
p_spl_qp_svc->send_async_queued = FALSE;
@@ -2461,6 +2487,8 @@ spl_qp_send_comp_cb(
AL_ENTER( AL_DBG_SMI );
+ UNREFERENCED_PARAMETER( h_cq );
+
CL_ASSERT( cq_context );
p_spl_qp_svc = cq_context;
@@ -2477,21 +2505,55 @@ spl_qp_send_comp_cb(
cl_spinlock_release( &p_spl_qp_svc->obj.lock );
#else
+ cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
+ if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
+ {
+ cl_spinlock_release( &p_spl_qp_svc->obj.lock );
+ AL_EXIT( AL_DBG_SMI );
+ return;
+ }
+ cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
+ cl_spinlock_release( &p_spl_qp_svc->obj.lock );
- /* Invoke the callback directly. */
+ /* Queue the DPC. */
CL_ASSERT( h_cq == p_spl_qp_svc->h_send_cq );
- spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_SEND );
+ KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL );
+#endif
+
+ AL_EXIT( AL_DBG_SMI );
+}
+
+
+void
+spl_qp_send_dpc_cb(
+ IN KDPC *p_dpc,
+ IN void *context,
+ IN void *arg1,
+ IN void *arg2
+ )
+{
+ spl_qp_svc_t* p_spl_qp_svc;
+
+ AL_ENTER( AL_DBG_SMI );
+
+ CL_ASSERT( context );
+ p_spl_qp_svc = context;
+
+ UNREFERENCED_PARAMETER( p_dpc );
+ UNREFERENCED_PARAMETER( arg1 );
+ UNREFERENCED_PARAMETER( arg2 );
+
+ spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_send_cq, IB_WC_SEND );
/* Continue processing any queued MADs on the QP. */
special_qp_resume_sends( p_spl_qp_svc->h_qp );
-#endif
+ cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
- AL_EXIT( AL_DBG_SMI );
+ AL_EXIT( AL_DBG_SMI );
}
-
#if defined( CL_USE_MUTEX )
void
spl_qp_send_async_cb(
@@ -2536,6 +2598,8 @@ spl_qp_recv_comp_cb(
AL_ENTER( AL_DBG_SMI );
+ UNREFERENCED_PARAMETER( h_cq );
+
CL_ASSERT( cq_context );
p_spl_qp_svc = cq_context;
@@ -2552,16 +2616,51 @@ spl_qp_recv_comp_cb(
cl_spinlock_release( &p_spl_qp_svc->obj.lock );
#else
+ cl_spinlock_acquire( &p_spl_qp_svc->obj.lock );
+ if( p_spl_qp_svc->state != SPL_QP_ACTIVE )
+ {
+ cl_spinlock_release( &p_spl_qp_svc->obj.lock );
+ AL_EXIT( AL_DBG_SMI );
+ return;
+ }
+ cl_atomic_inc( &p_spl_qp_svc->in_use_cnt );
+ cl_spinlock_release( &p_spl_qp_svc->obj.lock );
+ /* Queue the DPC. */
CL_ASSERT( h_cq == p_spl_qp_svc->h_recv_cq );
- spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_RECV );
-
+ KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
#endif
AL_EXIT( AL_DBG_SMI );
}
+void
+spl_qp_recv_dpc_cb(
+ IN KDPC *p_dpc,
+ IN void *context,
+ IN void *arg1,
+ IN void *arg2
+ )
+{
+ spl_qp_svc_t* p_spl_qp_svc;
+
+ AL_ENTER( AL_DBG_SMI );
+
+ CL_ASSERT( context );
+ p_spl_qp_svc = context;
+
+ UNREFERENCED_PARAMETER( p_dpc );
+ UNREFERENCED_PARAMETER( arg1 );
+ UNREFERENCED_PARAMETER( arg2 );
+
+ spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_recv_cq, IB_WC_RECV );
+
+ cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
+
+ AL_EXIT( AL_DBG_SMI );
+}
+
#if defined( CL_USE_MUTEX )
void
@@ -2589,7 +2688,7 @@ spl_qp_recv_async_cb(
#endif
-
+#define SPL_QP_MAX_POLL 16
/*
* Special QP completion handler.
*/
@@ -2607,6 +2706,7 @@ spl_qp_comp(
ib_mad_element_t* p_mad_element;
ib_smp_t* p_smp;
ib_api_status_t status;
+ int max_poll = SPL_QP_MAX_POLL;
AL_ENTER( AL_DBG_SMI_CB );
@@ -2625,7 +2725,7 @@ spl_qp_comp(
wc.p_next = NULL;
/* Process work completions. */
- while( ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == IB_SUCCESS )
+ while( max_poll && ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == IB_SUCCESS )
{
/* Process completions one at a time. */
CL_ASSERT( p_done_wc );
@@ -2713,13 +2813,25 @@ spl_qp_comp(
spl_qp_svc_reset( p_spl_qp_svc );
}
p_free_wc = &wc;
+ --max_poll;
}
- /* Rearm the CQ. */
- status = ib_rearm_cq( h_cq, FALSE );
- CL_ASSERT( status == IB_SUCCESS );
+ if( max_poll == 0 )
+ {
+ /* We already have an in_use_cnt reference - use it to queue the DPC. */
+ if( wc_type == IB_WC_SEND )
+ KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL );
+ else
+ KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL );
+ }
+ else
+ {
+ /* Rearm the CQ. */
+ status = ib_rearm_cq( h_cq, FALSE );
+ CL_ASSERT( status == IB_SUCCESS );
- cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
+ cl_atomic_dec( &p_spl_qp_svc->in_use_cnt );
+ }
AL_EXIT( AL_DBG_SMI_CB );
}
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\core\al\kernel\al_smi.h trunk\core\al\kernel\al_smi.h
--- old\core\al\kernel\al_smi.h Fri Jul 11 16:24:31 2008
+++ trunk\core\al\kernel\al_smi.h Fri Jul 11 16:07:29 2008
@@ -157,6 +157,9 @@ typedef struct _spl_qp_svc
ib_pool_key_t pool_key;
ib_mad_svc_handle_t h_mad_svc;
+ KDPC send_dpc;
+ KDPC recv_dpc;
+
} spl_qp_svc_t;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smi_dpc.patch
Type: application/octet-stream
Size: 6957 bytes
Desc: smi_dpc.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080711/c880c54f/attachment.obj>
More information about the ofw
mailing list