[ofw] patch: Fix a race in the cl_timer code that caused deadlocks in opensm

Wed Jun 23 12:32:17 PDT 2010

Ok, so HTML format mails suck for responding to patches...

timer.patch:

>Index: a/core/complib/user/cl_timer.c
>===================================================================
>--- a/core/complib/user/cl_timer.c	(revision 5977)
>+++ b/core/complib/user/cl_timer.c	(revision 5978)
>@@ -33,25 +33,27 @@
> 
> #include "complib/cl_timer.h"
> 
>-
> static void CALLBACK
> __timer_callback( 
> 	IN cl_timer_t* const p_timer,
> 	IN BOOLEAN timer_signalled )
> {
> 	/* timer_signalled is always TRUE, and has no value. */
>+	
>+	DWORD thread_id = GetCurrentThreadId();
> 	CL_ASSERT( timer_signalled );
>+

Don't need this blank line.

> 	UNUSED_PARAM( timer_signalled );
> 
>+	CL_ASSERT(thread_id!=0);

Move this assert down, reference p_timer->thread_id, and you can eliminate the local variable.

>+
> 	p_timer->timeout_time = 0;
>-	p_timer->thread_id = GetCurrentThreadId();
>+	p_timer->thread_id = thread_id;
> 
> 	(p_timer->pfn_callback)( (void*)p_timer->context );
> 
>-	p_timer->thread_id = 0;
> }
> 
>-
> void
> cl_timer_construct(
> 	IN	cl_timer_t* const	p_timer )

Where's the call to cl_spinlock_construct?

 >Index: a/inc/complib/cl_timer.h
>===================================================================
>--- a/inc/complib/cl_timer.h	(revision 5977)
>+++ b/inc/complib/cl_timer.h	(revision 5978)
>@@ -45,8 +45,8 @@
> 
> 
> #include <complib/cl_types.h>
>+#include <complib/cl_spinlock.h>

This belongs in the cl_timer_osd.h, not here.

cl_timer.c.patch & cl_timer_osd.h should have been a single patch, as they are related.  No need for per-file patches.

>--- a/core/complib/user/cl_timer.c	Wed Jun 23 11:09:38 2010
>+++ b/core/complib/user/cl_timer.c	Wed Jun 23 11:13:04 2010
>@@ -59,6 +60,7 @@
> 	IN	cl_timer_t* const	p_timer )
> {
> 	p_timer->h_timer = NULL;
>+	p_timer->pfn_callback = NULL;
> 	p_timer->timeout_time = 0;
> 	p_timer->thread_id = 0;

Where's the call to cl_spinlock_construct?

> }
>@@ -89,6 +96,12 @@
> 	CL_ASSERT( p_timer );
> 	
> 	cl_timer_stop( p_timer );
>+	if ( p_timer->pfn_callback )

Don't use the callback as a flag.  If you properly construct the spinlocks, you don't need the check.

>+	{
>+		/* construct does not set these, _init does */

construct should construct, init should init.

>+		cl_spinlock_destroy( &p_timer->spinlock );
>+		cl_spinlock_destroy( &p_timer->cb_serialize );
>+	}
> }
> 
>

kernel-timer.patch:

>diff --git a/trunk/core/complib/kernel/cl_timer.c b/trunk/core/complib/kernel/cl_timer.c
>index 34cae8e..ecee4a6 100644
>--- a/trunk/core/complib/kernel/cl_timer.c
>+++ b/trunk/core/complib/kernel/cl_timer.c
>@@ -45,9 +45,13 @@ __timer_callback(
> 	UNUSED_PARAM( arg1 );
> 	UNUSED_PARAM( arg2 );
> 
>+	cl_spinlock_acquire( &p_timer->spinlock );
> 	p_timer->timeout_time = 0;

The only case where you could end up with multiple callbacks executing is if you call cl_timer_start from the callback.  It might be easier to flag that you're in a callback, store the new timeout time, and delay calling KeSetTimer until the callback unwinds.  You could add a flag here to indicate that you're in the timer callback, that you check in cl_timer_start to control whether you call KeSetTimer...

>+	cl_spinlock_release( &p_timer->spinlock );
> 
>+	KeAcquireGuardedMutex( &p_timer->cb_lock );

timer callback is in a DPC, at DISPATCH_LEVEL.  KeAcquireGuardedMutex must be <= APC_LEVEL.

> 	(p_timer->pfn_callback)( (void*)p_timer->context );

Check the timeout here, and if non-zero, call KeSetTimer, then clear the callback flag.  Note that similar logic could be used in the user-mode timer to prevent multiple timer callbacks from running concurrently, while also avoiding extra threads spinning on the callback spinlock.

>+	KeReleaseGuardedMutex( &p_timer->cb_lock );
> }
> 
> 
>@@ -75,6 +79,8 @@ cl_timer_init(
> 
> 	KeInitializeTimer( &p_timer->timer );
> 	KeInitializeDpc( &p_timer->dpc, __timer_callback, p_timer );
>+	cl_spinlock_init( &p_timer->spinlock );

Just use a KSPINLOCK here, no?  Then you can use the in-stack queued spinlocks if the lock might be contentious.

>+	KeInitializeGuardedMutex( &p_timer->cb_lock );
> 
> 	return( CL_SUCCESS );
> }
>@@ -108,6 +114,7 @@ cl_timer_start(
> 	IN	const uint32_t		time_ms )
> {
> 	LARGE_INTEGER	due_time;
>+	uint64_t		timeout_time;
> 
> 	CL_ASSERT( p_timer );
> 	CL_ASSERT( p_timer->pfn_callback );
>@@ -115,11 +122,13 @@ cl_timer_start(
> 
> 	/* Due time is in 100 ns increments.  Negative for relative time. */
> 	due_time.QuadPart = -(int64_t)(((uint64_t)time_ms) * 10000);
>+	timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
> 
>-	/* Store the timeout time in the timer object. */
>-	p_timer->timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
>-
>+	cl_spinlock_acquire( &p_timer->spinlock );
>+	p_timer->timeout_time = timeout_time;

Check the 'in callback' flag here to conditionally call KeSetTimer.

> 	KeSetTimer( &p_timer->timer, due_time, &p_timer->dpc );
>+	cl_spinlock_release( &p_timer->spinlock );
>+
> 	return( CL_SUCCESS );
> }
> 
>@@ -129,19 +138,25 @@ cl_timer_trim(
> 	IN	cl_timer_t* const	p_timer,
> 	IN	const uint32_t		time_ms )
> {
>+	LARGE_INTEGER	due_time;
> 	uint64_t		timeout_time;
> 
> 	CL_ASSERT( p_timer );
> 	CL_ASSERT( p_timer->pfn_callback );
> 
>-	/* Calculate the timeout time in the timer object. */
>+	/* Due time is in 100 ns increments.  Negative for relative time. */
>+	due_time.QuadPart = -(int64_t)(((uint64_t)time_ms) * 10000);

You can delay calculating this until the check that the new timeout time is before the current timeout time.

> 	timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
> 
> 	/* Only pull in the timeout time. */
>-	if( p_timer->timeout_time && p_timer->timeout_time < timeout_time )
>-		return( CL_SUCCESS );
>-
>-	return cl_timer_start( p_timer, time_ms );
>+	cl_spinlock_acquire( &p_timer->spinlock );
>+	if( !p_timer->timeout_time || p_timer->timeout_time > timeout_time )
>+	{
>+		p_timer->timeout_time = timeout_time;

Same here, check the 'in callback' flag and defer calling KeSetTimer if set.

>+		KeSetTimer( &p_timer->timer, due_time, &p_timer->dpc );
>+	}
>+	cl_spinlock_release( &p_timer->spinlock );
>+	return( CL_SUCCESS );
> }
> 
> 
>diff --git a/trunk/inc/kernel/complib/cl_timer_osd.h b/trunk/inc/kernel/complib/cl_timer_osd.h
>index 4cb99b0..6817a90 100644
>--- a/trunk/inc/kernel/complib/cl_timer_osd.h
>+++ b/trunk/inc/kernel/complib/cl_timer_osd.h
>@@ -38,7 +38,7 @@
> 
> 
> #include "complib/cl_types.h"
>-
>+#include "complib/cl_spinlock.h"
> 
> /* Timer object definition. */
> typedef struct _cl_timer
>@@ -48,6 +48,8 @@ typedef struct _cl_timer
> 	cl_pfn_timer_callback_t	pfn_callback;
> 	const void				*context;
> 	uint64_t				timeout_time;
>+	cl_spinlock_t			spinlock;
>+	KGUARDED_MUTEX			cb_lock;
> 
> } cl_timer_t;
> 
>
>

Smith, Stan wrote on Wed, 23 Jun 2010 at 11:46:39

> From: Smith, Stan [mailto:stan.smith at intel.com]
> Sent: Wednesday, June 23, 2010 11:47 AM
> To: Tzachi Dar; ofw at lists.openfabrics.org
> Cc: Yevgeny Kliteynik; Uri Habusha; Fab Tillier
> Subject: RE: [ofw] patch: Fix a race in the cl_timer code that caused
> deadlocks in opensm
> 
> Separated patch files so cut-n-paste into a single patch file did not
> introduce any problems.
> From svn trunk\
> Apply your timer patch as patch -p1 < timer.patch
> Apply:
>    patch -p1 < cl_timer_osd.h.patch    user-mode serialize
>    patch -p1 < cl_timer.c.patch           user-mode serialize
>    patch -p2 < kernel_timer.patch       sean's kernel patches
>    
>    if using opensm 3.3.6 (vnedor-umad) patch -p1 < umad.cpp.patch
>