[ofw] patch: Fix a race in the cl_timer code that caused deadlocks in opensm

Hefty, Sean sean.hefty at intel.com
Tue Jun 22 12:43:43 PDT 2010


> My patch fixes this as it makes the timer thread safe.

I think it's a step in the right directly, but the thread_id still isn't thread safe, making its usage racy.  Because a second timer_callback may be ready to run, it's possible for the user to call cl_timer_stop from the timer callback, return from that call, then immediately have their timer callback function invoked again.

This is my latest patch to the kernel based on what you did for user space.  I think we'll want a similar change to user space, including serializing the callback and changing how cl_timer_trim is implemented to avoid locking/unlocking twice.  We're in the process of testing this kernel change, along with something similar to user space. 

---
trunk/core/complib/kernel/cl_timer.c    |   36 ++++++++++++++++++++++---------
 trunk/inc/kernel/complib/cl_timer_osd.h |    4 +++
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/trunk/core/complib/kernel/cl_timer.c b/trunk/core/complib/kernel/cl_timer.c
index 34cae8e..ecee4a6 100644
--- a/trunk/core/complib/kernel/cl_timer.c
+++ b/trunk/core/complib/kernel/cl_timer.c
@@ -45,9 +45,13 @@ __timer_callback(
 	UNUSED_PARAM( arg1 );
 	UNUSED_PARAM( arg2 );
 
+	cl_spinlock_acquire( &p_timer->spinlock );
 	p_timer->timeout_time = 0;
+	cl_spinlock_release( &p_timer->spinlock );
 
+	KeAcquireGuardedMutex( &p_timer->cb_lock );
 	(p_timer->pfn_callback)( (void*)p_timer->context );
+	KeReleaseGuardedMutex( &p_timer->cb_lock );
 }
 
 
@@ -75,6 +79,8 @@ cl_timer_init(
 
 	KeInitializeTimer( &p_timer->timer );
 	KeInitializeDpc( &p_timer->dpc, __timer_callback, p_timer );
+	cl_spinlock_init( &p_timer->spinlock );
+	KeInitializeGuardedMutex( &p_timer->cb_lock );
 
 	return( CL_SUCCESS );
 }
@@ -108,6 +114,7 @@ cl_timer_start(
 	IN	const uint32_t		time_ms )
 {
 	LARGE_INTEGER	due_time;
+	uint64_t		timeout_time;
 
 	CL_ASSERT( p_timer );
 	CL_ASSERT( p_timer->pfn_callback );
@@ -115,11 +122,13 @@ cl_timer_start(
 
 	/* Due time is in 100 ns increments.  Negative for relative time. */
 	due_time.QuadPart = -(int64_t)(((uint64_t)time_ms) * 10000);
+	timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
 
-	/* Store the timeout time in the timer object. */
-	p_timer->timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
-
+	cl_spinlock_acquire( &p_timer->spinlock );
+	p_timer->timeout_time = timeout_time;
 	KeSetTimer( &p_timer->timer, due_time, &p_timer->dpc );
+	cl_spinlock_release( &p_timer->spinlock );
+
 	return( CL_SUCCESS );
 }
 
@@ -129,19 +138,25 @@ cl_timer_trim(
 	IN	cl_timer_t* const	p_timer,
 	IN	const uint32_t		time_ms )
 {
+	LARGE_INTEGER	due_time;
 	uint64_t		timeout_time;
 
 	CL_ASSERT( p_timer );
 	CL_ASSERT( p_timer->pfn_callback );
 
-	/* Calculate the timeout time in the timer object. */
+	/* Due time is in 100 ns increments.  Negative for relative time. */
+	due_time.QuadPart = -(int64_t)(((uint64_t)time_ms) * 10000);
 	timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
 
 	/* Only pull in the timeout time. */
-	if( p_timer->timeout_time && p_timer->timeout_time < timeout_time )
-		return( CL_SUCCESS );
-
-	return cl_timer_start( p_timer, time_ms );
+	cl_spinlock_acquire( &p_timer->spinlock );
+	if( !p_timer->timeout_time || p_timer->timeout_time > timeout_time )
+	{
+		p_timer->timeout_time = timeout_time;
+		KeSetTimer( &p_timer->timer, due_time, &p_timer->dpc );
+	}
+	cl_spinlock_release( &p_timer->spinlock );
+	return( CL_SUCCESS );
 }
 
 
@@ -154,7 +169,8 @@ cl_timer_stop(
 	CL_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL );
 	
 	/* Cancel the timer.  This also cancels any queued DPCs for the timer. */
-	KeCancelTimer( &p_timer->timer );
-
+	cl_spinlock_acquire( &p_timer->spinlock );
 	p_timer->timeout_time = 0;
+	KeCancelTimer( &p_timer->timer );
+	cl_spinlock_release( &p_timer->spinlock );
 }
diff --git a/trunk/inc/kernel/complib/cl_timer_osd.h b/trunk/inc/kernel/complib/cl_timer_osd.h
index 4cb99b0..6817a90 100644
--- a/trunk/inc/kernel/complib/cl_timer_osd.h
+++ b/trunk/inc/kernel/complib/cl_timer_osd.h
@@ -38,7 +38,7 @@
 
 
 #include "complib/cl_types.h"
-
+#include "complib/cl_spinlock.h"
 
 /* Timer object definition. */
 typedef struct _cl_timer
@@ -48,6 +48,8 @@ typedef struct _cl_timer
 	cl_pfn_timer_callback_t	pfn_callback;
 	const void				*context;
 	uint64_t				timeout_time;
+	cl_spinlock_t			spinlock;
+	KGUARDED_MUTEX			cb_lock;
 
 } cl_timer_t;
 




More information about the ofw mailing list