[ofw] patch: Fix a race in the cl_timer code that caused deadlocks in opensm

Fab Tillier ftillier at microsoft.com
Wed Jun 23 09:32:42 PDT 2010


If the cl_timer_construct code calls cl_spinlock_construct, it will always be safe to call cl_spinlock_destroy from cl_timer_destroy.  Note also that you aren't checking the return code for cl_spinlock_init...

-Fab

From: Smith, Stan [mailto:stan.smith at intel.com]
Sent: Wednesday, June 23, 2010 7:16 AM
To: Tzachi Dar; ofw at lists.openfabrics.org
Cc: Yevgeny Kliteynik; Uri Habusha; Fab Tillier
Subject: RE: [ofw] patch: Fix a race in the cl_timer code that caused deadlocks in opensm

Hello,
  In testing on our 52 node HPC Edition cluster using opensm 3.3.6 (vendor-umad) I found your patch by itself insufficient to resolve the lost MAD problem which occurs when rebooting many compute nodes (DHCP assigned IPoIB IPv4 address) simultaneously.
Your patch by itself did work at 39 nodes, although failed consistently at 52 nodes.
Only after applying your patch plus Sean's kernel cl_timer patch and my own cl_timer serialization patch can 52 nodes reboot simultaneously and reach full IPoIB operational status (no lost MADs, all ports active and all compute nodes get DHCP address assigned correctly).
Will be testing opensm 3.3.6 (vendor-ibal) later today.

BTW, the system installs, uninstalls performs without noticeable delays. Will do some performance comparisons later today.

I suspect the system 'might' get by without the user-mode serialization patch although it's there as the Linux cl_timer callbacks are serialized.
Will perform experiments in the near future.

stan.

PS: Sorry about the diffs as my patch is integrated with your original patch. Important part is the lock acquire before invoking callback() then lock release.

--- a/inc/user/complib/cl_timer_osd.h Wed Jun 23 06:48:25 2010
+++ b/inc/user/complib/cl_timer_osd.h Wed Jun 23 06:48:46 2010
@@ -47,6 +47,8 @@
  const void    *context;
  uint64_t    timeout_time;
  DWORD     thread_id;
+ cl_spinlock_t   spinlock;
+ cl_spinlock_t   cb_serialize;

 } cl_timer_t;

--- b/core/complib/user/cl_timer.c Wed Jun 23 06:50:14 2010
+++ b/core/complib/user/cl_timer.c Tue Jun 22 15:02:02 2010
@@ -33,30 +33,34 @@

 #include "complib/cl_timer.h"

-
 static void CALLBACK
 __timer_callback(
  IN cl_timer_t* const p_timer,
  IN BOOLEAN timer_signalled )
 {
  /* timer_signalled is always TRUE, and has no value. */
+
+ DWORD thread_id = GetCurrentThreadId();
  CL_ASSERT( timer_signalled );
+
  UNUSED_PARAM( timer_signalled );

+ CL_ASSERT(thread_id!=0);
+
  p_timer->timeout_time = 0;
- p_timer->thread_id = GetCurrentThreadId();
+ p_timer->thread_id = thread_id;

+ cl_spinlock_acquire(&p_timer->cb_serialize);
  (p_timer->pfn_callback)( (void*)p_timer->context );
-
- p_timer->thread_id = 0;
+ cl_spinlock_release(&p_timer->cb_serialize);
 }

-
 void
 cl_timer_construct(
  IN cl_timer_t* const p_timer )
 {
  p_timer->h_timer = NULL;
+ p_timer->pfn_callback = NULL;
  p_timer->timeout_time = 0;
  p_timer->thread_id = 0;
 }
@@ -75,8 +79,9 @@
  cl_timer_construct( p_timer );

  p_timer->pfn_callback = pfn_callback;
+ cl_spinlock_init(&p_timer->cb_serialize);
  p_timer->context = context;
- return( CL_SUCCESS );
+ return cl_spinlock_init(&p_timer->spinlock);
 }


@@ -87,6 +92,12 @@
  CL_ASSERT( p_timer );

  cl_timer_stop( p_timer );
+ if ( p_timer->pfn_callback )
+ {
+  /* construct does not set these, _init does */
+  cl_spinlock_destroy( &p_timer->spinlock );
+  cl_spinlock_destroy( &p_timer->cb_serialize );
+ }
 }


@@ -97,6 +108,8 @@
 {
  CL_ASSERT( p_timer );

+ cl_spinlock_acquire(&p_timer->spinlock);
+
  cl_timer_stop( p_timer );

  p_timer->timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
@@ -104,9 +117,12 @@
  if( !CreateTimerQueueTimer( &p_timer->h_timer, NULL, __timer_callback,
   p_timer, time_ms, 0, WT_EXECUTEINIOTHREAD ) )
  {
+
+  cl_spinlock_release(&p_timer->spinlock);
   return( CL_ERROR );
  }

+ cl_spinlock_release(&p_timer->spinlock);
  return( CL_SUCCESS );
 }

@@ -117,26 +133,34 @@
  IN const uint32_t  time_ms )
 {
  uint64_t  timeout_time;
+ cl_status_t  status;

  CL_ASSERT( p_timer );
  CL_ASSERT( p_timer->pfn_callback );

+ cl_spinlock_acquire(&p_timer->spinlock);
+
  /* Calculate the timeout time in the timer object. */
  timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);

  /* Only pull in the timeout time. */
- if( p_timer->timeout_time && p_timer->timeout_time < timeout_time )
+ if( p_timer->timeout_time && p_timer->timeout_time < timeout_time ) {
+  cl_spinlock_release(&p_timer->spinlock);
   return( CL_SUCCESS );
+ }

- return cl_timer_start( p_timer, time_ms );
+ status = cl_timer_start( p_timer, time_ms );
+
+ cl_spinlock_release(&p_timer->spinlock);
+ return status;
 }

-
 void
 cl_timer_stop(
  IN cl_timer_t* const p_timer )
 {
  CL_ASSERT( p_timer );
+ cl_spinlock_acquire(&p_timer->spinlock);

  if( p_timer->h_timer && p_timer->thread_id != GetCurrentThreadId() )
  {
@@ -145,6 +169,8 @@
   p_timer->h_timer = NULL;
  }
  p_timer->timeout_time = 0;
+
+ cl_spinlock_release(&p_timer->spinlock);
 }



________________________________
From: ofw-bounces at lists.openfabrics.org [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Tzachi Dar
Sent: Tuesday, June 22, 2010 12:51 AM
To: ofw at lists.openfabrics.org
Cc: Yevgeny Kliteynik
Subject: [ofw] patch: Fix a race in the cl_timer code that caused deadlocks in opensm
While debugging a deadlock in the opensm executable, we have found that there is a problem in the cl_timer code:

The problem starts from the fact that there can be two call backs that are running simultaneously.
On each call back that runs there is the following code:

static void CALLBACK
__timer_callback(
                IN cl_timer_t* const p_timer,
                IN BOOLEAN timer_signalled )
{
                /* timer_signalled is always TRUE, and has no value. */
                CL_ASSERT( timer_signalled );
                UNUSED_PARAM( timer_signalled );

                p_timer->timeout_time = 0;
                p_timer->thread_id = GetCurrentThreadId();

                (p_timer->pfn_callback)( (void*)p_timer->context );

                p_timer->thread_id = 0;
}

This should promise that if a callback is running, p_timer->thread_id  will  have the thread_id of the running thread.
This field is later used when cl_timer_stop() is being called in order to prevent deadlocks. (please note that cl_timer_stop() is being called from cl_timer_start(). Please also note that the deadlock happens if the caller of cl_stop() is actually executing in the context of the call back itself).

The problem happens when two callbacks will be running and than the p_timer->thread_id doesn't show the correct field.

The fix itself is to sync the start and stop calls and now only one callback will be running (actually, there is one exception to this code: if from the callback, a new timer is started, the new callback might be running on the same time, but: (1) stop has already being called. (2) the p_timer->thread_id is not being touched after the callback.

Thanks
Tzachi




Index: B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/core/complib/user/cl_timer.c
===================================================================
--- B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/core/complib/user/cl_timer.c (revision 5977)
+++ B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/core/complib/user/cl_timer.c              (revision 5978)
@@ -33,25 +33,27 @@

 #include "complib/cl_timer.h"

-
 static void CALLBACK
 __timer_callback(
                IN cl_timer_t* const p_timer,
                IN BOOLEAN timer_signalled )
 {
                /* timer_signalled is always TRUE, and has no value. */
+
+             DWORD thread_id = GetCurrentThreadId();
                CL_ASSERT( timer_signalled );
+
                UNUSED_PARAM( timer_signalled );

+             CL_ASSERT(thread_id!=0);
+
                p_timer->timeout_time = 0;
-              p_timer->thread_id = GetCurrentThreadId();
+             p_timer->thread_id = thread_id;

                (p_timer->pfn_callback)( (void*)p_timer->context );

-              p_timer->thread_id = 0;
 }

-
 void
 cl_timer_construct(
                IN           cl_timer_t* const            p_timer )
@@ -76,7 +78,8 @@

                p_timer->pfn_callback = pfn_callback;
                p_timer->context = context;
-              return( CL_SUCCESS );
+             return cl_spinlock_init(&p_timer->spinlock);
+
 }


@@ -97,6 +100,8 @@
 {
                CL_ASSERT( p_timer );

+             cl_spinlock_acquire(&p_timer->spinlock);
+
                cl_timer_stop( p_timer );

                p_timer->timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
@@ -104,9 +109,12 @@
                if( !CreateTimerQueueTimer( &p_timer->h_timer, NULL, __timer_callback,
                                p_timer, time_ms, 0, WT_EXECUTEINIOTHREAD ) )
                {
+
+                             cl_spinlock_release(&p_timer->spinlock);
                                return( CL_ERROR );
                }

+             cl_spinlock_release(&p_timer->spinlock);
                return( CL_SUCCESS );
 }

@@ -117,26 +125,34 @@
                IN           const uint32_t                   time_ms )
 {
                uint64_t                               timeout_time;
+             cl_status_t                          status;

                CL_ASSERT( p_timer );
                CL_ASSERT( p_timer->pfn_callback );

+             cl_spinlock_acquire(&p_timer->spinlock);
+
                /* Calculate the timeout time in the timer object. */
                timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);

                /* Only pull in the timeout time. */
-              if( p_timer->timeout_time && p_timer->timeout_time < timeout_time )
+             if( p_timer->timeout_time && p_timer->timeout_time < timeout_time ) {
+                             cl_spinlock_release(&p_timer->spinlock);
                                return( CL_SUCCESS );
+             }

-              return cl_timer_start( p_timer, time_ms );
+             status = cl_timer_start( p_timer, time_ms );
+
+             cl_spinlock_release(&p_timer->spinlock);
+             return status;
 }

-
 void
 cl_timer_stop(
                IN           cl_timer_t* const            p_timer )
 {
                CL_ASSERT( p_timer );
+             cl_spinlock_acquire(&p_timer->spinlock);

                if( p_timer->h_timer && p_timer->thread_id != GetCurrentThreadId() )
                {
@@ -145,6 +161,8 @@
                                p_timer->h_timer = NULL;
                }
                p_timer->timeout_time = 0;
+
+             cl_spinlock_release(&p_timer->spinlock);
 }


Index: B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/inc/complib/cl_timer.h
===================================================================
--- B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/inc/complib/cl_timer.h               (revision 5977)
+++ B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/inc/complib/cl_timer.h            (revision 5978)
@@ -45,8 +45,8 @@


 #include <complib/cl_types.h>
+#include <complib/cl_spinlock.h>

-
 /****h* Component Library/Timer
 * NAME
 *            Timer
Index: B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/inc/user/complib/cl_timer_osd.h
===================================================================
--- B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/inc/user/complib/cl_timer_osd.h          (revision 5977)
+++ B:/users/tzachid/MLNX_WinOF-2_1_1/MLNX_WinOF-2_1_1/inc/user/complib/cl_timer_osd.h       (revision 5978)
@@ -47,8 +47,9 @@
                const void                                                           *context;
                uint64_t                                                               timeout_time;
                DWORD                                                                                thread_id;
+             cl_spinlock_t                                      spinlock;

 } cl_timer_t;


-#endif // _CL_TIMER_OSD_H_
\ No newline at end of file
+#endif                // _CL_TIMER_OSD_H_
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20100623/457b2758/attachment.html>


More information about the ofw mailing list