[openib-general] [PATCH] Opensm - locking order issue

Yael Kalka yael at mellanox.co.il
Wed Sep 28 00:45:56 PDT 2005


Hi Hal,

During one of our Windows runs we encountered a deadlock in opensm.
This caused us to review the different locks in the opensm code, and
we found 2 places that might cause deadlock.
In the osm_state_mgr_process the order of the locks is:
1. osm_state_mgr_t.state_lock
2. p_lock (same pointer for the different sm managers)

We noticed 2 places where this order wasn't kept, and a deadlock might
occure. This happened where inside the p_lock osm_state_mgr_process
function was called.

Attached is a patch resolving this issue.

Thanks,
Yael

Signed-off-by:  Yael Kalka <yael at mellanox.co.il>

Index: opensm/osm_sminfo_rcv.c
===================================================================
--- opensm/osm_sminfo_rcv.c	(revision 3590)
+++ opensm/osm_sminfo_rcv.c	(working copy)
@@ -425,13 +425,18 @@ __osm_sminfo_rcv_process_set_request(
 
 
 /**********************************************************************
+ * Return a signal with which to call the osm_state_mgr_process.
+ * This is done since we are locked by p_rcv->p_lock in this function, 
+ * and thus cannot call osm_state_mgr_process (that locks the state_lock).
+ * If return OSM_SIGNAL_NONE - do not call osm_state_mgr_process.
  **********************************************************************/
-void
+osm_signal_t
 __osm_sminfo_rcv_process_get_sm(
   IN const osm_sminfo_rcv_t*  const p_rcv,
   IN const osm_remote_sm_t*   const p_sm )
 {
   const ib_sm_info_t*         p_smi;
+  osm_signal_t ret_val = OSM_SIGNAL_NONE;
 
   OSM_LOG_ENTER( p_rcv->p_log, __osm_sminfo_rcv_process_get_sm );
 
@@ -459,8 +464,7 @@ __osm_sminfo_rcv_process_get_sm(
     case IB_SMINFO_STATE_NOTACTIVE:
       break;
     case IB_SMINFO_STATE_MASTER:
-      osm_state_mgr_process( p_rcv->p_state_mgr,
-                             OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED );
+      ret_val = OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED;
       /*  save on the p_sm_state_mgr the guid of the current master. */
       osm_log( p_rcv->p_log, OSM_LOG_VERBOSE,
                "__osm_sminfo_rcv_process_get_sm: "
@@ -473,8 +477,7 @@ __osm_sminfo_rcv_process_get_sm(
       if ( __osm_sminfo_rcv_remote_sm_is_higher(p_rcv, p_smi) == TRUE )
       {
         /*  the remote is a higher sm - need to stop sweeping */
-        osm_state_mgr_process( p_rcv->p_state_mgr,
-                               OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED );
+        ret_val = OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED;
         /*  save on the p_sm_state_mgr the guid of the higher SM we found - */
         /*  we will poll it - as long as it lives - we should be in Standby. */
         osm_log( p_rcv->p_log, OSM_LOG_VERBOSE,
@@ -548,6 +551,7 @@ __osm_sminfo_rcv_process_get_sm(
   }
 
   OSM_LOG_EXIT( p_rcv->p_log );
+  return ret_val;
 }
 
 
@@ -566,6 +570,7 @@ __osm_sminfo_rcv_process_get_response(
   ib_net64_t               port_guid;
   osm_remote_sm_t*         p_sm;
   ib_api_status_t          status;
+  osm_signal_t             process_get_sm_ret_val = OSM_SIGNAL_NONE;
 
   OSM_LOG_ENTER( p_rcv->p_log, __osm_sminfo_rcv_process_get_response );
 
@@ -664,10 +669,16 @@ __osm_sminfo_rcv_process_get_response(
     p_sm->smi = *p_smi;
   }
 
-  __osm_sminfo_rcv_process_get_sm( p_rcv, p_sm );
+  process_get_sm_ret_val = __osm_sminfo_rcv_process_get_sm( p_rcv, p_sm );
 
  Exit:
   CL_PLOCK_RELEASE( p_rcv->p_lock );
+  
+  /* If process_get_sm_ret_val != OSM_SIGNAL_NONE then we have to signal
+   * to the state_mgr with that signal. */
+  if (process_get_sm_ret_val != OSM_SIGNAL_NONE)
+    osm_state_mgr_process( p_rcv->p_state_mgr,
+                           process_get_sm_ret_val );
   OSM_LOG_EXIT( p_rcv->p_log );
 }
 
Index: opensm/osm_node_info_rcv.c
===================================================================
--- opensm/osm_node_info_rcv.c	(revision 3590)
+++ opensm/osm_node_info_rcv.c	(working copy)
@@ -726,8 +726,6 @@ __osm_ni_rcv_process_new(
            cl_ntoh64( p_ni->node_guid ),
            cl_ntoh64( p_smp->trans_id ) );
 
-  osm_state_mgr_process( p_rcv->p_state_mgr, OSM_SIGNAL_CHANGE_DETECTED );
-
   p_node = osm_node_new( p_madw );
   if( p_node == NULL )
   {
@@ -985,6 +983,7 @@ osm_ni_rcv_process(
   ib_node_info_t *p_ni;
   ib_smp_t *p_smp;
   osm_node_t *p_node;
+  boolean_t  process_new_flag = FALSE;
 
   OSM_LOG_ENTER( p_rcv->p_log, osm_ni_rcv_process );
 
@@ -1026,11 +1025,23 @@ osm_ni_rcv_process(
   osm_dump_node_info( p_rcv->p_log, p_ni, OSM_LOG_DEBUG );
 
   if( p_node == (osm_node_t*)cl_qmap_end(p_guid_tbl) )
+  {
     __osm_ni_rcv_process_new( p_rcv, p_madw );
+    process_new_flag = TRUE;
+  }
   else
     __osm_ni_rcv_process_existing( p_rcv, p_node, p_madw );
 
   CL_PLOCK_RELEASE( p_rcv->p_lock );
+  
+  /* 
+   * If we processed a new node - need to signal to the state_mgr that
+   * change detected. BUT - we cannot call the osm_state_mgr_process
+   * from within the lock of p_rcv->p_lock (can cause a deadlock).
+   */
+  if ( process_new_flag )
+    osm_state_mgr_process( p_rcv->p_state_mgr, OSM_SIGNAL_CHANGE_DETECTED );
+
  Exit:
   OSM_LOG_EXIT( p_rcv->p_log );
 }




More information about the general mailing list