[openib-general] [PATCH] OpenSM - SA Client Not Detecting SM Change

Jan Daley jdaley at systemfabricworks.com
Fri Aug 6 14:09:05 PDT 2004


Hi,

The SM's lid was being saved off in the bind call and used for all
subsequent SA queries.  If a different SM became master, all queries
would fail until a rebind occurred.  The change is to not save off the
SM's lid on the bind and to query the port for the SM's lid on the send.

Also, fixed a memory leak in __osmv_get_lid_and_sm_lid_by_port_guid.


Index: opensm/osm_vendor_mlx_sa.c
===================================================================
--- opensm/osm_vendor_mlx_sa.c	(revision 590)
+++ opensm/osm_vendor_mlx_sa.c	(working copy)
@@ -87,8 +87,6 @@
   osm_mad_pool_t    *p_mad_pool;
   uint64_t           port_guid;
   cl_event_t         sync_event;
-  uint16_t           lid;
-  uint16_t           sm_lid;
 } osmv_sa_bind_info_t;
 
 
/***********************************************************************
******
@@ -317,6 +315,8 @@
     }
   }
 
+  cl_free(p_attr_array);
+  
  Exit:
   return ( status );
 }
@@ -332,7 +332,6 @@
 {
   osm_bind_info_t bind_info;
   osm_log_t *p_log = p_vend->p_log;
-  ib_api_status_t status = IB_SUCCESS;
   osmv_sa_bind_info_t *p_sa_bind_info;
   cl_status_t cl_status;
 
@@ -368,6 +367,7 @@
   p_sa_bind_info->p_log = p_log;
   p_sa_bind_info->p_mad_pool = p_mad_pool;
   p_sa_bind_info->p_vendor = p_vend;
+  p_sa_bind_info->port_guid = port_guid;
 
   /* Bind to the lower level */
   p_sa_bind_info->h_bind =
@@ -388,22 +388,6 @@
     goto Exit;
   }
 
-  /* obtain the sm_lid from the vendor */
-  status =
-    __osmv_get_lid_and_sm_lid_by_port_guid(
-      p_vend, port_guid,
-      &p_sa_bind_info->lid,
-      &p_sa_bind_info->sm_lid);
-  if (status != IB_SUCCESS)
-  {
-    cl_free(p_sa_bind_info);
-    p_sa_bind_info = OSM_BIND_INVALID_HANDLE;
-    osm_log( p_log, OSM_LOG_ERROR,
-             "osm_vendor_bind_sa: ERR 0507: "
-             "Fail to obtain the sm lid.\n" );
-    goto Exit;
-  }
-
   /* initialize the sync_event */
   cl_event_construct( &p_sa_bind_info->sync_event );
   cl_status = cl_event_init( &p_sa_bind_info->sync_event, TRUE );
@@ -480,9 +464,25 @@
   static atomic32_t trans_id;
   boolean_t         sync;
   osmv_query_req_t   *p_query_req_copy;
+  uint16_t          local_lid;
+  uint16_t          sm_lid;
 
   OSM_LOG_ENTER( p_log, __osmv_send_sa_req );
 
+  status = __osmv_get_lid_and_sm_lid_by_port_guid(
+    p_bind->p_vendor,
+    p_bind->port_guid,
+    &local_lid,
+    &sm_lid);
+
+  if (IB_SUCCESS != status)
+  {
+    osm_log( p_log, OSM_LOG_ERROR,
+             "__osmv_send_sa_req: ERR 1103: "
+             "Unable to get SM's LID.\n" );
+    goto Exit;
+  }
+
   /* Get a MAD wrapper for the send */
   p_madw = osm_mad_pool_get(
     p_bind->p_mad_pool,
@@ -535,9 +535,13 @@
   /*
     Provide the address to send to
   */
-  p_madw->mad_addr.dest_lid = cl_hton16(p_bind->sm_lid);
+
+  __osmv_get_lid_and_sm_lid_by_port_guid(p_bind->p_vendor,
p_bind->port_guid,
+                          &local_lid, &sm_lid);
+
+  p_madw->mad_addr.dest_lid = cl_hton16(sm_lid);
   p_madw->mad_addr.addr_type.smi.source_lid =
-    cl_hton16(p_bind->lid);
+    cl_hton16(local_lid);
   p_madw->mad_addr.addr_type.gsi.remote_qp = CL_HTON32(1);
   p_madw->resp_expected = TRUE;
   p_madw->fail_msg = CL_DISP_MSGID_NONE;






Jan Daley
System Fabric Works
(512) 343-6101 x 13







More information about the general mailing list