[ofa-general] [PATCH 2/4] opensm: duplicated GUID/moved port detection improvements

Sasha Khapyorsky sashak at voltaire.com
Sun Aug 12 14:23:44 PDT 2007


When during discovery phase of the OpenSM heavy sweep some already
discovered port is reconnected to different location so that OpenSM
will find it again this will be recognized as duplicated GUID
(different nodes with same guid) and OpenSM will report fatal error
and exit immediately.

This patch tries to do the better work to isolate duplicated GUID
case from regular port moving as described above. This will resend
NodeInfo query to the old location. This doesn't resolve all possible
reconnections, for instance:

A: sm -> .. -> s1:N -> h1    B: sm -> .. -> s1:N -> h1
               s1:M -> s2                   s1:M -> s1

If such change was done after switch s2 was discovered node h1 will
be detectable at both paths original (via s1) and new one (via s2,
which is s1 now).

In order to solve this in the case when targeted node is switch we
will query NodeInfo and match remote port and not targeted node itself.
Unfortunately it cannot be used with CA because it doesn't forward MADs,
so when check fails we will request for new discovery.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
 opensm/include/opensm/osm_madw.h  |    3 ++
 opensm/opensm/osm_node_info_rcv.c |   70 +++++++++++++++++++++++++++++++-----
 opensm/opensm/osm_port_info_rcv.c |    1 +
 opensm/opensm/osm_state_mgr.c     |    2 +
 4 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/opensm/include/opensm/osm_madw.h b/opensm/include/opensm/osm_madw.h
index 948a2ae..6d1ab04 100644
--- a/opensm/include/opensm/osm_madw.h
+++ b/opensm/include/opensm/osm_madw.h
@@ -146,6 +146,9 @@ typedef struct _osm_ni_context
 {
   ib_net64_t			node_guid;
   uint8_t			port_num;
+  ib_net64_t			dup_node_guid;
+  uint8_t			dup_port_num;
+  unsigned			dup_count;
 } osm_ni_context_t;
 /*
 * FIELDS
diff --git a/opensm/opensm/osm_node_info_rcv.c b/opensm/opensm/osm_node_info_rcv.c
index 8a5ecb6..715c0f4 100644
--- a/opensm/opensm/osm_node_info_rcv.c
+++ b/opensm/opensm/osm_node_info_rcv.c
@@ -137,6 +137,37 @@ report_duplicated_guid(
   }
 }
 
+static void requery_dup_node_info(
+  IN const osm_ni_rcv_t* const p_rcv,
+  osm_physp_t *p_physp, unsigned count)
+{
+    osm_madw_context_t context;
+    osm_dr_path_t path;
+    cl_status_t status;
+
+    path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
+    osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num);
+
+    context.ni_context.node_guid = p_physp->p_remote_physp->p_node->node_info.port_guid;
+    context.ni_context.port_num = p_physp->p_remote_physp->port_num;
+    context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
+    context.ni_context.dup_port_num = p_physp->port_num;
+    context.ni_context.dup_count = count;
+
+    status = osm_req_get(p_rcv->p_gen_req,
+                         &path,
+                         IB_MAD_ATTR_NODE_INFO,
+                         0,
+                         CL_DISP_MSGID_NONE,
+                         &context);
+
+    if(status != IB_SUCCESS)
+      osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+               "requery_dup_node_info: ERR 0D02: "
+               "Failure initiating NodeInfo request (%s)\n",
+               ib_get_err_str(status) );
+}
+
 /**********************************************************************
  The plock must be held before calling this function.
 **********************************************************************/
@@ -199,20 +230,39 @@ __osm_ni_rcv_set_links(
     goto _exit;
   }
 
-  if( osm_node_has_any_link( p_node, port_num ) &&
-      p_rcv->p_subn->force_immediate_heavy_sweep == FALSE )
+  if (osm_node_has_any_link(p_node, port_num) &&
+      p_rcv->p_subn->force_immediate_heavy_sweep == FALSE &&
+      (!p_ni_context->dup_count ||
+       (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
+        p_ni_context->dup_port_num == port_num)))
   {
     /*
       Uh oh...
-      This means that we found 2 nodes with the same guid,
-      or a 12x link with lane reversal that is not configured correctly.
-      If the force_immediate_heavy_sweep == TRUE, then this might be a case
-      of port being moved (causing trap 128), and thus rediscovered.
-      In this case, just continue. There will be another heavy sweep
-      immediately after, when the subnet is stable again.
+      This could be reconnected ports, but also duplicated GUID
+      (2 nodes have the same guid) or a 12x link with lane reversal
+      that is not configured correctly.
+      We will try to recover by querying NodeInfo again.
+      In order to catch even fast port moving to new location(s) and
+      back we will count up to 5.
+      Some crazy reconnections (newly created switch loop right before
+      targeted CA) will not be catched this way. So in worst case -
+      report GUID duplication and request new discovery.
+      When switch node is targeted NodeInfo querying will be done in
+      opposite order, this is much stronger check, unfortunately it is
+      impossible with CAs.
     */
-
-    report_duplicated_guid(p_rcv, p_node, port_num, p_ni_context);
+    p_physp = osm_node_get_physp_ptr(p_node, port_num);
+    if (p_ni_context->dup_count > 5)
+    {
+      report_duplicated_guid(p_rcv, p_node, port_num, p_ni_context);
+      p_rcv->p_subn->force_immediate_heavy_sweep = TRUE;
+    }
+    else if (p_node->sw)
+      requery_dup_node_info(p_rcv, p_physp->p_remote_physp,
+                            p_ni_context->dup_count + 1);
+    else
+      requery_dup_node_info(p_rcv, p_physp,
+                            p_ni_context->dup_count + 1);
   }
 
   /*
diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c
index 3965b88..93b49b2 100644
--- a/opensm/opensm/osm_port_info_rcv.c
+++ b/opensm/opensm/osm_port_info_rcv.c
@@ -297,6 +297,7 @@ __osm_pi_rcv_process_switch_port(
 
         osm_dr_path_extend( &path, osm_physp_get_port_num( p_physp ) );
 
+        memset(&context, 0, sizeof(context));
         context.ni_context.node_guid = osm_node_get_node_guid( p_node );
         context.ni_context.port_num = osm_physp_get_port_num( p_physp );
 
diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index 6a2045e..f98a377 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -931,6 +931,7 @@ __osm_state_mgr_sweep_hop_1(
    {
    case IB_NODE_TYPE_CA:
    case IB_NODE_TYPE_ROUTER:
+      memset(&context, 0, sizeof(context));
       context.ni_context.node_guid = osm_node_get_node_guid( p_node );
       context.ni_context.port_num = port_num;
 
@@ -966,6 +967,7 @@ __osm_state_mgr_sweep_hop_1(
          if( ib_port_info_get_port_state( &( p_ext_physp->port_info ) ) >
              IB_LINK_DOWN )
          {
+            memset(&context, 0, sizeof(context));
             context.ni_context.node_guid = osm_node_get_node_guid( p_node );
             context.ni_context.port_num = port_num;
 
-- 
1.5.3.rc2.38.g11308




More information about the general mailing list