[ofa-general] [PATCH v4] Add a Node Description check on light sweep.
Ira Weiny
weiny2 at llnl.gov
Mon Aug 18 14:43:06 PDT 2008
Revised with Sasha's comments.
Ira
>From 098718557b18b6e47cceec15a65b1706722999aa Mon Sep 17 00:00:00 2001
From: Ira K. Weiny <weiny2 at llnl.gov>
Date: Wed, 30 Jul 2008 17:28:30 -0700
Subject: [PATCH] Add a Node Description check on light sweep.
A Node Description check on light sweep will ensure that the ND has been found
for each node. This case covers the condition where a ND message is
dropped/lost for some reason and OpenSM is left with a valid configured node
which is not named correctly.
This is not the same as a node which has changed it's Node Descriptioin. In
this case the node needs to send a trap.
Signed-off-by: Ira Weiny <weiny2 at llnl.gov>
---
opensm/include/opensm/osm_base.h | 11 ++++++++
opensm/opensm/osm_node.c | 2 +-
opensm/opensm/osm_state_mgr.c | 49 ++++++++++++++++++++++++++++++++++++++
3 files changed, 61 insertions(+), 1 deletions(-)
diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h
index 3793804..2e8def7 100644
--- a/opensm/include/opensm/osm_base.h
+++ b/opensm/include/opensm/osm_base.h
@@ -640,6 +640,17 @@ BEGIN_C_DECLS
*/
#define OSM_NO_PATH 0xFF
/**********/
+/****d* OpenSM: Base/OSM_NODE_DESC_UNKNOWN
+* NAME
+* OSM_NODE_DESC_UNKNOWN
+*
+* DESCRIPTION
+* Value indicating the Node Description is not set and is "unknown"
+*
+* SYNOPSIS
+*/
+#define OSM_NODE_DESC_UNKNOWN "<unknown>"
+/**********/
/****d* OpenSM: Base/osm_thread_state_t
* NAME
* osm_thread_state_t
diff --git a/opensm/opensm/osm_node.c b/opensm/opensm/osm_node.c
index d99c656..123feb8 100644
--- a/opensm/opensm/osm_node.c
+++ b/opensm/opensm/osm_node.c
@@ -136,7 +136,7 @@ osm_node_t *osm_node_new(IN const osm_madw_t * const p_madw)
osm_node_init_physp(p_node, p_madw);
if (p_ni->node_type == IB_NODE_TYPE_SWITCH)
node_init_physp0(p_node, p_madw);
- p_node->print_desc = strdup("<unknown>");
+ p_node->print_desc = strdup(OSM_NODE_DESC_UNKNOWN);
return (p_node);
}
diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index 3cdb2cf..86a728c 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -516,6 +516,51 @@ static void query_sm_info(cl_map_item_t *item, void *cxt)
}
/**********************************************************************
+ During a light sweep check each node to see if the node descriptor is valid
+ if not issue a ND query.
+**********************************************************************/
+static void __osm_state_mgr_get_node_desc(IN cl_map_item_t * const p_object,
+ IN void *context)
+{
+ osm_physp_t *p_physp = NULL;
+ osm_node_t *const p_node = (osm_node_t *) p_object;
+ ib_api_status_t status = IB_SUCCESS;
+ osm_madw_context_t mad_context;
+ osm_sm_t *sm = (osm_sm_t *)context;
+
+ OSM_LOG_ENTER(sm->p_log);
+
+ CL_ASSERT(p_node);
+
+ if (p_node->print_desc && strcmp(p_node->print_desc, OSM_NODE_DESC_UNKNOWN))
+ /* if ND is valid, do nothing */
+ goto exit;
+
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR,
+ "ERR 3319: Unknown node description for node GUID "
+ "0x%016" PRIx64 ". Reissuing ND query\n",
+ cl_ntoh64(osm_node_get_node_guid (p_node)));
+
+ /* get a physp to request from. */
+ p_physp = osm_node_get_any_physp_ptr(p_node);
+
+ mad_context.nd_context.node_guid = osm_node_get_node_guid(p_node);
+
+ status = osm_req_get(sm,
+ osm_physp_get_dr_path_ptr(p_physp),
+ IB_MAD_ATTR_NODE_DESC,
+ 0, CL_DISP_MSGID_NONE, &mad_context);
+ if (status != IB_SUCCESS)
+ OSM_LOG(sm->p_log, OSM_LOG_ERROR,
+ "ERR 331B: Failure initiating NodeDescription request "
+ "(%s)\n", ib_get_err_str(status));
+
+exit:
+ OSM_LOG_EXIT(sm->p_log);
+}
+
+
+/**********************************************************************
Initiates a lightweight sweep of the subnet.
Used during normal sweeps after the subnet is up.
**********************************************************************/
@@ -550,6 +595,10 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm)
cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm);
CL_PLOCK_RELEASE(sm->p_lock);
+ CL_PLOCK_ACQUIRE(sm->p_lock);
+ cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, __osm_state_mgr_get_node_desc, sm);
+ CL_PLOCK_RELEASE(sm->p_lock);
+
/* now scan the list of physical ports that were not down but have no remote port */
CL_PLOCK_ACQUIRE(sm->p_lock);
p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl);
--
1.5.4.5
More information about the general
mailing list