[ofa-general] [PATCH 2/2] opensm: OpenSM state machine rework
Sasha Khapyorsky
sashak at voltaire.com
Wed Jan 30 10:56:06 PST 2008
Instead of tricky state machine it implements plain flow do_sweep()
function which uses wait_for_pending_transaction() blocker.
One of the goals of this patch is to preserve the original OpenSM
behavior.
Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
opensm/include/opensm/osm_base.h | 51 +--
opensm/include/opensm/osm_sm.h | 1 +
opensm/opensm/osm_console.c | 64 --
opensm/opensm/osm_helper.c | 51 +--
opensm/opensm/osm_node_info_rcv.c | 11 +-
opensm/opensm/osm_perfmgr.c | 12 +-
opensm/opensm/osm_port_info_rcv.c | 3 +-
opensm/opensm/osm_sm.c | 3 +
opensm/opensm/osm_sm_mad_ctrl.c | 6 +-
opensm/opensm/osm_sminfo_rcv.c | 17 +-
opensm/opensm/osm_state_mgr.c | 1176 ++++++++---------------------------
opensm/opensm/osm_sw_info_rcv.c | 3 +-
opensm/opensm/osm_sweep_fail_ctrl.c | 2 +-
13 files changed, 286 insertions(+), 1114 deletions(-)
diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h
index aaf9930..6f784ca 100644
--- a/opensm/include/opensm/osm_base.h
+++ b/opensm/include/opensm/osm_base.h
@@ -753,39 +753,7 @@ typedef enum _osm_sm_state {
OSM_SM_STATE_NO_STATE = 0,
OSM_SM_STATE_INIT,
OSM_SM_STATE_IDLE,
- OSM_SM_STATE_SWEEP_LIGHT,
- OSM_SM_STATE_SWEEP_LIGHT_WAIT,
- OSM_SM_STATE_SWEEP_HEAVY_SELF,
- OSM_SM_STATE_SWEEP_HEAVY_SUBNET,
- OSM_SM_STATE_SET_SM_UCAST_LID,
- OSM_SM_STATE_SET_SM_UCAST_LID_WAIT,
- OSM_SM_STATE_SET_SM_UCAST_LID_DONE,
- OSM_SM_STATE_SET_SUBNET_UCAST_LIDS,
- OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_WAIT,
- OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_DONE,
- OSM_SM_STATE_SET_UCAST_TABLES,
- OSM_SM_STATE_SET_UCAST_TABLES_WAIT,
- OSM_SM_STATE_SET_UCAST_TABLES_DONE,
- OSM_SM_STATE_SET_MCAST_TABLES,
- OSM_SM_STATE_SET_MCAST_TABLES_WAIT,
- OSM_SM_STATE_SET_MCAST_TABLES_DONE,
- OSM_SM_STATE_SET_LINK_PORTS,
- OSM_SM_STATE_SET_LINK_PORTS_WAIT,
- OSM_SM_STATE_SET_LINK_PORTS_DONE,
- OSM_SM_STATE_SET_ARMED,
- OSM_SM_STATE_SET_ARMED_WAIT,
- OSM_SM_STATE_SET_ARMED_DONE,
- OSM_SM_STATE_SET_ACTIVE,
- OSM_SM_STATE_SET_ACTIVE_WAIT,
OSM_SM_STATE_STANDBY,
- OSM_SM_STATE_SUBNET_UP,
- OSM_SM_STATE_PROCESS_REQUEST,
- OSM_SM_STATE_PROCESS_REQUEST_WAIT,
- OSM_SM_STATE_PROCESS_REQUEST_DONE,
- OSM_SM_STATE_MASTER_OR_HIGHER_SM_DETECTED,
- OSM_SM_STATE_SET_PKEY,
- OSM_SM_STATE_SET_PKEY_WAIT,
- OSM_SM_STATE_SET_PKEY_DONE,
OSM_SM_STATE_MAX
} osm_sm_state_t;
/***********/
@@ -804,17 +772,14 @@ typedef enum _osm_sm_state {
*/
#define OSM_SIGNAL_NONE 0
#define OSM_SIGNAL_SWEEP 1
-#define OSM_SIGNAL_CHANGE_DETECTED 2
-#define OSM_SIGNAL_NO_PENDING_TRANSACTIONS 3
-#define OSM_SIGNAL_DONE 4
-#define OSM_SIGNAL_DONE_PENDING 5
-#define OSM_SIGNAL_LIGHT_SWEEP_FAIL 6
-#define OSM_SIGNAL_IDLE_TIME_PROCESS 7
-#define OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST 8
-#define OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED 9
-#define OSM_SIGNAL_EXIT_STBY 10
-#define OSM_SIGNAL_PERFMGR_SWEEP 11
-#define OSM_SIGNAL_MAX 12
+#define OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST 2
+#define OSM_SIGNAL_EXIT_STBY 3
+#define OSM_SIGNAL_PERFMGR_SWEEP 4
+#define OSM_SIGNAL_MAX 4
+
+/* status values for sweep managers - can be removed later */
+#define OSM_SIGNAL_DONE 16
+#define OSM_SIGNAL_DONE_PENDING 17
typedef uintn_t osm_signal_t;
/***********/
diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h
index e0b3d01..2cdbdd0 100644
--- a/opensm/include/opensm/osm_sm.h
+++ b/opensm/include/opensm/osm_sm.h
@@ -120,6 +120,7 @@ typedef struct osm_sm {
cl_timer_t sweep_timer;
cl_event_wheel_t trap_aging_tracker;
cl_thread_t sweeper;
+ unsigned master_sm_found;
osm_subn_t *p_subn;
osm_db_t *p_db;
osm_vendor_t *p_vendor;
diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c
index d0a632f..bb2a68d 100644
--- a/opensm/opensm/osm_console.c
+++ b/opensm/opensm/osm_console.c
@@ -300,72 +300,8 @@ static char *sm_state_mgr_str(osm_sm_state_t state)
return ("Init");
case OSM_SM_STATE_IDLE:
return ("Idle");
- case OSM_SM_STATE_SWEEP_LIGHT:
- return ("Sweep Light");
- case OSM_SM_STATE_SWEEP_LIGHT_WAIT:
- return ("Sweep Light Wait");
- case OSM_SM_STATE_SWEEP_HEAVY_SELF:
- return ("Sweep Heavy Self");
- case OSM_SM_STATE_SWEEP_HEAVY_SUBNET:
- return ("Sweep Heavy Subnet");
- case OSM_SM_STATE_SET_SM_UCAST_LID:
- return ("Set SM UCAST LID");
- case OSM_SM_STATE_SET_SM_UCAST_LID_WAIT:
- return ("Set SM UCAST LID Wait");
- case OSM_SM_STATE_SET_SM_UCAST_LID_DONE:
- return ("Set SM UCAST LID Done");
- case OSM_SM_STATE_SET_SUBNET_UCAST_LIDS:
- return ("Set Subnet UCAST LIDS");
- case OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_WAIT:
- return ("Set Subnet UCAST LIDS Wait");
- case OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_DONE:
- return ("Set Subnet UCAST LIDS Done");
- case OSM_SM_STATE_SET_UCAST_TABLES:
- return ("Set UCAST Tables");
- case OSM_SM_STATE_SET_UCAST_TABLES_WAIT:
- return ("Set UCAST Tables Wait");
- case OSM_SM_STATE_SET_UCAST_TABLES_DONE:
- return ("Set UCAST Tables Done");
- case OSM_SM_STATE_SET_MCAST_TABLES:
- return ("Set MCAST Tables");
- case OSM_SM_STATE_SET_MCAST_TABLES_WAIT:
- return ("Set MCAST Tables Wait");
- case OSM_SM_STATE_SET_MCAST_TABLES_DONE:
- return ("Set MCAST Tables Done");
- case OSM_SM_STATE_SET_LINK_PORTS:
- return ("Set Link Ports");
- case OSM_SM_STATE_SET_LINK_PORTS_WAIT:
- return ("Set Link Ports Wait");
- case OSM_SM_STATE_SET_LINK_PORTS_DONE:
- return ("Set Link Ports Done");
- case OSM_SM_STATE_SET_ARMED:
- return ("Set Armed");
- case OSM_SM_STATE_SET_ARMED_WAIT:
- return ("Set Armed Wait");
- case OSM_SM_STATE_SET_ARMED_DONE:
- return ("Set Armed Done");
- case OSM_SM_STATE_SET_ACTIVE:
- return ("Set Active");
- case OSM_SM_STATE_SET_ACTIVE_WAIT:
- return ("Set Active Wait");
case OSM_SM_STATE_STANDBY:
return ("Standby");
- case OSM_SM_STATE_SUBNET_UP:
- return ("Subnet Up");
- case OSM_SM_STATE_PROCESS_REQUEST:
- return ("Process Request");
- case OSM_SM_STATE_PROCESS_REQUEST_WAIT:
- return ("Process Request Wait");
- case OSM_SM_STATE_PROCESS_REQUEST_DONE:
- return ("Process Request Done");
- case OSM_SM_STATE_MASTER_OR_HIGHER_SM_DETECTED:
- return ("Master or Higher SM Detected");
- case OSM_SM_STATE_SET_PKEY:
- return ("Set PKey");
- case OSM_SM_STATE_SET_PKEY_WAIT:
- return ("Set PKey Wait");
- case OSM_SM_STATE_SET_PKEY_DONE:
- return ("Set PKey Done");
default:
return ("Unknown State");
}
diff --git a/opensm/opensm/osm_helper.c b/opensm/opensm/osm_helper.c
index 1ea86b9..bd345bc 100644
--- a/opensm/opensm/osm_helper.c
+++ b/opensm/opensm/osm_helper.c
@@ -2062,56 +2062,17 @@ const char *const __osm_sm_state_str[] = {
"OSM_SM_STATE_NO_STATE", /* 0 */
"OSM_SM_STATE_INIT", /* 1 */
"OSM_SM_STATE_IDLE", /* 2 */
- "OSM_SM_STATE_SWEEP_LIGHT", /* 3 */
- "OSM_SM_STATE_SWEEP_LIGHT_WAIT", /* 4 */
- "OSM_SM_STATE_SWEEP_HEAVY_SELF", /* 5 */
- "OSM_SM_STATE_SWEEP_HEAVY_SUBNET", /* 6 */
- "OSM_SM_STATE_SET_SM_UCAST_LID", /* 7 */
- "OSM_SM_STATE_SET_SM_UCAST_LID_WAIT", /* 8 */
- "OSM_SM_STATE_SET_SM_UCAST_LID_DONE", /* 9 */
- "OSM_SM_STATE_SET_SUBNET_UCAST_LIDS", /* 10 */
- "OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_WAIT", /* 11 */
- "OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_DONE", /* 12 */
- "OSM_SM_STATE_SET_UCAST_TABLES", /* 13 */
- "OSM_SM_STATE_SET_UCAST_TABLES_WAIT", /* 14 */
- "OSM_SM_STATE_SET_UCAST_TABLES_DONE", /* 15 */
- "OSM_SM_STATE_SET_MCAST_TABLES", /* 16 */
- "OSM_SM_STATE_SET_MCAST_TABLES_WAIT", /* 17 */
- "OSM_SM_STATE_SET_MCAST_TABLES_DONE", /* 18 */
- "OSM_SM_STATE_SET_LINK_PORTS", /* 19 */
- "OSM_SM_STATE_SET_LINK_PORTS_WAIT", /* 20 */
- "OSM_SM_STATE_SET_LINK_PORTS_DONE", /* 21 */
- "OSM_SM_STATE_SET_ARMED", /* 22 */
- "OSM_SM_STATE_SET_ARMED_WAIT", /* 23 */
- "OSM_SM_STATE_SET_ARMED_DONE", /* 24 */
- "OSM_SM_STATE_SET_ACTIVE", /* 25 */
- "OSM_SM_STATE_SET_ACTIVE_WAIT", /* 26 */
- "OSM_SM_STATE_STANDBY", /* 27 */
- "OSM_SM_STATE_SUBNET_UP", /* 28 */
- "OSM_SM_STATE_PROCESS_REQUEST", /* 29 */
- "OSM_SM_STATE_PROCESS_REQUEST_WAIT", /* 30 */
- "OSM_SM_STATE_PROCESS_REQUEST_DONE", /* 31 */
- "OSM_SM_STATE_MASTER_OR_HIGHER_SM_DETECTED", /* 32 */
- "OSM_SM_STATE_SET_PKEY", /* 33 */
- "OSM_SM_STATE_SET_PKEY_WAIT", /* 34 */
- "OSM_SM_STATE_SET_PKEY_DONE", /* 35 */
- "UNKNOWN STATE!!" /* 36 */
+ "OSM_SM_STATE_STANDBY", /* 3 */
+ "UNKNOWN STATE!!" /* 4 */
};
const char *const __osm_sm_signal_str[] = {
"OSM_SIGNAL_NONE", /* 0 */
"OSM_SIGNAL_SWEEP", /* 1 */
- "OSM_SIGNAL_CHANGE_DETECTED", /* 2 */
- "OSM_SIGNAL_NO_PENDING_TRANSACTIONS", /* 3 */
- "OSM_SIGNAL_DONE", /* 4 */
- "OSM_SIGNAL_DONE_PENDING", /* 5 */
- "OSM_SIGNAL_LIGHT_SWEEP_FAIL", /* 6 */
- "OSM_SIGNAL_IDLE_TIME_PROCESS", /* 7 */
- "OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST", /* 8 */
- "OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED", /* 9 */
- "OSM_SIGNAL_EXIT_STBY", /* 10 */
- "OSM_SIGNAL_PERFMGR_SWEEP", /* 11 */
- "UNKNOWN SIGNAL!!" /* 12 */
+ "OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST", /* 2 */
+ "OSM_SIGNAL_EXIT_STBY", /* 3 */
+ "OSM_SIGNAL_PERFMGR_SWEEP", /* 4 */
+ "UNKNOWN SIGNAL!!" /* 5 */
};
/**********************************************************************
diff --git a/opensm/opensm/osm_node_info_rcv.c b/opensm/opensm/osm_node_info_rcv.c
index 3ac8d1f..2106aa2 100644
--- a/opensm/opensm/osm_node_info_rcv.c
+++ b/opensm/opensm/osm_node_info_rcv.c
@@ -814,7 +814,6 @@ void osm_ni_rcv_process(IN void *context, IN void *data)
ib_node_info_t *p_ni;
ib_smp_t *p_smp;
osm_node_t *p_node;
- boolean_t process_new_flag = FALSE;
CL_ASSERT(sm);
@@ -856,20 +855,12 @@ void osm_ni_rcv_process(IN void *context, IN void *data)
if (!p_node) {
__osm_ni_rcv_process_new(sm, p_madw);
- process_new_flag = TRUE;
+ sm->p_subn->force_heavy_sweep = 1;
} else
__osm_ni_rcv_process_existing(sm, p_node, p_madw);
CL_PLOCK_RELEASE(sm->p_lock);
- /*
- * If we processed a new node - need to signal to the SM that
- * change detected.
- */
- if (process_new_flag)
- osm_sm_signal(&sm->p_subn->p_osm->sm,
- OSM_SIGNAL_CHANGE_DETECTED);
-
Exit:
OSM_LOG_EXIT(sm->p_log);
}
diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c
index dd6e662..9480ad7 100644
--- a/opensm/opensm/osm_perfmgr.c
+++ b/opensm/opensm/osm_perfmgr.c
@@ -740,7 +740,6 @@ static void reset_switch_count(cl_map_item_t * const p_map_item, void *cxt)
static int perfmgr_discovery(osm_opensm_t * osm)
{
- unsigned signals = osm->sm.signal_mask;
int ret;
CL_PLOCK_ACQUIRE(&osm->lock);
@@ -772,17 +771,10 @@ static int perfmgr_discovery(osm_opensm_t * osm)
if (wait_for_pending_transactions(&osm->stats))
goto _exit;
- _drop:
+_drop:
osm_drop_mgr_process(&osm->sm.drop_mgr);
- _exit:
- /* dirty hack: cleanup signal mask -
- * this will not be needed later with both discoveries merged */
- cl_spinlock_acquire(&osm->sm.signal_lock);
- osm->sm.signal_mask &= ~(OSM_SIGNAL_NO_PENDING_TRANSACTIONS |
- OSM_SIGNAL_CHANGE_DETECTED);
- osm->sm.signal_mask |= signals;
- cl_spinlock_release(&osm->sm.signal_lock);
+_exit:
return ret;
}
diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c
index e56ba51..356cd56 100644
--- a/opensm/opensm/osm_port_info_rcv.c
+++ b/opensm/opensm/osm_port_info_rcv.c
@@ -578,8 +578,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
"GUID 0x%" PRIx64 " port 0x%016" PRIx64
", Commencing heavy sweep\n",
cl_ntoh64(node_guid), cl_ntoh64(port_guid));
- osm_sm_signal(&sm->p_subn->p_osm->sm,
- OSM_SIGNAL_CHANGE_DETECTED);
+ sm->p_subn->force_heavy_sweep = 1;
goto Exit;
}
diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c
index f2d259d..019fa51 100644
--- a/opensm/opensm/osm_sm.c
+++ b/opensm/opensm/osm_sm.c
@@ -121,6 +121,9 @@ static void __osm_sm_sweeper(IN void *p_ptr)
continue;
}
+ if (osm_exit_flag)
+ break;
+
cl_spinlock_acquire(&p_sm->signal_lock);
signals = p_sm->signal_mask;
p_sm->signal_mask = 0;
diff --git a/opensm/opensm/osm_sm_mad_ctrl.c b/opensm/opensm/osm_sm_mad_ctrl.c
index c6624a1..efbe97a 100644
--- a/opensm/opensm/osm_sm_mad_ctrl.c
+++ b/opensm/opensm/osm_sm_mad_ctrl.c
@@ -103,11 +103,7 @@ __osm_sm_mad_ctrl_retire_trans_mad(IN osm_sm_mad_ctrl_t * const p_ctrl,
Signal the subnet manager.
*/
osm_log(p_ctrl->p_log, OSM_LOG_DEBUG,
- "__osm_sm_mad_ctrl_retire_trans_mad: "
- "signal OSM_SIGNAL_NO_PENDING_TRANSACTIONS\n");
-
- osm_sm_signal(&p_ctrl->p_subn->p_osm->sm,
- OSM_SIGNAL_NO_PENDING_TRANSACTIONS);
+ "__osm_sm_mad_ctrl_retire_trans_mad: wire is clean.\n");
#ifdef HAVE_LIBPTHREAD
pthread_cond_signal(&p_ctrl->p_stats->cond);
#else
diff --git a/opensm/opensm/osm_sminfo_rcv.c b/opensm/opensm/osm_sminfo_rcv.c
index 63cc393..4b4c2b1 100644
--- a/opensm/opensm/osm_sminfo_rcv.c
+++ b/opensm/opensm/osm_sminfo_rcv.c
@@ -346,7 +346,6 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm,
IN const osm_remote_sm_t * const p_sm)
{
const ib_sm_info_t *p_smi;
- osm_signal_t ret_val = OSM_SIGNAL_NONE;
OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_get_sm);
@@ -370,7 +369,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm,
case IB_SMINFO_STATE_NOTACTIVE:
break;
case IB_SMINFO_STATE_MASTER:
- ret_val = OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED;
+ sm->master_sm_found = 1;
/* save on the p_sm_state_mgr the guid of the current master. */
osm_log(sm->p_log, OSM_LOG_VERBOSE,
"__osm_sminfo_rcv_process_get_sm: "
@@ -383,8 +382,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm,
if (__osm_sminfo_rcv_remote_sm_is_higher(sm, p_smi)
== TRUE) {
/* the remote is a higher sm - need to stop sweeping */
- ret_val =
- OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED;
+ sm->master_sm_found = 1;
/* save on the sm_state_mgr the guid of the higher SM we found - */
/* we will poll it - as long as it lives - we should be in Standby. */
osm_log(sm->p_log, OSM_LOG_VERBOSE,
@@ -456,7 +454,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm,
}
OSM_LOG_EXIT(sm->p_log);
- return ret_val;
+ return 0;
}
/**********************************************************************
@@ -471,7 +469,6 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm,
osm_port_t *p_port;
ib_net64_t port_guid;
osm_remote_sm_t *p_sm;
- osm_signal_t process_get_sm_ret_val = OSM_SIGNAL_NONE;
OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_get_response);
@@ -558,17 +555,11 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm,
*/
p_sm->smi = *p_smi;
- process_get_sm_ret_val = __osm_sminfo_rcv_process_get_sm(sm, p_sm);
+ __osm_sminfo_rcv_process_get_sm(sm, p_sm);
_unlock_and_exit:
CL_PLOCK_RELEASE(sm->p_lock);
- /* If process_get_sm_ret_val != OSM_SIGNAL_NONE then we have to signal
- * to the SM with that signal. */
- if (process_get_sm_ret_val != OSM_SIGNAL_NONE)
- osm_sm_signal(&sm->p_subn->p_osm->sm,
- process_get_sm_ret_val);
-
Exit:
OSM_LOG_EXIT(sm->p_log);
}
diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index 3746389..4dcb584 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -1332,998 +1332,336 @@ int wait_for_pending_transactions(osm_stats_t * stats)
return osm_exit_flag;
}
-void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr,
- IN osm_signal_t signal)
+static void do_sweep(osm_sm_t * sm)
{
ib_api_status_t status;
osm_remote_sm_t *p_remote_sm;
- osm_signal_t tmp_signal;
- CL_ASSERT(p_mgr);
-
- OSM_LOG_ENTER(p_mgr->p_log, osm_state_mgr_process);
+ sm->master_sm_found = 0;
- /* if we are exiting do nothing */
- if (osm_exit_flag)
- signal = OSM_SIGNAL_NONE;
-
- while (signal != OSM_SIGNAL_NONE) {
- if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) {
- osm_log(p_mgr->p_log, OSM_LOG_DEBUG,
- "osm_state_mgr_process: "
- "Received signal %s in state %s\n",
- osm_get_sm_signal_str(signal),
- osm_get_sm_state_str(p_mgr->state));
+ /*
+ * If we already have switches, then try a light sweep.
+ * Otherwise, this is probably our first discovery pass
+ * or we are connected in loopback. In both cases do a
+ * heavy sweep.
+ * Note: If we are connected in loopback we want a heavy
+ * sweep, since we will not be getting any traps if there is
+ * a lost connection.
+ */
+ /* if we are in DISCOVERING state - this means it is either in
+ * initializing or wake up from STANDBY - run the heavy sweep */
+ if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
+ && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
+ && sm->p_subn->opt.force_heavy_sweep == FALSE
+ && sm->p_subn->force_heavy_sweep == FALSE
+ && sm->p_subn->subnet_initialization_error == FALSE
+ && (__osm_state_mgr_light_sweep_start(&sm->state_mgr) == IB_SUCCESS)) {
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
+ if (!sm->p_subn->force_heavy_sweep) {
+ __osm_state_mgr_light_sweep_done_msg(&sm->state_mgr);
+ return;
}
+ }
- /*
- * If we're already sweeping and we get the signal to sweep,
- * just ignore it harmlessly.
- */
- if ((p_mgr->state != OSM_SM_STATE_IDLE)
- && (p_mgr->state != OSM_SM_STATE_STANDBY)
- && (signal == OSM_SIGNAL_SWEEP)) {
- break;
- }
+ /* go to heavy sweep */
+_repeat_discovery:
- switch (p_mgr->state) {
- case OSM_SM_STATE_IDLE:
- switch (signal) {
- case OSM_SIGNAL_SWEEP:
- /*
- * If the osm_sm_state_mgr is in NOT-ACTIVE state -
- * stay in IDLE
- */
- if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) {
- osm_vendor_set_sm(p_mgr->p_mad_ctrl->h_bind, FALSE);
- goto Idle;
- }
+ /* First of all - unset all flags */
+ sm->p_subn->force_heavy_sweep = FALSE;
+ sm->p_subn->subnet_initialization_error = FALSE;
- /*
- * If the osm_sm_state_mgr is in INIT state - signal
- * it with a INIT signal to move it to DISCOVERY state.
- */
- if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_INIT)
- osm_sm_state_mgr_process(p_mgr->
- p_sm_state_mgr,
- OSM_SM_SIGNAL_INIT);
-
- /*
- * If we already have switches, then try a light sweep.
- * Otherwise, this is probably our first discovery pass
- * or we are connected in loopback. In both cases do a
- * heavy sweep.
- * Note: If we are connected in loopback we want a heavy
- * sweep, since we will not be getting any traps if there is
- * a lost connection.
- */
- /* if we are in DISCOVERING state - this means it is either in
- * initializing or wake up from STANDBY - run the heavy sweep */
- if (cl_qmap_count(&p_mgr->p_subn->sw_guid_tbl)
- && p_mgr->p_subn->sm_state !=
- IB_SMINFO_STATE_DISCOVERING
- && p_mgr->p_subn->opt.force_heavy_sweep ==
- FALSE
- && p_mgr->p_subn->force_heavy_sweep == FALSE
- && p_mgr->p_subn->subnet_initialization_error == FALSE) {
- if (__osm_state_mgr_light_sweep_start(p_mgr) == IB_SUCCESS) {
- p_mgr->state = OSM_SM_STATE_SWEEP_LIGHT;
- }
- } else {
- /* First of all - if force_heavy_sweep is TRUE then
- * need to unset it */
- p_mgr->p_subn->force_heavy_sweep = FALSE;
- /* If subnet_initialization_error is TRUE then
- * need to unset it. */
- p_mgr->p_subn->subnet_initialization_error = FALSE;
-
- /* rescan configuration updates */
- status = osm_subn_rescan_conf_files(p_mgr->p_subn);
- if (status != IB_SUCCESS) {
- osm_log(p_mgr->p_log,
- OSM_LOG_ERROR,
- "osm_state_mgr_process: ERR 331A: "
- "osm_subn_rescan_conf_file failed\n");
- }
-
- if (p_mgr->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
- p_mgr->p_subn->need_update = 1;
-
- status = __osm_state_mgr_sweep_hop_0(p_mgr);
- if (status == IB_SUCCESS) {
- p_mgr->state = OSM_SM_STATE_SWEEP_HEAVY_SELF;
- }
- }
- Idle:
- signal = OSM_SIGNAL_NONE;
- break;
+ /* rescan configuration updates */
+ status = osm_subn_rescan_conf_files(sm->p_subn);
+ if (status != IB_SUCCESS)
+ osm_log(sm->p_log, OSM_LOG_ERROR,
+ "osm_state_mgr_process: ERR 331A: "
+ "osm_subn_rescan_conf_file failed\n");
- case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST:
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST;
- signal = OSM_SIGNAL_IDLE_TIME_PROCESS;
- break;
+ if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
+ sm->p_subn->need_update = 1;
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ status = __osm_state_mgr_sweep_hop_0(&sm->state_mgr);
+ if (status != IB_SUCCESS ||
+ wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- case OSM_SM_STATE_PROCESS_REQUEST:
- switch (signal) {
- case OSM_SIGNAL_IDLE_TIME_PROCESS:
- signal = osm_mcast_mgr_process_mgroups(p_mgr->p_mcast_mgr);
- switch (signal) {
- case OSM_SIGNAL_NONE:
- p_mgr->state = OSM_SM_STATE_IDLE;
- break;
-
- case OSM_SIGNAL_DONE_PENDING:
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
-
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST_DONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ if (__osm_state_mgr_is_sm_port_down(&sm->state_mgr) == TRUE) {
+ __osm_state_mgr_sm_port_down_msg(&sm->state_mgr);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_PROCESS_REQUEST_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST_DONE;
- break;
+ /* Run the drop manager - we want to clear all records */
+ osm_drop_mgr_process(&sm->drop_mgr);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_PROCESS_REQUEST_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
- if (p_mgr->p_subn->force_heavy_sweep) {
- /*
- * Do not read next item from the idle queue.
- * Immediate heavy sweep is requested, so it's
- * more important.
- * Besides, there is a chance that after the
- * heavy sweep complition, idle queue processing
- * that SM would have performed here will be obsolete.
- */
- if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG))
- osm_log(p_mgr->p_log, OSM_LOG_DEBUG,
- "osm_state_mgr_process: "
- "interrupting idle time queue processing - heavy sweep requested\n");
- signal = OSM_SIGNAL_NONE;
- p_mgr->state = OSM_SM_STATE_IDLE;
- break;
- }
- signal = OSM_SIGNAL_IDLE_TIME_PROCESS;
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SWEEP_LIGHT:
- switch (signal) {
- case OSM_SIGNAL_LIGHT_SWEEP_FAIL:
- case OSM_SIGNAL_CHANGE_DETECTED:
- /*
- * Nothing else to do yet except change state.
- */
- p_mgr->state = OSM_SM_STATE_SWEEP_LIGHT_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
+ /* Move to DISCOVERING state */
+ osm_sm_state_mgr_process(&sm->sm_state_mgr,
+ OSM_SM_SIGNAL_DISCOVER);
+ return;
+ }
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /*
- * No change was detected on the subnet.
- * We can return to the idle state.
- */
- __osm_state_mgr_light_sweep_done_msg(p_mgr);
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST;
- signal = OSM_SIGNAL_IDLE_TIME_PROCESS;
- break;
+ status = __osm_state_mgr_sweep_hop_1(&sm->state_mgr);
+ if (status != IB_SUCCESS ||
+ wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ /* discovery completed - check other sm presense */
+ if (sm->master_sm_found) {
+ sm->state_mgr.state = OSM_SM_STATE_STANDBY;
+ /*
+ * Call the sm_state_mgr with signal
+ * MASTER_OR_HIGHER_SM_DETECTED_DONE
+ */
+ osm_sm_state_mgr_process(&sm->sm_state_mgr,
+ OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED_DONE);
+ __osm_state_mgr_standby_msg(&sm->state_mgr);
+ return;
+ }
- case OSM_SM_STATE_SWEEP_LIGHT_WAIT:
- switch (signal) {
- case OSM_SIGNAL_LIGHT_SWEEP_FAIL:
- case OSM_SIGNAL_CHANGE_DETECTED:
- /*
- * Nothing to do here. One subnet change typcially
- * begets another.... But need to wait for all transactions to
- * complete
- */
- break;
+ /* if new sweep requested - don't bother with the rest */
+ if (sm->p_subn->force_heavy_sweep)
+ goto _repeat_discovery;
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /*
- * A change was detected on the subnet.
- * Initiate a heavy sweep.
- */
- if (__osm_state_mgr_sweep_hop_0(p_mgr) == IB_SUCCESS) {
- p_mgr->state = OSM_SM_STATE_SWEEP_HEAVY_SELF;
- }
- break;
+ __osm_state_mgr_sweep_heavy_done_msg(&sm->state_mgr);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- break;
+ /* If we are MASTER - get the highest remote_sm, and
+ * see if it is higher than our local sm.
+ */
+ if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
+ p_remote_sm = __osm_state_mgr_get_highest_sm(&sm->state_mgr);
+ if (p_remote_sm != NULL) {
+ /* report new ports (trap 64) before leaving MASTER */
+ __osm_state_mgr_report_new_ports(&sm->state_mgr);
+
+ /* need to handover the mastership
+ * to the remote sm, and move to standby */
+ __osm_state_mgr_send_handover(&sm->state_mgr, p_remote_sm);
+ osm_sm_state_mgr_process(&sm->sm_state_mgr,
+ OSM_SM_SIGNAL_HANDOVER_SENT);
+ sm->state_mgr.state = OSM_SM_STATE_STANDBY;
+ return;
+ } else {
+ /* We are the highest sm - check to see if there is
+ * a remote SM that is in master state. */
+ p_remote_sm =
+ __osm_state_mgr_exists_other_master_sm(&sm->state_mgr);
+ if (p_remote_sm != NULL) {
+ /* There is a remote SM that is master.
+ * need to wait for that SM to relinquish control
+ * of its portion of the subnet. C14-60.2.1.
+ * Also - need to start polling on that SM. */
+ sm->sm_state_mgr.p_polling_sm = p_remote_sm;
+ osm_sm_state_mgr_process(&sm->sm_state_mgr,
+ OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
+ return;
}
- signal = OSM_SIGNAL_NONE;
- break;
+ }
+ }
- case OSM_SM_STATE_SWEEP_HEAVY_SELF:
- switch (signal) {
- case OSM_SIGNAL_CHANGE_DETECTED:
- /*
- * Nothing to do here. One subnet change typcially
- * begets another.... But need to wait for all transactions
- */
- signal = OSM_SIGNAL_NONE;
- break;
+ /* Need to continue with lid assignment */
+ osm_drop_mgr_process(&sm->drop_mgr);
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- if (__osm_state_mgr_is_sm_port_down(p_mgr) == TRUE) {
- __osm_state_mgr_sm_port_down_msg(p_mgr);
-
- /* Run the drop manager - we want to clear all records */
- osm_drop_mgr_process(p_mgr->p_drop_mgr);
-
- /* Move to DISCOVERING state */
- osm_sm_state_mgr_process(p_mgr->
- p_sm_state_mgr,
- OSM_SM_SIGNAL_DISCOVER);
-
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST;
- signal = OSM_SIGNAL_IDLE_TIME_PROCESS;
- } else {
- if (__osm_state_mgr_sweep_hop_1(p_mgr)
- == IB_SUCCESS) {
- p_mgr->state = OSM_SM_STATE_SWEEP_HEAVY_SUBNET;
- }
- signal = OSM_SIGNAL_NONE;
- }
- break;
+ /*
+ * If we are not MASTER already - this means that we are
+ * in discovery state. call osm_sm_state_mgr with signal
+ * DISCOVERY_COMPLETED
+ */
+ if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
+ osm_sm_state_mgr_process(&sm->sm_state_mgr,
+ OSM_SM_SIGNAL_DISCOVERY_COMPLETED);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ osm_pkey_mgr_process(sm->p_subn->p_osm);
- /*
- * There is no 'OSM_SM_STATE_SWEEP_HEAVY_WAIT' state since we
- * know that there are outstanding transactions on the wire already...
- */
- case OSM_SM_STATE_SWEEP_HEAVY_SUBNET:
- switch (signal) {
- case OSM_SIGNAL_CHANGE_DETECTED:
- /*
- * Nothing to do here. One subnet change typically
- * begets another....
- */
- signal = OSM_SIGNAL_NONE;
- break;
+ osm_qos_setup(sm->p_subn->p_osm);
- case OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED:
- p_mgr->state = OSM_SM_STATE_MASTER_OR_HIGHER_SM_DETECTED;
- break;
+ /* try to restore SA DB (this should be before lid_mgr
+ because we may want to disable clients reregistration
+ when SA DB is restored) */
+ osm_sa_db_file_load(sm->p_subn->p_osm);
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /* if new sweep requiested - don't bother with the rest */
- if (p_mgr->p_subn->force_heavy_sweep) {
- p_mgr->state = OSM_SM_STATE_IDLE;
- signal = OSM_SIGNAL_SWEEP;
- break;
- }
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- __osm_state_mgr_sweep_heavy_done_msg(p_mgr);
-
- /* If we are MASTER - get the highest remote_sm, and
- * see if it is higher than our local sm. If
- */
- if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
- p_remote_sm = __osm_state_mgr_get_highest_sm(p_mgr);
- if (p_remote_sm != NULL) {
- /* report new ports (trap 64) before leaving MASTER */
- __osm_state_mgr_report_new_ports(p_mgr);
-
- /* need to handover the mastership
- * to the remote sm, and move to standby */
- __osm_state_mgr_send_handover(p_mgr, p_remote_sm);
- osm_sm_state_mgr_process(p_mgr->
- p_sm_state_mgr,
- OSM_SM_SIGNAL_HANDOVER_SENT);
- p_mgr->state = OSM_SM_STATE_STANDBY;
- signal = OSM_SIGNAL_NONE;
- break;
- } else {
- /* We are the highest sm - check to see if there is
- * a remote SM that is in master state. */
- p_remote_sm =
- __osm_state_mgr_exists_other_master_sm(p_mgr);
- if (p_remote_sm != NULL) {
- /* There is a remote SM that is master.
- * need to wait for that SM to relinquish control
- * of its portion of the subnet. C14-60.2.1.
- * Also - need to start polling on that SM. */
- p_mgr->p_sm_state_mgr->
- p_polling_sm = p_remote_sm;
- osm_sm_state_mgr_process
- (p_mgr->
- p_sm_state_mgr,
- OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST;
- signal = OSM_SIGNAL_IDLE_TIME_PROCESS;
- break;
- }
- }
- }
+ osm_lid_mgr_process_sm(&sm->lid_mgr);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- /* Need to continue with lid assignment */
- osm_drop_mgr_process(p_mgr->p_drop_mgr);
+ __osm_state_mgr_set_sm_lid_done_msg(&sm->state_mgr);
+ __osm_state_mgr_notify_lid_change(&sm->state_mgr);
- p_mgr->state = OSM_SM_STATE_SET_PKEY;
+ osm_lid_mgr_process_subnet(&sm->lid_mgr);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- /*
- * If we are not MASTER already - this means that we are
- * in discovery state. call osm_sm_state_mgr with signal
- * DISCOVERY_COMPLETED
- */
- if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
- osm_sm_state_mgr_process(p_mgr->
- p_sm_state_mgr,
- OSM_SM_SIGNAL_DISCOVERY_COMPLETED);
+ /* At this point we need to check the consistency of
+ * the port_lid_tbl under the subnet. There might be
+ * errors in it if PortInfo Set reqeusts didn't reach
+ * their destination. */
+ __osm_state_mgr_check_tbl_consistency(&sm->state_mgr);
- /* the returned signal might be DONE or DONE_PENDING */
- signal = osm_pkey_mgr_process(p_mgr->p_subn->p_osm);
+ __osm_state_mgr_lid_assign_msg(&sm->state_mgr);
- /* the returned signal is always DONE */
- tmp_signal = osm_qos_setup(p_mgr->p_subn->p_osm);
+ /*
+ * Proceed with unicast forwarding table configuration.
+ * First - send trap 64 on newly discovered endports
+ */
+ __osm_state_mgr_report_new_ports(&sm->state_mgr);
- if (tmp_signal == OSM_SIGNAL_DONE_PENDING)
- signal = OSM_SIGNAL_DONE_PENDING;
+ osm_ucast_mgr_process(&sm->ucast_mgr);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- /* try to restore SA DB (this should be before lid_mgr
- because we may want to disable clients reregistration
- when SA DB is restored) */
- osm_sa_db_file_load(p_mgr->p_subn->p_osm);
+ /* We are done setting all LFTs so clear the ignore existing.
+ * From now on, as long as we are still master, we want to
+ * take into account these lfts. */
+ sm->p_subn->ignore_existing_lfts = FALSE;
- break;
+ __osm_state_mgr_switch_config_msg(&sm->state_mgr);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ if (!sm->p_subn->opt.disable_multicast) {
+ osm_mcast_mgr_process(&sm->mcast_mgr);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
+ __osm_state_mgr_multicast_config_msg(&sm->state_mgr);
+ }
- case OSM_SM_STATE_SET_PKEY:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_PKEY_DONE;
- break;
+ /*
+ * The LINK_PORTS state is required since we can not count on
+ * the port state change MADs to succeed. This is an artifact
+ * of the spec defining state change from state X to state X
+ * as an error. The hardware then is not required to process
+ * other parameters provided by the Set(PortInfo) Packet.
+ */
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_PKEY_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
+ osm_link_mgr_process(&sm->link_mgr, IB_LINK_NO_CHANGE);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ __osm_state_mgr_links_ports_msg(&sm->state_mgr);
- case OSM_SM_STATE_SET_PKEY_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_SET_PKEY_DONE;
- break;
+ osm_link_mgr_process(&sm->link_mgr, IB_LINK_ARMED);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ __osm_state_mgr_links_armed_msg(&sm->state_mgr);
- case OSM_SM_STATE_SET_PKEY_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_SM_UCAST_LID;
- signal = osm_lid_mgr_process_sm(p_mgr->p_lid_mgr);
- break;
+ osm_link_mgr_process(&sm->link_mgr, IB_LINK_ACTIVE);
+ if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+ return;
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ /*
+ * The sweep completed!
+ */
- case OSM_SM_STATE_SET_SM_UCAST_LID:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_SM_UCAST_LID_DONE;
- break;
+ /* in any case we zero this flag */
+ sm->p_subn->coming_out_of_standby = FALSE;
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_SM_UCAST_LID_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
+ /* If there were errors - then the subnet is not really up */
+ if (sm->p_subn->subnet_initialization_error == TRUE)
+ __osm_state_mgr_init_errors_msg(&sm->state_mgr);
+ else {
+ /* The subnet is up correctly - set the first_time_master_sweep
+ * flag (if it is on) to FALSE. */
+ if (sm->p_subn->first_time_master_sweep == TRUE)
+ sm->p_subn->first_time_master_sweep = FALSE;
+ sm->p_subn->need_update = 0;
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ osm_dump_all(sm->p_subn->p_osm);
+ __osm_state_mgr_up_msg(&sm->state_mgr);
- case OSM_SM_STATE_SET_SM_UCAST_LID_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_SET_SM_UCAST_LID_DONE;
- break;
+ if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE))
+ osm_sa_db_file_dump(sm->p_subn->p_osm);
+ }
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ /*
+ * Finally signal the subnet up event
+ */
+ cl_event_signal(&sm->subnet_up_event);
- case OSM_SM_STATE_SET_SM_UCAST_LID_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
- __osm_state_mgr_set_sm_lid_done_msg(p_mgr);
- __osm_state_mgr_notify_lid_change(p_mgr);
- p_mgr->state = OSM_SM_STATE_SET_SUBNET_UCAST_LIDS;
- signal = osm_lid_mgr_process_subnet(p_mgr->p_lid_mgr);
- break;
+ /* if we got a signal to force heavy sweep or errors
+ * in the middle of the sweep - try another sweep. */
+ if (sm->p_subn->force_heavy_sweep
+ || sm->p_subn->subnet_initialization_error)
+ osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
+}
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+static void do_process_mgrp_queue(osm_sm_t * sm)
+{
+ osm_mcast_mgr_process_mgroups(&sm->mcast_mgr);
+ wait_for_pending_transactions(&sm->p_subn->p_osm->stats);
+}
- case OSM_SM_STATE_SET_SUBNET_UCAST_LIDS:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- /*
- * The LID Manager is done processing.
- * There are no outstanding transactions, so we
- * can move on to configuring the forwarding tables.
- */
- p_mgr->state = OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_DONE;
- break;
+void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr,
+ IN osm_signal_t signal)
+{
+ CL_ASSERT(p_mgr);
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * The LID Manager is done processing.
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
+ OSM_LOG_ENTER(p_mgr->p_log, osm_state_mgr_process);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
+ if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG))
+ osm_log(p_mgr->p_log, OSM_LOG_DEBUG,
+ "osm_state_mgr_process: "
+ "Received signal %s in state %s\n",
+ osm_get_sm_signal_str(signal),
+ osm_get_sm_state_str(p_mgr->state));
+ switch (p_mgr->state) {
+ case OSM_SM_STATE_IDLE:
+ switch (signal) {
+ case OSM_SIGNAL_SWEEP:
/*
- * In this state, the Unicast Manager has completed processing,
- * but there are still transactions on the wire. Therefore,
- * wait here until the wire clears.
+ * If the osm_sm_state_mgr is in NOT-ACTIVE state -
+ * stay in IDLE
*/
- case OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /*
- * The LID Manager is done processing.
- * There are no outstanding transactions, so we
- * can move on to configuring the forwarding tables.
- */
- p_mgr->state = OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_DONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_SUBNET_UCAST_LIDS_DONE:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /* At this point we need to check the consistency of
- * the port_lid_tbl under the subnet. There might be
- * errors in it if PortInfo Set reqeusts didn't reach
- * their destination. */
- __osm_state_mgr_check_tbl_consistency(p_mgr);
-
- __osm_state_mgr_lid_assign_msg(p_mgr);
-
- /*
- * OK, the wire is clear, so proceed with
- * unicast forwarding table configuration.
- * First - send trap 64 on newly discovered endports
- */
- __osm_state_mgr_report_new_ports(p_mgr);
-
- p_mgr->state = OSM_SM_STATE_SET_UCAST_TABLES;
- signal = osm_ucast_mgr_process(p_mgr->p_ucast_mgr);
-
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_UCAST_TABLES:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_UCAST_TABLES_DONE;
- break;
-
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * The Unicast Manager is done processing.
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_UCAST_TABLES_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_UCAST_TABLES_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_SET_UCAST_TABLES_DONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
+ if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) {
+ osm_vendor_set_sm(p_mgr->p_mad_ctrl->h_bind, FALSE);
break;
}
- break;
-
- case OSM_SM_STATE_SET_UCAST_TABLES_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
- /* We are done setting all LFTs so clear the ignore existing.
- * From now on, as long as we are still master, we want to
- * take into account these lfts. */
- p_mgr->p_subn->ignore_existing_lfts = FALSE;
-
- __osm_state_mgr_switch_config_msg(p_mgr);
-
- if (!p_mgr->p_subn->opt.disable_multicast) {
- p_mgr->state = OSM_SM_STATE_SET_MCAST_TABLES;
- signal = osm_mcast_mgr_process(p_mgr->p_mcast_mgr);
- } else {
- p_mgr->state = OSM_SM_STATE_SET_LINK_PORTS;
- signal =
- osm_link_mgr_process(p_mgr->
- p_link_mgr, IB_LINK_NO_CHANGE);
- }
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_MCAST_TABLES:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_MCAST_TABLES_DONE;
- break;
-
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * The Multicast Manager is done processing.
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_MCAST_TABLES_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_MCAST_TABLES_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_SET_MCAST_TABLES_DONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_MCAST_TABLES_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
- __osm_state_mgr_multicast_config_msg(p_mgr);
-
- p_mgr->state = OSM_SM_STATE_SET_LINK_PORTS;
- signal = osm_link_mgr_process(p_mgr->p_link_mgr, IB_LINK_NO_CHANGE);
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
/*
- * The LINK_PORTS state is required since we can not count on
- * the port state change MADs to succeed. This is an artifact
- * of the spec defining state change from state X to state X
- * as an error. The hardware then is not required to process
- * other parameters provided by the Set(PortInfo) Packet.
+ * If the osm_sm_state_mgr is in INIT state - signal
+ * it with a INIT signal to move it to DISCOVERY state.
*/
- case OSM_SM_STATE_SET_LINK_PORTS:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_LINK_PORTS_DONE;
- break;
-
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * The Link Manager is done processing.
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_LINK_PORTS_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_LINK_PORTS_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_SET_LINK_PORTS_DONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_LINK_PORTS_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
-
- __osm_state_mgr_links_ports_msg(p_mgr);
-
- p_mgr->state = OSM_SM_STATE_SET_ARMED;
- signal = osm_link_mgr_process(p_mgr->p_link_mgr, IB_LINK_ARMED);
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_ARMED:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- p_mgr->state = OSM_SM_STATE_SET_ARMED_DONE;
- break;
-
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * The Link Manager is done processing.
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_ARMED_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_ARMED_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_SET_ARMED_DONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_ARMED_DONE:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
-
- __osm_state_mgr_links_armed_msg(p_mgr);
-
- p_mgr->state = OSM_SM_STATE_SET_ACTIVE;
- signal = osm_link_mgr_process(p_mgr->p_link_mgr, IB_LINK_ACTIVE);
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_ACTIVE:
- switch (signal) {
- case OSM_SIGNAL_DONE:
- /*
- * Don't change the signal, just the state.
- */
- p_mgr->state = OSM_SM_STATE_SUBNET_UP;
- break;
-
- case OSM_SIGNAL_DONE_PENDING:
- /*
- * The Link Manager is done processing.
- * There are outstanding transactions, so we
- * must wait for the wire to clear.
- */
- p_mgr->state = OSM_SM_STATE_SET_ACTIVE_WAIT;
- signal = OSM_SIGNAL_NONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SET_ACTIVE_WAIT:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /*
- * Don't change the signal, just the state.
- */
- p_mgr->state = OSM_SM_STATE_SUBNET_UP;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_SUBNET_UP:
- switch (signal) {
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- case OSM_SIGNAL_DONE:
- /*
- * The sweep completed!
- */
-
- /* in any case we zero this flag */
- p_mgr->p_subn->coming_out_of_standby = FALSE;
-
- /* If there were errors - then the subnet is not really up */
- if (p_mgr->p_subn->subnet_initialization_error == TRUE) {
- __osm_state_mgr_init_errors_msg(p_mgr);
- } else {
- /* The subnet is up correctly - set the first_time_master_sweep flag
- * (if it is on) to FALSE. */
- if (p_mgr->p_subn->first_time_master_sweep == TRUE) {
- p_mgr->p_subn->first_time_master_sweep = FALSE;
- }
- p_mgr->p_subn->need_update = 0;
-
- osm_dump_all(p_mgr->p_subn->p_osm);
- __osm_state_mgr_up_msg(p_mgr);
-
- if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE))
- osm_sa_db_file_dump(p_mgr->p_subn->p_osm);
- }
- p_mgr->state = OSM_SM_STATE_PROCESS_REQUEST;
- signal = OSM_SIGNAL_IDLE_TIME_PROCESS;
-
- /*
- * Finally signal the subnet up event
- */
- status =
- cl_event_signal(p_mgr->p_subnet_up_event);
- if (status != IB_SUCCESS) {
- osm_log(p_mgr->p_log, OSM_LOG_ERROR,
- "osm_state_mgr_process: ERR 3319: "
- "Invalid SM state %u\n",
- p_mgr->state);
- }
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- break;
-
- case OSM_SM_STATE_MASTER_OR_HIGHER_SM_DETECTED:
- switch (signal) {
- case OSM_SIGNAL_CHANGE_DETECTED:
- /*
- * Nothing to do here. One subnet change typically
- * begets another....
- */
- break;
-
- case OSM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED:
- /*
- * If we lost once, we might lose again. Nothing to do.
- */
- break;
-
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- p_mgr->state = OSM_SM_STATE_STANDBY;
- /*
- * Call the sm_state_mgr with signal
- * MASTER_OR_HIGHER_SM_DETECTED_DONE
- */
+ if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_INIT)
osm_sm_state_mgr_process(p_mgr->p_sm_state_mgr,
- OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED_DONE);
- __osm_state_mgr_standby_msg(p_mgr);
- break;
+ OSM_SM_SIGNAL_INIT);
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- break;
- }
- signal = OSM_SIGNAL_NONE;
+ do_sweep(p_mgr->sm);
break;
- case OSM_SM_STATE_STANDBY:
- switch (signal) {
- case OSM_SIGNAL_EXIT_STBY:
- /*
- * Need to force re-write of sm_base_lid to all ports
- * to do that we want all the ports to be considered
- * foriegn
- */
- signal = OSM_SIGNAL_SWEEP;
- __osm_state_mgr_clean_known_lids(p_mgr);
- p_mgr->state = OSM_SM_STATE_IDLE;
- break;
-
- case OSM_SIGNAL_NO_PENDING_TRANSACTIONS:
- /*
- * Nothing to do here - need to stay at this state
- */
- signal = OSM_SIGNAL_NONE;
- break;
-
- default:
- __osm_state_mgr_signal_error(p_mgr, signal);
- signal = OSM_SIGNAL_NONE;
- break;
- }
- /* stay with the same signal - so we can start the sweep */
+ case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST:
+ do_process_mgrp_queue(p_mgr->sm);
break;
default:
- CL_ASSERT(FALSE);
- osm_log(p_mgr->p_log, OSM_LOG_ERROR,
- "osm_state_mgr_process: ERR 3320: "
- "Invalid SM state %u\n", p_mgr->state);
- p_mgr->state = OSM_SM_STATE_IDLE;
- signal = OSM_SIGNAL_NONE;
+ __osm_state_mgr_signal_error(p_mgr, signal);
break;
}
+ break;
- /* if we got a signal to force immediate heavy sweep in the middle of the sweep -
- * try another sweep. */
- if ((p_mgr->p_subn->force_heavy_sweep) &&
- (p_mgr->state == OSM_SM_STATE_IDLE)) {
- signal = OSM_SIGNAL_SWEEP;
- }
- /* if we got errors during the initialization in the middle of the sweep -
- * try another sweep. */
- if ((p_mgr->p_subn->subnet_initialization_error) &&
- (p_mgr->state == OSM_SM_STATE_IDLE)) {
- signal = OSM_SIGNAL_SWEEP;
+ case OSM_SM_STATE_STANDBY:
+ switch (signal) {
+ case OSM_SIGNAL_EXIT_STBY:
+ /*
+ * Need to force re-write of sm_base_lid to all ports
+ * to do that we want all the ports to be considered
+ * foriegn
+ */
+ __osm_state_mgr_clean_known_lids(p_mgr);
+ p_mgr->state = OSM_SM_STATE_IDLE;
+ osm_sm_signal(p_mgr->sm, OSM_SIGNAL_SWEEP);
+ break;
+ default:
+ __osm_state_mgr_signal_error(p_mgr, signal);
+ break;
}
+ /* stay with the same signal - so we can start the sweep */
+ break;
+ default:
+ CL_ASSERT(FALSE);
+ osm_log(p_mgr->p_log, OSM_LOG_ERROR,
+ "osm_state_mgr_process: ERR 3320: "
+ "Invalid SM state %u\n", p_mgr->state);
+ break;
}
OSM_LOG_EXIT(p_mgr->p_log);
diff --git a/opensm/opensm/osm_sw_info_rcv.c b/opensm/opensm/osm_sw_info_rcv.c
index dbf8b8c..2cc887a 100644
--- a/opensm/opensm/osm_sw_info_rcv.c
+++ b/opensm/opensm/osm_sw_info_rcv.c
@@ -562,8 +562,7 @@ void osm_si_rcv_process(IN void *context, IN void *data)
if (__osm_si_rcv_process_existing
(sm, p_node, p_madw)) {
CL_PLOCK_RELEASE(sm->p_lock);
- osm_sm_signal(&sm->p_subn->p_osm->sm,
- OSM_SIGNAL_CHANGE_DETECTED);
+ sm->p_subn->force_heavy_sweep = 1;
goto Exit;
}
}
diff --git a/opensm/opensm/osm_sweep_fail_ctrl.c b/opensm/opensm/osm_sweep_fail_ctrl.c
index 92b3165..3a5190f 100644
--- a/opensm/opensm/osm_sweep_fail_ctrl.c
+++ b/opensm/opensm/osm_sweep_fail_ctrl.c
@@ -65,7 +65,7 @@ static void __osm_sweep_fail_ctrl_disp_callback(IN void *context,
/*
Notify the state manager that we had a light sweep failure.
*/
- osm_sm_signal(p_ctrl->sm, OSM_SIGNAL_LIGHT_SWEEP_FAIL);
+ p_ctrl->sm->p_subn->force_heavy_sweep = 1;
OSM_LOG_EXIT(p_ctrl->sm->p_log);
}
--
1.5.4.rc2.60.gb2e62
More information about the general
mailing list