[openib-general] [PATCH] Opensm - fix lmc algorithm
Yael Kalka
yael at mellanox.co.il
Thu Oct 27 06:04:25 PDT 2005
Hi Hal,
We noticed a problem in the lmc assignment algorithm.
In the current code - when trying to run opensm with lmc > 0, the
opensm goes into infinite loop.
Debugging the problem we noticed that there is a problem with the
lid assignment, and we changed the algorithm. The change is in the
osm_lid_mgr_init_sweep function.
We have done some testing to the new code, and it seems that the lmc
assignment is ok with the fix.
Thanks,
Yael
Signed-off-by: Yael Kalka <yael at mellanox.co.il>
Index: opensm/osm_lid_mgr.c
===================================================================
--- opensm/osm_lid_mgr.c (revision 3848)
+++ opensm/osm_lid_mgr.c (working copy)
@@ -337,7 +337,7 @@ __osm_lid_mgr_init_sweep(
uint16_t max_defined_lid;
uint16_t max_persistent_lid;
uint16_t max_discovered_lid;
- uint16_t lid, l;
+ uint16_t lid;
uint16_t disc_min_lid;
uint16_t disc_max_lid;
uint16_t db_min_lid;
@@ -349,16 +349,23 @@ __osm_lid_mgr_init_sweep(
osm_port_t *p_port;
cl_qmap_t *p_port_guid_tbl;
uint8_t lmc_num_lids = (uint8_t)(1 << p_mgr->p_subn->opt.lmc);
+ uint16_t lmc_mask;
+ uint16_t req_lid, num_lids;
OSM_LOG_ENTER( p_mgr->p_log, __osm_lid_mgr_init_sweep );
+ if (p_mgr->p_subn->opt.lmc)
+ lmc_mask = ~((1 << p_mgr->p_subn->opt.lmc) - 1);
+ else
+ lmc_mask = 0xffff;
+
/* if we came out of standby we need to discard any previous guid 2 lid
info we might had */
if ( p_mgr->p_subn->coming_out_of_standby == TRUE )
{
osm_db_clear( p_mgr->p_g2l );
for (lid = 0; lid < cl_ptr_vector_get_size(&p_mgr->used_lids); lid++)
- cl_ptr_vector_set(&p_mgr->used_lids, lid, NULL);
+ cl_ptr_vector_set(p_persistent_vec, lid, NULL);
}
/* we need to cleanup the empty ranges list */
@@ -375,7 +382,7 @@ __osm_lid_mgr_init_sweep(
/* we if are on the first sweep and in re-assign lids mode
we should ignore all the available info and simply define one
- hufe empty range */
+ huge empty range */
if ((p_mgr->p_subn->first_time_master_sweep == TRUE) &&
(p_mgr->p_subn->opt.reassign_lids == TRUE ))
{
@@ -398,6 +405,34 @@ __osm_lid_mgr_init_sweep(
osm_port_get_lid_range_ho(p_port, &disc_min_lid, &disc_max_lid);
for (lid = disc_min_lid; lid <= disc_max_lid; lid++)
cl_ptr_vector_set(p_discovered_vec, lid, p_port );
+ /* make sure the guid2lid entry is valid. If not - clean it. */
+ if (!osm_db_guid2lid_get( p_mgr->p_g2l,
+ cl_ntoh64(osm_port_get_guid(p_port)),
+ &db_min_lid, &db_max_lid))
+ {
+ if ( osm_node_get_type( osm_port_get_parent_node( p_port ) ) !=
+ IB_NODE_TYPE_SWITCH)
+ num_lids = lmc_num_lids;
+ else
+ num_lids = 1;
+
+ if ((num_lids != 1) &&
+ (((db_min_lid & lmc_mask) != db_min_lid) ||
+ (db_max_lid - db_min_lid + 1 < num_lids)) )
+ {
+ /* Not alligned, or not wide enough - remove the entry */
+ osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
+ "__osm_lid_mgr_init_sweep: "
+ "Cleaning persistent entry for guid:0x%016" PRIx64
+ " illegal range:[0x%x:0x%x] \n",
+ cl_ntoh64(osm_port_get_guid(p_port)), db_min_lid,
+ db_max_lid );
+ osm_db_guid2lid_delete( p_mgr->p_g2l,
+ cl_ntoh64(osm_port_get_guid(p_port)));
+ for ( lid = db_min_lid ; lid <= db_max_lid ; lid++ )
+ cl_ptr_vector_set(p_persistent_vec, lid, NULL);
+ }
+ }
}
/*
@@ -434,7 +469,7 @@ __osm_lid_mgr_init_sweep(
{
is_free = TRUE;
/* first check to see if the lid is used by a persistent assignment */
- if ((lid < max_persistent_lid) && cl_ptr_vector_get(p_persistent_vec, lid))
+ if ((lid <= max_persistent_lid) && cl_ptr_vector_get(p_persistent_vec, lid))
{
osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
"__osm_lid_mgr_init_sweep: "
@@ -442,62 +477,86 @@ __osm_lid_mgr_init_sweep(
lid);
is_free = FALSE;
}
-
- /* check the discovered port if there is one */
- if ((lid < max_discovered_lid) &&
- (p_port = (osm_port_t *)cl_ptr_vector_get(p_discovered_vec, lid)))
+ else
{
- /* get the lid range of that port - but we know how many lids we
- are about to assign to it */
- osm_port_get_lid_range_ho(p_port, &disc_min_lid, &disc_max_lid);
- if ( osm_node_get_type( osm_port_get_parent_node( p_port ) ) !=
- IB_NODE_TYPE_SWITCH)
- disc_max_lid = disc_min_lid + lmc_num_lids - 1;
-
+ /* check this is a discovered port */
+ CL_ASSERT(lid <= max_discovered_lid);
+ if ((p_port = (osm_port_t *)cl_ptr_vector_get(p_discovered_vec, lid)))
+ {
+ /* we have a port. Now lets see if we can preserve its lid range. */
+ /* For that - we need to make sure:
+ 1. The port has a (legal) persistancy entry. Then the local lid
+ is free (we will use the persistancy value).
+ 2. Can the port keep its local assignment?
+ a. Make sure the lid a alligned.
+ b. Make sure all needed lids (for the lmc) are free according
+ to persistancy table.
+ */
/* qualify the guid of the port is not persistently mapped to
another range */
if (!osm_db_guid2lid_get( p_mgr->p_g2l,
cl_ntoh64(osm_port_get_guid(p_port)),
&db_min_lid, &db_max_lid))
{
- /* ok there is an asignment - is it the same ? */
- if ((disc_min_lid == db_min_lid) && (disc_max_lid == db_max_lid))
- {
osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
"__osm_lid_mgr_init_sweep: "
- "[0x%04x,0x%04x] is not free as it was discovered "
- " and mapped by the persistent db.\n",
- disc_min_lid, disc_max_lid);
- is_free = FALSE;
+ "0x%04x is free as it was discovered "
+ "but mapped by the persistent db to [0x%04x:0x%04x].\n",
+ lid, db_min_lid, db_max_lid);
+ }
+ else
+ {
+ /* can the port keep its assignment ? */
+ /* get the lid range of that port, and the required number
+ of lids we are about to assign to it */
+ osm_port_get_lid_range_ho(p_port, &disc_min_lid, &disc_max_lid);
+ if ( osm_node_get_type( osm_port_get_parent_node( p_port ) ) !=
+ IB_NODE_TYPE_SWITCH)
+ {
+ disc_max_lid = disc_min_lid + lmc_num_lids - 1;
+ num_lids = lmc_num_lids;
}
else
{
+ num_lids = 1;
+ }
+ /* Make sure the lid is alligned */
+ if ((num_lids != 1) && ((disc_min_lid & lmc_mask) != disc_min_lid))
+ {
+ /* The lid cannot be used */
osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
"__osm_lid_mgr_init_sweep: "
- "[0x%04x,0x%04x] is free as it was discovered"
- " but mapped to range: [0x%x:0x%x] by the persistent db.\n",
- disc_min_lid, disc_max_lid, db_min_lid, db_max_lid);
- for (l = disc_min_lid; l <= disc_max_lid; l++)
- cl_ptr_vector_set(p_discovered_vec, l, NULL);
- }
+ "0x%04x is free as it was discovered "
+ "but not alligned. \n",
+ lid );
}
else
{
+ /* check that all needed lids are not persistantly mapped */
+ is_free = FALSE;
+ for ( req_lid = disc_min_lid + 1 ; req_lid <= disc_max_lid ; req_lid++ )
+ {
+ if ((req_lid <= max_persistent_lid) && cl_ptr_vector_get(p_persistent_vec, req_lid))
+ {
osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
"__osm_lid_mgr_init_sweep: "
- "0x%04x is not free as it was discovered"
- " and there is no persistent db entry for it.\n",
+ "0x%04x is free as it was discovered "
+ "but mapped. \n",
lid);
- is_free = FALSE;
+ is_free = TRUE;
+ break;
+ }
}
-
- /* if there is more then one lid on that port - and the discovered port
- is going to retain its lids advance to the max lid */
if (is_free == FALSE)
{
+ /* This port will use its local lid, and consume the entire required lid range.
+ Thus we can skip that range. */
lid = disc_max_lid;
}
}
+ }
+ }
+ }
if (is_free)
{
@@ -1300,7 +1359,6 @@ osm_lid_mgr_process_subnet(
/* the proc returns the fact it sent a set port info */
if (__osm_lid_mgr_set_physp_pi( p_mgr, p_physp, cl_hton16( min_lid_ho )))
p_mgr->send_set_reqs = TRUE;
-
}
} /* all ports */
More information about the general
mailing list