[ofa-general] Re: [PATCH][10] opensm: hook mesh code into lash (updated)

Sasha Khapyorsky sashak at voltaire.com
Sun Nov 30 13:34:02 PST 2008


On 16:41 Tue 11 Nov     , Robert Pearson wrote:
> Sasha,
> 
> Here is the tenth patch implementing the mesh analysis algorithm.
> I am resending it because I inadvertently left a bug in the last version.
> 
> This patch
>       - hooks mesh code into lash
>       - replaces sw->phys_connections by the equivalent switch->node->links
>       - replaces sw->num_connections by the equivalent
> switch->node->num_links
>       - replaces sw->virtual_physical_port_table by
> switch->node->links[]->ports
> 
> When the do_mesh_analysis flag is not set there is no change to the function
> except To replace the variables with variables in node that have the same
> size. In this Case the port table in link_t will always have just one port.
> 
> When the do_mesh_analysis flag is set multiple physical links will collapse
> to a Single logical link with a port list with more than one element.
> 
>       - fixed bug, mesh not set in osm_do_mesh_analysis

I think it should be fixed in related patch.

>       - rewrote connect switches to use variables in node
>       - in log Lane requirements (%d) exceed available lanes (%d)
>         Arguments were reversed, fixed

Nice finding.

>       - compute physical egress port in routine get_next_port
>         Which will use round robin if there are more than one
>         Physical links between switches
>       - changed printf's to OSM_LOG's in mesh.c
> 
> Regards,
> 
> Bob Pearson
> 
> Signed-off-by: Bob Pearson <rpearson at systemfabricworks.com>
> ----
> diff --git a/opensm/include/opensm/osm_ucast_lash.h
> b/opensm/include/opensm/osm_ucast_lash.h
> index c037571..f3bde5d 100644
> --- a/opensm/include/opensm/osm_ucast_lash.h
> +++ b/opensm/include/opensm/osm_ucast_lash.h
> @@ -82,9 +82,6 @@ typedef struct _switch {
>  		unsigned lane;
>  	} *routing_table;
>  	mesh_node_t *node;
> -	unsigned int num_connections;
> -	int *virtual_physical_port_table;
> -	int *phys_connections;
>  } switch_t;
>  
>  typedef struct _lash {
> diff --git a/opensm/opensm/osm_mesh.c b/opensm/opensm/osm_mesh.c
> index a248522..dbe3eeb 100644
> --- a/opensm/opensm/osm_mesh.c
> +++ b/opensm/opensm/osm_mesh.c
> @@ -750,7 +750,7 @@ static void make_geometry(lash_t *p_lash, int sw)
>  					continue;
>  
>  				if (l2 == -1) {
> -					printf("ERROR no reverse link\n");
> +					OSM_LOG(p_log, OSM_LOG_DEBUG, "ERROR
> no reverse link\n");
>  					continue;
>  				}
>  
> @@ -919,6 +919,7 @@ static int reorder_links(lash_t *p_lash, int sw)
>   */
>  static int measure_geometry(lash_t *p_lash, int seed)
>  {
> +	osm_log_t *p_log = &p_lash->p_osm->log;
>  	int i, j, k;
>  	int sw;
>  	switch_t *s, *s1;
> @@ -942,7 +943,7 @@ static int measure_geometry(lash_t *p_lash, int seed)
>  				assigned_axes++;
>  	}
>  
> -	printf("lash: %d/%d unassigned/assigned axes\n", unassigned_axes,
> assigned_axes);
> +	OSM_LOG(p_log, OSM_LOG_DEBUG, "%d/%d unassigned/assigned axes\n",
> unassigned_axes, assigned_axes);
>  
>  	do {
>  		change = 0;
> @@ -1069,8 +1070,7 @@ int osm_do_mesh_analysis(lash_t *p_lash)
>  	int i;
>  	mesh_t *mesh;
>  	switch_t *s;
> -
> -	OSM_LOG_ENTER(p_log);
> +	char buf[256], *p;
>  
>  	/*
>  	 * allocate per mesh data structures
> @@ -1080,6 +1080,8 @@ int osm_do_mesh_analysis(lash_t *p_lash)
>  		return -1;
>  	}
>  
> +	mesh = p_lash->mesh;
> +
>  	/*
>  	 * get local metric and invariant for each switch
>  	 * also classify each switch
> @@ -1099,36 +1101,41 @@ int osm_do_mesh_analysis(lash_t *p_lash)
>  
>  	s = p_lash->switches[max_class_type];
>  
> -	printf("lash: found %d node type%s\n", mesh->num_class,
> (mesh->num_class == 1)? "" : "s");
> -	printf("lash: %snode type is ", (mesh->num_class == 1)? "" : "most
> common ");
> +	OSM_LOG(p_log, OSM_LOG_INFO, "found %d node type%s\n",
> mesh->num_class, (mesh->num_class == 1)? "" : "s");
> +
> +	p = buf;
> +	p += sprintf( p, "%snode type is ", (mesh->num_class == 1)? "" :
> "most common ");
>  
>  	if (s->node->type) {
>  		struct _mesh_info *t = &mesh_info[s->node->type];
>  
>  		for (i = 0; i < t->dimension; i++) {
> -			printf("%s%d%s", i? "X" : "", t->size[i],
> +			p += sprintf(p, "%s%d%s", i? " x " : "", t->size[i],
>  				(t->size[i] == 6)? "+" : "");

Would snprintf() be more suitable here in order to prevent potential
overflow? (This is a nit - dimension value is limited now in mesh_info
structure).

>  		}
> -		printf(" mesh\n");
> +		p += sprintf(p, " mesh\n");
>  
>  		p_lash->mesh->dimension = t->dimension;
>  	} else {
> -		printf("unknown geometry\n");
> +		p += sprintf(p, "unknown geometry\n");
>  	}
>  
> +	OSM_LOG(p_log, OSM_LOG_INFO, "%s", buf);
> +
>  	if (s->node->type) {
>  		make_geometry(p_lash, max_class_type);
>  
>  		if (measure_geometry(p_lash, max_class_type))
>  			return -1;
>  
> -		printf("lash: found ");
> +		p = buf;
> +		p += sprintf(p, "found ");
>  		for (i = 0; i < mesh->dimension; i++)
> -			printf("%s%d", i? "X" : "", mesh->size[i]);
> -		printf(" mesh\n");
> -	}
> +			p += sprintf(p, "%s%d", i? " x " : "",
> mesh->size[i]);
> +		p += sprintf(p, " mesh\n");
>  
> -	OSM_LOG_EXIT(p_log);
> +		OSM_LOG(p_log, OSM_LOG_INFO, "%s", buf);
> +	}
>  
>  	return 0;
>  }
> diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
> index 95dbcc2..660ad56 100644
> --- a/opensm/opensm/osm_ucast_lash.c
> +++ b/opensm/opensm/osm_ucast_lash.c
> @@ -67,16 +67,53 @@ static cdg_vertex_t *create_cdg_vertex(unsigned
> num_switches)
>  static void connect_switches(lash_t * p_lash, int sw1, int sw2, int
> phy_port_1)
>  {
>  	osm_log_t *p_log = &p_lash->p_osm->log;
> -	unsigned num = p_lash->switches[sw1]->num_connections;
> +	unsigned num = p_lash->switches[sw1]->node->num_links;
> +	switch_t *s1 = p_lash->switches[sw1];
> +	mesh_node_t *node = s1->node;
> +	switch_t *s2;
> +	link_t *l;
> +	int i;
> +
> +	/*
> +	 * if doing mesh analysis:
> +	 *  - do not consider connections to self
> +	 *  - collapse multiple connections between
> +	 *    pair of switches to a single locical link
> +	 */
> +	if (p_lash->p_osm->subn.opt.do_mesh_analysis) {
> +		if (sw1 == sw2)
> +			return;

This 'if (sw1 == sw2)' is related for non mesh case too, right?

Sasha

> +
> +		/* see if we are alredy linked to sw2 */
> +		for (i = 0; i < num; i++) {
> +			l = node->links[i];
> +
> +			if (node->links[i]->switch_id == sw2) {
> +				l->ports[l->num_ports++] = phy_port_1;
> +				return;
> +			}
> +		}
> +	}
> +
> +	l = node->links[num];
> +	l->switch_id = sw2;
> +	l->link_id = -1;
> +	l->ports[l->num_ports++] = phy_port_1;
> +
> +	s2 = p_lash->switches[sw2];
> +	for (i = 0; i < s2->node->num_links; i++) {
> +		if (s2->node->links[i]->switch_id == sw1) {
> +			s2->node->links[i]->link_id = num;
> +			l->link_id = i;
> +			break;
> +		}
> +	}
>  
> -	p_lash->switches[sw1]->phys_connections[num] = sw2;
> -	p_lash->switches[sw1]->virtual_physical_port_table[num] =
> phy_port_1;
> -	p_lash->switches[sw1]->num_connections++;
> +	node->num_links++;
>  
>  	OSM_LOG(p_log, OSM_LOG_VERBOSE,
>  		"LASH connect: %d, %d, %d\n", sw1, sw2,
>  		phy_port_1);
> -
>  }
>  
>  static osm_switch_t *get_osm_switch_from_port(osm_port_t * port)
> @@ -148,7 +185,7 @@ static int cycle_exists(cdg_vertex_t * start,
> cdg_vertex_t * current,
>  
>  static inline int get_next_switch(lash_t *p_lash, int sw, int link)
>  {
> -	return p_lash->switches[sw]->phys_connections[link];
> +	return p_lash->switches[sw]->node->links[link]->switch_id;
>  }
>  
>  static void remove_semipermanent_depend_for_sp(lash_t * p_lash, int sw,
> @@ -233,8 +270,8 @@ static int get_phys_connection(switch_t *sw, int
> switch_to)
>  {
>  	unsigned int i = 0;
>  
> -	for (i = 0; i < sw->num_connections; i++)
> -		if (sw->phys_connections[i] == switch_to)
> +	for (i = 0; i < sw->node->num_links; i++)
> +		if (sw->node->links[i]->switch_id == switch_to)
>  			return i;
>  	return i;
>  }
> @@ -252,8 +289,8 @@ static void shortest_path(lash_t * p_lash, int ir)
>  
>  	while (!cl_is_list_empty(&bfsq)) {
>  		dequeue(&bfsq, &sw);
> -		for (i = 0; i < sw->num_connections; i++) {
> -			swi = switches[sw->phys_connections[i]];
> +		for (i = 0; i < sw->node->num_links; i++) {
> +			swi = switches[sw->node->links[i]->switch_id];
>  			if (swi->q_state == UNQUEUED) {
>  				enqueue(&bfsq, swi);
>  				sw->dij_channels[sw->used_channels++] =
> swi->id;
> @@ -614,25 +651,8 @@ static switch_t *switch_create(lash_t * p_lash,
> unsigned id, osm_switch_t * p_sw
>  		return NULL;
>  	}
>  
> -	sw->virtual_physical_port_table = malloc(num_ports * sizeof(int));
> -	if (!sw->virtual_physical_port_table) {
> -		free(sw->dij_channels);
> -		free(sw);
> -		return NULL;
> -	}
> -
> -	sw->phys_connections = malloc(num_ports * sizeof(int));
> -	if (!sw->phys_connections) {
> -		free(sw->virtual_physical_port_table);
> -		free(sw->dij_channels);
> -		free(sw);
> -		return NULL;
> -	}
> -
>  	sw->routing_table = malloc(num_switches *
> sizeof(sw->routing_table[0]));
>  	if (!sw->routing_table) {
> -		free(sw->phys_connections);
> -		free(sw->virtual_physical_port_table);
>  		free(sw->dij_channels);
>  		free(sw);
>  		return NULL;
> @@ -643,18 +663,13 @@ static switch_t *switch_create(lash_t * p_lash,
> unsigned id, osm_switch_t * p_sw
>  		sw->routing_table[i].lane = NONE;
>  	}
>  
> -	for (i = 0; i < num_ports; i++) {
> -		sw->virtual_physical_port_table[i] = -1;
> -		sw->phys_connections[i] = NONE;
> -	}
> -
> -	if (osm_mesh_node_create(p_lash, sw))
> -		return NULL;
> -
>  	sw->p_sw = p_sw;
>  	if (p_sw)
>  		p_sw->priv = sw;
>  
> +	if (osm_mesh_node_create(p_lash, sw))
> +		return NULL;
> +
>  	return sw;
>  }
>  
> @@ -664,10 +679,6 @@ static void switch_delete(switch_t * sw)
>  
>  	if (sw->dij_channels)
>  		free(sw->dij_channels);
> -	if (sw->virtual_physical_port_table)
> -		free(sw->virtual_physical_port_table);
> -	if (sw->phys_connections)
> -		free(sw->phys_connections);
>  	if (sw->routing_table)
>  		free(sw->routing_table);
>  	if (sw->p_sw)
> @@ -972,7 +983,7 @@ Error_Not_Enough_Lanes:
>  	status = -1;
>  	OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: "
>  		"Lane requirements (%d) exceed available lanes (%d)\n",
> -		p_lash->vl_min, lanes_needed);
> +		lanes_needed, p_lash->vl_min);
>  Exit:
>  	if (switch_bitmap)
>  		free(switch_bitmap);
> @@ -985,6 +996,21 @@ static unsigned get_lash_id(osm_switch_t * p_sw)
>  	return ((switch_t *) p_sw->priv)->id;
>  }
>  
> +int get_next_port(switch_t *sw, int link)
> +{
> +	link_t *l = sw->node->links[link];
> +	int port = l->next_port++;
> +
> +	/*
> +	 * note if not doing mesh analysis
> +	 * then num_ports is always 1
> +	 */
> +	if (l->next_port >= l->num_ports)
> +		l->next_port = 0;
> +
> +	return l->ports[port];
> +}
> +
>  static void populate_fwd_tbls(lash_t * p_lash)
>  {
>  	osm_log_t *p_log = &p_lash->p_osm->log;
> @@ -1036,9 +1062,7 @@ static void populate_fwd_tbls(lash_t * p_lash)
>  				    (uint8_t) sw->
>  
> routing_table[dst_lash_switch_id].out_link;
>  				uint8_t physical_egress_port =
> -				    (uint8_t) sw->
> -				    virtual_physical_port_table
> -				    [lash_egress_port];
> +					get_next_port(sw, lash_egress_port);
>  
>  				p_sw->lft_buf[lid] = physical_egress_port;
>  				OSM_LOG(p_log, OSM_LOG_VERBOSE,
> 
> 



More information about the general mailing list